monitoring: add Prometheus + Grafana stack (kube-prometheus-stack)
Deploys kube-prometheus-stack via helm-controller: Prometheus, Grafana (internal-only), node-exporter, kube-state-metrics. Adds prometheus-pve-exporter for Proxmox (token in an out-of-band Secret), scrape configs for external hosts (.48/.70/.71/.49), community dashboards (Node Exporter Full, Proxmox via Prometheus) and a custom Hardware Temperatures dashboard. Grafana liveness made tolerant for slow first-boot migrations. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a064f41250
commit
0a02da49a4
3 changed files with 275 additions and 0 deletions
91
monitoring/grafana-temps-dashboard.yaml
Normal file
91
monitoring/grafana-temps-dashboard.yaml
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-temps
|
||||
namespace: monitoring
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
data:
|
||||
hardware-temperatures.json: |
|
||||
{
|
||||
"annotations": {"list": []},
|
||||
"editable": true,
|
||||
"graphTooltip": 0,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${datasource}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "thresholds"},
|
||||
"unit": "celsius",
|
||||
"thresholds": {"mode": "absolute", "steps": [
|
||||
{"color": "green", "value": null},
|
||||
{"color": "yellow", "value": 60},
|
||||
{"color": "orange", "value": 75},
|
||||
{"color": "red", "value": 85}
|
||||
]}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "11.0.0",
|
||||
"title": "Max temperature per host (now)",
|
||||
"type": "stat",
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "${datasource}"},
|
||||
"expr": "max by (instance) (node_hwmon_temp_celsius)",
|
||||
"legendFormat": "{{instance}}", "refId": "A"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "${datasource}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {"mode": "palette-classic"},
|
||||
"unit": "celsius",
|
||||
"custom": {"drawStyle": "line", "lineWidth": 1, "fillOpacity": 10, "showPoints": "never"},
|
||||
"thresholds": {"mode": "absolute", "steps": [
|
||||
{"color": "green", "value": null},
|
||||
{"color": "red", "value": 85}
|
||||
]}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {"displayMode": "table", "placement": "bottom", "calcs": ["last", "max"]},
|
||||
"tooltip": {"mode": "multi"}
|
||||
},
|
||||
"title": "All temperature sensors",
|
||||
"type": "timeseries",
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "${datasource}"},
|
||||
"expr": "node_hwmon_temp_celsius",
|
||||
"legendFormat": "{{instance}} / {{chip}} / {{sensor}}", "refId": "A"}
|
||||
]
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"tags": ["hardware", "temperature"],
|
||||
"templating": {"list": [
|
||||
{"current": {}, "hide": 0, "includeAll": false, "label": "Datasource",
|
||||
"multi": false, "name": "datasource", "options": [], "query": "prometheus",
|
||||
"refresh": 1, "type": "datasource"}
|
||||
]},
|
||||
"time": {"from": "now-6h", "to": "now"},
|
||||
"timezone": "Europe/Oslo",
|
||||
"title": "Hardware Temperatures",
|
||||
"uid": "hw-temps",
|
||||
"version": 1
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue