464 lines
13 KiB
Nix
464 lines
13 KiB
Nix
{ lib, pkgs }:
|
|
let
|
|
datasourceUid = "vm-datasource-uid";
|
|
|
|
# Helper to create a panel with common defaults
|
|
mkPanel = { id, title, type, gridPos, targets, options ? { }, fieldConfig ? { }, ... }@args:
|
|
{
|
|
inherit id title type gridPos targets;
|
|
datasource = { uid = datasourceUid; type = "prometheus"; };
|
|
options = options;
|
|
fieldConfig = {
|
|
defaults = fieldConfig.defaults or { };
|
|
overrides = fieldConfig.overrides or [ ];
|
|
};
|
|
} // (builtins.removeAttrs args [ "id" "title" "type" "gridPos" "targets" "options" "fieldConfig" ]);
|
|
|
|
# Dashboard definition
|
|
dashboard = {
|
|
uid = "smart-disk-health";
|
|
title = "S.M.A.R.T Disk Health";
|
|
description = "S.M.A.R.T metrics and RAID array status";
|
|
tags = [ "disk" "smart" "storage" "nas" ];
|
|
timezone = "browser";
|
|
editable = false;
|
|
refresh = "5m";
|
|
schemaVersion = 39;
|
|
version = 1;
|
|
|
|
# Variables
|
|
templating.list = [
|
|
{
|
|
name = "host";
|
|
label = "Host";
|
|
type = "query";
|
|
datasource = { uid = datasourceUid; type = "prometheus"; };
|
|
query = "label_values(smart_health_passed, instance)";
|
|
regex = "";
|
|
sort = 1;
|
|
refresh = 1;
|
|
includeAll = true;
|
|
multi = false;
|
|
current = { selected = true; text = "All"; value = "$__all"; };
|
|
}
|
|
];
|
|
|
|
# Panels
|
|
panels = [
|
|
# === OVERVIEW ROW ===
|
|
{
|
|
id = 1;
|
|
type = "row";
|
|
title = "Overview";
|
|
collapsed = false;
|
|
gridPos = { x = 0; y = 0; w = 24; h = 1; };
|
|
panels = [ ];
|
|
}
|
|
|
|
# Alert Status - Shows firing disk alerts
|
|
{
|
|
id = 5;
|
|
title = "Alert Status";
|
|
type = "alertlist";
|
|
gridPos = { x = 0; y = 1; w = 6; h = 5; };
|
|
options = {
|
|
alertInstanceLabelFilter = "";
|
|
alertName = "Disk";
|
|
dashboardAlerts = false;
|
|
groupBy = [ ];
|
|
groupMode = "default";
|
|
maxItems = 20;
|
|
sortOrder = 1;
|
|
stateFilter = {
|
|
"error" = true;
|
|
firing = true;
|
|
noData = false;
|
|
normal = false;
|
|
pending = false;
|
|
};
|
|
viewMode = "list";
|
|
};
|
|
}
|
|
|
|
# Health Status - Stat panel
|
|
(mkPanel {
|
|
id = 2;
|
|
title = "Disk Health Status";
|
|
type = "stat";
|
|
gridPos = { x = 6; y = 1; w = 6; h = 5; };
|
|
targets = [{
|
|
expr = ''smart_health_passed{instance=~"$host"}'';
|
|
legendFormat = "{{device}}";
|
|
refId = "A";
|
|
}];
|
|
options = {
|
|
reduceOptions = { values = false; calcs = [ "lastNotNull" ]; fields = ""; };
|
|
orientation = "horizontal";
|
|
textMode = "auto";
|
|
colorMode = "background";
|
|
graphMode = "none";
|
|
};
|
|
fieldConfig = {
|
|
defaults = {
|
|
mappings = [
|
|
{ type = "value"; options."1" = { text = "PASSED"; color = "green"; index = 0; }; }
|
|
{ type = "value"; options."0" = { text = "FAILED"; color = "red"; index = 1; }; }
|
|
];
|
|
thresholds = {
|
|
mode = "absolute";
|
|
steps = [
|
|
{ color = "red"; value = null; }
|
|
{ color = "green"; value = 1; }
|
|
];
|
|
};
|
|
};
|
|
};
|
|
})
|
|
|
|
# Temperature Gauge
|
|
(mkPanel {
|
|
id = 3;
|
|
title = "Disk Temperatures";
|
|
type = "gauge";
|
|
gridPos = { x = 12; y = 1; w = 6; h = 8; };
|
|
targets = [{
|
|
expr = ''smart_temperature_celsius{instance=~"$host"}'';
|
|
legendFormat = "{{device}}";
|
|
refId = "A";
|
|
}];
|
|
options = {
|
|
reduceOptions = { values = false; calcs = [ "lastNotNull" ]; fields = ""; };
|
|
orientation = "auto";
|
|
showThresholdLabels = false;
|
|
showThresholdMarkers = true;
|
|
};
|
|
fieldConfig = {
|
|
defaults = {
|
|
unit = "celsius";
|
|
min = 0;
|
|
max = 70;
|
|
thresholds = {
|
|
mode = "absolute";
|
|
steps = [
|
|
{ color = "green"; value = null; }
|
|
{ color = "yellow"; value = 45; }
|
|
{ color = "red"; value = 55; }
|
|
];
|
|
};
|
|
};
|
|
};
|
|
})
|
|
|
|
# RAID Status - Stat panel
|
|
(mkPanel {
|
|
id = 4;
|
|
title = "RAID Array Status";
|
|
type = "stat";
|
|
gridPos = { x = 18; y = 1; w = 6; h = 8; };
|
|
targets = [{
|
|
expr = ''mdadm_array_state{instance=~"$host"}'';
|
|
legendFormat = "{{array}}";
|
|
refId = "A";
|
|
}];
|
|
options = {
|
|
reduceOptions = { values = false; calcs = [ "lastNotNull" ]; fields = ""; };
|
|
orientation = "horizontal";
|
|
textMode = "auto";
|
|
colorMode = "background";
|
|
graphMode = "none";
|
|
};
|
|
fieldConfig = {
|
|
defaults = {
|
|
mappings = [
|
|
{ type = "value"; options."1" = { text = "Healthy"; color = "green"; index = 0; }; }
|
|
{ type = "value"; options."0" = { text = "Degraded"; color = "red"; index = 1; }; }
|
|
];
|
|
thresholds = {
|
|
mode = "absolute";
|
|
steps = [
|
|
{ color = "red"; value = null; }
|
|
{ color = "green"; value = 1; }
|
|
];
|
|
};
|
|
};
|
|
};
|
|
})
|
|
|
|
# Sector Health Table - Promoted to overview for visibility
|
|
(mkPanel {
|
|
id = 13;
|
|
title = "Sector Health";
|
|
type = "table";
|
|
gridPos = { x = 0; y = 6; w = 12; h = 4; };
|
|
targets = [
|
|
{
|
|
expr = ''smart_reallocated_sector_ct{instance=~"$host"}'';
|
|
legendFormat = "{{device}}";
|
|
refId = "A";
|
|
format = "table";
|
|
instant = true;
|
|
}
|
|
{
|
|
expr = ''smart_current_pending_sector{instance=~"$host"}'';
|
|
legendFormat = "{{device}}";
|
|
refId = "B";
|
|
format = "table";
|
|
instant = true;
|
|
}
|
|
{
|
|
expr = ''smart_offline_uncorrectable{instance=~"$host"}'';
|
|
legendFormat = "{{device}}";
|
|
refId = "C";
|
|
format = "table";
|
|
instant = true;
|
|
}
|
|
];
|
|
options = {
|
|
showHeader = true;
|
|
cellHeight = "sm";
|
|
};
|
|
transformations = [
|
|
{ id = "merge"; options = { }; }
|
|
{
|
|
id = "organize";
|
|
options = {
|
|
excludeByName = { Time = true; __name__ = true; instance = true; job = true; serial = true; };
|
|
renameByName = {
|
|
device = "Device";
|
|
"Value #A" = "Reallocated Sectors";
|
|
"Value #B" = "Pending Sectors";
|
|
"Value #C" = "Offline Uncorrectable";
|
|
};
|
|
};
|
|
}
|
|
];
|
|
fieldConfig = {
|
|
defaults = {
|
|
thresholds = {
|
|
mode = "absolute";
|
|
steps = [
|
|
{ color = "green"; value = null; }
|
|
{ color = "yellow"; value = 1; }
|
|
{ color = "red"; value = 10; }
|
|
];
|
|
};
|
|
custom = { displayMode = "color-background-solid"; };
|
|
};
|
|
};
|
|
})
|
|
|
|
# === DETAILED METRICS ROW ===
|
|
{
|
|
id = 10;
|
|
type = "row";
|
|
title = "Detailed Metrics";
|
|
collapsed = false;
|
|
gridPos = { x = 0; y = 10; w = 24; h = 1; };
|
|
panels = [ ];
|
|
}
|
|
|
|
# Temperature Time Series
|
|
(mkPanel {
|
|
id = 11;
|
|
title = "Temperature Over Time";
|
|
type = "timeseries";
|
|
gridPos = { x = 0; y = 11; w = 12; h = 8; };
|
|
targets = [{
|
|
expr = ''smart_temperature_celsius{instance=~"$host"}'';
|
|
legendFormat = "{{device}}";
|
|
refId = "A";
|
|
}];
|
|
options = {
|
|
legend = { displayMode = "list"; placement = "bottom"; showLegend = true; };
|
|
tooltip = { mode = "multi"; sort = "desc"; };
|
|
};
|
|
fieldConfig = {
|
|
defaults = {
|
|
unit = "celsius";
|
|
custom = {
|
|
drawStyle = "line";
|
|
lineInterpolation = "smooth";
|
|
fillOpacity = 10;
|
|
pointSize = 5;
|
|
showPoints = "auto";
|
|
};
|
|
thresholds = {
|
|
mode = "absolute";
|
|
steps = [
|
|
{ color = "green"; value = null; }
|
|
{ color = "yellow"; value = 45; }
|
|
{ color = "red"; value = 55; }
|
|
];
|
|
};
|
|
};
|
|
};
|
|
})
|
|
|
|
# Power On Hours
|
|
(mkPanel {
|
|
id = 12;
|
|
title = "Power On Hours";
|
|
type = "stat";
|
|
gridPos = { x = 12; y = 11; w = 12; h = 8; };
|
|
targets = [{
|
|
expr = ''smart_power_on_hours{instance=~"$host"}'';
|
|
legendFormat = "{{device}}";
|
|
refId = "A";
|
|
}];
|
|
options = {
|
|
reduceOptions = { values = false; calcs = [ "lastNotNull" ]; fields = ""; };
|
|
orientation = "horizontal";
|
|
textMode = "value_and_name";
|
|
colorMode = "none";
|
|
graphMode = "none";
|
|
};
|
|
fieldConfig = {
|
|
defaults = {
|
|
unit = "h";
|
|
};
|
|
};
|
|
})
|
|
|
|
# === RAID DETAILS ROW ===
|
|
{
|
|
id = 20;
|
|
type = "row";
|
|
title = "RAID Details";
|
|
collapsed = false;
|
|
gridPos = { x = 0; y = 19; w = 24; h = 1; };
|
|
panels = [ ];
|
|
}
|
|
|
|
# RAID Devices
|
|
(mkPanel {
|
|
id = 21;
|
|
title = "RAID Array Devices";
|
|
type = "stat";
|
|
gridPos = { x = 0; y = 20; w = 12; h = 4; };
|
|
targets = [
|
|
{
|
|
expr = ''mdadm_array_devices_active{instance=~"$host"}'';
|
|
legendFormat = "{{array}} Active";
|
|
refId = "A";
|
|
}
|
|
{
|
|
expr = ''mdadm_array_devices_total{instance=~"$host"}'';
|
|
legendFormat = "{{array}} Total";
|
|
refId = "B";
|
|
}
|
|
];
|
|
options = {
|
|
reduceOptions = { values = false; calcs = [ "lastNotNull" ]; fields = ""; };
|
|
orientation = "horizontal";
|
|
textMode = "value_and_name";
|
|
colorMode = "value";
|
|
graphMode = "none";
|
|
};
|
|
fieldConfig = {
|
|
defaults = {
|
|
unit = "short";
|
|
};
|
|
};
|
|
})
|
|
|
|
# UDMA CRC Errors
|
|
(mkPanel {
|
|
id = 22;
|
|
title = "UDMA CRC Errors";
|
|
type = "timeseries";
|
|
gridPos = { x = 12; y = 20; w = 12; h = 4; };
|
|
targets = [{
|
|
expr = ''smart_udma_crc_error_count{instance=~"$host"}'';
|
|
legendFormat = "{{device}}";
|
|
refId = "A";
|
|
}];
|
|
options = {
|
|
legend = { displayMode = "list"; placement = "bottom"; showLegend = true; };
|
|
tooltip = { mode = "multi"; sort = "desc"; };
|
|
};
|
|
fieldConfig = {
|
|
defaults = {
|
|
unit = "short";
|
|
custom = {
|
|
drawStyle = "line";
|
|
lineInterpolation = "stepAfter";
|
|
fillOpacity = 0;
|
|
pointSize = 5;
|
|
showPoints = "auto";
|
|
};
|
|
};
|
|
};
|
|
})
|
|
|
|
# Last Update Timestamp
|
|
(mkPanel {
|
|
id = 30;
|
|
title = "Last Metrics Update";
|
|
type = "stat";
|
|
gridPos = { x = 0; y = 24; w = 6; h = 5; };
|
|
targets = [{
|
|
expr = ''time() - disk_metrics_last_update{instance=~"$host"}'';
|
|
legendFormat = "Age";
|
|
refId = "A";
|
|
}];
|
|
options = {
|
|
reduceOptions = { values = false; calcs = [ "lastNotNull" ]; fields = ""; };
|
|
orientation = "horizontal";
|
|
textMode = "value";
|
|
colorMode = "value";
|
|
graphMode = "none";
|
|
};
|
|
fieldConfig = {
|
|
defaults = {
|
|
unit = "s";
|
|
thresholds = {
|
|
mode = "absolute";
|
|
steps = [
|
|
{ color = "green"; value = null; }
|
|
{ color = "yellow"; value = 1800; }
|
|
{ color = "red"; value = 3600; }
|
|
];
|
|
};
|
|
};
|
|
};
|
|
})
|
|
|
|
# Device Activity Status
|
|
(mkPanel {
|
|
id = 31;
|
|
title = "Device Activity";
|
|
type = "stat";
|
|
gridPos = { x = 6; y = 24; w = 18; h = 5; };
|
|
targets = [{
|
|
expr = ''smart_device_active{instance=~"$host"}'';
|
|
legendFormat = "{{device}}";
|
|
refId = "A";
|
|
}];
|
|
options = {
|
|
reduceOptions = { values = false; calcs = [ "lastNotNull" ]; fields = ""; };
|
|
orientation = "horizontal";
|
|
textMode = "auto";
|
|
colorMode = "background";
|
|
graphMode = "none";
|
|
};
|
|
fieldConfig = {
|
|
defaults = {
|
|
mappings = [
|
|
{ type = "value"; options."1" = { text = "Active"; color = "green"; index = 0; }; }
|
|
{ type = "value"; options."0" = { text = "Standby"; color = "blue"; index = 1; }; }
|
|
];
|
|
thresholds = {
|
|
mode = "absolute";
|
|
steps = [
|
|
{ color = "blue"; value = null; }
|
|
{ color = "green"; value = 1; }
|
|
];
|
|
};
|
|
};
|
|
};
|
|
})
|
|
];
|
|
};
|
|
in
|
|
pkgs.writeText "smart-dashboard.json" (builtins.toJSON dashboard)
|