feat: refactor Grafana alerting rules into a consolidated system module and update individual alert files

This commit is contained in:
2025-05-31 09:57:03 +02:00
parent 8b5fb0861d
commit 35fa61ef34
7 changed files with 281 additions and 299 deletions

View File

@@ -1,12 +1,6 @@
{ lib, pkgs, config, ... }: { lib, pkgs, config, ... }:
{ {
services.grafana.provision.alerting.rules.settings.groups = [ grafanaAlertRuleDefinitions = [
{
name = "CPUUsageAlerts";
folder = "System Alerts";
interval = "1m";
rules = [
{ {
uid = "high-cpu-usage-alert-uid"; uid = "high-cpu-usage-alert-uid";
title = "HighCPUUsage"; title = "HighCPUUsage";
@@ -61,6 +55,4 @@
}; };
} }
]; ];
}
];
} }

View File

@@ -1,18 +1,11 @@
{ lib, pkgs, config, ... }: { lib, pkgs, config, ... }:
{ {
services.grafana.provision.alerting.rules.settings.groups = [ grafanaAlertRuleDefinitions = [
{
# orgId = 1; # Defaults to 1 for provisioned rules
name = "DiskUsageAlerts"; # Name of the rule group
folder = "System Alerts"; # The folder these rules belong to in Grafana UI
interval = "1m"; # How often to evaluate rules in this group
rules = [
{ {
uid = "high-disk-usage-alert-uid"; # Optional: provide a stable UID for the rule itself uid = "high-disk-usage-alert-uid"; # Optional: provide a stable UID for the rule itself
title = "HighDiskUsage"; # Name of the alert rule (was 'alert' in vmalert) title = "HighDiskUsage"; # Name of the alert rule (was 'alert' in vmalert)
# Condition for the alert to fire. 'C' refers to the refId of the threshold expression. # Condition for the alert to fire. 'D' refers to the refId of the threshold expression.
condition = "D"; # Condition is now D condition = "D"; # Condition is now D
# Removed rule-level relativeTimeRange # Removed rule-level relativeTimeRange
@@ -80,6 +73,4 @@
# isPaused = false; # Default is not paused # isPaused = false; # Default is not paused
} }
]; ];
}
];
} }

View File

@@ -1,12 +1,6 @@
{ lib, pkgs, config, ... }: { lib, pkgs, config, ... }:
{ {
services.grafana.provision.alerting.rules.settings.groups = [ grafanaAlertRuleDefinitions = [
{
name = "HostStatusAlerts";
folder = "System Alerts";
interval = "1m";
rules = [
{ {
uid = "host-down-alert-uid"; uid = "host-down-alert-uid";
title = "HostDown"; title = "HostDown";
@@ -57,6 +51,4 @@
}; };
} }
]; ];
}
];
} }

View File

@@ -1,12 +1,6 @@
{ lib, pkgs, config, ... }: { lib, pkgs, config, ... }:
{ {
services.grafana.provision.alerting.rules.settings.groups = [ grafanaAlertRuleDefinitions = [
{
name = "InodeUsageAlerts";
folder = "System Alerts";
interval = "1m";
rules = [
{ {
uid = "high-inode-usage-alert-uid"; uid = "high-inode-usage-alert-uid";
title = "HighInodeUsage"; title = "HighInodeUsage";
@@ -66,6 +60,4 @@
}; };
} }
]; ];
}
];
} }

View File

@@ -1,12 +1,6 @@
{ lib, pkgs, config, ... }: { lib, pkgs, config, ... }:
{ {
services.grafana.provision.alerting.rules.settings.groups = [ grafanaAlertRuleDefinitions = [
{
name = "RAMUsageAlerts";
folder = "System Alerts";
interval = "1m";
rules = [
{ {
uid = "high-ram-usage-alert-uid"; uid = "high-ram-usage-alert-uid";
title = "HighRAMUsage"; title = "HighRAMUsage";
@@ -64,6 +58,4 @@
}; };
} }
]; ];
}
];
} }

View File

@@ -0,0 +1,21 @@
{ lib, pkgs, config, ... }:
let
# Import rule definitions from refactored alert files in the parent 'alerting' directory
cpuAlertRules = (import ../cpu_usage.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
diskAlertRules = (import ../disk_usage.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
hostDownAlertRules = (import ../host_down.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
inodeAlertRules = (import ../inode_usage.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
ramAlertRules = (import ../ram_usage.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
allSystemRules = cpuAlertRules ++ diskAlertRules ++ hostDownAlertRules ++ inodeAlertRules ++ ramAlertRules;
in
{
services.grafana.provision.alerting.rules.settings.groups = [
{
name = "System Alerts"; # This is the Grafana alert group name
folder = "System Alerts"; # This is the Grafana folder name
interval = "1m";
rules = allSystemRules;
}
];
}

View File

@@ -28,11 +28,13 @@ let
in in
{ {
imports = [ imports = [
./alerting/disk_usage.nix # Individual alert files removed, now handled by alerting/system/default.nix
./alerting/cpu_usage.nix # ./alerting/disk_usage.nix
./alerting/host_down.nix # ./alerting/cpu_usage.nix
./alerting/inode_usage.nix # ./alerting/host_down.nix
./alerting/ram_usage.nix # ./alerting/inode_usage.nix
# ./alerting/ram_usage.nix
./alerting/system/default.nix # Added: Imports the consolidated system alerts module
# ... other rule files can be added here ... # ... other rule files can be added here ...
./datasources/victoriametrics.nix ./datasources/victoriametrics.nix
]; ];
@@ -99,7 +101,7 @@ in
}; };
provision = { provision = {
alerting = { alerting = {
rules.settings.groups = lib.mkMerge []; # Allows rule groups to be merged rules.settings.groups = lib.mkMerge []; # Allows rule groups to be merged (including the one from system/default.nix)
contactPoints = { contactPoints = {
settings = { settings = {
apiVersion = 1; # As per Grafana provisioning API apiVersion = 1; # As per Grafana provisioning API