feat: refactor Grafana alerting rules into a consolidated system module and update individual alert files
This commit is contained in:
@@ -1,12 +1,6 @@
|
||||
{ lib, pkgs, config, ... }:
|
||||
{
|
||||
services.grafana.provision.alerting.rules.settings.groups = [
|
||||
{
|
||||
name = "CPUUsageAlerts";
|
||||
folder = "System Alerts";
|
||||
interval = "1m";
|
||||
|
||||
rules = [
|
||||
grafanaAlertRuleDefinitions = [
|
||||
{
|
||||
uid = "high-cpu-usage-alert-uid";
|
||||
title = "HighCPUUsage";
|
||||
@@ -62,5 +56,3 @@
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1,18 +1,11 @@
|
||||
{ lib, pkgs, config, ... }:
|
||||
{
|
||||
services.grafana.provision.alerting.rules.settings.groups = [
|
||||
{
|
||||
# orgId = 1; # Defaults to 1 for provisioned rules
|
||||
name = "DiskUsageAlerts"; # Name of the rule group
|
||||
folder = "System Alerts"; # The folder these rules belong to in Grafana UI
|
||||
interval = "1m"; # How often to evaluate rules in this group
|
||||
|
||||
rules = [
|
||||
grafanaAlertRuleDefinitions = [
|
||||
{
|
||||
uid = "high-disk-usage-alert-uid"; # Optional: provide a stable UID for the rule itself
|
||||
title = "HighDiskUsage"; # Name of the alert rule (was 'alert' in vmalert)
|
||||
|
||||
# Condition for the alert to fire. 'C' refers to the refId of the threshold expression.
|
||||
# Condition for the alert to fire. 'D' refers to the refId of the threshold expression.
|
||||
condition = "D"; # Condition is now D
|
||||
# Removed rule-level relativeTimeRange
|
||||
|
||||
@@ -81,5 +74,3 @@
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1,12 +1,6 @@
|
||||
{ lib, pkgs, config, ... }:
|
||||
{
|
||||
services.grafana.provision.alerting.rules.settings.groups = [
|
||||
{
|
||||
name = "HostStatusAlerts";
|
||||
folder = "System Alerts";
|
||||
interval = "1m";
|
||||
|
||||
rules = [
|
||||
grafanaAlertRuleDefinitions = [
|
||||
{
|
||||
uid = "host-down-alert-uid";
|
||||
title = "HostDown";
|
||||
@@ -58,5 +52,3 @@
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1,12 +1,6 @@
|
||||
{ lib, pkgs, config, ... }:
|
||||
{
|
||||
services.grafana.provision.alerting.rules.settings.groups = [
|
||||
{
|
||||
name = "InodeUsageAlerts";
|
||||
folder = "System Alerts";
|
||||
interval = "1m";
|
||||
|
||||
rules = [
|
||||
grafanaAlertRuleDefinitions = [
|
||||
{
|
||||
uid = "high-inode-usage-alert-uid";
|
||||
title = "HighInodeUsage";
|
||||
@@ -67,5 +61,3 @@
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
@@ -1,12 +1,6 @@
|
||||
{ lib, pkgs, config, ... }:
|
||||
{
|
||||
services.grafana.provision.alerting.rules.settings.groups = [
|
||||
{
|
||||
name = "RAMUsageAlerts";
|
||||
folder = "System Alerts";
|
||||
interval = "1m";
|
||||
|
||||
rules = [
|
||||
grafanaAlertRuleDefinitions = [
|
||||
{
|
||||
uid = "high-ram-usage-alert-uid";
|
||||
title = "HighRAMUsage";
|
||||
@@ -65,5 +59,3 @@
|
||||
}
|
||||
];
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
21
hosts/web-arm/modules/grafana/alerting/system/default.nix
Normal file
21
hosts/web-arm/modules/grafana/alerting/system/default.nix
Normal file
@@ -0,0 +1,21 @@
|
||||
{ lib, pkgs, config, ... }:
|
||||
let
|
||||
# Import rule definitions from refactored alert files in the parent 'alerting' directory
|
||||
cpuAlertRules = (import ../cpu_usage.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
|
||||
diskAlertRules = (import ../disk_usage.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
|
||||
hostDownAlertRules = (import ../host_down.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
|
||||
inodeAlertRules = (import ../inode_usage.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
|
||||
ramAlertRules = (import ../ram_usage.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
|
||||
|
||||
allSystemRules = cpuAlertRules ++ diskAlertRules ++ hostDownAlertRules ++ inodeAlertRules ++ ramAlertRules;
|
||||
in
|
||||
{
|
||||
services.grafana.provision.alerting.rules.settings.groups = [
|
||||
{
|
||||
name = "System Alerts"; # This is the Grafana alert group name
|
||||
folder = "System Alerts"; # This is the Grafana folder name
|
||||
interval = "1m";
|
||||
rules = allSystemRules;
|
||||
}
|
||||
];
|
||||
}
|
||||
@@ -28,11 +28,13 @@ let
|
||||
in
|
||||
{
|
||||
imports = [
|
||||
./alerting/disk_usage.nix
|
||||
./alerting/cpu_usage.nix
|
||||
./alerting/host_down.nix
|
||||
./alerting/inode_usage.nix
|
||||
./alerting/ram_usage.nix
|
||||
# Individual alert files removed, now handled by alerting/system/default.nix
|
||||
# ./alerting/disk_usage.nix
|
||||
# ./alerting/cpu_usage.nix
|
||||
# ./alerting/host_down.nix
|
||||
# ./alerting/inode_usage.nix
|
||||
# ./alerting/ram_usage.nix
|
||||
./alerting/system/default.nix # Added: Imports the consolidated system alerts module
|
||||
# ... other rule files can be added here ...
|
||||
./datasources/victoriametrics.nix
|
||||
];
|
||||
@@ -99,7 +101,7 @@ in
|
||||
};
|
||||
provision = {
|
||||
alerting = {
|
||||
rules.settings.groups = lib.mkMerge []; # Allows rule groups to be merged
|
||||
rules.settings.groups = lib.mkMerge []; # Allows rule groups to be merged (including the one from system/default.nix)
|
||||
contactPoints = {
|
||||
settings = {
|
||||
apiVersion = 1; # As per Grafana provisioning API
|
||||
|
||||
Reference in New Issue
Block a user