feat: add cleanup for grafana alerting rules
This commit is contained in:
99
hosts/web-arm/modules/grafana/alert-cleanup.nix
Normal file
99
hosts/web-arm/modules/grafana/alert-cleanup.nix
Normal file
@@ -0,0 +1,99 @@
|
||||
{ lib, pkgs, config, ... }:
|
||||
|
||||
let
|
||||
cfg = config.services.grafana;
|
||||
|
||||
# Extract all UIDs from configured alert rules
|
||||
extractRuleUids = groups:
|
||||
lib.unique (lib.flatten (
|
||||
map (group: map (rule: rule.uid) group.rules) groups
|
||||
));
|
||||
|
||||
# Collect all alert rule groups from the three modules
|
||||
allGroups = cfg.provision.alerting.rules.settings.groups or [];
|
||||
|
||||
expectedUids = extractRuleUids allGroups;
|
||||
|
||||
# Generate manifest JSON
|
||||
cleanupManifest = pkgs.writeTextFile {
|
||||
name = "grafana-alert-cleanup-manifest.json";
|
||||
text = builtins.toJSON {
|
||||
expected_uids = expectedUids;
|
||||
};
|
||||
};
|
||||
|
||||
# Cleanup script using PostgreSQL
|
||||
cleanupScript = pkgs.writeShellScriptBin "grafana-alert-cleanup" ''
|
||||
#!${pkgs.bash}/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
MANIFEST="${cleanupManifest}"
|
||||
DB_NAME="grafana"
|
||||
|
||||
echo "=== Grafana Alert Rule Cleanup (PostgreSQL) ==="
|
||||
echo "Loading expected UIDs from manifest..."
|
||||
|
||||
EXPECTED_UIDS=$(${pkgs.jq}/bin/jq -r '.expected_uids[]' "$MANIFEST")
|
||||
EXPECTED_COUNT=$(echo "$EXPECTED_UIDS" | wc -l)
|
||||
echo "Expected UIDs count: $EXPECTED_COUNT"
|
||||
|
||||
echo "Querying database for current provisioned alert rules..."
|
||||
|
||||
# Query database for all provisioned rule UIDs
|
||||
CURRENT_UIDS=$(${pkgs.postgresql}/bin/psql -h /run/postgresql -d "$DB_NAME" -t -A -c \
|
||||
"SELECT uid FROM alert_rule WHERE updated_by = 'service';" || echo "")
|
||||
|
||||
if [[ -z "$CURRENT_UIDS" ]]; then
|
||||
echo "No provisioned rules found in database."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
CURRENT_COUNT=$(echo "$CURRENT_UIDS" | wc -l)
|
||||
echo "Current provisioned UIDs count: $CURRENT_COUNT"
|
||||
|
||||
# Find orphaned UIDs (in database but not in expected list)
|
||||
ORPHANED_UIDS=""
|
||||
ORPHAN_COUNT=0
|
||||
|
||||
while IFS= read -r uid; do
|
||||
if [[ -n "$uid" ]] && ! echo "$EXPECTED_UIDS" | grep -qx "$uid"; then
|
||||
ORPHANED_UIDS="$ORPHANED_UIDS$uid "
|
||||
ORPHAN_COUNT=$((ORPHAN_COUNT + 1))
|
||||
fi
|
||||
done <<< "$CURRENT_UIDS"
|
||||
|
||||
if [[ $ORPHAN_COUNT -eq 0 ]]; then
|
||||
echo "No orphaned alert rules found. All rules match configuration."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Found $ORPHAN_COUNT orphaned rule(s)"
|
||||
|
||||
# Delete orphaned rules
|
||||
for uid in $ORPHANED_UIDS; do
|
||||
echo "Deleting orphaned rule: $uid"
|
||||
|
||||
${pkgs.postgresql}/bin/psql -h /run/postgresql -d "$DB_NAME" -c \
|
||||
"DELETE FROM alert_rule WHERE uid = '$uid' AND updated_by = 'service';" >/dev/null 2>&1
|
||||
|
||||
if [[ $? -eq 0 ]]; then
|
||||
echo " ✓ Deleted $uid"
|
||||
else
|
||||
echo " ✗ Failed to delete $uid" >&2
|
||||
fi
|
||||
done
|
||||
|
||||
echo "=== Cleanup Complete ==="
|
||||
'';
|
||||
|
||||
in
|
||||
{
|
||||
config = lib.mkIf cfg.enable {
|
||||
|
||||
# Systemd service that runs before Grafana starts
|
||||
systemd.services.grafana.serviceConfig.ExecStartPre = pkgs.writeShellScript "grafana-alert-cleanup-pre" ''
|
||||
echo "Running Grafana alert rule cleanup..."
|
||||
${cleanupScript}/bin/grafana-alert-cleanup
|
||||
'';
|
||||
};
|
||||
}
|
||||
@@ -11,8 +11,7 @@ let
|
||||
httpsDomains = lib.map (d: "https://${d}") filteredDomains;
|
||||
websiteAlertRules = lib.map (target:
|
||||
let
|
||||
domain = lib.replaceStrings ["://" "." "-" "/" ] ["-" "-" "_" "_"] target + "-down-alert";
|
||||
uid = builtins.hashString "sha1" domain;
|
||||
uid = "website-" + (builtins.replaceStrings ["https://" "http://" "." "/"] ["" "" "-" "-"] target);
|
||||
in {
|
||||
uid = uid;
|
||||
title = "Website " + target + " Down";
|
||||
|
||||
@@ -34,6 +34,8 @@ in
|
||||
|
||||
./datasources/victoriametrics.nix
|
||||
./datasources/loki.nix
|
||||
|
||||
./alert-cleanup.nix
|
||||
];
|
||||
|
||||
systemd.services.grafana.script = lib.mkBefore ''
|
||||
|
||||
Reference in New Issue
Block a user