From db25b2bfbbbc655735245d3a07dc09ce5478d209 Mon Sep 17 00:00:00 2001 From: Dominik Polakovics Date: Sat, 1 Nov 2025 11:09:05 +0100 Subject: [PATCH] feat: add cleanup for grafana alerting rules --- .../web-arm/modules/grafana/alert-cleanup.nix | 99 +++++++++++++++++++ .../grafana/alerting/websites/default.nix | 3 +- hosts/web-arm/modules/grafana/default.nix | 2 + 3 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 hosts/web-arm/modules/grafana/alert-cleanup.nix diff --git a/hosts/web-arm/modules/grafana/alert-cleanup.nix b/hosts/web-arm/modules/grafana/alert-cleanup.nix new file mode 100644 index 0000000..827bc67 --- /dev/null +++ b/hosts/web-arm/modules/grafana/alert-cleanup.nix @@ -0,0 +1,99 @@ +{ lib, pkgs, config, ... }: + +let + cfg = config.services.grafana; + + # Extract all UIDs from configured alert rules + extractRuleUids = groups: + lib.unique (lib.flatten ( + map (group: map (rule: rule.uid) group.rules) groups + )); + + # Collect all alert rule groups from the three modules + allGroups = cfg.provision.alerting.rules.settings.groups or []; + + expectedUids = extractRuleUids allGroups; + + # Generate manifest JSON + cleanupManifest = pkgs.writeTextFile { + name = "grafana-alert-cleanup-manifest.json"; + text = builtins.toJSON { + expected_uids = expectedUids; + }; + }; + + # Cleanup script using PostgreSQL + cleanupScript = pkgs.writeShellScriptBin "grafana-alert-cleanup" '' + #!${pkgs.bash}/bin/bash + set -euo pipefail + + MANIFEST="${cleanupManifest}" + DB_NAME="grafana" + + echo "=== Grafana Alert Rule Cleanup (PostgreSQL) ===" + echo "Loading expected UIDs from manifest..." + + EXPECTED_UIDS=$(${pkgs.jq}/bin/jq -r '.expected_uids[]' "$MANIFEST") + EXPECTED_COUNT=$(echo "$EXPECTED_UIDS" | wc -l) + echo "Expected UIDs count: $EXPECTED_COUNT" + + echo "Querying database for current provisioned alert rules..." + + # Query database for all provisioned rule UIDs + CURRENT_UIDS=$(${pkgs.postgresql}/bin/psql -h /run/postgresql -d "$DB_NAME" -t -A -c \ + "SELECT uid FROM alert_rule WHERE updated_by = 'service';" || echo "") + + if [[ -z "$CURRENT_UIDS" ]]; then + echo "No provisioned rules found in database." + exit 0 + fi + + CURRENT_COUNT=$(echo "$CURRENT_UIDS" | wc -l) + echo "Current provisioned UIDs count: $CURRENT_COUNT" + + # Find orphaned UIDs (in database but not in expected list) + ORPHANED_UIDS="" + ORPHAN_COUNT=0 + + while IFS= read -r uid; do + if [[ -n "$uid" ]] && ! echo "$EXPECTED_UIDS" | grep -qx "$uid"; then + ORPHANED_UIDS="$ORPHANED_UIDS$uid " + ORPHAN_COUNT=$((ORPHAN_COUNT + 1)) + fi + done <<< "$CURRENT_UIDS" + + if [[ $ORPHAN_COUNT -eq 0 ]]; then + echo "No orphaned alert rules found. All rules match configuration." + exit 0 + fi + + echo "Found $ORPHAN_COUNT orphaned rule(s)" + + # Delete orphaned rules + for uid in $ORPHANED_UIDS; do + echo "Deleting orphaned rule: $uid" + + ${pkgs.postgresql}/bin/psql -h /run/postgresql -d "$DB_NAME" -c \ + "DELETE FROM alert_rule WHERE uid = '$uid' AND updated_by = 'service';" >/dev/null 2>&1 + + if [[ $? -eq 0 ]]; then + echo " ✓ Deleted $uid" + else + echo " ✗ Failed to delete $uid" >&2 + fi + done + + echo "=== Cleanup Complete ===" + ''; + +in +{ + config = lib.mkIf cfg.enable { + + # Systemd service that runs before Grafana starts + systemd.services.grafana.serviceConfig.ExecStartPre = pkgs.writeShellScript "grafana-alert-cleanup-pre" '' + echo "Running Grafana alert rule cleanup..." + ${cleanupScript}/bin/grafana-alert-cleanup + ''; + }; +} diff --git a/hosts/web-arm/modules/grafana/alerting/websites/default.nix b/hosts/web-arm/modules/grafana/alerting/websites/default.nix index 7e65cf2..fe17a9c 100644 --- a/hosts/web-arm/modules/grafana/alerting/websites/default.nix +++ b/hosts/web-arm/modules/grafana/alerting/websites/default.nix @@ -11,8 +11,7 @@ let httpsDomains = lib.map (d: "https://${d}") filteredDomains; websiteAlertRules = lib.map (target: let - domain = lib.replaceStrings ["://" "." "-" "/" ] ["-" "-" "_" "_"] target + "-down-alert"; - uid = builtins.hashString "sha1" domain; + uid = "website-" + (builtins.replaceStrings ["https://" "http://" "." "/"] ["" "" "-" "-"] target); in { uid = uid; title = "Website " + target + " Down"; diff --git a/hosts/web-arm/modules/grafana/default.nix b/hosts/web-arm/modules/grafana/default.nix index 75d7113..3ac5e10 100644 --- a/hosts/web-arm/modules/grafana/default.nix +++ b/hosts/web-arm/modules/grafana/default.nix @@ -34,6 +34,8 @@ in ./datasources/victoriametrics.nix ./datasources/loki.nix + + ./alert-cleanup.nix ]; systemd.services.grafana.script = lib.mkBefore ''