feat: add cleanup for grafana alerting rules
This commit is contained in:
parent
819bfc1531
commit
db25b2bfbb
3 changed files with 102 additions and 2 deletions
99
hosts/web-arm/modules/grafana/alert-cleanup.nix
Normal file
99
hosts/web-arm/modules/grafana/alert-cleanup.nix
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
{ lib, pkgs, config, ... }:
|
||||
|
||||
let
|
||||
cfg = config.services.grafana;
|
||||
|
||||
# Extract all UIDs from configured alert rules
|
||||
extractRuleUids = groups:
|
||||
lib.unique (lib.flatten (
|
||||
map (group: map (rule: rule.uid) group.rules) groups
|
||||
));
|
||||
|
||||
# Collect all alert rule groups from the three modules
|
||||
allGroups = cfg.provision.alerting.rules.settings.groups or [];
|
||||
|
||||
expectedUids = extractRuleUids allGroups;
|
||||
|
||||
# Generate manifest JSON
|
||||
cleanupManifest = pkgs.writeTextFile {
|
||||
name = "grafana-alert-cleanup-manifest.json";
|
||||
text = builtins.toJSON {
|
||||
expected_uids = expectedUids;
|
||||
};
|
||||
};
|
||||
|
||||
# Cleanup script using PostgreSQL
|
||||
cleanupScript = pkgs.writeShellScriptBin "grafana-alert-cleanup" ''
|
||||
#!${pkgs.bash}/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
MANIFEST="${cleanupManifest}"
|
||||
DB_NAME="grafana"
|
||||
|
||||
echo "=== Grafana Alert Rule Cleanup (PostgreSQL) ==="
|
||||
echo "Loading expected UIDs from manifest..."
|
||||
|
||||
EXPECTED_UIDS=$(${pkgs.jq}/bin/jq -r '.expected_uids[]' "$MANIFEST")
|
||||
EXPECTED_COUNT=$(echo "$EXPECTED_UIDS" | wc -l)
|
||||
echo "Expected UIDs count: $EXPECTED_COUNT"
|
||||
|
||||
echo "Querying database for current provisioned alert rules..."
|
||||
|
||||
# Query database for all provisioned rule UIDs
|
||||
CURRENT_UIDS=$(${pkgs.postgresql}/bin/psql -h /run/postgresql -d "$DB_NAME" -t -A -c \
|
||||
"SELECT uid FROM alert_rule WHERE updated_by = 'service';" || echo "")
|
||||
|
||||
if [[ -z "$CURRENT_UIDS" ]]; then
|
||||
echo "No provisioned rules found in database."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
CURRENT_COUNT=$(echo "$CURRENT_UIDS" | wc -l)
|
||||
echo "Current provisioned UIDs count: $CURRENT_COUNT"
|
||||
|
||||
# Find orphaned UIDs (in database but not in expected list)
|
||||
ORPHANED_UIDS=""
|
||||
ORPHAN_COUNT=0
|
||||
|
||||
while IFS= read -r uid; do
|
||||
if [[ -n "$uid" ]] && ! echo "$EXPECTED_UIDS" | grep -qx "$uid"; then
|
||||
ORPHANED_UIDS="$ORPHANED_UIDS$uid "
|
||||
ORPHAN_COUNT=$((ORPHAN_COUNT + 1))
|
||||
fi
|
||||
done <<< "$CURRENT_UIDS"
|
||||
|
||||
if [[ $ORPHAN_COUNT -eq 0 ]]; then
|
||||
echo "No orphaned alert rules found. All rules match configuration."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Found $ORPHAN_COUNT orphaned rule(s)"
|
||||
|
||||
# Delete orphaned rules
|
||||
for uid in $ORPHANED_UIDS; do
|
||||
echo "Deleting orphaned rule: $uid"
|
||||
|
||||
${pkgs.postgresql}/bin/psql -h /run/postgresql -d "$DB_NAME" -c \
|
||||
"DELETE FROM alert_rule WHERE uid = '$uid' AND updated_by = 'service';" >/dev/null 2>&1
|
||||
|
||||
if [[ $? -eq 0 ]]; then
|
||||
echo " ✓ Deleted $uid"
|
||||
else
|
||||
echo " ✗ Failed to delete $uid" >&2
|
||||
fi
|
||||
done
|
||||
|
||||
echo "=== Cleanup Complete ==="
|
||||
'';
|
||||
|
||||
in
|
||||
{
|
||||
config = lib.mkIf cfg.enable {
|
||||
|
||||
# Systemd service that runs before Grafana starts
|
||||
systemd.services.grafana.serviceConfig.ExecStartPre = pkgs.writeShellScript "grafana-alert-cleanup-pre" ''
|
||||
echo "Running Grafana alert rule cleanup..."
|
||||
${cleanupScript}/bin/grafana-alert-cleanup
|
||||
'';
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue