feat: add alerting for amz ebs server and websites blackbox

This commit is contained in:
2025-11-14 23:08:27 +01:00
parent 01d3ab1357
commit 8a2a68a91c
7 changed files with 270 additions and 1 deletions

View File

@@ -0,0 +1,58 @@
{ lib, pkgs, config, ... }:
{
grafanaAlertRuleDefinitions = [
{
uid = "amzebs-mysql-service-down-alert-uid";
title = "MySQL Service Down on amzebs-01";
condition = "C";
data = [
{
refId = "A";
relativeTimeRange = {
from = 300;
to = 0;
};
datasourceUid = "vm-datasource-uid";
model = {
editorMode = "code";
expr = "node_systemd_unit_state{state=\"active\", name=\"mysql.service\", instance=\"amzebs-01:9100\"} OR on() vector(0)";
hide = false;
intervalMs = 1000;
legendFormat = "__auto";
maxDataPoints = 43200;
range = true;
refId = "A";
};
}
{
refId = "B";
datasourceUid = "__expr__";
model = {
type = "reduce";
expression = "A";
reducer = "last";
};
}
{
refId = "C";
datasourceUid = "__expr__";
model = {
type = "math";
expression = "$B < 1";
};
}
];
noDataState = "Alerting";
execErrState = "Alerting";
for = "5m";
annotations = {
description = "MySQL service is down on amzebs-01";
summary = "MySQL Service Down on amzebs-01";
};
labels = {
severity = "critical";
host = "amzebs-01";
};
}
];
}

View File

@@ -0,0 +1,58 @@
{ lib, pkgs, config, ... }:
{
grafanaAlertRuleDefinitions = [
{
uid = "amzebs-nginx-service-down-alert-uid";
title = "Nginx Service Down on amzebs-01";
condition = "C";
data = [
{
refId = "A";
relativeTimeRange = {
from = 300;
to = 0;
};
datasourceUid = "vm-datasource-uid";
model = {
editorMode = "code";
expr = "node_systemd_unit_state{state=\"active\", name=\"nginx.service\", instance=\"amzebs-01:9100\"} OR on() vector(0)";
hide = false;
intervalMs = 1000;
legendFormat = "__auto";
maxDataPoints = 43200;
range = true;
refId = "A";
};
}
{
refId = "B";
datasourceUid = "__expr__";
model = {
type = "reduce";
expression = "A";
reducer = "last";
};
}
{
refId = "C";
datasourceUid = "__expr__";
model = {
type = "math";
expression = "$B < 1";
};
}
];
noDataState = "Alerting";
execErrState = "Alerting";
for = "5m";
annotations = {
description = "Nginx service is down on amzebs-01";
summary = "Nginx Service Down on amzebs-01";
};
labels = {
severity = "critical";
host = "amzebs-01";
};
}
];
}

View File

@@ -0,0 +1,58 @@
{ lib, pkgs, config, ... }:
{
grafanaAlertRuleDefinitions = [
{
uid = "amzebs-phpfpm-service-down-alert-uid";
title = "PHP-FPM Service Down on amzebs-01";
condition = "C";
data = [
{
refId = "A";
relativeTimeRange = {
from = 300;
to = 0;
};
datasourceUid = "vm-datasource-uid";
model = {
editorMode = "code";
expr = "node_systemd_unit_state{state=\"active\", name=~\"phpfpm-.*\\\\.service\", instance=\"amzebs-01:9100\"} OR on() vector(0)";
hide = false;
intervalMs = 1000;
legendFormat = "__auto";
maxDataPoints = 43200;
range = true;
refId = "A";
};
}
{
refId = "B";
datasourceUid = "__expr__";
model = {
type = "reduce";
expression = "A";
reducer = "min";
};
}
{
refId = "C";
datasourceUid = "__expr__";
model = {
type = "math";
expression = "$B < 1";
};
}
];
noDataState = "Alerting";
execErrState = "Alerting";
for = "5m";
annotations = {
description = "One or more PHP-FPM services are down on amzebs-01";
summary = "PHP-FPM Service Down on amzebs-01";
};
labels = {
severity = "critical";
host = "amzebs-01";
};
}
];
}

View File

@@ -7,12 +7,20 @@ let
openldapDownAlertRules = (import ./openldap_down.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
wireguardDownAlertRules = (import ./wireguard_down.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
# amzebs-01 service alerts
ambebsMysqlDownAlertRules = (import ./amzebs_mysql_down.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
ambebsNginxDownAlertRules = (import ./amzebs_nginx_down.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
ambebsPhpfpmDownAlertRules = (import ./amzebs_phpfpm_down.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions;
allServiceRules = giteaDownAlertRules
++ giteaRunnerDownAlertRules
++ postfixDownAlertRules
++ dovecotDownAlertRules
++ openldapDownAlertRules
++ wireguardDownAlertRules;
++ wireguardDownAlertRules
++ ambebsMysqlDownAlertRules
++ ambebsNginxDownAlertRules
++ ambebsPhpfpmDownAlertRules;
in
{
services.grafana.provision.alerting.rules.settings.groups = [

View File

@@ -6,6 +6,9 @@ let
allDomains =
(lib.attrNames nginxVHosts) ++ [
"foundry-vtt.cloonar.com"
# amzebs-01 domains
"ebs.cloonar.dev"
"api.ebs.cloonar.dev"
];
filteredDomains = builtins.filter (d: !builtins.elem d cfg.blacklistDomains) allDomains;
httpsDomains = lib.map (d: "https://${d}") filteredDomains;