diff --git a/hosts/fw/channel b/hosts/fw/channel index ced117e..93f5df5 100644 --- a/hosts/fw/channel +++ b/hosts/fw/channel @@ -1 +1 @@ -https://channels.nixos.org/nixos-24.11 +https://channels.nixos.org/nixos-25.05 diff --git a/hosts/fw/configuration.nix b/hosts/fw/configuration.nix index 5bd0338..fc47be8 100644 --- a/hosts/fw/configuration.nix +++ b/hosts/fw/configuration.nix @@ -65,8 +65,9 @@ # setup network ./modules/setupnetwork.nix ./modules/set-nix-channel.nix # Automatically manage nix-channel from /var/bento/channel - - + ./modules/grafana-monitor.nix # Grafana online status monitor + + ./hardware-configuration.nix ]; diff --git a/hosts/fw/modules/dnsmasq.nix b/hosts/fw/modules/dnsmasq.nix index 8110621..8be86a7 100644 --- a/hosts/fw/modules/dnsmasq.nix +++ b/hosts/fw/modules/dnsmasq.nix @@ -70,7 +70,6 @@ "24:df:a7:b1:1b:74,${config.networkPrefix}.96.101,rmproplus-b1-1b-74" "1a:c4:04:6e:29:bd,${config.networkPrefix}.97.2,omada" - "02:00:00:00:00:03,${config.networkPrefix}.97.5,web-02" "02:00:00:00:00:04,${config.networkPrefix}.97.6,matrix" "ea:db:d4:c1:18:ba,${config.networkPrefix}.97.50,git" "c2:4f:64:dd:13:0c,${config.networkPrefix}.97.20,home-assistant" diff --git a/hosts/fw/modules/gitea-vm.nix b/hosts/fw/modules/gitea-vm.nix index b9c65b5..d202cd4 100644 --- a/hosts/fw/modules/gitea-vm.nix +++ b/hosts/fw/modules/gitea-vm.nix @@ -1,4 +1,4 @@ -{ lib, nixpkgs, pkgs, ... }: let +{ config, lib, nixpkgs, pkgs, ... }: let # hostname = "git-02"; # json = pkgs.formats.json { }; runners = ["git-runner-1" "git-runner-2"]; @@ -38,6 +38,13 @@ in { ]; }; + systemd.network.networks."10-lan" = { + matchConfig.PermanentMACAddress = "02:00:00:00:00:0${toString idx}"; + address = [ "${config.networkPrefix}.97.5${toString idx}/24" ]; + gateway = [ "${config.networkPrefix}.97.1" ]; + dns = [ "${config.networkPrefix}.97.1" ]; + }; + networking.hostName = runner; virtualisation.podman.enable = true; diff --git a/hosts/fw/modules/grafana-monitor.nix b/hosts/fw/modules/grafana-monitor.nix new file mode 100644 index 0000000..c99b9fc --- /dev/null +++ b/hosts/fw/modules/grafana-monitor.nix @@ -0,0 +1,193 @@ +{ config, pkgs, lib, ... }: + +let + grafanaMonitorUser = "grafana-monitor"; + grafanaMonitorGroup = "grafana-monitor"; + stateDir = "/var/lib/${grafanaMonitorUser}"; + + # Monitoring script will be defined here later + monitorScript = pkgs.writeShellScriptBin "grafana-online-check" '' + #!${pkgs.bash}/bin/bash + set -euo pipefail + + GRAFANA_URL="https://grafana.cloonar.com/api/health" + STATE_FILE="${stateDir}/status.env" + PUSHOVER_API_TOKEN_FILE="/run/secrets/pushover-api-token" + PUSHOVER_USER_KEY_FILE="/run/secrets/pushover-user-key" + MAX_FAILURES=5 + + # Ensure state directory exists (NixOS creates $HOME for the user, which is stateDir) + # The script runs as grafanaMonitorUser, so $HOME will be /var/lib/grafana-monitor + mkdir -p "''${HOME}" + + # Load current state or initialize + CONSECUTIVE_FAILURES=0 + ALERT_SENT="false" + LAST_KNOWN_STATUS="UP" # Assume UP initially if no state file + + # Note: STATE_FILE uses $stateDir which is /var/lib/grafana-monitor. + # The script will run with HOME=/var/lib/grafana-monitor. + # So, using ''${HOME}/status.env or ''${STATE_FILE} should resolve to the same path. + # Let's stick to ''${STATE_FILE} for consistency with its definition. + if [[ -f "''${STATE_FILE}" ]]; then + source "''${STATE_FILE}" + fi + + # Check secrets + if [[ ! -f "''${PUSHOVER_API_TOKEN_FILE}" ]] || [[ ! -r "''${PUSHOVER_API_TOKEN_FILE}" ]]; then + echo "Error: Pushover API token file (''${PUSHOVER_API_TOKEN_FILE}) not found or not readable." >&2 + exit 1 + fi + PUSHOVER_API_TOKEN=$(cat "''${PUSHOVER_API_TOKEN_FILE}") + + if [[ ! -f "''${PUSHOVER_USER_KEY_FILE}" ]] || [[ ! -r "''${PUSHOVER_USER_KEY_FILE}" ]]; then + echo "Error: Pushover user key file (''${PUSHOVER_USER_KEY_FILE}) not found or not readable." >&2 + exit 1 + fi + PUSHOVER_USER_KEY=$(cat "''${PUSHOVER_USER_KEY_FILE}") + +# Internet connectivity check + INTERNET_CHECK_URL="https://1.1.1.1" # Using a reliable IP to bypass potential DNS issues for the check itself + echo "Performing internet connectivity check to ''${INTERNET_CHECK_URL}..." + if ! ${pkgs.curl}/bin/curl --head --silent --fail --connect-timeout 3 --max-time 5 "''${INTERNET_CHECK_URL}" > /dev/null 2>&1; then + echo "Internet connectivity check failed. Cannot reach ''${INTERNET_CHECK_URL}. Skipping Grafana check and exiting successfully." + exit 0 + else + echo "Internet connectivity check successful. Proceeding with Grafana check." + fi + echo "" # Add a blank line for readability before Grafana check logs + echo "Checking Grafana at ''${GRAFANA_URL}..." + ACTUAL_HTTP_CODE="000" # Default if curl doesn't provide one + CURL_ERROR_MESSAGE="" + CURL_STDERR_OUTPUT=$(mktemp) + # Ensure temp file is cleaned up on exit, error, or interrupt + trap 'rm -f "''${CURL_STDERR_OUTPUT}"' EXIT TERM INT HUP + + # -L: follow redirects + # -sS: silent mode, but show errors + # --fail: curl exits with 22 on server errors (4xx, 5xx) + # --connect-timeout 5: max time to connect + # --max-time 10: max total time for operation + # --stderr: redirect stderr to a file to capture detailed errors + # -o /dev/null: discard response body + # --write-out "%{http_code}": output the HTTP status code + if ACTUAL_HTTP_CODE=$(${pkgs.curl}/bin/curl -L -sS --fail --connect-timeout 5 --max-time 10 \ + --stderr "''${CURL_STDERR_OUTPUT}" \ + -o /dev/null --write-out "%{http_code}" "''${GRAFANA_URL}"); then + # Curl exited with 0. With --fail, this means HTTP status was 2xx. + echo "Grafana is UP (HTTP ''${ACTUAL_HTTP_CODE})." + CURRENT_STATUS="UP" + if [[ "''${LAST_KNOWN_STATUS}" == "DOWN" && "''${ALERT_SENT}" == "true" ]]; then + echo "Grafana recovered. Sending recovery notification." + ${pkgs.curl}/bin/curl -sS -X POST \ + -F "token=''${PUSHOVER_API_TOKEN}" \ + -F "user=''${PUSHOVER_USER_KEY}" \ + -F "message=Grafana at ''${GRAFANA_URL} is back online (HTTP ''${ACTUAL_HTTP_CODE})." \ + -F "title=Grafana Recovered (fw)" \ + -F "priority=0" \ + https://api.pushover.net/1/messages.json + ALERT_SENT="false" + fi + CONSECUTIVE_FAILURES=0 + else + # Curl exited with a non-zero status. + CURL_EXIT_CODE=$? + CURL_ERROR_MESSAGE=$(cat "''${CURL_STDERR_OUTPUT}" | tr -d '\n' | sed 's/"/\\"/g') # Read, remove newlines, escape quotes for JSON + + echo "Grafana check failed. Curl Exit Code: ''${CURL_EXIT_CODE}. HTTP Code reported: ''${ACTUAL_HTTP_CODE}." + echo "Curl Stderr: ''${CURL_ERROR_MESSAGE}" + CURRENT_STATUS="DOWN" + CONSECUTIVE_FAILURES=$(( ''${CONSECUTIVE_FAILURES} + 1 )) + echo "Consecutive failures: ''${CONSECUTIVE_FAILURES}" + + if [[ ''${CONSECUTIVE_FAILURES} -ge ''${MAX_FAILURES} && "''${ALERT_SENT}" == "false" ]]; then + echo "Grafana has been offline for ''${CONSECUTIVE_FAILURES} checks (>= ''${MAX_FAILURES}). Sending alert." + PUSHOVER_TITLE="Grafana OFFLINE (fw)" + PUSHOVER_MSG="Grafana ''${GRAFANA_URL} offline for ''${MAX_FAILURES}+ min. HTTP:''${ACTUAL_HTTP_CODE}, CurlExit:''${CURL_EXIT_CODE}." + if [[ -n "''${CURL_ERROR_MESSAGE}" ]]; then + PUSHOVER_MSG+=" Err: ''${CURL_ERROR_MESSAGE}" + fi + # Truncate message if too long for Pushover (1024 chars) + PUSHOVER_MSG=$(echo "''${PUSHOVER_MSG}" | cut -c 1-1024) + + ${pkgs.curl}/bin/curl -sS -X POST \ + -F "token=''${PUSHOVER_API_TOKEN}" \ + -F "user=''${PUSHOVER_USER_KEY}" \ + -F "message=''${PUSHOVER_MSG}" \ + -F "title=''${PUSHOVER_TITLE}" \ + -F "priority=1" \ + https://api.pushover.net/1/messages.json + ALERT_SENT="true" + fi + fi + # Temp file is removed by trap + + # Save current state + echo "Saving state: CONSECUTIVE_FAILURES=''${CONSECUTIVE_FAILURES}, ALERT_SENT=''${ALERT_SENT}, LAST_KNOWN_STATUS=''${CURRENT_STATUS}" + ( + echo "CONSECUTIVE_FAILURES=''${CONSECUTIVE_FAILURES}" + echo "ALERT_SENT=''${ALERT_SENT}" + echo "LAST_KNOWN_STATUS=''${CURRENT_STATUS}" + ) > "''${STATE_FILE}" # Using STATE_FILE which is ${stateDir}/status.env + chmod 600 "''${STATE_FILE}" + + echo "Grafana check finished." + ''; +in +{ + # Module is now implicitly enabled when imported + config = { + users.users.${grafanaMonitorUser} = { + isSystemUser = true; + group = grafanaMonitorGroup; + home = stateDir; # Home directory for state + createHome = true; # NixOS will create this directory + description = "User for Grafana online monitoring service"; + }; + users.groups.${grafanaMonitorGroup} = {}; + + # Sops secrets for Pushover + sops.secrets."pushover-api-token" = { + owner = grafanaMonitorUser; + group = grafanaMonitorGroup; + mode = "0400"; # Read-only for the user + }; + sops.secrets."pushover-user-key" = { + owner = grafanaMonitorUser; + group = grafanaMonitorGroup; + mode = "0400"; # Read-only for the user + }; + + environment.systemPackages = [ + pkgs.curl + pkgs.coreutils # for mkdir, cat, echo, rm used in script (though bash builtins are often used) + ]; + + systemd.services.grafana-online-check = { + description = "Grafana Online Check Service"; + wantedBy = [ "multi-user.target" ]; # Or timers.target if only started by timer + after = [ "network-online.target" ]; # Ensure network is up and secrets are available + requires = [ "network-online.target" ]; + + serviceConfig = { + Type = "oneshot"; + User = grafanaMonitorUser; + Group = grafanaMonitorGroup; + ExecStart = "${monitorScript}/bin/grafana-online-check"; + # Permissions to write to its own home directory (stateDir) are implicit + # If using StateDirectory= in systemd, it would be different. + # For home directory usage, ensure the user has rights. `createHome = true` helps. + }; + }; + + systemd.timers.grafana-online-check = { + description = "Timer to periodically check Grafana's online status"; + wantedBy = [ "timers.target" ]; + timerConfig = { + OnBootSec = "2min"; # Wait a bit after boot + OnUnitActiveSec = "1min"; # Run every 1 minute after the last run + Unit = "grafana-online-check.service"; + }; + }; + }; +} diff --git a/hosts/fw/modules/web/default.nix b/hosts/fw/modules/web/default.nix index 14b06c5..3fcfad8 100644 --- a/hosts/fw/modules/web/default.nix +++ b/hosts/fw/modules/web/default.nix @@ -52,7 +52,6 @@ in { ../network-prefix.nix ../../utils/modules/sops.nix ../../utils/modules/lego/lego.nix - ../../modules/tinder-api.nix # ../../utils/modules/borgbackup.nix ./zammad.nix diff --git a/hosts/fw/secrets.yaml b/hosts/fw/secrets.yaml index 1605aaa..a372ff3 100644 --- a/hosts/fw/secrets.yaml +++ b/hosts/fw/secrets.yaml @@ -1,18 +1,20 @@ +ai-mailer-imap-password: ENC[AES256_GCM,data:kMxDPUK9rk7mbel5JDT03m3Y2w==,iv:cbnkNIVRXd7OLqueSrfYRzfaW9TzI+FauuQD8lgYIy0=,tag:63W7seIgt5TPVFQc84semQ==,type:str] +ai-mailer-openrouter-key: ENC[AES256_GCM,data:PCe8kt/M+7g087AKzYMY2H5WO4L+NGkHLsh47fMK36kz+Ju5kd/kpmM4GQcDbI3LgWm/P+T0/mv7kGGOL6KLmBFaFmGV/88cGw==,iv:ruVftGvnv+PX1Zd92tfOezpyaMbYrqCrexelyPUYFMc=,tag:z4JVUCfz/frehar6y+fOlQ==,type:str] borg-passphrase: ENC[AES256_GCM,data:jHb+yXK0RqNdVYtWiueztZFlHC/xQ6ZiAOUcLt6BxmZQewuL3mh4AZ+lQdmA/4EaaTTIhVMR3xFx5fU6b2CtNLiGb/0=,iv:IW09B1EE1OupMCOvv13MXRYiMsD4VmIfyYONUyrPX1c=,tag:3ankeLOaDJkwRUGCd72DuA==,type:str] borg-ssh-key: ENC[AES256_GCM,data:ir25XfzLBb/H/YWzxP501hCaLBB4jpiLW7WUcnvguzosT9QeOtBdJ0WB1IndEMtiEgQyE9kyGOJ3QJwzbQNkX6CG96Uzt2mKw8gw8ayUqC+B9zR8eIRYiDKOYs+YREVo7nA5pLLzIc/9jaRicDFMmw1Thmk7UUJKB1DNV49nU9K+nAfrCzk7ZQieY8oaasFD0cvNb4Ndj6f9PWSXkNBwKK52ig4hDeNBs1bdy8nDE8VqlwOo8H2DcYMzdMjKCZDBRccy8NofHEhakCW5OdliFyIHsLkcBHca3Bp46JN7wbo8avPPd9bXGuRiOSWYq50RcyZUovnB3g7Dk3swCyuiFztnStN63+g7ZnGFdYLYDYfuDSPN1W2HCkknmaoT910VNE8sEAMyfXk4tqJv4eW4qmFk2UwPlRCrsk9GtdRQ5wm8muNPHEZ8s2dGkn4WDcjy7SUpgF4UJJZV8iJe74W9BK1Ef+AWWNsNjYfZde3iw1+8Fz1u65u4seFWqQMok/noADpszbpk+YYRoM+5D/YVMx+KeDtoFqnZfULM/BqvAqdYYZtRzojndeNW6Ea4sxDE+XQ5b1OwGFlNAlnuS1fYYPvKojrKNgT9KMwbsvPijU5vFddY8Qpz2h6GKEv/OW87j5UeyDW4l32lvyawBuzczBfiFgCElggGSZHM5rjE4Deb06eQleTioZ79EDXTv5UsPQ6Bc1v5Wvnu8DvxJe4B10vxH70JIGIlmjwo0yhMkxDTN7BkAGQC0QAPhwtURDq+XVufQNjlTUjjH1Q1E4u0Vy19clMs8SStqFeMN02BfWZdS9mbueF5Ehc+8wTfAs43CQFublJ4wfG1PzEbqj9LZdimFe4hCnE2y6Gbf591shugVSAMA3UXQUuvFQmm69i9gz88YSYrkLlVStM+dtXCugZho72xgHtnI+5o19wuoZPRoxe47W0T2kJZZeomtqoAsSo5yr5JeYzYdaHYcK2fgRY0HWgWzOxnVEfX/gRPR3b20Tko6yp9lIDECkXVDQSxptxqIYk+VuETnD9YF2OpYeHZLGoo9OLdEHVZRcuy1S74aAOJGO9SAHLw3eukxG//AZlwcOYjOsYDVt3BjhYZEkYCLg8GkAqV/7bGsxT7pgckNEB2NRYQI9ckqEcEw9CdkYre67HwfPCvAble68VnRzgp+v5s0koVjTURF9FTxvVOXQEbvSpY828idyx6nOaAIHoqpIOFz4jsGE9L4FKamqnlnjzj2Ri/MboT9JQBj8bnIF/ej+dQGpfqZo7zqtu3d0B/9e0xuVTcqI9Bxlqn3D4108I8R37Ctr5OFKloeOZ8HHMsHcBUAzZC6/fWrOspru14YHW2YNj8nBxHve/P3oiTQ/nlXLcBGLoFfI+hOpofccQB8FnkKfTbLSRUGrGY6NJt9RCnZgm2+RUgel77XpsCsT/Q5ZGclBdyk8mSaqVjiNyHCbCV5tF/tWnuvf859S0tcmqbJ0FhIRAvwxFucmfi6FSPX5HEMdRbNV7szrHKSX60u7YA2DBBzv3c/+C2bxq70vhwFelqz7FqpVKwebbE4/a59lZpibzefCoji/TPDJB62/ox5NHHE5qenv7IPcEj3dEmdasbrApAw1UFsFlRCnlg4JIYley/AQx7OzUSImqkG8JWvSJ4JXijhsr9dPFR/cb0srUO88aFNh/ZUQhELZCVnzAsF81Y4w6LTGApMfUVN/yx9MqENGvObywzMls1UJphvzDZzvb+Ue6eqELogN1QcEI/WOirwVtJO6E7IevEtK4xxWsLfRHVjtbLc4QjCWuiyszAPTTttKJ+iC2h14Wj1XoiMpWRiVnj+jI9iWRen96P4glYEfuCYQS6vbGkNDEoZt/FnkLJDbLdjXatmhUoRpvExOtp26ULR/f1lwzLMJBt1qPvhuGur1ru2B1e8+AVte1Cfjmk+xrnxNwkTFLGe89Qjd77wPyQv9h0YrhZ6uDi2zLemhZs2LjW5ZvzV5P4thMDxkhezJHatPHAGa8OfclJOyrRTyW2azdz2A45MNzZtCQcnQdQxBXf+XRskLnhquZfgv66hFITjuF/HeI9cq4HJcrgaOcVj+tBdK1bTCyL2kqKkCpSCbh/Pv6FuAlDXgLjsWwZgOKz8gfTIfXMapPLDYVTbS/PPPABylZflN98FFyeFDHB3Fwn1a6qAJ0mC7+4sowVZ1DIAoflaHqNs5TXyb3KeZGgXj5ZQwhv1z6NySvOS6cHxx0PvkFo99T1NHztxCRERNvBdWSwsr32DTwEvZo5iNPy3lvKI5A+rXc7jlQkUbufbddtLw2iPtt29XyMDOysK010fXzzQRjaz4R8ZaDtHNjqPrynvqFPXRB0VSIrwXS2utU7bmD+0dGX26t9k5qRBi7Gm+iZNKGMnSRsm17bVk5o8q0tb1P1eGL9mexZJJvxolfXVFJJtR8m6vLmUX1LSht/JhoWFElrINl0hviwd1dehmTqdQqWz5/imjF+pVOasrt7XVZ+7T/rDpuwNl375qSZptM1pMUExJ3CvzigpnarXXQxEBYkf0haGvQwPWNVHe/bR/1VooSQkH/mGg1g+rcTqp4yB5hsFu1lNK4ph04WQOqaafg40HBv6e5cOjLkFdEtYNpjyd6sRS+WHk7zzFlfPVlzijq8f+oDH9ALRzNnL1Y2DrX53wx4dBBWvxE1Yhb6Kj6Er4ZDiRLLXo+wJOGCpnNTPJMVaYskZ+LN2e9nS2/ZwbsNBnPHxSqCc1oP4d3yXH0j90VKnWg79aIEOagRvTF/9F6SkkGL9zVuUnoVSPwq97etWWtjGoEORMGY7jkGOK+U391p7Z69Hrv2AejS1BoSDeGcxXasFvINpmc+Hl2c+zOlFBySu2zA39cVlcStUFICA5GCmE5Eum4ED9DXP6RAuicD7YE0qSKbMkfLxIWMCZ6wBcwVUjdt43SI/ZqdpDm3E1kTRg07dE0R091rtfzEiIwBM4xFPJBafOx0L/Do61YMOHGzi6wgIQO7P7wIslv62M8MD1KKa/eH0tE2vhG/GyEGtKkg3P9vZRJwioifyshS1hvrt5pLinuCaDYyqMAl8Ro0OOm8di7+mBvXib0nRLfW7wBGDA4ADTipizNWAmbspQQl89kH5gdxgXO5U+N/qc0zXbpB+qeHVkPIK1DmrJ8pHLOE8mOpLy7eHUsSku/WtTt/RP4pcDbBU/43MCbk7NXKu/LjKjkQBjAL49LxnYmhEU7X//jtwSPE3gdx0x+wRJxzlbehM6rpfDRV5WQGSFf7yjLc/Ga1KwsgVdAstJEzDdv2vWSsjNzfJvHVBLrQPIC9fggi3DeLiHTAryCUcLUhNj4xtZWhSS1qmx07E4VzfjDJLMOsLY0vlimgngZ3YYCjC3Sw0frfQH2SZvmbLd3XfBdud67ZaMUobcRhnKzQnilldyD1jWVWLdVTup4RVxT4GYek9nmYflzpWWmwbXatz9Sgcw==,iv:9E1uiPqM3Hh4KWtL8haxm6PRm2VPc+DggrA135FvfB8=,tag:QSOgzVH9IBMgZxJvUhvY2w==,type:str] ddclient: ENC[AES256_GCM,data:EaXjXS/bwL3S/Fr+rzQ7dXA1eIzeFpHH7H+SvoNhVSg=,iv:3BzjnJG5yT1W8ob2nm0oUlr+sSJ73W/ctl48xyxeeWM=,tag:TqKSwfxF0V1v5T8VT/qblw==,type:str] +gitea-mailer-password: ENC[AES256_GCM,data:M4qCWNt1oQVJzxThIjocm2frwuVMyx+69TBpke25RwxJxEQnvHL1CM579OVroTm7+gGE/oOJqAwDIepfiDtyM1xm,iv:jayFZMbu3uDimS/rIKZSeoU0MsYwWp880iEMs1oQE4k=,tag:qGDncRkyuCWaELhcxUrqtQ==,type:str] +gitea-runner: ENC[AES256_GCM,data:NYG3qRLiMjmfA+oHYBXBbxpuX2ZjB/VgvLaS7yr5kJeDN/NukB/B3OZcEfsUWgbBS5IsLENESngWTFmK4W3htN4lSqdg/g4UsUr20beNov+pbyPN05rkBYmSCZZFwZ1L9POEE4GF4LuuoNpDlWIw0mrA8oV8MoI4W5QS2IGranBTIQQaYXU5TEGYa4XMVo4oC75iuH6DIq1KD6OgFAfMhm/wlbP8CP/Iaw2K8CNPxktk93pm3OSmggf22Z4JPEnvV25sc9iBkxLkDk9FXYFys0g=,iv:UzL5ncVOC/loJwcFSG1QJHnzLp3il4Hf3qDwLWxrIlo=,tag:w0Zn/E+02KyAsPXZdOLrew==,type:str] +gitea-runner-token: ENC[AES256_GCM,data:HpBjLS10w78ihbnAUrlCRGvwrXLBYKH5v/P7XggoUSWLoAazSVQArABxaK7PJas=,iv:q3Y6jV0gmug06O0EYqGVyIJ4AvMGr2ydwY17YKxo0Qw=,tag:Ws5HLbdaeYGGXzDZW/FX4w==,type:str] +home-assistant-ldap: ENC[AES256_GCM,data:uZEPbSnkgQYSd8ev6FD8TRHWWr+vusadtMcvP7KKL2AZAV0h1hga5fODN6I5u0DNL9hq2pNM+FwU0E/svWLRww==,iv:IhmUgSu34NaAY+kUZehx40uymydUYYAyte1aGqQ33/8=,tag:BKFCJPr7Vz4EG78ry/ZD7g==,type:str] +home-assistant-secrets.yaml: ENC[AES256_GCM,data:m7uOVo7hPk/RmqqRS6y7NKoMKsR9Bdi1ntatsZdDOAbJMjZmZL2FgPEHi/zF73zCfRfTOca3dwpulR3WXZ9Ic1sbUIggmusJMg4Gellw1CUhx7SbQN5nieAbPbB9GVxMuV4OakD1u7Swz8JggDT6IwojSnuD5omCRCyUH1wvKB+Re59q6EStderlm5MJNVFlVrbKVbLKLcw4yRgTh34BGnTTjcJmgSlQjO1ciu2B7YQmdl0Fw6d8AdbEzgB5TFG5ONc85UhJDE8Wlw==,iv:GCtpcVChN2UMWtfnWURozCfVj2YbRPqp/bH4Jjntybs=,tag:pcxP7gTBtXMNT5iyW5YXTw==,type:str] +pushover-api-token: ENC[AES256_GCM,data:W2ILPksaNeDvbSlSJztu1vu23kQKLDRHYKoUIvyd,iv:RYFAN6AU+DALphpqpiifhOoEQ8++6DEgo2wETSwxBCg=,tag:pRfaNuz4564LvRuaLggatg==,type:str] +pushover-user-key: ENC[AES256_GCM,data:mh3u3FAdFkGD1d4UKcTwLOsCB2vfhEADI5cd1aT4,iv:4bkR7ZNJwWAYBdu435SPZUovGsfb8qivuDOQdGkPd/U=,tag:5UO4vGt75CCFEM5jxTGkGg==,type:str] wrwks_vpn_key: ENC[AES256_GCM,data:gGipXC8JJO59b4KWMSo0+r761raQl7RzgBuUbXmPEKlZR21bs5XRAQalzDCFNtjcpNkXiGqAHCLkDTtjPagMsw==,iv:MH1EBJEOdQDEgm9E0F884fynhsH8KiS5QSc605XbASQ=,tag:FUM1eptHS0rpt6ILyQjGOg==,type:str] wg_cloonar_key: ENC[AES256_GCM,data:Dtp6I5J0jU5LLVwEFU4DFCpUngPRmFMebGXnk2oSwsKtsir/DtRBFG7ictM=,iv:1Abx/EAZRJrRQURljofzUYDgJpuREriX0nSrFbH5Npw=,tag:l4uFl9Uc+W0XeLVfLGmgZA==,type:str] wg_epicenter_works_key: ENC[AES256_GCM,data:LeLjfwfaz+loWyHYRgIMIPzHzlOnhl9tluKcQFgdes6r+deft1JfnUzDuF0=,iv:DKrc3I+U2hWDH8nnc8ZQeaVtA1eVXu7SXdTn1fxHoH4=,tag:V0PL0GrL2NEPVslAZa801A==,type:str] wg_epicenter_works_psk: ENC[AES256_GCM,data:Den3NDWdP013Or6/2Vll1igUahuRSNW4hu+nDa5vkr93bbveQTaWFT4TD4U=,iv:r3UsD3+3lUIP2X3Grti7wpXTQBXtu1/MdrycEmpZfsI=,tag:ghbAcxmjGVOe9jCZsmFzjA==,type:str] wg_ghetto_at_key: ENC[AES256_GCM,data:OIHmoy3SpIi9aefZnZ1PzpyHbEso18ceoTULf2eQkx1rJbaxC6PD1lma7eQ=,iv:u0eFjHHOBzPTmBvBEQsYY5flcBayiAQKd6e7RyiPwJI=,tag:731C9wvv8bA5fuuQq+weVQ==,type:str] -gitea-mailer-password: ENC[AES256_GCM,data:M4qCWNt1oQVJzxThIjocm2frwuVMyx+69TBpke25RwxJxEQnvHL1CM579OVroTm7+gGE/oOJqAwDIepfiDtyM1xm,iv:jayFZMbu3uDimS/rIKZSeoU0MsYwWp880iEMs1oQE4k=,tag:qGDncRkyuCWaELhcxUrqtQ==,type:str] -ai-mailer-imap-password: ENC[AES256_GCM,data:kMxDPUK9rk7mbel5JDT03m3Y2w==,iv:cbnkNIVRXd7OLqueSrfYRzfaW9TzI+FauuQD8lgYIy0=,tag:63W7seIgt5TPVFQc84semQ==,type:str] -ai-mailer-openrouter-key: ENC[AES256_GCM,data:PCe8kt/M+7g087AKzYMY2H5WO4L+NGkHLsh47fMK36kz+Ju5kd/kpmM4GQcDbI3LgWm/P+T0/mv7kGGOL6KLmBFaFmGV/88cGw==,iv:ruVftGvnv+PX1Zd92tfOezpyaMbYrqCrexelyPUYFMc=,tag:z4JVUCfz/frehar6y+fOlQ==,type:str] -gitea-runner: ENC[AES256_GCM,data:NYG3qRLiMjmfA+oHYBXBbxpuX2ZjB/VgvLaS7yr5kJeDN/NukB/B3OZcEfsUWgbBS5IsLENESngWTFmK4W3htN4lSqdg/g4UsUr20beNov+pbyPN05rkBYmSCZZFwZ1L9POEE4GF4LuuoNpDlWIw0mrA8oV8MoI4W5QS2IGranBTIQQaYXU5TEGYa4XMVo4oC75iuH6DIq1KD6OgFAfMhm/wlbP8CP/Iaw2K8CNPxktk93pm3OSmggf22Z4JPEnvV25sc9iBkxLkDk9FXYFys0g=,iv:UzL5ncVOC/loJwcFSG1QJHnzLp3il4Hf3qDwLWxrIlo=,tag:w0Zn/E+02KyAsPXZdOLrew==,type:str] -gitea-runner-token: ENC[AES256_GCM,data:HpBjLS10w78ihbnAUrlCRGvwrXLBYKH5v/P7XggoUSWLoAazSVQArABxaK7PJas=,iv:q3Y6jV0gmug06O0EYqGVyIJ4AvMGr2ydwY17YKxo0Qw=,tag:Ws5HLbdaeYGGXzDZW/FX4w==,type:str] -home-assistant-ldap: ENC[AES256_GCM,data:uZEPbSnkgQYSd8ev6FD8TRHWWr+vusadtMcvP7KKL2AZAV0h1hga5fODN6I5u0DNL9hq2pNM+FwU0E/svWLRww==,iv:IhmUgSu34NaAY+kUZehx40uymydUYYAyte1aGqQ33/8=,tag:BKFCJPr7Vz4EG78ry/ZD7g==,type:str] -home-assistant-secrets.yaml: ENC[AES256_GCM,data:m7uOVo7hPk/RmqqRS6y7NKoMKsR9Bdi1ntatsZdDOAbJMjZmZL2FgPEHi/zF73zCfRfTOca3dwpulR3WXZ9Ic1sbUIggmusJMg4Gellw1CUhx7SbQN5nieAbPbB9GVxMuV4OakD1u7Swz8JggDT6IwojSnuD5omCRCyUH1wvKB+Re59q6EStderlm5MJNVFlVrbKVbLKLcw4yRgTh34BGnTTjcJmgSlQjO1ciu2B7YQmdl0Fw6d8AdbEzgB5TFG5ONc85UhJDE8Wlw==,iv:GCtpcVChN2UMWtfnWURozCfVj2YbRPqp/bH4Jjntybs=,tag:pcxP7gTBtXMNT5iyW5YXTw==,type:str] matrix-shared-secret: ENC[AES256_GCM,data:67imd3m6WBeGP/5Msmjy8B6sP983jMyWzRIzWgNVV5jZslX+GBJyEYzm3OTDs1iTZf4ScvuYheTH0QFPfw==,iv:7ElCpESWumbIHmmFaedcpkFm5M58ZT3vW9wb9e1Sbh4=,tag:wr4FIymtJBtCerVqae+Xlw==,type:str] palworld: ENC[AES256_GCM,data:rdqChPt4gSJHS1D60+HJ+4m5mg35JbC+pOmevK21Y95QyAIeyBLVGhRYlOaUcqdZM2e4atyTTSf6z4nHsm539ddCbW7J2DCdF5PQkrAGDmmdTVq+jyJAT8gTrbXXCglT1wvFYY5dbf2NKA4ASJIA8bdVNuwRZU0CtFiishzLuc9m8ZcGCNwQ/+xkMZgkUAHYRlEJAZyMpXR6KkFftiR05JRAFczD4N7GXPPe+vyvgXg7QBGtf20Qd4SGBUw0zI/SNTRmifHUuc4Z6+Fe9JHgvTc3uFcTMVnty0fEuL+a29liaVdAFq8BnqJfc5CNV401ZSUeMbG41lCn1cegP/WChs9J6HXNrhWDgiXa6ln++NoKcfOHIfZVbYOCoOxFR6+YWeBU2+sHmdwI9j5XQf5Ly2hmg12j0Ds2Cn8k4PG5aQP+HT2bedqyxwSt6fi97A0Osnh4ig7+DzYAjSNLewbYLzVdK39VdvB9hqLto+yFS3gAaeYOHwPwtqa+COI85c55lHiyKHlSwPhBqYaaiDu00lQTUzq9R5vz6F/l+T3bUjuna5RryUu8yhnk5DyK834KycTOg4ETcZTqro6prfiEBxc+Utsc9JvEtZgwFv6fsVLOu7nHxuiYuvseZ4YA8LlYdwPJboMPO2XsuhwWtT1uz/rh2orH7/vsXvzA/kF8NFemWBEMVLYA8byC5ze8doiGDYp4T5AAf10nJB1ceQ==,iv:gs78fxhvo9KlTaR5nzs12/LdgPChSFPHD2k4VQp3ARo=,tag:lpWBOi9xh2cWkS+71KD/UQ==,type:str] ark: ENC[AES256_GCM,data:YYGyzoVIKI9Ac1zGOr0BEpd3fgBsvp1hSwAvfO07/EQdg8ufMWUkNvqNHDKN62ZK5A1NnY3JTA1p4gyZ4ryQeAOsbwqU1GSk2YKHFyPeEnpLz/Ml82KMsv7XPGXuKRXZ4v3UcLu0R8k1Q0gQsMWo4FjCs3FF5mVtJG/YWxxbCYHoBLJ/di5p0DgjuFgJBQknYBpuLzr+yIoeqEyN7XcGYAJO53trEJuOOxLILULifkqISHjZ66i5F1fHW0iUdRbmeWV4aOAeOrsQqXYv,iv:gJwV5ip84zHqpU0l0uESfWWOtcgihMvEEdLaeI+twcU=,tag:sy8udVQsKxV/jOqwhJmWAg==,type:str] @@ -21,10 +23,6 @@ knot-tsig-key: ENC[AES256_GCM,data:H2jEkRSVSIJl1dSolAXj9uUmzD6eEh9zPpoajZLxfuuFt mopidy-spotify: ENC[AES256_GCM,data:O3s6UvTP8z5KZPCq10GaaEQntWAEoxGFMnTkeUz9AfobrpsGZJcQgyazFX2u4DgAaIjNb34032MISotmuVQDJ14mi8xI5vC9w/Vf16v3TFu/dSKGZNb5ZPQwTUQ+iMJf7chgwOV9guThhutVJokb6pLxzt7fSht7,iv:j8+X1AmuWzIJdafzgrE7WBIlZ7coNNi0/Zn6JObR6rw=,tag:fiw6M2/6nfEPqEgV2YOWLg==,type:str] lms-spotify: ENC[AES256_GCM,data:gh5kx/MDSefNLbZsnovRc3rNWxp/RTrJ4A2WIs1QMi4JVGFj9SppdsErMXW4y/IFj/YxH1X7JtwvhptO/p3P2CFK0XL2I1vFVqPuj7LavDHJK7GXPAV6+x17ldvPXgym5NqHjzHi4gtj7U/bMJlz0NxrFsrrjMcY9nmNX2vVwKlINUFqWb1JRvQsJ8ujSutjJbGtAY/bVQI8OFtU29QGKw1CU3RH/bgXIzxGiLQsUd68w7N17oKYj8MiTpGVcovMCRKwwUbd9w==,iv:4aVy+r//s1Cs9q4GasR3vSAb8b/VB/8Mx5E1jWAUA+E=,tag:TgTSLLH1OG9ySi2tZ+hK1Q==,type:str] sops: - kms: [] - gcp_kms: [] - azure_kv: [] - hc_vault: [] age: - recipient: age14grjcxaq4h55yfnjxvnqhtswxhj9sfdcvyas4lwvpa8py27pjy2sv3g6v7 enc: | @@ -62,8 +60,7 @@ sops: WXJpUUxadERyYUExRFMzNzBXaUVET3cKG9ZwWy5YvTr/BAw/i+ZJos5trwRvaW5j eV/SHiEteZZtCuCVFAp3iolE/mJyu97nA2yFwWaLN86h+/xkOJsdqA== -----END AGE ENCRYPTED FILE----- - lastmodified: "2025-05-29T18:23:13Z" - mac: ENC[AES256_GCM,data:19U1KlPoC/hj8sGRjO3j/ONYcFvmUTul6qP6CaRE0BhJfpeaVYq5OvqdErVnw8UA/zBJ+zpSX/N13jcsx8QVqTljMha2fbx7iZxMbpVgzGZ+fhwICLri6PwT/sNLXKFrv8VZqNUYR5q+PWSlKCu8QQarDPvGR6qj4gm7VN7tVsI=,iv:udieJwN63LEeCRhZrLpMN6VCHBzAYt8BeJhbbLVxwCM=,tag:M6iYQb/b7vMoM+9e5is3hw==,type:str] - pgp: [] + lastmodified: "2025-05-31T08:08:02Z" + mac: ENC[AES256_GCM,data:p6FHDa6Xfd66pH4zB8s6nhGGk2Ha2YTC/wUsCrqu+9M01VQ7qv9tha1MpKMj9TUxSPSxPOI++5zkNi5LJbs4Y4q0KH4yd9w/guMmJB2+d2YUwNCTofvmQp3wS1KtaRbaai6mAXZELaVEsRkmwUdkdApNbSZkTZgDc+CMH7OmHbs=,iv:w/kv2wRO6N4k1U7y8efS7LXhrpMxkZ9kTs3lFo23MA8=,tag:F4rZGG00AQZLfGU3djgW8Q==,type:str] unencrypted_suffix: _unencrypted - version: 3.9.4 + version: 3.10.2 diff --git a/hosts/mail/channel b/hosts/mail/channel index ced117e..93f5df5 100644 --- a/hosts/mail/channel +++ b/hosts/mail/channel @@ -1 +1 @@ -https://channels.nixos.org/nixos-24.11 +https://channels.nixos.org/nixos-25.05 diff --git a/hosts/mail/modules/dovecot.nix b/hosts/mail/modules/dovecot.nix index a9ef486..cf8e8c1 100644 --- a/hosts/mail/modules/dovecot.nix +++ b/hosts/mail/modules/dovecot.nix @@ -88,6 +88,7 @@ in { environment.systemPackages = with pkgs; [ doveSync + dovecot_pigeonhole ]; services.dovecot2 = { @@ -215,9 +216,6 @@ in # Read multiple mails in parallel, improves performance mail_prefetch_count = 20 ''; - modules = [ - pkgs.dovecot_pigeonhole - ]; protocols = [ "sieve" ]; diff --git a/hosts/nb/configuration.nix b/hosts/nb/configuration.nix index 0e2bb86..c708175 100644 --- a/hosts/nb/configuration.nix +++ b/hosts/nb/configuration.nix @@ -146,6 +146,7 @@ in { "/var/lib/bluetooth" "/var/lib/docker" "/var/lib/flatpak" + "/var/lib/fprint" "/var/lib/nixos" "/var/lib/mysql" "/etc/NetworkManager/system-connections" diff --git a/hosts/nb/modules/fingerprint.nix b/hosts/nb/modules/fingerprint.nix index d26a50d..5ff0be2 100644 --- a/hosts/nb/modules/fingerprint.nix +++ b/hosts/nb/modules/fingerprint.nix @@ -5,7 +5,6 @@ security.pam.services.login.fprintAuth = true; security.pam.services.sudo.fprintAuth = true; - security.pam.services.sddm.fprintAuth = true; # If you use swaylock and want fingerprint auth for it: security.pam.services.swaylock.fprintAuth = true; # Add Polkit rule to allow locally active users to manage their own fingerprints diff --git a/hosts/web-arm/channel b/hosts/web-arm/channel index ced117e..93f5df5 100644 --- a/hosts/web-arm/channel +++ b/hosts/web-arm/channel @@ -1 +1 @@ -https://channels.nixos.org/nixos-24.11 +https://channels.nixos.org/nixos-25.05 diff --git a/hosts/web-arm/configuration.nix b/hosts/web-arm/configuration.nix index ac3d270..54c74d9 100644 --- a/hosts/web-arm/configuration.nix +++ b/hosts/web-arm/configuration.nix @@ -14,7 +14,7 @@ ./modules/nextcloud ./modules/rustdesk.nix ./modules/postgresql.nix - ./modules/grafana.nix + ./modules/grafana/default.nix ./modules/loki.nix ./modules/victoriametrics.nix ./modules/updns.nix diff --git a/hosts/web-arm/modules/grafana.nix b/hosts/web-arm/modules/grafana.nix deleted file mode 100644 index f8ef660..0000000 --- a/hosts/web-arm/modules/grafana.nix +++ /dev/null @@ -1,146 +0,0 @@ -{ lib, pkgs, config, ...}: -let - ldap = pkgs.writeTextFile { - name = "ldap.toml"; - text = '' - [[servers]] - host = "ldap.cloonar.com" - port = 636 - use_ssl = true - bind_dn = "cn=grafana,ou=system,ou=users,dc=cloonar,dc=com" - bind_password = "$__file{/run/secrets/grafana-ldap-password}" - search_filter = "(&(objectClass=cloonarUser)(mail=%s))" - search_base_dns = ["ou=users,dc=cloonar,dc=com"] - - [servers.attributes] - name = "givenName" - surname = "sn" - username = "mail" - email = "mail" - member_of = "memberOf" - - [[servers.group_mappings]] - group_dn = "cn=Administrators,ou=groups,dc=cloonar,dc=com" - org_role = "Admin" - grafana_admin = true # Available in Grafana v5.3 and above - ''; - }; -in -{ - systemd.services.grafana.script = lib.mkBefore '' - export GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET=$(cat /run/secrets/grafana-oauth-secret) - export PUSHOVER_API_TOKEN=$(cat /run/secrets/pushover-api-token) - export PUSHOVER_USER_KEY=$(cat /run/secrets/pushover-user-key) - ''; - services.grafana = { - enable = true; - settings = { - analytics.reporting_enabled = false; - "auth.ldap".enabled = true; - "auth.ldap".config_file = toString ldap; - - "auth.generic_oauth" = { - enabled = true; - name = "Authelia"; - icon = "signin"; - client_id = "grafana"; - scopes = "openid profile email groups"; - empty_scopes = false; - auth_url = "https://auth.cloonar.com/api/oidc/authorization"; - token_url = "https://auth.cloonar.com/api/oidc/token"; - api_url = "https://auth.cloonar.com/api/oidc/userinfo"; - login_attribute_path = "preferred_username"; - groups_attribute_path = "groups"; - role_attribute_path = "contains(groups, 'Administrators') && 'Admin' || contains(groups, 'editor') && 'Editor' || 'Viewer'"; - allow_assign_grafana_admin = true; - name_attribute_path = "name"; - use_pkce = true; - }; - - "auth.anonymous".enabled = true; - "auth.anonymous".org_name = "Cloonar e.U."; - "auth.anonymous".org_role = "Viewer"; - - server = { - root_url = "https://grafana.cloonar.com"; - domain = "grafana.cloonar.com"; - enforce_domain = true; - enable_gzip = true; - http_addr = "0.0.0.0"; - http_port = 3001; - }; - - smtp = { - enabled = true; - host = "mail.cloonar.com:587"; - user = "grafana@cloonar.com"; - password = "$__file{${config.sops.secrets.grafana-ldap-password.path}}"; - fromAddress = "grafana@cloonar.com"; - }; - - database = { - type = "postgres"; - name = "grafana"; - host = "/run/postgresql"; - user = "grafana"; - }; - - security.admin_password = "$__file{${config.sops.secrets.grafana-admin-password.path}}"; - }; - provision = { - alerting = { - contactPoints.settings = { - apiVersion = 1; - - contactPoints = [{ - orgId = 1; - name = "cp_dominik"; - receivers = [{ - uid = "dominik"; - type = "pushover"; - settings = { - security.apiToken = "$__file{${config.sops.secrets.pushover-api-token.path}}"; - security.userKey = "$__file{${config.sops.secrets.pushover-user-key.path}}"; - apiToken = "\${PUSHOVER_API_TOKEN}"; - userKey = "\${PUSHOVER_USER_KEY}"; - device = "iphone"; - priority = "2"; - retry = "30"; - expire = "120"; - sound = "siren"; - okSound = "magic"; - message = '' - {{ template "default.message" . }} - ''; - }; - }]; - }]; - }; - }; - }; - }; - - services.nginx.virtualHosts."grafana.cloonar.com" = { - forceSSL = true; - enableACME = true; - acmeRoot = null; - locations."/".extraConfig = "proxy_pass http://localhost:3001;"; - }; - - services.postgresql.ensureUsers = [ - { - name = "grafana"; - ensureDBOwnership = true; - } - ]; - services.postgresql.ensureDatabases = [ "grafana" ]; - services.postgresqlBackup.databases = [ "grafana" ]; - - sops.secrets = { - grafana-admin-password.owner = "grafana"; - grafana-ldap-password.owner = "grafana"; - grafana-oauth-secret.owner = "grafana"; - pushover-api-token.owner = "grafana"; - pushover-user-key.owner = "grafana"; - }; -} diff --git a/hosts/web-arm/modules/grafana/alerting/cpu_usage.nix b/hosts/web-arm/modules/grafana/alerting/cpu_usage.nix new file mode 100644 index 0000000..9c09881 --- /dev/null +++ b/hosts/web-arm/modules/grafana/alerting/cpu_usage.nix @@ -0,0 +1,58 @@ +{ lib, pkgs, config, ... }: +{ + grafanaAlertRuleDefinitions = [ + { + uid = "high-cpu-usage-alert-uid"; + title = "HighCPUUsage"; + condition = "D"; # Condition is now D + + data = [ + # Query A: Calculate CPU usage percentage + { + refId = "A"; + datasourceUid = "vm-datasource-uid"; + queryType = "prometheus"; + relativeTimeRange = { from = 60; to = 0; }; # Query over the last minute + model = { + # Calculate average CPU usage over 1m, grouped by instance and job + expr = ''(1 - avg by (instance, job) (rate(node_cpu_seconds_total{mode="idle"}[1m]))) * 100''; + legendFormat = "CPU usage on {{instance}} ({{job}})"; + instant = false; # This is a range query + }; + } + # Expression C: Reduce Query A to its last value, preserving labels + { + refId = "C"; + datasourceUid = "__expr__"; + model = { + type = "reduce"; + expression = "A"; # Input is Query A + reducer = "last"; # Get the last value of each series in A + }; + } + # Expression D: Apply math condition to the reduced values from C + { + refId = "D"; + datasourceUid = "__expr__"; + model = { + type = "math"; + expression = "$C > 90"; # Alert if CPU usage from C is > 90% + }; + } + ]; + + for = "5m"; # Duration the condition must be met + noDataState = "NoData"; + execErrState = "Error"; + + annotations = { + summary = "High CPU usage on {{ $labels.instance }}"; + description = ''CPU usage on {{ $labels.instance }} (job: {{ $labels.job }}) has been above 90% for more than 5 minutes. Current value: {{ if $values.C }}{{ $values.C | humanizePercentage }}{{ else }}N/A{{ end }}%.''; + }; + labels = { + severity = "warning"; + category = "performance"; + }; + } + ]; +} diff --git a/hosts/web-arm/modules/grafana/alerting/disk_usage.nix b/hosts/web-arm/modules/grafana/alerting/disk_usage.nix new file mode 100644 index 0000000..020947b --- /dev/null +++ b/hosts/web-arm/modules/grafana/alerting/disk_usage.nix @@ -0,0 +1,76 @@ +{ lib, pkgs, config, ... }: +{ + grafanaAlertRuleDefinitions = [ + { + uid = "high-disk-usage-alert-uid"; # Optional: provide a stable UID for the rule itself + title = "HighDiskUsage"; # Name of the alert rule (was 'alert' in vmalert) + + # Condition for the alert to fire. 'D' refers to the refId of the threshold expression. + condition = "D"; # Condition is now D + # Removed rule-level relativeTimeRange + + # Data queries and expressions + data = [ + # Query A: Calculate disk usage percentage + { + refId = "A"; + datasourceUid = "vm-datasource-uid"; # UID of the VictoriaMetrics datasource + queryType = "prometheus"; # Explicitly set, though often inferred + relativeTimeRange = { from = 60; to = 0; }; # Query-level, integer seconds + model = { + expr = '' + ( + node_filesystem_size_bytes{fstype!~"tmpfs|rootfs",mountpoint!=""} - node_filesystem_avail_bytes{fstype!~"tmpfs|rootfs",mountpoint!=""} + ) / (node_filesystem_size_bytes{fstype!~"tmpfs|rootfs",mountpoint!=""} > 0) * 100 + and node_filesystem_size_bytes{fstype!~"tmpfs|rootfs",mountpoint!=""} + and node_filesystem_avail_bytes{fstype!~"tmpfs|rootfs",mountpoint!=""} + ''; + legendFormat = "{{mountpoint}} on {{instance}}"; # Example legend + instant = false; # For range queries, default is false + }; + } + # Expression C: Reduce Query A to its last value, preserving labels + { + refId = "C"; + datasourceUid = "__expr__"; + model = { + type = "reduce"; + expression = "A"; # Input is Query A + reducer = "last"; # Get the last value of each series in A + }; + } + # Expression D: Apply math condition to the reduced values from C + { + refId = "D"; + datasourceUid = "__expr__"; + model = { + type = "math"; + expression = "$C > 85"; # Check if the last value from each series in C is > 85 + }; + } + ]; + + for = "15m"; # Duration the condition must be met (same as vmalert) + + # How to handle states where data is missing or query errors + noDataState = "NoData"; # Options: NoData, Alerting, OK + execErrState = "Error"; # Options: Error, Alerting, OK + + annotations = { + summary = "High disk usage on {{ $labels.instance }} at {{ $labels.mountpoint }}"; + description = '' + Disk usage on {{ $labels.instance }} for mount point {{ $labels.mountpoint }} + (fstype: {{ $labels.fstype }}) has been above 85% for more than 15 minutes. + Current value: {{ if $values.C }}{{ $values.C | humanizePercentage }}{{ else }}N/A{{ end }}%. + ''; # Using $values.C as it's the input to the math condition D + }; + labels = { + severity = "warning"; + category = "capacity"; + # Grafana automatically adds labels from the query result (instance, mountpoint, etc.) + # and labels from the rule group/folder. + }; + # isPaused = false; # Default is not paused + } + ]; +} diff --git a/hosts/web-arm/modules/grafana/alerting/host_down.nix b/hosts/web-arm/modules/grafana/alerting/host_down.nix new file mode 100644 index 0000000..1910b23 --- /dev/null +++ b/hosts/web-arm/modules/grafana/alerting/host_down.nix @@ -0,0 +1,54 @@ +{ lib, pkgs, config, ... }: +{ + grafanaAlertRuleDefinitions = [ + { + uid = "host-down-alert-uid"; + title = "HostDown"; + condition = "C"; + + data = [ + { + refId = "A"; + datasourceUid = "vm-datasource-uid"; + queryType = "prometheus"; + relativeTimeRange = { from = 60; to = 0; }; # Query over the last minute + model = { + expr = ''up''; + legendFormat = "{{instance}} ({{job}})"; + instant = false; # Changed from true, as relativeTimeRange is used + }; + } + { # New Expression B: Reduce Query A + refId = "B"; + datasourceUid = "__expr__"; + model = { + type = "reduce"; + expression = "A"; # Input is Query A + reducer = "last"; # Get the last value of each series in A + }; + } + { # Modified Expression C: Math condition based on B + refId = "C"; + datasourceUid = "__expr__"; + model = { + type = "math"; + expression = "$B == 0"; # Check if the last value from B is 0 + }; + } + ]; + + for = "2m"; + noDataState = "Alerting"; + execErrState = "Error"; + + annotations = { + summary = "Host {{ $labels.instance }} is down"; + description = ''Host {{ $labels.instance }} (job: {{ $labels.job }}) has been down for more than 2 minutes.''; + }; + labels = { + severity = "critical"; + category = "availability"; + }; + } + ]; +} diff --git a/hosts/web-arm/modules/grafana/alerting/inode_usage.nix b/hosts/web-arm/modules/grafana/alerting/inode_usage.nix new file mode 100644 index 0000000..ba73f30 --- /dev/null +++ b/hosts/web-arm/modules/grafana/alerting/inode_usage.nix @@ -0,0 +1,63 @@ +{ lib, pkgs, config, ... }: +{ + grafanaAlertRuleDefinitions = [ + { + uid = "high-inode-usage-alert-uid"; + title = "HighInodeUsage"; + condition = "D"; # Condition is now D + + data = [ + # Query A: Calculate inode usage percentage + { + refId = "A"; + datasourceUid = "vm-datasource-uid"; + queryType = "prometheus"; + relativeTimeRange = { from = 60; to = 0; }; + model = { + expr = '' + ( + node_filesystem_files{fstype!~"tmpfs|rootfs",mountpoint!=""} - node_filesystem_files_free{fstype!~"tmpfs|rootfs",mountpoint!=""} + ) / (node_filesystem_files{fstype!~"tmpfs|rootfs",mountpoint!=""} > 0) * 100 + and node_filesystem_files{fstype!~"tmpfs|rootfs",mountpoint!=""} + and node_filesystem_files_free{fstype!~"tmpfs|rootfs",mountpoint!=""} + ''; + legendFormat = "{{mountpoint}} on {{instance}}"; + instant = false; + }; + } + # Expression C: Reduce Query A to its last value, preserving labels + { + refId = "C"; + datasourceUid = "__expr__"; + model = { + type = "reduce"; + expression = "A"; # Input is Query A + reducer = "last"; # Get the last value of each series in A + }; + } + # Expression D: Apply math condition to the reduced values from C + { + refId = "D"; + datasourceUid = "__expr__"; + model = { + type = "math"; + expression = "$C > 80"; # Alert if inode usage from C is > 80% + }; + } + ]; + + for = "30m"; # Duration the condition must be met + noDataState = "NoData"; + execErrState = "Error"; + + annotations = { + summary = "High inode usage on {{ $labels.instance }} at {{ $labels.mountpoint }}"; + description = ''Inode usage on {{ $labels.instance }} for mount point {{ $labels.mountpoint }} (fstype: {{ $labels.fstype }}) has been above 80% for more than 30 minutes. Current value: {{ if $values.C }}{{ $values.C | humanizePercentage }}{{ else }}N/A{{ end }}%.''; + }; + labels = { + severity = "warning"; + category = "capacity"; + }; + } + ]; +} diff --git a/hosts/web-arm/modules/grafana/alerting/ram_usage.nix b/hosts/web-arm/modules/grafana/alerting/ram_usage.nix new file mode 100644 index 0000000..14a2ea8 --- /dev/null +++ b/hosts/web-arm/modules/grafana/alerting/ram_usage.nix @@ -0,0 +1,61 @@ +{ lib, pkgs, config, ... }: +{ + grafanaAlertRuleDefinitions = [ + { + uid = "high-ram-usage-alert-uid"; + title = "HighRAMUsage"; + condition = "D"; # Condition is now D + + data = [ + # Query A: Calculate RAM usage percentage + { + refId = "A"; + datasourceUid = "vm-datasource-uid"; + queryType = "prometheus"; + relativeTimeRange = { from = 60; to = 0; }; + model = { + expr = '' + (1 - node_memory_MemAvailable_bytes / (node_memory_MemTotal_bytes > 0)) * 100 + and node_memory_MemAvailable_bytes + and node_memory_MemTotal_bytes + ''; + legendFormat = "RAM usage on {{instance}} ({{job}})"; + instant = false; + }; + } + # Expression C: Reduce Query A to its last value, preserving labels + { + refId = "C"; + datasourceUid = "__expr__"; + model = { + type = "reduce"; + expression = "A"; # Input is Query A + reducer = "last"; # Get the last value of each series in A + }; + } + # Expression D: Apply math condition to the reduced values from C + { + refId = "D"; + datasourceUid = "__expr__"; + model = { + type = "math"; + expression = "$C > 90"; # Alert if RAM usage from C is > 90% + }; + } + ]; + + for = "10m"; # Duration the condition must be met + noDataState = "NoData"; + execErrState = "Error"; + + annotations = { + summary = "High RAM usage on {{ $labels.instance }}"; + description = ''RAM usage on {{ $labels.instance }} (job: {{ $labels.job }}) has been above 90% for more than 10 minutes. Current value: {{ if $values.C }}{{ $values.C | humanizePercentage }}{{ else }}N/A{{ end }}%.''; + }; + labels = { + severity = "warning"; + category = "performance"; + }; + } + ]; +} diff --git a/hosts/web-arm/modules/grafana/alerting/system/default.nix b/hosts/web-arm/modules/grafana/alerting/system/default.nix new file mode 100644 index 0000000..26db06d --- /dev/null +++ b/hosts/web-arm/modules/grafana/alerting/system/default.nix @@ -0,0 +1,21 @@ +{ lib, pkgs, config, ... }: +let + # Import rule definitions from refactored alert files in the parent 'alerting' directory + cpuAlertRules = (import ../cpu_usage.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions; + diskAlertRules = (import ../disk_usage.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions; + hostDownAlertRules = (import ../host_down.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions; + inodeAlertRules = (import ../inode_usage.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions; + ramAlertRules = (import ../ram_usage.nix { inherit lib pkgs config; }).grafanaAlertRuleDefinitions; + + allSystemRules = cpuAlertRules ++ diskAlertRules ++ hostDownAlertRules ++ inodeAlertRules ++ ramAlertRules; +in +{ + services.grafana.provision.alerting.rules.settings.groups = [ + { + name = "System Alerts"; # This is the Grafana alert group name + folder = "System Alerts"; # This is the Grafana folder name + interval = "1m"; + rules = allSystemRules; + } + ]; +} \ No newline at end of file diff --git a/hosts/web-arm/modules/grafana/datasources/victoriametrics.nix b/hosts/web-arm/modules/grafana/datasources/victoriametrics.nix new file mode 100644 index 0000000..57ea78a --- /dev/null +++ b/hosts/web-arm/modules/grafana/datasources/victoriametrics.nix @@ -0,0 +1,18 @@ +{ lib, pkgs, config, ... }: +{ + services.grafana.provision.datasources.settings.datasources = [ + { + name = "VictoriaMetrics"; + uid = "vm-datasource-uid"; # Stable UID for referencing in alerts + type = "prometheus"; + url = "http://localhost:8428"; # URL of VictoriaMetrics + access = "proxy"; # Grafana proxies requests + isDefault = true; # Optional: make this the default datasource + jsonData = { + # timeInterval = "30s"; # Optional: Scrape interval if different from Grafana's default + # httpMethod = "POST"; # Optional: if VictoriaMetrics prefers POST for queries + }; + editable = false; # Recommended for provisioned datasources + } + ]; +} \ No newline at end of file diff --git a/hosts/web-arm/modules/grafana/default.nix b/hosts/web-arm/modules/grafana/default.nix new file mode 100644 index 0000000..6f48794 --- /dev/null +++ b/hosts/web-arm/modules/grafana/default.nix @@ -0,0 +1,201 @@ +{ lib, pkgs, config, ...}: +let + ldap = pkgs.writeTextFile { + name = "ldap.toml"; + text = '' + [[servers]] + host = "ldap.cloonar.com" + port = 636 + use_ssl = true + bind_dn = "cn=grafana,ou=system,ou=users,dc=cloonar,dc=com" + bind_password = "$__file{/run/secrets/grafana-ldap-password}" + search_filter = "(&(objectClass=cloonarUser)(mail=%s))" + search_base_dns = ["ou=users,dc=cloonar,dc=com"] + + [servers.attributes] + name = "givenName" + surname = "sn" + username = "mail" + email = "mail" + member_of = "memberOf" + + [[servers.group_mappings]] + group_dn = "cn=Administrators,ou=groups,dc=cloonar,dc=com" + org_role = "Admin" + grafana_admin = true # Available in Grafana v5.3 and above + ''; + }; +in +{ + imports = [ + # Individual alert files removed, now handled by alerting/system/default.nix + # ./alerting/disk_usage.nix + # ./alerting/cpu_usage.nix + # ./alerting/host_down.nix + # ./alerting/inode_usage.nix + # ./alerting/ram_usage.nix + ./alerting/system/default.nix # Added: Imports the consolidated system alerts module + # ... other rule files can be added here ... + ./datasources/victoriametrics.nix + ]; + + systemd.services.grafana.script = lib.mkBefore '' + export GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET=$(cat /run/secrets/grafana-oauth-secret) + export PUSHOVER_API_TOKEN=$(cat /run/secrets/pushover-api-token) + export PUSHOVER_USER_KEY=$(cat /run/secrets/pushover-user-key) + ''; + services.grafana = { + enable = true; + settings = { + analytics.reporting_enabled = false; + "auth.ldap".enabled = true; + "auth.ldap".config_file = toString ldap; + + "auth.generic_oauth" = { + enabled = true; + name = "Authelia"; + icon = "signin"; + client_id = "grafana"; + scopes = "openid profile email groups"; + empty_scopes = false; + auth_url = "https://auth.cloonar.com/api/oidc/authorization"; + token_url = "https://auth.cloonar.com/api/oidc/token"; + api_url = "https://auth.cloonar.com/api/oidc/userinfo"; + login_attribute_path = "preferred_username"; + groups_attribute_path = "groups"; + role_attribute_path = "contains(groups, 'Administrators') && 'Admin' || contains(groups, 'editor') && 'Editor' || 'Viewer'"; + allow_assign_grafana_admin = true; + name_attribute_path = "name"; + use_pkce = true; + }; + + "auth.anonymous".enabled = true; + "auth.anonymous".org_name = "Cloonar e.U."; + "auth.anonymous".org_role = "Viewer"; + + server = { + root_url = "https://grafana.cloonar.com"; + domain = "grafana.cloonar.com"; + enforce_domain = true; + enable_gzip = true; + http_addr = "0.0.0.0"; + http_port = 3001; + }; + + smtp = { + enabled = true; + host = "mail.cloonar.com:587"; + user = "grafana@cloonar.com"; + password = "$__file{${config.sops.secrets.grafana-ldap-password.path}}"; + fromAddress = "grafana@cloonar.com"; + }; + + database = { + type = "postgres"; + name = "grafana"; + host = "/run/postgresql"; + user = "grafana"; + }; + + security.admin_password = "$__file{${config.sops.secrets.grafana-admin-password.path}}"; + }; + provision = { + alerting = { + rules.settings.groups = lib.mkMerge []; # Allows rule groups to be merged (including the one from system/default.nix) + contactPoints = { + settings = { + apiVersion = 1; # As per Grafana provisioning API + contactPoints = [{ + orgId = 1; + name = "cp_dominik"; + receivers = [{ + uid = "dominik_pushover_cp_receiver"; # Made UID even more specific + type = "pushover"; + settings = { + apiToken = "\${PUSHOVER_API_TOKEN}"; + userKey = "\${PUSHOVER_USER_KEY}"; + device = "iphone"; + priority = 2; + retry = "30s"; + expire = "2m"; + sound = "siren"; + okSound = "magic"; + message = '' + {{ template "default.message" . }} + ''; + }; + }]; + }]; + }; + }; + + policies = { # Corrected from notificationPolicies to policies + settings = { + apiVersion = 1; # As per Grafana provisioning API + + # Grafana's new unified alerting expects a single policy tree per org. + # For OrgID 1 (default), this defines the root of that tree. + # The NixOS module should translate this into the correct YAML structure. + # The `policies` attribute within `settings` usually takes a list of policy trees. + # For a single default organization, we define one policy tree. + # Grafana's own YAML examples show a top-level 'route' for the default policy, + # or a list under 'policies' if you're managing multiple policy sets (less common for basic setup). + # Given the NixOS option `services.grafana.provision.alerting.policies.settings.policies`, + # it's likely expecting a list here. + policies = [{ # This outer list corresponds to the `policies` option + # orgId = 1; # Usually implicit for the default policy file, but can be specified + receiver = "cp_dominik"; # This sets the default receiver for the root route + + # The actual routing tree starts here. + # For a simple setup where all alerts go to one receiver, + # just setting the top-level 'receiver' is often enough. + # If more complex routing is needed, 'routes' would be defined here. + # Example: + # route = { + # receiver = "cp_dominik"; + # group_by = [ "alertname", "job" ]; + # # ... other root route settings + # routes = [ + # { + # matcher_re = { severity = "critical" }; + # receiver = "critical_alerts_receiver"; # Another contact point + # continue = false; + # }, + # # ... other specific routes + # ]; + # }; + # For the simplest case, just defining the receiver at this level should work + # as the root policy for the default organization. + }]; + # resetPolicies = false; # Default, set to true to remove existing policies not in this config. + }; + }; + }; + datasources.settings.datasources = lib.mkMerge []; # Allows datasources to be merged + }; + }; + + services.nginx.virtualHosts."grafana.cloonar.com" = { + forceSSL = true; + enableACME = true; + acmeRoot = null; + locations."/".extraConfig = "proxy_pass http://localhost:3001;"; + }; + + services.postgresql.ensureUsers = [ + { + name = "grafana"; + ensureDBOwnership = true; + } + ]; + services.postgresql.ensureDatabases = [ "grafana" ]; + services.postgresqlBackup.databases = [ "grafana" ]; + + sops.secrets = { + grafana-admin-password.owner = "grafana"; + grafana-ldap-password.owner = "grafana"; + grafana-oauth-secret.owner = "grafana"; + pushover-api-token.owner = "grafana"; + pushover-user-key.owner = "grafana"; + }; +} diff --git a/utils/modules/netdata.nix b/utils/modules/netdata.nix index 1aab534..2160f1d 100644 --- a/utils/modules/netdata.nix +++ b/utils/modules/netdata.nix @@ -1,10 +1,14 @@ -{ config, pkgs, ... }: +{ config, lib, pkgs, ... }: let unstable = import (fetchTarball https://nixos.org/channels/nixos-unstable/nixexprs.tar.xz) { config = { allowUnfree = true; }; }; in { + nixpkgs.config.allowUnfreePredicate = pkg: builtins.elem (lib.getName pkg) [ + "netdata" + ]; + services.netdata.configDir."python.d.conf" = pkgs.writeText "python.d.conf" '' postfix: yes ''; @@ -14,7 +18,7 @@ in python.enable = true; package = pkgs.netdata.override { - withCloud = true; + withCloudUi = true; }; config = {