nixos/hosts/fw/modules/nas-wake-on-access.nix

196 lines
7.2 KiB
Nix

# NAS wake-on-access (fw side)
#
# Detects traffic aimed at the NAS (10.42.97.11) and sends a WOL magic
# packet so the machine comes back up on demand after it has powered itself
# off (see hosts/nas/modules/auto-shutdown.nix).
#
# Shape:
#
# 1. Detector: nftables' forward chain logs packets headed to the NAS with
# a "nas-wake: " prefix. A journal follower translates each log line
# into a wake invocation. Only cross-VLAN traffic hits this path, which
# is exactly what we want — same-VLAN ARP bursts from stale neighbor
# entries are not user intent and must not wake the NAS.
#
# 2. Reachability probe: a systemd timer pings the NAS every 5s and
# writes the current epoch to last-seen-up on success. The wake script
# consults this timestamp and skips the WOL if the NAS was seen up
# recently — this both saves redundant WOLs and closes the UGREEN N100
# PSU-ramp-down race where a magic packet arriving right after
# poweroff makes the board boot into BIOS setup instead of the OS.
{ config, lib, pkgs, ... }:
let
nasIp = "${config.networkPrefix}.97.11";
nasMac = "6c:1f:f7:8e:a9:86";
serverBroadcast = "${config.networkPrefix}.97.255";
stateDir = "/run/nas-wake-on-access";
lastWakeFile = "${stateDir}/last-wake";
lastSeenFile = "${stateDir}/last-seen-up";
cooldownSeconds = 30;
holdoffSeconds = 60;
wakeScript = pkgs.writeShellScript "nas-wake" ''
set -euo pipefail
mkdir -p "${stateDir}"
now=$(date +%s)
# Cooldown gate: at most one WOL every ${toString cooldownSeconds}s.
# Second line of defense against burst triggers.
if [[ -f "${lastWakeFile}" ]]; then
last_wake=$(cat "${lastWakeFile}" 2>/dev/null || echo 0)
if (( now - last_wake < ${toString cooldownSeconds} )); then
echo "nas-wake: cooldown active ($((now - last_wake))s < ${toString cooldownSeconds}s), skipping WOL"
exit 0
fi
fi
# Hold-off gate: if the NAS was probed up within the last
# ${toString holdoffSeconds}s, skip WOL. Two cases covered:
# a) NAS is still up WOL would be wasted (harmless but noisy).
# b) NAS just started powering off PSU is ramping down and a magic
# packet arriving now is the UGREEN BIOS-cold-boot race window.
# Missing file => first boot of fw or probe has never run; fall through
# and send the WOL (state unknown, default to waking).
if [[ -f "${lastSeenFile}" ]]; then
last_seen=$(cat "${lastSeenFile}" 2>/dev/null || echo 0)
age=$(( now - last_seen ))
if (( age < ${toString holdoffSeconds} )); then
echo "nas-wake: NAS seen up ''${age}s ago (< ${toString holdoffSeconds}s), skipping WOL"
exit 0
fi
fi
echo "nas-wake: sending WOL to ${nasMac} via ${serverBroadcast}"
${pkgs.wol}/bin/wol -i ${serverBroadcast} ${nasMac} || true
echo "$now" > "${lastWakeFile}"
'';
# Journal follower for cross-VLAN (routed) traffic. nftables logs a line
# prefixed with "nas-wake: " into the kernel ring buffer for every new
# packet headed to the NAS (rate-limited kernel-side).
journalFollowerScript = pkgs.writeShellScript "nas-wake-journal-follower" ''
set -euo pipefail
${pkgs.systemd}/bin/journalctl -kf -o cat --since now \
| ${pkgs.gnugrep}/bin/grep --line-buffered -F "nas-wake:" \
| while IFS= read -r _line; do
${wakeScript} || true
done
'';
# Periodic reachability probe. One-shot: ping the NAS, on success write
# the current epoch to lastSeenFile. On failure, leave the file alone so
# the timestamp ages out naturally past holdoffSeconds.
nasProbeScript = pkgs.writeShellScript "nas-probe" ''
set -euo pipefail
if ${pkgs.iputils}/bin/ping -c1 -W1 -n ${nasIp} >/dev/null 2>&1; then
date +%s > "${lastSeenFile}"
fi
'';
fwIp = "${config.networkPrefix}.97.1";
nasWakeHtml = pkgs.writeText "nas-wake.html" ''
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta http-equiv="refresh" content="15">
<title>Waking up NAS...</title>
<style>
body { font-family: -apple-system, BlinkMacSystemFont, sans-serif; display: flex; justify-content: center; align-items: center; min-height: 100vh; margin: 0; background: #1a1a2e; color: #e0e0e0; }
.container { text-align: center; padding: 2rem; }
h1 { font-size: 1.8em; margin-bottom: 0.5em; }
p { font-size: 1.1em; color: #aaa; }
</style>
</head>
<body>
<div class="container">
<h1>NAS is waking up&hellip;</h1>
<p>A wake-on-LAN packet has been sent.<br>This page will refresh automatically in 15 seconds.</p>
</div>
</body>
</html>
'';
nasWakeHttpScript = pkgs.writeShellScript "nas-wake-http" ''
# Trigger WOL (reuses cooldown/holdoff from wakeScript)
${wakeScript} >&2 || true
BODY=$(cat ${nasWakeHtml})
LENGTH=''${#BODY}
printf "HTTP/1.1 503 Service Unavailable\r\n"
printf "Content-Type: text/html; charset=utf-8\r\n"
printf "Content-Length: %d\r\n" "$LENGTH"
printf "Retry-After: 15\r\n"
printf "Connection: close\r\n"
printf "\r\n"
printf "%s" "$BODY"
'';
in
{
systemd.services.nas-wake-journal = {
description = "Wake NAS on cross-VLAN traffic (nftables log follower)";
after = [ "nftables.service" "systemd-journald.service" ];
requires = [ "systemd-journald.service" ];
wantedBy = [ "multi-user.target" ];
path = with pkgs; [ coreutils wol systemd gnugrep ];
serviceConfig = {
Type = "simple";
ExecStart = "${journalFollowerScript}";
Restart = "always";
RestartSec = "5s";
RuntimeDirectory = "nas-wake-on-access";
RuntimeDirectoryPreserve = "yes";
};
};
systemd.services.nas-probe = {
description = "Probe NAS reachability for wake-on-access hold-off";
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
path = with pkgs; [ coreutils iputils ];
serviceConfig = {
Type = "oneshot";
ExecStart = "${nasProbeScript}";
TimeoutStartSec = "3s";
RuntimeDirectory = "nas-wake-on-access";
RuntimeDirectoryPreserve = "yes";
};
};
systemd.timers.nas-probe = {
description = "Run NAS reachability probe every 5s";
wantedBy = [ "timers.target" ];
timerConfig = {
OnBootSec = "10s";
OnUnitActiveSec = "5s";
AccuracySec = "1s";
};
};
# Allow web-02 (bridged to server) to reach the wake HTTP endpoint
networking.firewall.interfaces."server".allowedTCPPorts = [ 9800 ];
# HTTP endpoint for nginx error_page → WOL trigger.
# When nginx on web-arm gets a 502/504 from a NAS-proxied vhost, it
# proxies the request here. We send WOL and return a "waking up" page.
systemd.services.nas-wake-http = {
description = "HTTP endpoint to wake NAS on reverse-proxy failure";
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
path = with pkgs; [ coreutils ];
serviceConfig = {
Type = "simple";
ExecStart = "${pkgs.socat}/bin/socat TCP-LISTEN:9800,bind=${fwIp},reuseaddr,fork EXEC:${nasWakeHttpScript}";
Restart = "always";
RestartSec = "5s";
RuntimeDirectory = "nas-wake-on-access";
RuntimeDirectoryPreserve = "yes";
};
};
}