fix: remove arp wake up for nas, and make a limit how often wol packages are sent

This commit is contained in:
Dominik Polakovics Polakovics 2026-04-11 11:59:32 +02:00
parent 46f42dab4b
commit f3cecd422f

View file

@ -4,27 +4,32 @@
# packet so the machine comes back up on demand after it has powered itself
# off (see hosts/nas/modules/auto-shutdown.nix).
#
# Traffic reaches the NAS via two paths, so we need two detectors that feed
# the same wake script:
# Shape:
#
# 1. Cross-VLAN traffic is routed through fw and hits nftables' forward
# chain. A logging rule tags these packets and a journal follower
# translates the log line into a wake invocation.
# 1. Detector: nftables' forward chain logs packets headed to the NAS with
# a "nas-wake: " prefix. A journal follower translates each log line
# into a wake invocation. Only cross-VLAN traffic hits this path, which
# is exactly what we want — same-VLAN ARP bursts from stale neighbor
# entries are not user intent and must not wake the NAS.
#
# 2. Same-VLAN (server) traffic stays on the bridge and never reaches
# nftables. A tcpdump follower watches ARP-who-has for 10.42.97.11 on
# the server interface and triggers the wake from there.
# 2. Reachability probe: a systemd timer pings the NAS every 5s and
# writes the current epoch to last-seen-up on success. The wake script
# consults this timestamp and skips the WOL if the NAS was seen up
# recently — this both saves redundant WOLs and closes the UGREEN N100
# PSU-ramp-down race where a magic packet arriving right after
# poweroff makes the board boot into BIOS setup instead of the OS.
{ config, lib, pkgs, ... }:
let
nasIp = "${config.networkPrefix}.97.11";
nasMac = "6c:1f:f7:8e:a9:86";
serverBroadcast = "${config.networkPrefix}.97.255";
serverIface = "server";
stateDir = "/run/nas-wake-on-access";
lastWakeFile = "${stateDir}/last-wake";
lastSeenFile = "${stateDir}/last-seen-up";
cooldownSeconds = 30;
holdoffSeconds = 60;
wakeScript = pkgs.writeShellScript "nas-wake" ''
set -euo pipefail
@ -33,19 +38,29 @@ let
now=$(date +%s)
# Cooldown gate: at most one WOL every ${toString cooldownSeconds}s.
# Second line of defense against burst triggers.
if [[ -f "${lastWakeFile}" ]]; then
last=$(cat "${lastWakeFile}" 2>/dev/null || echo 0)
if (( now - last < ${toString cooldownSeconds} )); then
last_wake=$(cat "${lastWakeFile}" 2>/dev/null || echo 0)
if (( now - last_wake < ${toString cooldownSeconds} )); then
echo "nas-wake: cooldown active ($((now - last_wake))s < ${toString cooldownSeconds}s), skipping WOL"
exit 0
fi
fi
# If the NAS answers ping it is already up; skip WOL but refresh
# the cooldown so repeated probes don't spin the CPU.
if ${pkgs.iputils}/bin/ping -c1 -W1 -n ${nasIp} >/dev/null 2>&1; then
echo "nas-wake: NAS already up, not sending WOL"
echo "$now" > "${lastWakeFile}"
exit 0
# Hold-off gate: if the NAS was probed up within the last
# ${toString holdoffSeconds}s, skip WOL. Two cases covered:
# a) NAS is still up — WOL would be wasted (harmless but noisy).
# b) NAS just started powering off — PSU is ramping down and a magic
# packet arriving now is the UGREEN BIOS-cold-boot race window.
# Missing file => first boot of fw or probe has never run; fall through
# and send the WOL (state unknown, default to waking).
if [[ -f "${lastSeenFile}" ]]; then
last_seen=$(cat "${lastSeenFile}" 2>/dev/null || echo 0)
age=$(( now - last_seen ))
if (( age < ${toString holdoffSeconds} )); then
echo "nas-wake: NAS seen up ''${age}s ago (< ${toString holdoffSeconds}s), skipping WOL"
exit 0
fi
fi
echo "nas-wake: sending WOL to ${nasMac} via ${serverBroadcast}"
@ -65,17 +80,14 @@ let
done
'';
# ARP follower for same-VLAN traffic. Clients on the server VLAN talk to
# the NAS directly via the bridge, so their packets never hit nftables.
# An ARP "who-has 10.42.97.11" is the reliable early signal that someone
# wants to reach the NAS.
arpFollowerScript = pkgs.writeShellScript "nas-wake-arp-follower" ''
# Periodic reachability probe. One-shot: ping the NAS, on success write
# the current epoch to lastSeenFile. On failure, leave the file alone so
# the timestamp ages out naturally past holdoffSeconds.
nasProbeScript = pkgs.writeShellScript "nas-probe" ''
set -euo pipefail
${pkgs.tcpdump}/bin/tcpdump -i ${serverIface} -l -n -p -Q in \
'arp and host ${nasIp}' \
| while IFS= read -r _line; do
${wakeScript} || true
done
if ${pkgs.iputils}/bin/ping -c1 -W1 -n ${nasIp} >/dev/null 2>&1; then
date +%s > "${lastSeenFile}"
fi
'';
in
{
@ -84,27 +96,38 @@ in
after = [ "nftables.service" "systemd-journald.service" ];
requires = [ "systemd-journald.service" ];
wantedBy = [ "multi-user.target" ];
path = with pkgs; [ coreutils iputils wol systemd gnugrep ];
path = with pkgs; [ coreutils wol systemd gnugrep ];
serviceConfig = {
Type = "simple";
ExecStart = "${journalFollowerScript}";
Restart = "always";
RestartSec = "5s";
RuntimeDirectory = "nas-wake-on-access";
RuntimeDirectoryPreserve = "yes";
};
};
systemd.services.nas-wake-arp = {
description = "Wake NAS on same-VLAN ARP (server bridge)";
systemd.services.nas-probe = {
description = "Probe NAS reachability for wake-on-access hold-off";
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
path = with pkgs; [ coreutils iputils wol tcpdump ];
path = with pkgs; [ coreutils iputils ];
serviceConfig = {
Type = "simple";
ExecStart = "${arpFollowerScript}";
Restart = "always";
RestartSec = "5s";
AmbientCapabilities = [ "CAP_NET_RAW" "CAP_NET_ADMIN" ];
Type = "oneshot";
ExecStart = "${nasProbeScript}";
TimeoutStartSec = "3s";
RuntimeDirectory = "nas-wake-on-access";
RuntimeDirectoryPreserve = "yes";
};
};
systemd.timers.nas-probe = {
description = "Run NAS reachability probe every 5s";
wantedBy = [ "timers.target" ];
timerConfig = {
OnBootSec = "10s";
OnUnitActiveSec = "5s";
AccuracySec = "1s";
};
};
}