diff --git a/hosts/fw/configuration.nix b/hosts/fw/configuration.nix index 8caf67a..b43c3c9 100644 --- a/hosts/fw/configuration.nix +++ b/hosts/fw/configuration.nix @@ -27,6 +27,7 @@ ./modules/podman.nix ./modules/omada.nix ./modules/ddclient.nix + ./modules/nas-wake-on-access.nix # ./modules/wol.nix @@ -94,7 +95,6 @@ nixpkgs.config.allowUnfreePredicate = pkg: builtins.elem (lib.getName pkg) [ "mongodb" "ai-mailer" - "filebot" "claude-code" ]; diff --git a/hosts/fw/modules/firewall.nix b/hosts/fw/modules/firewall.nix index 31f1fbc..33d904d 100644 --- a/hosts/fw/modules/firewall.nix +++ b/hosts/fw/modules/firewall.nix @@ -85,6 +85,12 @@ chain forward { type filter hook forward priority filter; policy drop; + # Wake-on-access: flag new traffic aimed at the NAS so + # nas-wake-journal.service can fire a WOL. No verdict => falls + # through, does not broaden policy. ct state new skips ongoing + # flows; rate limit caps journal spam before it leaves the kernel. + ip daddr ${config.networkPrefix}.97.11 ct state new limit rate 30/minute log prefix "nas-wake: " comment "trigger wake-on-access" + iifname "wg_cloonar" counter accept comment "test wireguard" iifname "wg_cloonar" oifname lo counter accept comment "wireguard to server" diff --git a/hosts/fw/modules/nas-wake-on-access.nix b/hosts/fw/modules/nas-wake-on-access.nix new file mode 100644 index 0000000..c44eef8 --- /dev/null +++ b/hosts/fw/modules/nas-wake-on-access.nix @@ -0,0 +1,110 @@ +# NAS wake-on-access (fw side) +# +# Detects traffic aimed at the NAS (10.42.97.11) and sends a WOL magic +# packet so the machine comes back up on demand after it has powered itself +# off (see hosts/nas/modules/auto-shutdown.nix). +# +# Traffic reaches the NAS via two paths, so we need two detectors that feed +# the same wake script: +# +# 1. Cross-VLAN traffic is routed through fw and hits nftables' forward +# chain. A logging rule tags these packets and a journal follower +# translates the log line into a wake invocation. +# +# 2. Same-VLAN (server) traffic stays on the bridge and never reaches +# nftables. A tcpdump follower watches ARP-who-has for 10.42.97.11 on +# the server interface and triggers the wake from there. +{ config, lib, pkgs, ... }: + +let + nasIp = "${config.networkPrefix}.97.11"; + nasMac = "6c:1f:f7:8e:a9:86"; + serverBroadcast = "${config.networkPrefix}.97.255"; + serverIface = "server"; + + stateDir = "/run/nas-wake-on-access"; + lastWakeFile = "${stateDir}/last-wake"; + cooldownSeconds = 30; + + wakeScript = pkgs.writeShellScript "nas-wake" '' + set -euo pipefail + + mkdir -p "${stateDir}" + now=$(date +%s) + + # Cooldown gate: at most one WOL every ${toString cooldownSeconds}s. + if [[ -f "${lastWakeFile}" ]]; then + last=$(cat "${lastWakeFile}" 2>/dev/null || echo 0) + if (( now - last < ${toString cooldownSeconds} )); then + exit 0 + fi + fi + + # If the NAS answers ping it is already up; skip WOL but refresh + # the cooldown so repeated probes don't spin the CPU. + if ${pkgs.iputils}/bin/ping -c1 -W1 -n ${nasIp} >/dev/null 2>&1; then + echo "nas-wake: NAS already up, not sending WOL" + echo "$now" > "${lastWakeFile}" + exit 0 + fi + + echo "nas-wake: sending WOL to ${nasMac} via ${serverBroadcast}" + ${pkgs.wol}/bin/wol -i ${serverBroadcast} ${nasMac} || true + echo "$now" > "${lastWakeFile}" + ''; + + # Journal follower for cross-VLAN (routed) traffic. nftables logs a line + # prefixed with "nas-wake: " into the kernel ring buffer for every new + # packet headed to the NAS (rate-limited kernel-side). + journalFollowerScript = pkgs.writeShellScript "nas-wake-journal-follower" '' + set -euo pipefail + ${pkgs.systemd}/bin/journalctl -kf -o cat --since now \ + | ${pkgs.gnugrep}/bin/grep --line-buffered -F "nas-wake:" \ + | while IFS= read -r _line; do + ${wakeScript} || true + done + ''; + + # ARP follower for same-VLAN traffic. Clients on the server VLAN talk to + # the NAS directly via the bridge, so their packets never hit nftables. + # An ARP "who-has 10.42.97.11" is the reliable early signal that someone + # wants to reach the NAS. + arpFollowerScript = pkgs.writeShellScript "nas-wake-arp-follower" '' + set -euo pipefail + ${pkgs.tcpdump}/bin/tcpdump -i ${serverIface} -l -n -p -Q in \ + 'arp and host ${nasIp}' \ + | while IFS= read -r _line; do + ${wakeScript} || true + done + ''; +in +{ + systemd.services.nas-wake-journal = { + description = "Wake NAS on cross-VLAN traffic (nftables log follower)"; + after = [ "nftables.service" "systemd-journald.service" ]; + requires = [ "systemd-journald.service" ]; + wantedBy = [ "multi-user.target" ]; + path = with pkgs; [ coreutils iputils wol systemd gnugrep ]; + serviceConfig = { + Type = "simple"; + ExecStart = "${journalFollowerScript}"; + Restart = "always"; + RestartSec = "5s"; + }; + }; + + systemd.services.nas-wake-arp = { + description = "Wake NAS on same-VLAN ARP (server bridge)"; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + wantedBy = [ "multi-user.target" ]; + path = with pkgs; [ coreutils iputils wol tcpdump ]; + serviceConfig = { + Type = "simple"; + ExecStart = "${arpFollowerScript}"; + Restart = "always"; + RestartSec = "5s"; + AmbientCapabilities = [ "CAP_NET_RAW" "CAP_NET_ADMIN" ]; + }; + }; +} diff --git a/hosts/nas/configuration.nix b/hosts/nas/configuration.nix index 2b43210..eba4ffc 100644 --- a/hosts/nas/configuration.nix +++ b/hosts/nas/configuration.nix @@ -21,6 +21,7 @@ in ./modules/audiobookshelf.nix ./modules/power-management.nix ./modules/disk-monitoring.nix + ./modules/auto-shutdown.nix ./modules/ugreen-leds.nix ./hardware-configuration.nix @@ -45,6 +46,10 @@ in networking.firewall.enable = true; networking.firewall.allowedTCPPorts = [ 22 ]; + # Wake-on-LAN: fw re-wakes the NAS on demand after auto-shutdown. + # Assumes WOL is enabled in BIOS; translates to `ethtool -s enp2s0 wol g`. + networking.interfaces.enp2s0.wakeOnLan.enable = true; + # SOPS configuration sops.age.sshKeyPaths = [ "/etc/ssh/ssh_host_ed25519_key" ]; sops.defaultSopsFile = ./secrets.yaml; diff --git a/hosts/nas/modules/auto-shutdown.nix b/hosts/nas/modules/auto-shutdown.nix new file mode 100644 index 0000000..5940cfd --- /dev/null +++ b/hosts/nas/modules/auto-shutdown.nix @@ -0,0 +1,113 @@ +# NAS auto-shutdown +# Powers the machine off when all of the following are true: +# 1. No active SSH session +# 2. pyload is not downloading (no non-local TCP peers) +# 3. pyload has no hook children (extraction, filebot, unrar, ...) +# 4. Both spinning HDDs are in standby +# 5. At least 15 minutes have passed since boot (via OnBootSec) +# The fw host re-wakes the NAS on demand via WOL +# (see hosts/fw/modules/nas-wake-on-access.nix). +{ config, lib, pkgs, ... }: + +let + # Spinning disks whose power state gates the shutdown decision. + # Only the Toshiba HDDs; NVMe drives do not spin down. + hdds = [ + "/dev/disk/by-id/ata-TOSHIBA_MG10ACA20TE_8582A01SF4MJ" + "/dev/disk/by-id/ata-TOSHIBA_MG10ACA20TE_75V2A0H3F4MJ" + ]; + + # Peer addresses that do not count as "real download traffic": + # loopback, internal VLAN (10.42.0.0/16), and IPv6 loopback / link-local. + localPeerRegex = "^(127\\.|10\\.42\\.|\\[::1\\]|\\[fe80)"; + + autoShutdownScript = pkgs.writeShellScript "nas-auto-shutdown" '' + set -euo pipefail + log() { echo "auto-shutdown: $*"; } + + # 1. SSH sessions. Cheapest check and it prevents shutting down while an + # admin is logged in. Uses ss at the socket layer so it catches + # forwarding-only sessions and `ssh host 'cmd'` runs that utmp misses. + # NOTE: SSH port hardcoded to 22 — keep in sync with configuration.nix. + ssh_sessions=$(${pkgs.iproute2}/bin/ss -H -t -n state established '( sport = :22 )' || true) + if [[ -n "$ssh_sessions" ]]; then + log "active SSH session present, staying up" + exit 0 + fi + + # 2. pyload active downloads: any established/outgoing pyload-owned socket + # to a non-local peer means a download is in flight. + pyload_conns=$(${pkgs.iproute2}/bin/ss -H -t -n -p \ + state established state syn-sent state syn-recv 2>/dev/null \ + | grep -F '"pyload"' \ + | awk '{print $5}' \ + | grep -Ev '${localPeerRegex}' || true) + if [[ -n "$pyload_conns" ]]; then + log "pyload has active non-local connections, staying up" + exit 0 + fi + + # 3. pyload hook children (extraction, filebot, unrar, 7z, java, ...). + # The package_finished hook is launched by pyload's ExternalScripts + # plugin, so every child lives in pyload.service's cgroup. cgroup.procs + # contains PIDs/TGIDs only, not TIDs, so pyload's internal thread pool + # cannot false-positive. Fail-safe: if the file is unreadable, stay up. + cgroup_procs=/sys/fs/cgroup/system.slice/pyload.service/cgroup.procs + if [[ ! -r "$cgroup_procs" ]]; then + log "pyload cgroup procs file unreadable ($cgroup_procs), staying up" + exit 0 + fi + main_pid=$(${pkgs.systemd}/bin/systemctl show -p MainPID --value pyload.service) + children=$(grep -v -x -F "$main_pid" "$cgroup_procs" || true) + if [[ -n "$children" ]]; then + log "pyload hook children running ($(echo "$children" | tr '\n' ' ')), staying up" + exit 0 + fi + + # 4. Both spinning HDDs must be in standby. hdparm -C is non-disturbing + # (does not wake the disk). The udev rule in power-management.nix runs + # hdparm -S 180, so standby implies >= 15 min of firmware-level idle — + # no extra shell-level debounce needed. + for disk in ${lib.concatStringsSep " " hdds}; do + if [[ ! -e "$disk" ]]; then + log "disk $disk missing, staying up" + exit 0 + fi + device=$(readlink -f "$disk") + power_state=$(${pkgs.hdparm}/bin/hdparm -C "$device" 2>/dev/null \ + | grep -oP '(standby|active/idle|active|idle)' | head -1 || echo "unknown") + if [[ "$power_state" != "standby" ]]; then + log "$disk is $power_state, staying up" + exit 0 + fi + done + + # 5. All clear. + log "all checks clear, powering off" + ${pkgs.systemd}/bin/systemctl poweroff + ''; +in +{ + systemd.services.nas-auto-shutdown = { + description = "Power off NAS when idle (HDD standby + pyload/filebot quiet + no SSH)"; + path = with pkgs; [ coreutils gawk gnugrep iproute2 hdparm systemd ]; + serviceConfig = { + Type = "oneshot"; + User = "root"; + ExecStart = "${autoShutdownScript}"; + }; + }; + + systemd.timers.nas-auto-shutdown = { + description = "Run NAS idle check every minute"; + wantedBy = [ "timers.target" ]; + timerConfig = { + # Requirement: earliest shutdown is 15 min after boot, so a + # WOL-triggered wake is not immediately followed by another poweroff. + OnBootSec = "15min"; + OnUnitActiveSec = "1min"; + AccuracySec = "10s"; + Persistent = false; + }; + }; +}