nixos/hosts/nas/modules/auto-shutdown.nix

113 lines
4.4 KiB
Nix

# NAS auto-shutdown
# Powers the machine off when all of the following are true:
# 1. No active SSH session
# 2. pyload is not downloading (no non-local TCP peers)
# 3. pyload has no hook children (extraction, filebot, unrar, ...)
# 4. Both spinning HDDs are in standby
# 5. At least 15 minutes have passed since boot (via OnBootSec)
# The fw host re-wakes the NAS on demand via WOL
# (see hosts/fw/modules/nas-wake-on-access.nix).
{ config, lib, pkgs, ... }:
let
# Spinning disks whose power state gates the shutdown decision.
# Only the Toshiba HDDs; NVMe drives do not spin down.
hdds = [
"/dev/disk/by-id/ata-TOSHIBA_MG10ACA20TE_8582A01SF4MJ"
"/dev/disk/by-id/ata-TOSHIBA_MG10ACA20TE_75V2A0H3F4MJ"
];
# Peer addresses that do not count as "real download traffic":
# loopback, internal VLAN (10.42.0.0/16), and IPv6 loopback / link-local.
localPeerRegex = "^(127\\.|10\\.42\\.|\\[::1\\]|\\[fe80)";
autoShutdownScript = pkgs.writeShellScript "nas-auto-shutdown" ''
set -euo pipefail
log() { echo "auto-shutdown: $*"; }
# 1. SSH sessions. Cheapest check and it prevents shutting down while an
# admin is logged in. Uses ss at the socket layer so it catches
# forwarding-only sessions and `ssh host 'cmd'` runs that utmp misses.
# NOTE: SSH port hardcoded to 22 keep in sync with configuration.nix.
ssh_sessions=$(${pkgs.iproute2}/bin/ss -H -t -n state established '( sport = :22 )' || true)
if [[ -n "$ssh_sessions" ]]; then
log "active SSH session present, staying up"
exit 0
fi
# 2. pyload active downloads: any established/outgoing pyload-owned socket
# to a non-local peer means a download is in flight.
pyload_conns=$(${pkgs.iproute2}/bin/ss -H -t -n -p \
state established state syn-sent state syn-recv 2>/dev/null \
| grep -F '"pyload"' \
| awk '{print $5}' \
| grep -Ev '${localPeerRegex}' || true)
if [[ -n "$pyload_conns" ]]; then
log "pyload has active non-local connections, staying up"
exit 0
fi
# 3. pyload hook children (extraction, filebot, unrar, 7z, java, ...).
# The package_finished hook is launched by pyload's ExternalScripts
# plugin, so every child lives in pyload.service's cgroup. cgroup.procs
# contains PIDs/TGIDs only, not TIDs, so pyload's internal thread pool
# cannot false-positive. Fail-safe: if the file is unreadable, stay up.
cgroup_procs=/sys/fs/cgroup/system.slice/pyload.service/cgroup.procs
if [[ ! -r "$cgroup_procs" ]]; then
log "pyload cgroup procs file unreadable ($cgroup_procs), staying up"
exit 0
fi
main_pid=$(${pkgs.systemd}/bin/systemctl show -p MainPID --value pyload.service)
children=$(grep -v -x -F "$main_pid" "$cgroup_procs" || true)
if [[ -n "$children" ]]; then
log "pyload hook children running ($(echo "$children" | tr '\n' ' ')), staying up"
exit 0
fi
# 4. Both spinning HDDs must be in standby. hdparm -C is non-disturbing
# (does not wake the disk). The udev rule in power-management.nix runs
# hdparm -S 180, so standby implies >= 15 min of firmware-level idle
# no extra shell-level debounce needed.
for disk in ${lib.concatStringsSep " " hdds}; do
if [[ ! -e "$disk" ]]; then
log "disk $disk missing, staying up"
exit 0
fi
device=$(readlink -f "$disk")
power_state=$(${pkgs.hdparm}/bin/hdparm -C "$device" 2>/dev/null \
| grep -oP '(standby|active/idle|active|idle)' | head -1 || echo "unknown")
if [[ "$power_state" != "standby" ]]; then
log "$disk is $power_state, staying up"
exit 0
fi
done
# 5. All clear.
log "all checks clear, powering off"
${pkgs.systemd}/bin/systemctl poweroff
'';
in
{
systemd.services.nas-auto-shutdown = {
description = "Power off NAS when idle (HDD standby + pyload/filebot quiet + no SSH)";
path = with pkgs; [ coreutils gawk gnugrep iproute2 hdparm systemd ];
serviceConfig = {
Type = "oneshot";
User = "root";
ExecStart = "${autoShutdownScript}";
};
};
systemd.timers.nas-auto-shutdown = {
description = "Run NAS idle check every minute";
wantedBy = [ "timers.target" ];
timerConfig = {
# Requirement: earliest shutdown is 15 min after boot, so a
# WOL-triggered wake is not immediately followed by another poweroff.
OnBootSec = "15min";
OnUnitActiveSec = "1min";
AccuracySec = "10s";
Persistent = false;
};
};
}