feat: integrate rollback sentinel as NixOS module

Add rollback-sentinel NixOS module that:
- Deploys sentinel-check.sh (inline) and nixos-rollback.sh (from file) as
  system packages
- Runs a boot-time systemd oneshot service after multi-user.target with
  configurable delay — checks Tier-1 services, triggers rollback on failure
- Runs a post-rebuild service via activation script after every
  nixos-rebuild switch
- Exposes options for tier1Services, tier2Services, tier3InfoServices,
  bootDelay, rollbackMode (set-default/rollback-now/dry-run), and
  enablePostRebuild

Module wired into flake.nix for lazyworkhorse and enabled in
configuration.nix with standard Tier-1/2 service lists and 120s delay.
This commit is contained in:
2026-05-25 00:08:39 -04:00
parent 36359de6aa
commit aa4a3f5b7c
4 changed files with 615 additions and 0 deletions

View File

@@ -321,10 +321,40 @@
environment.etc."ssh/ssh_host_ed25519_key.pub".text =
"${keys.hosts.lazyworkhorse.main}";
# ── Boot sentinel: auto-rollback on critical service failure ───────────────
services.rollbackSentinel.enable = true;
# Tier-1: failure triggers rollback
services.rollbackSentinel.tier1Services = [
"sshd" "docker" "traefik" "authelia"
];
# Tier-2: warn only
services.rollbackSentinel.tier2Services = [
"gitea" "hermes" "ollama" "synapse" "nextcloud"
"vaultwarden" "wireguard" "homeassistant" "fail2ban"
];
# Wait 2 minutes after boot before checking (lets services initialize)
services.rollbackSentinel.bootDelay = "120";
# Change boot default only (not --rollback-now) for safety
services.rollbackSentinel.rollbackMode = "set-default";
services.fstrim.enable = true;
services.zfs.autoSnapshot.enable = true;
services.zfs.autoScrub.enable = true;
# Ensure com.sun:auto-snapshot is set on ZFS datasets so auto-snapshots actually run
systemd.services."zfs-set-auto-snapshot" = {
description = "Set com.sun:auto-snapshot=true on ZFS datasets";
after = [ "zfs-import.target" ];
wants = [ "zfs-import.target" ];
wantedBy = [ "multi-user.target" ];
path = with pkgs; [ zfs ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = "${pkgs.zfs}/bin/zfs set -r com.sun:auto-snapshot=true rpool";
};
};
# Mi50 config
hardware.graphics = {