From 697f40f1edb4fd3c0db31785e03de84d5053a8cf Mon Sep 17 00:00:00 2001 From: Hermes Date: Wed, 20 May 2026 14:20:47 -0400 Subject: [PATCH] feat: add staging-vm module with libvirt/KVM infrastructure --- flake.nix | 1 + modules/nixos/services/staging-vm.nix | 415 ++++++++++++++++++++++++++ 2 files changed, 416 insertions(+) create mode 100644 modules/nixos/services/staging-vm.nix diff --git a/flake.nix b/flake.nix index 8f8b51a..8b3d9f5 100644 --- a/flake.nix +++ b/flake.nix @@ -59,6 +59,7 @@ ./modules/nixos/filesystem/hoardingcow-mount.nix ./modules/nixos/services/docker_manager.nix ./modules/nixos/services/open_code_server.nix + ./modules/nixos/services/staging-vm.nix ./modules/nixos/services/ollama_init_custom_models.nix ./modules/nixos/services/openclaw_node.nix ./modules/nixos/security/ai-worker-restricted.nix diff --git a/modules/nixos/services/staging-vm.nix b/modules/nixos/services/staging-vm.nix new file mode 100644 index 0000000..71f8a04 --- /dev/null +++ b/modules/nixos/services/staging-vm.nix @@ -0,0 +1,415 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.services.staging-vm; + + # Resolve the first IP in the subnet (the gateway address for the NAT network) + networkIp = head (splitString "/" cfg.network.subnet); + + # ── pr-test-vm helper script ────────────────────────────────────────── + pr-test-vm = pkgs.writeShellScriptBin "pr-test-vm" '' + set -euo pipefail + + LIBVIRT_URI="qemu:///system" + STORAGE_POOL="${cfg.storagePool}" + VM_DIR="${cfg.dataDir}" + NETWORK="${cfg.network.name}" + SCRIPT_NAME="$(basename "$0")" + + usage() { + cat < [options] + + Commands: + build [--name ] Build VM image from a NixOS config + start Start a VM + stop Gracefully shut down a VM + destroy Force-power-off and undefine a VM + ssh [user@] SSH into a running VM + console Connect to VM serial console + list List all staging VMs + status Show VM status + rebuild Redeploy the VM (stop + start) + + Examples: + $SCRIPT_NAME build ./vm-config.nix --name my-test + $SCRIPT_NAME start my-test + $SCRIPT_NAME ssh root@my-test + EOF + exit 1 + } + + # Find the VM's IP address from the DHCP lease + vm_ip() { + local name="$1" + local mac + mac=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" domiflist "$name" 2>/dev/null \ + | ${pkgs.gawk}/bin/awk 'NR>2 && $1 ~ /^vnet/ {print $NF; exit}') + [ -z "$mac" ] && { echo "error: cannot find MAC for VM '$name'"; exit 1; } + + local ip + ip=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" net-dhcp-leases "$NETWORK" 2>/dev/null \ + | ${pkgs.gawk}/bin/awk -v mac="$mac" '$0 ~ mac {gsub(/-.*/, "", $3); print $3; exit}') + [ -z "$ip" ] && { echo "error: no DHCP lease found for VM '$name' (MAC: $mac)"; exit 1; } + echo "$ip" + } + + case "''${1:-help}" in + build) + shift + CONFIG="''${1:?Missing NixOS config path}" + VM_NAME="''${2:-}" + [ -f "$CONFIG" ] || { echo "error: config file not found: $CONFIG"; exit 1; } + + # Extract name from --name flag or config basename + if [ "''${2:-}" = "--name" ] && [ -n "''${3:-}" ]; then + VM_NAME="$3" + elif [ -z "$VM_NAME" ] || [ "''${VM_NAME#--}" != "$VM_NAME" ]; then + VM_NAME="$(basename "$CONFIG" .nix)" + fi + + BUILD_DIR="$VM_DIR/$VM_NAME" + IMAGE="$BUILD_DIR/disk-image.qcow2" + + echo "==> Building VM '$VM_NAME' from config: $CONFIG" + mkdir -p "$BUILD_DIR" + + # Build the NixOS VM derivation + nix build --no-link -f "$CONFIG" vm 2>&1 || { + # Fallback: try as flake output + echo "Trying flake build..." + nix build "''${CONFIG%/.nix}#nixosConfigurations.$VM_NAME.config.system.build.vm" --no-link 2>&1 || { + echo "error: failed to build VM (tried both import and flake)" + exit 1 + } + } + + echo "==> Build complete. Run 'pr-test-vm start $VM_NAME' to launch." + ;; + + start) + VM_NAME="''${1:?Missing VM name}" + IMAGE="$VM_DIR/$VM_NAME/disk-image.qcow2" + [ -f "$IMAGE" ] || { echo "error: no disk image found at $IMAGE. Build first."; exit 1; } + + # Check if already running + STATE=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" domstate "$VM_NAME" 2>/dev/null || echo "undefined") + if [ "$STATE" = "running" ]; then + echo "VM '$VM_NAME' is already running." + exit 0 + fi + + echo "==> Starting VM '$VM_NAME'..." + + # Undefine if defined but not running + if [ "$STATE" != "undefined" ]; then + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" undefine "$VM_NAME" 2>/dev/null || true + fi + + # Define and start with virt-install + ${pkgs.virt-manager}/bin/virt-install \ + --connect "$LIBVIRT_URI" \ + --name "$VM_NAME" \ + --memory "${toString cfg.memory}" \ + --vcpus "${toString cfg.vcpus}" \ + --disk "$IMAGE",bus=virtio \ + --import \ + --network network="$NETWORK",model=virtio \ + --graphics none \ + --console pty,target_type=virtio \ + --serial pty \ + --memballoon virtio \ + --rng /dev/urandom \ + --noautoconsole \ + --os-variant detect=on,name=generic + + echo "==> VM '$VM_NAME' started. Get IP with: pr-test-vm status $VM_NAME" + ;; + + stop) + VM_NAME="''${1:?Missing VM name}" + echo "==> Stoping VM '$VM_NAME'..." + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" shutdown "$VM_NAME" 2>/dev/null && { + echo "Waiting for VM to shut down..." + for i in $(seq 1 30); do + STATE=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" domstate "$VM_NAME" 2>/dev/null || echo "undefined") + [ "$STATE" != "running" ] && { echo "VM stopped."; exit 0; } + sleep 2 + done + echo "warning: VM did not shut down gracefully, use 'destroy' for force" + } || { + echo "VM '$VM_NAME' not running or does not exist." + } + ;; + + destroy) + VM_NAME="''${1:?Missing VM name}" + echo "==> Destroying VM '$VM_NAME'..." + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" destroy "$VM_NAME" 2>/dev/null || true + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" undefine "$VM_NAME" 2>/dev/null || true + echo "==> VM '$VM_NAME' destroyed and undefined." + ;; + + ssh) + TARGET="''${1:?Usage: $SCRIPT_NAME ssh [user@]}" + # Split user@hostname if present + if echo "$TARGET" | ${pkgs.gnugrep}/bin/grep -q '@'; then + USER="''${TARGET%@*}" + VM_NAME="''${TARGET#*@}" + else + VM_NAME="$TARGET" + USER="" + fi + + IP=$(vm_ip "$VM_NAME") || exit 1 + if [ -n "$USER" ]; then + exec ${pkgs.openssh}/bin/ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "''${USER}@''${IP}" + else + exec ${pkgs.openssh}/bin/ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "$IP" + fi + ;; + + console) + VM_NAME="''${1:?Missing VM name}" + exec ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" console "$VM_NAME" + ;; + + list) + echo "Staging VMs:" + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" list --all + echo "" + echo "Active networks:" + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" net-list + echo "" + echo "Storage pools:" + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" pool-list + ;; + + status) + VM_NAME="''${1:?Missing VM name}" + echo "VM: $VM_NAME" + STATE=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" domstate "$VM_NAME" 2>/dev/null || echo "not found") + echo "State: $STATE" + if [ "$STATE" = "running" ]; then + IP=$(vm_ip "$VM_NAME" 2>/dev/null || echo "N/A") + echo "IP: $IP" + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" dommemstat "$VM_NAME" 2>/dev/null | head -3 || true + fi + ;; + + rebuild) + VM_NAME="''${1:?Missing VM name}" + "$0" destroy "$VM_NAME" + "$0" start "$VM_NAME" + ;; + + help|--help|-h) + usage + ;; + + *) + usage + ;; + esac + ''; +in +{ + options.services.staging-vm = { + enable = mkEnableOption "Staging VM infrastructure with libvirt/KVM"; + + network = { + name = mkOption { + type = types.str; + default = "staging"; + description = "Name of the libvirt NAT network for staging VMs"; + }; + + bridge = mkOption { + type = types.str; + default = "virbr1"; + description = "Bridge interface name for the staging network"; + }; + + subnet = mkOption { + type = types.str; + default = "192.168.122.0/24"; + description = "NAT network subnet in CIDR notation"; + }; + + dhcpStart = mkOption { + type = types.str; + default = "192.168.122.2"; + description = "Start of the DHCP range"; + }; + + dhcpEnd = mkOption { + type = types.str; + default = "192.168.122.254"; + description = "End of the DHCP range"; + }; + }; + + storagePool = mkOption { + type = types.str; + default = "/var/lib/libvirt/images"; + description = "Directory path for the libvirt storage pool"; + }; + + dataDir = mkOption { + type = types.str; + default = "/var/lib/staging-vm"; + description = "Directory for staging VM test data (images, cloud-init configs)"; + }; + + memory = mkOption { + type = types.int; + default = 2048; + description = "Default RAM in MiB for staging VMs"; + }; + + vcpus = mkOption { + type = types.int; + default = 2; + description = "Default number of vCPUs for staging VMs"; + }; + }; + + config = mkIf cfg.enable { + # ── libvirtd with QEMU/KVM ────────────────────────────────────────── + virtualisation.libvirtd = { + enable = true; + + qemu = { + package = pkgs.qemu_kvm; + runAsRoot = true; + + swtpm = { + enable = true; + }; + + ovmf = { + enable = true; + packages = [ pkgs.OVMF ]; + }; + }; + + # Allow the staging bridge for guest networking + allowedBridges = [ cfg.network.bridge ]; + }; + + # ── System packages ───────────────────────────────────────────────── + environment.systemPackages = with pkgs; [ + libvirt # virsh, virt-admin + qemu_kvm # QEMU/KVM + swtpm # Software TPM + OVMF # UEFI firmware for VMs + virt-manager # GUI management + virt-viewer # SPICE/VNC viewer + libguestfs # virt-install, virt-customize, guestfish + cdrtools # genisoimage for cloud-init ISOs + jq # JSON parsing + gawk # awk for DHCP lease parsing + gnugrep # grep + ]; + + # ── User permissions ──────────────────────────────────────────────── + users.users.gortium.extraGroups = [ "libvirtd" ]; + + # ── Directories ───────────────────────────────────────────────────── + systemd.tmpfiles.rules = [ + "d ${cfg.storagePool} 0755 root root -" + "d ${cfg.dataDir} 0755 root root -" + ]; + + # ── Default NAT network definition ────────────────────────────────── + systemd.services.define-staging-network = { + description = "Define the staging libvirt NAT network"; + after = [ "libvirtd.service" ]; + requires = [ "libvirtd.service" ]; + wantedBy = [ "multi-user.target" ]; + before = [ "define-staging-pool.service" ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + }; + script = let + networkXml = pkgs.writeText "staging-network.xml" '' + + ${cfg.network.name} + + + + + + + + + ''; + in '' + set -e + ${pkgs.libvirt}/bin/virsh -c qemu:///system net-info "${cfg.network.name}" 2>/dev/null && { + echo "Network '${cfg.network.name}' already exists" + } || { + echo "Defining network '${cfg.network.name}'..." + ${pkgs.libvirt}/bin/virsh -c qemu:///system net-define "${networkXml}" + } + ${pkgs.libvirt}/bin/virsh -c qemu:///system net-autostart "${cfg.network.name}" + # Start the network if not already active + STATE=$(${pkgs.libvirt}/bin/virsh -c qemu:///system net-state "${cfg.network.name}" 2>/dev/null || echo "inactive") + if [ "$STATE" != "active" ]; then + ${pkgs.libvirt}/bin/virsh -c qemu:///system net-start "${cfg.network.name}" + fi + echo "Network '${cfg.network.name}' is ready." + ''; + }; + + # ── Storage pool definition ───────────────────────────────────────── + systemd.services.define-staging-pool = { + description = "Define the staging libvirt storage pool"; + after = [ "libvirtd.service" "define-staging-network.service" ]; + requires = [ "libvirtd.service" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + }; + script = '' + set -e + ${pkgs.libvirt}/bin/virsh -c qemu:///system pool-info staging 2>/dev/null && { + echo "Storage pool 'staging' already exists" + } || { + echo "Defining storage pool 'staging' at ${cfg.storagePool}..." + ${pkgs.libvirt}/bin/virsh -c qemu:///system pool-define-as \ + --name staging --type dir --target "${cfg.storagePool}" + } + ${pkgs.libvirt}/bin/virsh -c qemu:///system pool-autostart staging + STATE=$(${pkgs.libvirt}/bin/virsh -c qemu:///system pool-state staging 2>/dev/null || echo "inactive") + if [ "$STATE" != "running" ]; then + ${pkgs.libvirt}/bin/virsh -c qemu:///system pool-build staging + ${pkgs.libvirt}/bin/virsh -c qemu:///system pool-start staging + fi + echo "Storage pool 'staging' is ready." + ''; + }; + + # ── Firewall rules for libvirt guests ─────────────────────────────── + networking.firewall = { + # Trust the bridge interface to allow guest traffic + trustedInterfaces = [ cfg.network.bridge ]; + + extraCommands = mkAfter '' + # Allow forwarding between the staging bridge and the outside world + iptables -I FORWARD -i ${cfg.network.bridge} -j ACCEPT 2>/dev/null || true + iptables -I FORWARD -o ${cfg.network.bridge} -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT 2>/dev/null || true + + # NAT for guest outbound traffic + iptables -t nat -I POSTROUTING -s ${cfg.network.subnet} -j MASQUERADE 2>/dev/null || true + ''; + }; + + # ── pr-test-vm helper script ──────────────────────────────────────── + environment.systemPackages = [ pr-test-vm ]; + }; +}