From 0a37d27337b2b7c29ae1a180501e65b0e87c0295 Mon Sep 17 00:00:00 2001 From: Hermes Date: Wed, 20 May 2026 14:24:37 -0400 Subject: [PATCH] feat: enhance staging-vm module Improved pr-test-vm script (virt-install, DHCP IP discovery), added packages (virt-manager, libguestfs, cdrtools, gawk, etc.), better firewall rules, storage pool auto-creation, gortium in libvirtd group, fixed OVMF package reference --- modules/nixos/services/staging-vm.nix | 470 +++++++++++++++----------- 1 file changed, 279 insertions(+), 191 deletions(-) diff --git a/modules/nixos/services/staging-vm.nix b/modules/nixos/services/staging-vm.nix index 91bf667..e1c1b1d 100644 --- a/modules/nixos/services/staging-vm.nix +++ b/modules/nixos/services/staging-vm.nix @@ -4,6 +4,202 @@ with lib; let cfg = config.services.stagingVm; + + # ── pr-test-vm helper script ────────────────────────────────────────── + pr-test-vm = pkgs.writeShellScriptBin "pr-test-vm" '' + set -euo pipefail + + LIBVIRT_URI="qemu:///system" + VM_DIR="${cfg.dataPath}" + NETWORK="default" + SCRIPT_NAME="$(basename "$0")" + + usage() { + cat < [options] + + Commands: + build [--name ] Build VM image from a NixOS config + start Start a VM + stop Gracefully shut down a VM + destroy Force-power-off and undefine a VM + ssh [user@] SSH into a running VM + console Connect to VM serial console + list List all staging VMs + status Show VM status + + Examples: + $SCRIPT_NAME build ./vm-config.nix --name my-test + $SCRIPT_NAME start my-test + $SCRIPT_NAME ssh root@my-test + EOF + exit 1 + } + + # Find the VM's IP address from the DHCP lease + vm_ip() { + local name="$1" + local mac + mac=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" domiflist "$name" 2>/dev/null \ + | ${pkgs.gawk}/bin/awk 'NR>2 && $1 ~ /^vnet/ {print $NF; exit}') + [ -z "$mac" ] && { echo "error: cannot find MAC for VM '$name'"; exit 1; } + + local ip + ip=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" net-dhcp-leases "$NETWORK" 2>/dev/null \ + | ${pkgs.gawk}/bin/awk -v mac="$mac" '$0 ~ mac {gsub(/-.*/, "", $3); print $3; exit}') + [ -z "$ip" ] && { echo "error: no DHCP lease found for VM '$name' (MAC: $mac)"; exit 1; } + echo "$ip" + } + + case "''${1:-help}" in + build) + shift + CONFIG="''${1:?Missing NixOS config path}" + VM_NAME="''${2:-}" + [ -f "$CONFIG" ] || { echo "error: config file not found: $CONFIG"; exit 1; } + + # Extract name from --name flag or config basename + if [ "''${2:-}" = "--name" ] && [ -n "''${3:-}" ]; then + VM_NAME="$3" + elif [ -z "$VM_NAME" ] || [ "''${VM_NAME#--}" != "$VM_NAME" ]; then + VM_NAME="$(basename "$CONFIG" .nix)" + fi + + BUILD_DIR="$VM_DIR/$VM_NAME" + echo "==> Building VM '$VM_NAME' from config: $CONFIG" + mkdir -p "$BUILD_DIR" + + # Build the NixOS VM derivation + nix build --no-link -f "$CONFIG" vm 2>&1 || { + echo "Trying flake build..." + nix build "''${CONFIG%/.nix}#nixosConfigurations.$VM_NAME.config.system.build.vm" --no-link 2>&1 || { + echo "error: failed to build VM (tried both import and flake)" + exit 1 + } + } + + echo "==> Build complete. Run 'pr-test-vm start $VM_NAME' to launch." + ;; + + start) + VM_NAME="''${1:?Missing VM name}" + IMAGE="$VM_DIR/$VM_NAME/disk-image.qcow2" + [ -f "$IMAGE" ] || { echo "error: no disk image found at $IMAGE. Build first."; exit 1; } + + # Check if already running + STATE=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" domstate "$VM_NAME" 2>/dev/null || echo "undefined") + if [ "$STATE" = "running" ]; then + echo "VM '$VM_NAME' is already running." + exit 0 + fi + + echo "==> Starting VM '$VM_NAME'..." + + # Undefine if defined but not running + if [ "$STATE" != "undefined" ]; then + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" undefine "$VM_NAME" 2>/dev/null || true + fi + + # Define and start with virt-install + ${pkgs.virt-manager}/bin/virt-install \ + --connect "$LIBVIRT_URI" \ + --name "$VM_NAME" \ + --memory "${toString cfg.memory}" \ + --vcpus "${toString cfg.vcpus}" \ + --disk "$IMAGE",bus=virtio \ + --import \ + --network network="$NETWORK",model=virtio \ + --graphics none \ + --console pty,target_type=virtio \ + --serial pty \ + --memballoon virtio \ + --rng /dev/urandom \ + --noautoconsole \ + --os-variant detect=on,name=generic + + echo "==> VM '$VM_NAME' started. Get IP with: pr-test-vm status $VM_NAME" + ;; + + stop) + VM_NAME="''${1:?Missing VM name}" + echo "==> Stoping VM '$VM_NAME'..." + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" shutdown "$VM_NAME" 2>/dev/null && { + echo "Waiting for VM to shut down..." + for i in $(seq 1 30); do + STATE=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" domstate "$VM_NAME" 2>/dev/null || echo "undefined") + [ "$STATE" != "running" ] && { echo "VM stopped."; exit 0; } + sleep 2 + done + echo "warning: VM did not shut down gracefully, use 'destroy' for force" + } || { + echo "VM '$VM_NAME' not running or does not exist." + } + ;; + + destroy) + VM_NAME="''${1:?Missing VM name}" + echo "==> Destroying VM '$VM_NAME'..." + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" destroy "$VM_NAME" 2>/dev/null || true + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" undefine "$VM_NAME" 2>/dev/null || true + echo "==> VM '$VM_NAME' destroyed and undefined." + ;; + + ssh) + TARGET="''${1:?Usage: $SCRIPT_NAME ssh [user@]}" + # Split user@hostname if present + if echo "$TARGET" | ${pkgs.gnugrep}/bin/grep -q '@'; then + USER="''${TARGET%@*}" + VM_NAME="''${TARGET#*@}" + else + VM_NAME="$TARGET" + USER="" + fi + + IP=$(vm_ip "$VM_NAME") || exit 1 + if [ -n "$USER" ]; then + exec ${pkgs.openssh}/bin/ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "''${USER}@''${IP}" + else + exec ${pkgs.openssh}/bin/ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "$IP" + fi + ;; + + console) + VM_NAME="''${1:?Missing VM name}" + exec ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" console "$VM_NAME" + ;; + + list) + echo "Staging VMs:" + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" list --all + echo "" + echo "Active networks:" + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" net-list + echo "" + echo "Storage pools:" + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" pool-list + ;; + + status) + VM_NAME="''${1:?Missing VM name}" + echo "VM: $VM_NAME" + STATE=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" domstate "$VM_NAME" 2>/dev/null || echo "not found") + echo "State: $STATE" + if [ "$STATE" = "running" ]; then + IP=$(vm_ip "$VM_NAME" 2>/dev/null || echo "N/A") + echo "IP: $IP" + ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" dommemstat "$VM_NAME" 2>/dev/null | head -3 || true + fi + ;; + + help|--help|-h) + usage + ;; + + *) + usage + ;; + esac + ''; in { options.services.stagingVm = { @@ -45,7 +241,7 @@ in }; config = mkIf cfg.enable { - # Enable libvirt daemon + # ── libvirtd with QEMU/KVM ────────────────────────────────────────── virtualisation.libvirtd = { enable = true; qemu = { @@ -54,26 +250,64 @@ in swtpm.enable = true; ovmf = { enable = true; - packages = [ pkgs.OVMFFull.fd ]; + packages = [ pkgs.OVMF ]; }; }; }; - # Kernel modules + groups already handled in configuration.nix + # ── System packages ───────────────────────────────────────────────── + environment.systemPackages = with pkgs; [ + libvirt # virsh, virt-admin + qemu_kvm # QEMU/KVM + swtpm # Software TPM + OVMF # UEFI firmware for VMs + virt-manager # GUI + virt-install + virt-viewer # SPICE/VNC viewer + libguestfs # virt-customize, guestfish + cdrtools # genisoimage for cloud-init ISOs + jq # JSON parsing + gawk # awk for DHCP lease parsing + gnugrep # grep + ]; - # libvirt NAT network (192.168.122.0/24) + # ── User permissions ──────────────────────────────────────────────── + users.users.gortium.extraGroups = [ "libvirtd" ]; + + # ── Directories ───────────────────────────────────────────────────── + systemd.tmpfiles.rules = [ + "d ${cfg.storagePath} 0755 root root -" + "d ${cfg.dataPath} 0755 root root -" + ]; + + # ── Default NAT network (192.168.122.0/24) ────────────────────────── + # Define the default libvirt NAT network using virsh postStart hook + systemd.services.libvirtd = { + postStart = '' + set -e + # Define the NAT network if it doesn't exist + ${pkgs.libvirt}/bin/virsh -c qemu:///system net-info default 2>/dev/null && { + echo "Network 'default' already exists" + } || { + echo "Defining default NAT network (192.168.122.0/24)..." + ${pkgs.libvirt}/bin/virsh -c qemu:///system net-define /etc/libvirt/qemu/networks/default.xml + } + ${pkgs.libvirt}/bin/virsh -c qemu:///system net-autostart default 2>/dev/null || true + # Start the network if not active + STATE=$(${pkgs.libvirt}/bin/virsh -c qemu:///system net-state default 2>/dev/null || echo "inactive") + if [ "$STATE" != "active" ]; then + ${pkgs.libvirt}/bin/virsh -c qemu:///system net-start default 2>/dev/null || true + fi + echo "Default network ready." + ''; + }; + + # Define the default network as an XML config file environment.etc."libvirt/qemu/networks/default.xml" = { text = '' default - 2b8f7a3c-9e5d-4a1f-bc3d-6e7a8f9b0c1d - - - - - + - @@ -81,195 +315,49 @@ in ''; - # Autostart the network so it comes up on boot mode = "0644"; }; - # Ensure the default network is defined and autostarted - systemd.services.libvirtd = { - postStart = '' - ${pkgs.libvirt}/bin/virsh net-define /etc/libvirt/qemu/networks/default.xml 2>/dev/null || true - ${pkgs.libvirt}/bin/virsh net-autostart default 2>/dev/null || true - ${pkgs.libvirt}/bin/virsh net-start default 2>/dev/null || true - ''; - }; - - # Storage directory for VM images - systemd.tmpfiles.rules = [ - "d ${cfg.storagePath} 0755 root root -" - "d ${cfg.dataPath} 0755 root root -" - ]; - - # Ensure storage pool exists in libvirt + # ── Storage pool ──────────────────────────────────────────────────── systemd.services.libvirtd.postStart = mkAfter '' - ${pkgs.libvirt}/bin/virsh pool-define-as default dir --target "${cfg.storagePath}" 2>/dev/null || true - ${pkgs.libvirt}/bin/virsh pool-autostart default 2>/dev/null || true - ${pkgs.libvirt}/bin/virsh pool-start default 2>/dev/null || true + set -e + ${pkgs.libvirt}/bin/virsh -c qemu:///system pool-info default 2>/dev/null && { + echo "Storage pool 'default' already exists" + } || { + echo "Defining storage pool at ${cfg.storagePath}..." + ${pkgs.libvirt}/bin/virsh -c qemu:///system pool-define-as \ + --name default --type dir --target "${cfg.storagePath}" + } + ${pkgs.libvirt}/bin/virsh -c qemu:///system pool-autostart default 2>/dev/null || true + STATE=$(${pkgs.libvirt}/bin/virsh -c qemu:///system pool-state default 2>/dev/null || echo "inactive") + if [ "$STATE" != "running" ]; then + ${pkgs.libvirt}/bin/virsh -c qemu:///system pool-build default 2>/dev/null || true + ${pkgs.libvirt}/bin/virsh -c qemu:///system pool-start default 2>/dev/null || true + fi + echo "Storage pool ready." ''; - # Firewall: allow traffic from virbr0 to host and outbound NAT + # ── Firewall rules for libvirt guests ─────────────────────────────── networking.firewall = { - extraCommands = '' - # Allow inbound DHCP/DNS from libvirt guests - iptables -I INPUT -i virbr0 -p udp --dport 67:68 -j ACCEPT - iptables -I INPUT -i virbr0 -p tcp --dport 53 -j ACCEPT - iptables -I INPUT -i virbr0 -p udp --dport 53 -j ACCEPT - - # Allow established/related traffic back to guests - iptables -I FORWARD -i virbr0 -o virbr0 -j ACCEPT - iptables -I FORWARD -o virbr0 -j ACCEPT - iptables -I FORWARD -i virbr0 -j ACCEPT + trustedInterfaces = [ "virbr0" ]; + + extraCommands = mkAfter '' + # Allow DHCP (port 67/68) and DNS (port 53) to libvirt guests + iptables -I INPUT -i virbr0 -p udp --dport 67:68 -j ACCEPT 2>/dev/null || true + iptables -I INPUT -i virbr0 -p tcp --dport 53 -j ACCEPT 2>/dev/null || true + iptables -I INPUT -i virbr0 -p udp --dport 53 -j ACCEPT 2>/dev/null || true + + # Allow forwarding between the bridge and the outside world + iptables -I FORWARD -i virbr0 -o virbr0 -j ACCEPT 2>/dev/null || true + iptables -I FORWARD -o virbr0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT 2>/dev/null || true + iptables -I FORWARD -i virbr0 -j ACCEPT 2>/dev/null || true + + # NAT for guest outbound traffic + iptables -t nat -I POSTROUTING -s 192.168.122.0/24 -j MASQUERADE 2>/dev/null || true ''; }; - # Packages needed for VM management - environment.systemPackages = with pkgs; [ - libvirt - qemu_kvm - virt-manager # optional GUI for manual management - OVMFFull - swtpm - ]; - - # Enable docker in the host (already enabled, but ensure for compose testing) - virtualisation.docker.enable = true; - - # Helper script: pr-test-vm - # Usage: - # pr-test-vm build — build the staging VM derivation - # pr-test-vm start — boot the VM with a compose PR branch - # pr-test-vm stop — graceful shutdown - # pr-test-vm destroy — force stop + delete VM - # pr-test-vm ssh — SSH into the running VM - systemd.tmpfiles.rules = mkAfter [ - "d ${cfg.dataPath}/scripts 0755 root root -" - ]; - - environment.systemPackages = [ (pkgs.writeShellScriptBin "pr-test-vm" '' - set -euo pipefail - - DATA="${cfg.dataPath}" - VM_NAME="${cfg.vmName}" - VM_IMAGE="''${DATA}/''${VM_NAME}.qcow2" - VM_PORT=2223 - - build_vm() { - echo "==> Building NixOS staging VM for compose testing..." - # Build the VM config inline — a minimal NixOS with Docker + SSH - cat > /tmp/staging-vm-config.nix << 'NIXEOF' - { config, pkgs, lib, ... }: { - boot.loader.grub.devices = [ "/dev/vda" ]; - boot.loader.timeout = 0; - - # Minimal kernel - boot.kernelParams = [ "console=ttyS0" ]; - boot.initrd.kernelModules = [ "virtio_blk" "virtio_net" "virtio_pci" ]; - - # SSH access - services.openssh = { - enable = true; - settings.PasswordAuthentication = false; - settings.PermitRootLogin = "prohibit-password"; - }; - - # Docker for compose testing - virtualisation.docker.enable = true; - - # Network (DHCP via virbr0) - networking.useDHCP = true; - networking.firewall.enable = false; - - # Users - users.users.root.openssh.authorizedKeys.keys = [ - "$(cat /root/.ssh/authorized_keys 2>/dev/null || echo 'ssh-ed25519 AAAAC3... placeholder')" - ]; - users.users.testrunner = { - isNormalUser = true; - extraGroups = [ "docker" ]; - openssh.authorizedKeys.keys = [ - "$(cat /root/.ssh/authorized_keys 2>/dev/null || echo 'ssh-ed25519 AAAAC3... placeholder')" - ]; - }; - - # Git + compose tools - environment.systemPackages = with pkgs; [ git docker-compose curl ]; - - system.stateVersion = "24.11"; - } - NIXEOF - - nixos-rebuild build-vm -I nixpkgs=channel:nixos-unstable \ - --arg configuration 'import /tmp/staging-vm-config.nix' \ - --out-link "''${DATA}/vm-result" - echo "==> VM built. Run 'pr-test-vm start' to boot." - } - - start_vm() { - if [ -f "''${VM_IMAGE}" ]; then - echo "==> Booting existing VM..." - else - echo "==> Creating VM image..." - ${pkgs.qemu_kvm}/bin/qemu-img create -f qcow2 "''${VM_IMAGE}" 20G - fi - - # Check if already running - if ${pkgs.libvirt}/bin/virsh list --name 2>/dev/null | grep -q "''${VM_NAME}"; then - echo "==> VM already running." - exit 0 - fi - - ${pkgs.qemu_kvm}/bin/qemu-system-x86_64 \ - -name "''${VM_NAME}" \ - -machine q35,accel=kvm \ - -cpu host \ - -smp ${toString cfg.vcpus} \ - -m ${cfg.memory} \ - -drive file="''${VM_IMAGE}",if=virtio,format=qcow2 \ - -netdev user,id=net0,hostfwd=tcp::''${VM_PORT}-:22 \ - -device virtio-net-pci,netdev=net0 \ - -nographic \ - -serial mon:stdio \ - -pidfile "''${DATA}/''${VM_NAME}.pid" \ - -daemonize - - echo "==> VM booting... SSH on port ''${VM_PORT}" - echo "==> Wait for it: ssh -p ''${VM_PORT} testrunner@localhost" - } - - stop_vm() { - PIDFILE="''${DATA}/''${VM_NAME}.pid" - if [ -f "''${PIDFILE}" ]; then - PID=$(cat "''${PIDFILE}") - kill "''${PID}" 2>/dev/null || true - rm -f "''${PIDFILE}" - echo "==> VM stopped." - else - ${pkgs.libvirt}/bin/virsh destroy "''${VM_NAME}" 2>/dev/null || true - echo "==> VM destroyed." - fi - } - - ssh_vm() { - exec ssh -p "''${VM_PORT}" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "testrunner@localhost" "$@" - } - - # Main dispatch - case "''${1:-help}" in - build) build_vm ;; - start) start_vm ;; - stop) stop_vm ;; - destroy) stop_vm; rm -f "''${VM_IMAGE}"; echo "==> VM deleted." ;; - ssh) shift; ssh_vm "$@" ;; - *) - echo "Usage: pr-test-vm {build|start|stop|destroy|ssh}" - echo "" - echo " build — build the NixOS VM derivation" - echo " start — boot the VM (create image if needed)" - echo " stop — graceful VM shutdown" - echo " destroy — stop + delete VM image" - echo " ssh — SSH into the running VM" - ;; - esac - '') ]; + # ── pr-test-vm helper script ──────────────────────────────────────── + environment.systemPackages = [ pr-test-vm ]; }; }