feat: add KVM/libvirt support for staging VM #48

Open
Hermes wants to merge 6 commits from kvm-pr into master
7 changed files with 772 additions and 8 deletions
Showing only changes of commit 0a37d27337 - Show all commits

View File

@@ -4,6 +4,202 @@ with lib;
let
cfg = config.services.stagingVm;
# ── pr-test-vm helper script ──────────────────────────────────────────
pr-test-vm = pkgs.writeShellScriptBin "pr-test-vm" ''
set -euo pipefail
LIBVIRT_URI="qemu:///system"
VM_DIR="${cfg.dataPath}"
NETWORK="default"
SCRIPT_NAME="$(basename "$0")"
usage() {
cat <<EOF
Usage: $SCRIPT_NAME <command> [options]
Commands:
build <nixos-config> [--name <name>] Build VM image from a NixOS config
start <vm-name> Start a VM
stop <vm-name> Gracefully shut down a VM
destroy <vm-name> Force-power-off and undefine a VM
ssh [user@]<vm-name> SSH into a running VM
console <vm-name> Connect to VM serial console
list List all staging VMs
status <vm-name> Show VM status
Examples:
$SCRIPT_NAME build ./vm-config.nix --name my-test
$SCRIPT_NAME start my-test
$SCRIPT_NAME ssh root@my-test
EOF
exit 1
}
# Find the VM's IP address from the DHCP lease
vm_ip() {
local name="$1"
local mac
mac=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" domiflist "$name" 2>/dev/null \
| ${pkgs.gawk}/bin/awk 'NR>2 && $1 ~ /^vnet/ {print $NF; exit}')
[ -z "$mac" ] && { echo "error: cannot find MAC for VM '$name'"; exit 1; }
local ip
ip=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" net-dhcp-leases "$NETWORK" 2>/dev/null \
| ${pkgs.gawk}/bin/awk -v mac="$mac" '$0 ~ mac {gsub(/-.*/, "", $3); print $3; exit}')
[ -z "$ip" ] && { echo "error: no DHCP lease found for VM '$name' (MAC: $mac)"; exit 1; }
echo "$ip"
}
case "''${1:-help}" in
build)
shift
CONFIG="''${1:?Missing NixOS config path}"
VM_NAME="''${2:-}"
[ -f "$CONFIG" ] || { echo "error: config file not found: $CONFIG"; exit 1; }
# Extract name from --name flag or config basename
if [ "''${2:-}" = "--name" ] && [ -n "''${3:-}" ]; then
VM_NAME="$3"
elif [ -z "$VM_NAME" ] || [ "''${VM_NAME#--}" != "$VM_NAME" ]; then
VM_NAME="$(basename "$CONFIG" .nix)"
fi
BUILD_DIR="$VM_DIR/$VM_NAME"
echo "==> Building VM '$VM_NAME' from config: $CONFIG"
mkdir -p "$BUILD_DIR"
# Build the NixOS VM derivation
nix build --no-link -f "$CONFIG" vm 2>&1 || {
echo "Trying flake build..."
nix build "''${CONFIG%/.nix}#nixosConfigurations.$VM_NAME.config.system.build.vm" --no-link 2>&1 || {
echo "error: failed to build VM (tried both import and flake)"
exit 1
}
}
echo "==> Build complete. Run 'pr-test-vm start $VM_NAME' to launch."
;;
start)
VM_NAME="''${1:?Missing VM name}"
IMAGE="$VM_DIR/$VM_NAME/disk-image.qcow2"
[ -f "$IMAGE" ] || { echo "error: no disk image found at $IMAGE. Build first."; exit 1; }
# Check if already running
STATE=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" domstate "$VM_NAME" 2>/dev/null || echo "undefined")
if [ "$STATE" = "running" ]; then
echo "VM '$VM_NAME' is already running."
exit 0
fi
echo "==> Starting VM '$VM_NAME'..."
# Undefine if defined but not running
if [ "$STATE" != "undefined" ]; then
${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" undefine "$VM_NAME" 2>/dev/null || true
fi
# Define and start with virt-install
${pkgs.virt-manager}/bin/virt-install \
--connect "$LIBVIRT_URI" \
--name "$VM_NAME" \
--memory "${toString cfg.memory}" \
--vcpus "${toString cfg.vcpus}" \
--disk "$IMAGE",bus=virtio \
--import \
--network network="$NETWORK",model=virtio \
--graphics none \
--console pty,target_type=virtio \
--serial pty \
--memballoon virtio \
--rng /dev/urandom \
--noautoconsole \
--os-variant detect=on,name=generic
echo "==> VM '$VM_NAME' started. Get IP with: pr-test-vm status $VM_NAME"
;;
stop)
VM_NAME="''${1:?Missing VM name}"
echo "==> Stoping VM '$VM_NAME'..."
${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" shutdown "$VM_NAME" 2>/dev/null && {
echo "Waiting for VM to shut down..."
for i in $(seq 1 30); do
STATE=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" domstate "$VM_NAME" 2>/dev/null || echo "undefined")
[ "$STATE" != "running" ] && { echo "VM stopped."; exit 0; }
sleep 2
done
echo "warning: VM did not shut down gracefully, use 'destroy' for force"
} || {
echo "VM '$VM_NAME' not running or does not exist."
}
;;
destroy)
VM_NAME="''${1:?Missing VM name}"
echo "==> Destroying VM '$VM_NAME'..."
${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" destroy "$VM_NAME" 2>/dev/null || true
${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" undefine "$VM_NAME" 2>/dev/null || true
echo "==> VM '$VM_NAME' destroyed and undefined."
;;
ssh)
TARGET="''${1:?Usage: $SCRIPT_NAME ssh [user@]<vm-name>}"
# Split user@hostname if present
if echo "$TARGET" | ${pkgs.gnugrep}/bin/grep -q '@'; then
USER="''${TARGET%@*}"
VM_NAME="''${TARGET#*@}"
else
VM_NAME="$TARGET"
USER=""
fi
IP=$(vm_ip "$VM_NAME") || exit 1
if [ -n "$USER" ]; then
exec ${pkgs.openssh}/bin/ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "''${USER}@''${IP}"
else
exec ${pkgs.openssh}/bin/ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "$IP"
fi
;;
console)
VM_NAME="''${1:?Missing VM name}"
exec ${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" console "$VM_NAME"
;;
list)
echo "Staging VMs:"
${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" list --all
echo ""
echo "Active networks:"
${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" net-list
echo ""
echo "Storage pools:"
${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" pool-list
;;
status)
VM_NAME="''${1:?Missing VM name}"
echo "VM: $VM_NAME"
STATE=$(${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" domstate "$VM_NAME" 2>/dev/null || echo "not found")
echo "State: $STATE"
if [ "$STATE" = "running" ]; then
IP=$(vm_ip "$VM_NAME" 2>/dev/null || echo "N/A")
echo "IP: $IP"
${pkgs.libvirt}/bin/virsh -c "$LIBVIRT_URI" dommemstat "$VM_NAME" 2>/dev/null | head -3 || true
fi
;;
help|--help|-h)
usage
;;
*)
usage
;;
esac
'';
in
{
options.services.stagingVm = {
@@ -45,7 +241,7 @@ in
};
config = mkIf cfg.enable {
# Enable libvirt daemon
# ── libvirtd with QEMU/KVM ──────────────────────────────────────────
virtualisation.libvirtd = {
enable = true;
qemu = {
@@ -54,26 +250,64 @@ in
swtpm.enable = true;
ovmf = {
enable = true;
packages = [ pkgs.OVMFFull.fd ];
packages = [ pkgs.OVMF ];
};
};
};
# Kernel modules + groups already handled in configuration.nix
# ── System packages ─────────────────────────────────────────────────
environment.systemPackages = with pkgs; [
libvirt # virsh, virt-admin
qemu_kvm # QEMU/KVM
swtpm # Software TPM
OVMF # UEFI firmware for VMs
virt-manager # GUI + virt-install
virt-viewer # SPICE/VNC viewer
libguestfs # virt-customize, guestfish
cdrtools # genisoimage for cloud-init ISOs
jq # JSON parsing
gawk # awk for DHCP lease parsing
gnugrep # grep
];
# libvirt NAT network (192.168.122.0/24)
# ── User permissions ────────────────────────────────────────────────
users.users.gortium.extraGroups = [ "libvirtd" ];
# ── Directories ─────────────────────────────────────────────────────
systemd.tmpfiles.rules = [
"d ${cfg.storagePath} 0755 root root -"
"d ${cfg.dataPath} 0755 root root -"
];
# ── Default NAT network (192.168.122.0/24) ──────────────────────────
# Define the default libvirt NAT network using virsh postStart hook
systemd.services.libvirtd = {
postStart = ''
set -e
# Define the NAT network if it doesn't exist
${pkgs.libvirt}/bin/virsh -c qemu:///system net-info default 2>/dev/null && {
echo "Network 'default' already exists"
} || {
echo "Defining default NAT network (192.168.122.0/24)..."
${pkgs.libvirt}/bin/virsh -c qemu:///system net-define /etc/libvirt/qemu/networks/default.xml
}
${pkgs.libvirt}/bin/virsh -c qemu:///system net-autostart default 2>/dev/null || true
# Start the network if not active
STATE=$(${pkgs.libvirt}/bin/virsh -c qemu:///system net-state default 2>/dev/null || echo "inactive")
if [ "$STATE" != "active" ]; then
${pkgs.libvirt}/bin/virsh -c qemu:///system net-start default 2>/dev/null || true
fi
echo "Default network ready."
'';
};
# Define the default network as an XML config file
environment.etc."libvirt/qemu/networks/default.xml" = {
text = ''
<network>
<name>default</name>
<uuid>2b8f7a3c-9e5d-4a1f-bc3d-6e7a8f9b0c1d</uuid>
<forward mode='nat'>
<nat>
<port start='1024' end='65535'/>
</nat>
</forward>
<forward mode='nat'/>
<bridge name='virbr0' stp='on' delay='0'/>
<mac address='52:54:00:12:34:56'/>
<ip address='192.168.122.1' netmask='255.255.255.0'>
<dhcp>
<range start='192.168.122.2' end='192.168.122.254'/>
@@ -81,195 +315,49 @@ in
</ip>
</network>
'';
# Autostart the network so it comes up on boot
mode = "0644";
};
# Ensure the default network is defined and autostarted
systemd.services.libvirtd = {
postStart = ''
${pkgs.libvirt}/bin/virsh net-define /etc/libvirt/qemu/networks/default.xml 2>/dev/null || true
${pkgs.libvirt}/bin/virsh net-autostart default 2>/dev/null || true
${pkgs.libvirt}/bin/virsh net-start default 2>/dev/null || true
'';
};
# Storage directory for VM images
systemd.tmpfiles.rules = [
"d ${cfg.storagePath} 0755 root root -"
"d ${cfg.dataPath} 0755 root root -"
];
# Ensure storage pool exists in libvirt
# ── Storage pool ────────────────────────────────────────────────────
systemd.services.libvirtd.postStart = mkAfter ''
${pkgs.libvirt}/bin/virsh pool-define-as default dir --target "${cfg.storagePath}" 2>/dev/null || true
${pkgs.libvirt}/bin/virsh pool-autostart default 2>/dev/null || true
${pkgs.libvirt}/bin/virsh pool-start default 2>/dev/null || true
set -e
${pkgs.libvirt}/bin/virsh -c qemu:///system pool-info default 2>/dev/null && {
echo "Storage pool 'default' already exists"
} || {
echo "Defining storage pool at ${cfg.storagePath}..."
${pkgs.libvirt}/bin/virsh -c qemu:///system pool-define-as \
--name default --type dir --target "${cfg.storagePath}"
}
${pkgs.libvirt}/bin/virsh -c qemu:///system pool-autostart default 2>/dev/null || true
STATE=$(${pkgs.libvirt}/bin/virsh -c qemu:///system pool-state default 2>/dev/null || echo "inactive")
if [ "$STATE" != "running" ]; then
${pkgs.libvirt}/bin/virsh -c qemu:///system pool-build default 2>/dev/null || true
${pkgs.libvirt}/bin/virsh -c qemu:///system pool-start default 2>/dev/null || true
fi
echo "Storage pool ready."
'';
# Firewall: allow traffic from virbr0 to host and outbound NAT
# ── Firewall rules for libvirt guests ───────────────────────────────
networking.firewall = {
extraCommands = ''
# Allow inbound DHCP/DNS from libvirt guests
iptables -I INPUT -i virbr0 -p udp --dport 67:68 -j ACCEPT
iptables -I INPUT -i virbr0 -p tcp --dport 53 -j ACCEPT
iptables -I INPUT -i virbr0 -p udp --dport 53 -j ACCEPT
# Allow established/related traffic back to guests
iptables -I FORWARD -i virbr0 -o virbr0 -j ACCEPT
iptables -I FORWARD -o virbr0 -j ACCEPT
iptables -I FORWARD -i virbr0 -j ACCEPT
trustedInterfaces = [ "virbr0" ];
extraCommands = mkAfter ''
# Allow DHCP (port 67/68) and DNS (port 53) to libvirt guests
iptables -I INPUT -i virbr0 -p udp --dport 67:68 -j ACCEPT 2>/dev/null || true
iptables -I INPUT -i virbr0 -p tcp --dport 53 -j ACCEPT 2>/dev/null || true
iptables -I INPUT -i virbr0 -p udp --dport 53 -j ACCEPT 2>/dev/null || true
# Allow forwarding between the bridge and the outside world
iptables -I FORWARD -i virbr0 -o virbr0 -j ACCEPT 2>/dev/null || true
iptables -I FORWARD -o virbr0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT 2>/dev/null || true
iptables -I FORWARD -i virbr0 -j ACCEPT 2>/dev/null || true
# NAT for guest outbound traffic
iptables -t nat -I POSTROUTING -s 192.168.122.0/24 -j MASQUERADE 2>/dev/null || true
'';
};
# Packages needed for VM management
environment.systemPackages = with pkgs; [
libvirt
qemu_kvm
virt-manager # optional GUI for manual management
OVMFFull
swtpm
];
# Enable docker in the host (already enabled, but ensure for compose testing)
virtualisation.docker.enable = true;
# Helper script: pr-test-vm
# Usage:
# pr-test-vm build — build the staging VM derivation
# pr-test-vm start — boot the VM with a compose PR branch
# pr-test-vm stop — graceful shutdown
# pr-test-vm destroy — force stop + delete VM
# pr-test-vm ssh — SSH into the running VM
systemd.tmpfiles.rules = mkAfter [
"d ${cfg.dataPath}/scripts 0755 root root -"
];
environment.systemPackages = [ (pkgs.writeShellScriptBin "pr-test-vm" ''
set -euo pipefail
DATA="${cfg.dataPath}"
VM_NAME="${cfg.vmName}"
VM_IMAGE="''${DATA}/''${VM_NAME}.qcow2"
VM_PORT=2223
build_vm() {
echo "==> Building NixOS staging VM for compose testing..."
# Build the VM config inline a minimal NixOS with Docker + SSH
cat > /tmp/staging-vm-config.nix << 'NIXEOF'
{ config, pkgs, lib, ... }: {
boot.loader.grub.devices = [ "/dev/vda" ];
boot.loader.timeout = 0;
# Minimal kernel
boot.kernelParams = [ "console=ttyS0" ];
boot.initrd.kernelModules = [ "virtio_blk" "virtio_net" "virtio_pci" ];
# SSH access
services.openssh = {
enable = true;
settings.PasswordAuthentication = false;
settings.PermitRootLogin = "prohibit-password";
};
# Docker for compose testing
virtualisation.docker.enable = true;
# Network (DHCP via virbr0)
networking.useDHCP = true;
networking.firewall.enable = false;
# Users
users.users.root.openssh.authorizedKeys.keys = [
"$(cat /root/.ssh/authorized_keys 2>/dev/null || echo 'ssh-ed25519 AAAAC3... placeholder')"
];
users.users.testrunner = {
isNormalUser = true;
extraGroups = [ "docker" ];
openssh.authorizedKeys.keys = [
"$(cat /root/.ssh/authorized_keys 2>/dev/null || echo 'ssh-ed25519 AAAAC3... placeholder')"
];
};
# Git + compose tools
environment.systemPackages = with pkgs; [ git docker-compose curl ];
system.stateVersion = "24.11";
}
NIXEOF
nixos-rebuild build-vm -I nixpkgs=channel:nixos-unstable \
--arg configuration 'import /tmp/staging-vm-config.nix' \
--out-link "''${DATA}/vm-result"
echo "==> VM built. Run 'pr-test-vm start' to boot."
}
start_vm() {
if [ -f "''${VM_IMAGE}" ]; then
echo "==> Booting existing VM..."
else
echo "==> Creating VM image..."
${pkgs.qemu_kvm}/bin/qemu-img create -f qcow2 "''${VM_IMAGE}" 20G
fi
# Check if already running
if ${pkgs.libvirt}/bin/virsh list --name 2>/dev/null | grep -q "''${VM_NAME}"; then
echo "==> VM already running."
exit 0
fi
${pkgs.qemu_kvm}/bin/qemu-system-x86_64 \
-name "''${VM_NAME}" \
-machine q35,accel=kvm \
-cpu host \
-smp ${toString cfg.vcpus} \
-m ${cfg.memory} \
-drive file="''${VM_IMAGE}",if=virtio,format=qcow2 \
-netdev user,id=net0,hostfwd=tcp::''${VM_PORT}-:22 \
-device virtio-net-pci,netdev=net0 \
-nographic \
-serial mon:stdio \
-pidfile "''${DATA}/''${VM_NAME}.pid" \
-daemonize
echo "==> VM booting... SSH on port ''${VM_PORT}"
echo "==> Wait for it: ssh -p ''${VM_PORT} testrunner@localhost"
}
stop_vm() {
PIDFILE="''${DATA}/''${VM_NAME}.pid"
if [ -f "''${PIDFILE}" ]; then
PID=$(cat "''${PIDFILE}")
kill "''${PID}" 2>/dev/null || true
rm -f "''${PIDFILE}"
echo "==> VM stopped."
else
${pkgs.libvirt}/bin/virsh destroy "''${VM_NAME}" 2>/dev/null || true
echo "==> VM destroyed."
fi
}
ssh_vm() {
exec ssh -p "''${VM_PORT}" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null "testrunner@localhost" "$@"
}
# Main dispatch
case "''${1:-help}" in
build) build_vm ;;
start) start_vm ;;
stop) stop_vm ;;
destroy) stop_vm; rm -f "''${VM_IMAGE}"; echo "==> VM deleted." ;;
ssh) shift; ssh_vm "$@" ;;
*)
echo "Usage: pr-test-vm {build|start|stop|destroy|ssh}"
echo ""
echo " build build the NixOS VM derivation"
echo " start boot the VM (create image if needed)"
echo " stop graceful VM shutdown"
echo " destroy stop + delete VM image"
echo " ssh SSH into the running VM"
;;
esac
'') ];
# ── pr-test-vm helper script ────────────────────────────────────────
environment.systemPackages = [ pr-test-vm ];
};
}