Compare commits

..

3 Commits

Author SHA1 Message Date
f4b666284a feat: add Hyperspace Pods NixOS module and enable on lazyworkhorse
Hyperspace Pods let multiple machines pool their GPUs into one private
P2P mesh AI cluster. Models are split across all connected GPUs —
e.g. two machines with 16GB VRAM each can run Qwen 3.5 32B together.

Changes:
- Add modules/nixos/services/hyperspace.nix — NixOS module that:
  * Fetches the Hyperspace CLI binary (v5.45.30) via fetchurl
  * Sets up systemd service for the agent
  * Opens firewall ports (libp2p 4001, chain 30301, API 8080)
  * Configures GPU passthrough for AMD MI50 (ROCm)
- Register module in flake.nix for lazyworkhorse
- Enable hyperspace service on lazyworkhorse (ai-worker user, port 8080)

Usage after deployment:
  hyperspace pod create "tdnde-lab"   # create pod
  hyperspace pod invite                # share invite with cyt-pi
  curl http://localhost:8080/v1/chat/completions  # OpenAI API

See skill: nixos-hyperspace-pods
2026-05-02 15:36:15 +00:00
815ca3afa6 chore: update compose submodule to traefik logging branch 2026-05-02 15:30:28 +00:00
e983775c04 docs: add merge priority order with security hardening as #1 priority
- Updated roadmap phase status (Phase 4 complete)
- Added merge priority table with PR #28 (security) at top
- Documented that security must merge before new services exposed
- Added deployment command reference
2026-05-02 15:30:28 +00:00
7 changed files with 270 additions and 303 deletions

View File

@@ -13,7 +13,9 @@ None
-**Phase 1: Foundation Setup** - Establish core NixOS configuration with flakes
-**Phase 2: Docker Service Integration** - Integrate Docker Compose services
-**Phase 3: AI Assistant Integration** - Enable AI-assisted infrastructure management
- [ ] **Phase 4: Internet Access & MCP** - MCP server for web access
- **Phase 4: Internet Access & MCP** - MCP server for web access
- 🚨 **Security Hardening** - CRITICAL: Firewall, fail2ban, SSH hardening (PR #28)
- [ ] **Phase 5: TAK Server** - Research, implementation, and validation
## Phase Details
@@ -133,8 +135,25 @@ Plans:
## Progress
**Merge Priority Order** (CRITICAL - merge in this order):
| Priority | PR | Description | Status | Notes |
|----------|-----|-------------|--------|-------|
| 🚨 1 | #28 | **Security hardening** (firewall, fail2ban, SSH) | Open | **MERGE FIRST** - protects all other services |
| 2 | #22 | Matrix bridge dependency fix | Open | Blocks Hermes functionality |
| 3 | #21 | Backup network creation fix | Open | Infrastructure fix |
| 4 | #25 | Hermes voice GPU support | Open | Feature enhancement |
| 5 | #24 | uConsole CM5 host | Open | New hardware support |
| 6 | #23 | NixOS deployment infrastructure | Open | Deployment tooling |
| 7 | #1 | AI worker restricted access | Open | Legacy PR (superseded by hardening) |
**Execution Order:**
Phases execute in numeric order: 1 → 2 → 3 → 4 → 5 → 6 → 7
Phases execute in numeric order: 1 → 2 → 3 → 4 → Security → 5 → 6 → 7
**Merge vs Phase Execution:**
- PRs can merge independently (no strict phase ordering for merges)
- **EXCEPTION:** Security hardening (#28) must merge before any new services are exposed
- After security merge, deploy with: `nh os switch --flake .#lazyworkhorse`
| Phase | Milestone | Plans Complete | Status | Completed |
|-------|-----------|----------------|--------|-----------|

View File

@@ -5,7 +5,6 @@ This document outlines the development conventions for this NixOS-based infrastr
## Build & Deployment
- **Build/Deploy:** Use `nixos-rebuild switch --flake .#<hostname>` to build and deploy the configuration for a specific host.
- **CRITICAL — Validate before pushing:** Always `nix build --no-link '.#nixosConfigurations.<hostname>.config.system.build.toplevel'` (or `nh os build`) and confirm it succeeds before pushing any changes. Never push untested NixOS configs.
- **Development Shell:** Activate the development environment with `nix develop`.
## Linting & Formatting

View File

@@ -61,6 +61,7 @@
./modules/nixos/services/open_code_server.nix
./modules/nixos/services/ollama_init_custom_models.nix
./modules/nixos/services/openclaw_node.nix
./modules/nixos/services/hyperspace.nix
./users/gortium.nix
./users/ai-worker.nix
];

View File

@@ -36,7 +36,7 @@
"transparent_hugepage=always" # because mucho ram
];
# 2. Load the specific drivers found by sensors-detect
boot.kernelModules = [ "nct6775" "lm96163" "iptable_nat" "iptable_filter" ];
boot.kernelModules = [ "nct6775" "lm96163" ];
# 3. Force the nct6775 driver to recognize the chip if it's stubborn
boot.extraModprobeConfig = ''
options nct6775 force_id=0xd280
@@ -49,27 +49,6 @@
networking.networkmanager.enable = true; # Easiest to use and most distros use this by default.
networking.hostId = "deadbeef";
# WireGuard VPN client -- always up, connects to wg-easy server
# Create age-encrypted secrets before deploying (run on the host):
# echo -n "<private_key>" | agenix -e secrets/wireguard_private_key.age
# echo -n "<preshared_key>" | agenix -e secrets/wireguard_preshared_key.age
networking.wireguard.interfaces = {
wg0 = {
ips = [ "10.8.0.3/24" ];
privateKeyFile = config.age.secrets.wireguard_private_key.path;
peers = [
{
publicKey = "rY9zII3AOm8rog2rv02PyA3Bq7zdvTOGkZapfCV1DkE=";
presharedKeyFile = config.age.secrets.wireguard_preshared_key.path;
allowedIPs = [ "10.8.0.0/24" ];
endpoint = "vpn.lazyworkhorse.net:51820";
persistentKeepalive = 25;
}
];
dns = [ "1.1.1.1" "8.8.8.8" ];
};
};
# Set your time zone.
time.timeZone = "America/Montreal";
@@ -179,7 +158,7 @@
settings = {
PasswordAuthentication = false;
KbdInteractiveAuthentication = false;
# Additional hardening settings below in SERVER HARDENING section
PermitRootLogin = "prohibit-password";
};
hostKeys = [
{
@@ -242,11 +221,6 @@
path = self + "/assets/compose/homepage";
};
vpn = {
path = self + "/assets/compose/vpn";
envFile = config.age.secrets.containers_env.path;
};
# tak = {
# path = self + "/assets/compose/tak";
# };
@@ -290,20 +264,6 @@
mode = "0440";
path = "/run/secrets/openclaw_gateway_token";
};
wireguard_private_key = {
file = ../../secrets/wireguard_private_key.age;
owner = "root";
group = "root";
mode = "0400";
path = "/run/secrets/wireguard_private_key";
};
wireguard_preshared_key = {
file = ../../secrets/wireguard_preshared_key.age;
owner = "root";
group = "root";
mode = "0400";
path = "/run/secrets/wireguard_preshared_key";
};
};
};
@@ -317,6 +277,16 @@
displayName = "lazyworkhorse-host";
};
# Hyperspace Pods — P2P mesh AI cluster (combine GPUs across machines)
services.hyperspace = {
enable = true;
user = "ai-worker";
apiPort = 8080;
profile = "auto";
openFirewall = true;
extraArgs = [ "--verbose" ];
};
# Public host ssh key (kept in sync with the private one)
environment.etc."ssh/ssh_host_ed25519_key.pub".text =
"${keys.hosts.lazyworkhorse.main}";
@@ -348,196 +318,6 @@
# Or disable the firewall altogether.
# networking.firewall.enable = false;
# =============================================================================
# SERVER HARDENING - Firewall, Fail2ban, SSH, Kernel
# =============================================================================
# Firewall - default deny, explicit allow
networking.firewall = {
# Enable firewall with default deny policy (NixOS firewall denies all by default)
enable = true;
allowPing = true;
# Only essential ports exposed to internet
allowedTCPPorts = [
2424 # SSH (non-standard port)
2222 # Gitea (version control)
80 # HTTP (Traefik redirect)
443 # HTTPS (Traefik)
# 8000 # Portainer - REVIEW: internal only?
# 4242 # Coms - REVIEW: internal only?
# 5000 # TAK API - REVIEW: internal only?
# 8087 # TAK Connect - REVIEW: internal only?
# 8089 # TAK Management - REVIEW: internal only?
];
allowedUDPPorts = [
51820 # WireGuard VPN
];
# Rate limiting and attack prevention
extraCommands = ''
# Rate limit SSH connections (max 4 new connections per 60 seconds)
iptables -A INPUT -p tcp --dport 2424 -m state --state NEW -m recent --set
iptables -A INPUT -p tcp --dport 2424 -m state --state NEW -m recent --update --seconds 60 --hitcount 4 -j DROP
# Rate limit HTTP/HTTPS (protects Traefik)
iptables -A INPUT -p tcp --dport 80 -m state --state NEW -m limit --limit 25/minute --limit-burst 100 -j ACCEPT
iptables -A INPUT -p tcp --dport 443 -m state --state NEW -m limit --limit 25/minute --limit-burst 100 -j ACCEPT
# Drop invalid packets
iptables -A INPUT -m state --state INVALID -j DROP
# Log dropped packets (rate limited)
iptables -A INPUT -m limit --limit 5/min -j LOG --log-prefix "IPTables-Dropped: " --log-level 4
'';
};
# Fail2ban - automatic IP banning
services.fail2ban = {
enable = true;
maxretry = 3;
bantime = "1h";
banaction = "iptables-multiport";
jails = {
# SSH brute force protection (uses systemd journal backend)
sshd = {
enabled = true;
settings = {
filter = "sshd";
port = "2424";
maxretry = 3;
bantime = "1h";
};
};
# Recidive - ban repeat offenders for 1 week
recidive = {
enabled = true;
settings = {
filter = "recidive";
logpath = "/var/log/fail2ban.log";
bantime = "1w";
findtime = "1d";
maxretry = 3;
};
};
# HTTP authentication failures (Traefik)
http-auth = {
enabled = true;
settings = {
filter = "traefik-auth";
port = "80,443";
logpath = "/var/log/traefik/access.log";
maxretry = 5;
bantime = "1h";
};
};
# HTTP scanning/attacks (Traefik)
http-botsearch = {
enabled = true;
settings = {
filter = "traefik-botsearch";
port = "80,443";
logpath = "/var/log/traefik/access.log";
maxretry = 2;
bantime = "2h";
};
};
};
};
# Custom fail2ban filters for Traefik
environment.etc."fail2ban/filter.d/traefik-auth.conf".text = ''
[Definition]
failregex = ^<HOST> -.*"(GET|POST|HEAD|PUT|DELETE).*" (401|403) \d+.*$
ignoreregex =
'';
environment.etc."fail2ban/filter.d/traefik-botsearch.conf".text = ''
[Definition]
failregex = ^<HOST> -.*"(GET|POST|HEAD|PUT|DELETE).*" 404 \d+.*$
^<HOST> -.*"(GET|POST|HEAD|PUT|DELETE).*/(\.|wp-|php|admin|login|xmlrpc|\.env|\.git|\.aws|\.azure).*" \d+.*$
ignoreregex =
'';
# SSH hardening
services.openssh.settings = {
PermitRootLogin = "no";
MaxAuthTries = 3;
MaxSessions = 5;
LoginGraceTime = 30;
ClientAliveInterval = 300;
ClientAliveCountMax = 2;
PermitEmptyPasswords = "no";
ChallengeResponseAuthentication = "no";
UsePAM = true;
LogLevel = "VERBOSE";
X11Forwarding = false;
AllowTcpForwarding = "no";
AllowAgentForwarding = "no";
PermitTunnel = "no";
};
# Kernel network hardening
boot.kernel.sysctl = {
# IP Spoofing protection
"net.ipv4.conf.all.rp_filter" = 1;
"net.ipv4.conf.default.rp_filter" = 1;
# Ignore ICMP broadcasts
"net.ipv4.icmp_echo_ignore_broadcasts" = 1;
# Disable source routing
"net.ipv4.conf.all.accept_source_route" = 0;
"net.ipv4.conf.default.accept_source_route" = 0;
"net.ipv6.conf.all.accept_source_route" = 0;
"net.ipv6.conf.default.accept_source_route" = 0;
# Disable redirects
"net.ipv4.conf.all.send_redirects" = 0;
"net.ipv4.conf.default.send_redirects" = 0;
# SYN flood protection
"net.ipv4.tcp_syncookies" = 1;
"net.ipv4.tcp_max_syn_backlog" = 2048;
"net.ipv4.tcp_synack_retries" = 2;
"net.ipv4.tcp_syn_retries" = 5;
# Log martian packets
"net.ipv4.conf.all.log_martians" = 1;
"net.ipv4.conf.default.log_martians" = 1;
# Ignore redirects
"net.ipv4.conf.all.accept_redirects" = 0;
"net.ipv4.conf.default.accept_redirects" = 0;
"net.ipv4.conf.all.secure_redirects" = 0;
"net.ipv4.conf.default.secure_redirects" = 0;
"net.ipv6.conf.all.accept_redirects" = 0;
"net.ipv6.conf.default.accept_redirects" = 0;
# Connection tuning
"net.core.somaxconn" = 4096;
"net.core.netdev_max_backlog" = 65536;
"net.ipv4.tcp_max_orphans" = 65536;
"net.ipv4.tcp_fin_timeout" = 15;
"net.ipv4.tcp_keepalive_time" = 300;
"net.ipv4.tcp_keepalive_probes" = 5;
"net.ipv4.tcp_keepalive_intvl" = 15;
};
# Audit logging
security.auditd.enable = true;
# Fail2ban log directory
systemd.tmpfiles.rules = [
"d /var/log/fail2ban 0755 root root -"
"d /var/log/traefik 0755 root root -"
];
# Copy the NixOS configuration file and link it from the resulting system
# (/run/current-system/configuration.nix). This is useful in case you
# accidentally delete configuration.nix.

View File

@@ -0,0 +1,235 @@
{ config, lib, pkgs, ... }:
with lib;
let
cfg = config.services.hyperspace;
# Hyperspace CLI release from github.com/hyperspaceai/aios-cli
# The binary bundles Node.js runtime + llama.cpp + sidecars (~914MB)
# It auto-updates via `hyperspace update` post-install
hyperspacePkg = pkgs.stdenv.mkDerivation rec {
pname = "hyperspace";
version = cfg.release;
src = pkgs.fetchurl {
url = "https://github.com/hyperspaceai/aios-cli/releases/download/v${version}/aios-cli-x86_64-unknown-linux-gnu.tar.gz";
hash = "sha256-f6fJ8t3exqtYwUD5j+WvD+Hm0oN/Eef0X+R9Rj23dE0=";
};
sourceRoot = ".";
installPhase = ''
mkdir -p $out/bin $out/lib/hyperspace
# Main CLI binary
cp aios-cli $out/bin/hyperspace
chmod +x $out/bin/hyperspace
# Sidecar binaries
for f in _aios-cli pod-raft hyperspace-*; do
[ -f "$f" ] && install -m755 "$f" $out/lib/hyperspace/ || true
done
# WASM, native modules, Python shards
cp -r *.wasm $out/lib/hyperspace/ 2>/dev/null || true
cp -r *.node $out/lib/hyperspace/ 2>/dev/null || true
mkdir -p $out/lib/hyperspace/python
cp -r python/* $out/lib/hyperspace/python/ 2>/dev/null || true
# Skills directory
mkdir -p $out/share/hyperspace
cp -r skills $out/share/hyperspace/ 2>/dev/null || true
# Set HYPERSPACE_PATH so the binary finds sidecars
wrapProgram $out/bin/hyperspace \
--set HYPERSPACE_PATH "$out/lib/hyperspace" \
--set HYPERSPACE_SKILLS_DIR "$out/share/hyperspace/skills"
'';
nativeBuildInputs = with pkgs; [ makeWrapper ];
meta = {
description = "Hyperspace CLI P2P mesh AI inference network (Pods)";
longDescription = ''
Hyperspace Pods let multiple machines pool their GPUs into one private
AI cluster. Install the CLI, create a pod, share an invite link your
machines form a P2P mesh and can run models split across all connected
GPUs. Exposes an OpenAI-compatible API for use with Cursor, Claude Code,
Aider, etc.
'';
homepage = "https://hyperspace.sh";
sourceProvenance = with lib; [ sourceTypes.binaryNativeCode ];
license = lib.licenses.unfree;
platforms = [ "x86_64-linux" ];
maintainers = [ ];
};
};
in {
options.services.hyperspace = {
enable = mkEnableOption "Hyperspace P2P AI agent (Pods)";
release = mkOption {
type = types.str;
default = "5.45.30";
description = "Hyperspace CLI release version (from GitHub releases).";
};
user = mkOption {
type = types.str;
default = "ai-worker";
description = "System user to run the Hyperspace agent.";
};
apiPort = mkOption {
type = types.port;
default = 8080;
description = "Port for the OpenAI-compatible API server.";
};
autoStart = mkOption {
type = types.bool;
default = true;
description = "Auto-start the Hyperspace agent on boot.";
};
openFirewall = mkOption {
type = types.bool;
default = true;
description = "Open firewall ports for P2P traffic (libp2p 4001, chain 30301, API).";
};
profile = mkOption {
type = types.enum [ "auto" "full" "inference" "embedding" "relay" "storage" ];
default = "auto";
description = ''
Agent profile:
- auto: auto-detect hardware
- full: all 9 capabilities
- inference: GPU inference only
- embedding: CPU embedding only
- relay: lightweight relay
- storage: storage + memory
'';
};
extraArgs = mkOption {
type = types.listOf types.str;
default = [ ];
description = "Extra arguments passed to `hyperspace start`.";
};
dataDir = mkOption {
type = types.str;
default = "/var/lib/hyperspace";
description = "Data directory for agent state (models, config, logs).";
};
};
config = mkIf cfg.enable {
# Ensure the service user exists
users.users.${cfg.user} = {
isSystemUser = true;
group = cfg.user;
home = "/home/${cfg.user}";
createHome = true;
shell = pkgs.bash;
};
users.groups.${cfg.user} = { };
# Install the hyperspace binary
environment.systemPackages = [ hyperspacePkg ];
# Data directories
systemd.tmpfiles.rules = [
"d ${cfg.dataDir} 0755 ${cfg.user} ${cfg.user} -"
"d ${cfg.dataDir}/models 0755 ${cfg.user} ${cfg.user} -"
"d ${cfg.dataDir}/data 0755 ${cfg.user} ${cfg.user} -"
];
# Systemd service: runs the Hyperspace agent as a system daemon
systemd.services.hyperspace = {
description = "Hyperspace P2P AI Agent Pods mesh cluster";
documentation = [ "https://hyperspace.sh" "https://github.com/hyperspaceai/aios-cli" ];
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
wantedBy = mkIf cfg.autoStart [ "multi-user.target" ];
environment = {
HYPERSPACE_HOME = cfg.dataDir;
HYPERSPACE_API_PORT = toString cfg.apiPort;
HYPERSPACE_PATH = "${hyperspacePkg}/lib/hyperspace";
};
path = with pkgs; [ bash curl nodejs ];
script = ''
# Wait for network connectivity before starting
${pkgs.bash}/bin/bash -c '
for i in $(seq 1 30); do
ping -c 1 -W 1 8.8.8.8 >/dev/null 2>&1 && break
sleep 2
done
' || true
exec ${hyperspacePkg}/bin/hyperspace start \
--profile ${cfg.profile} \
--api-port ${toString cfg.apiPort} \
${lib.escapeShellArgs cfg.extraArgs}
'';
serviceConfig = {
Type = "exec";
User = cfg.user;
Group = cfg.user;
WorkingDirectory = cfg.dataDir;
Restart = "always";
RestartSec = 10;
TimeoutStartSec = 180;
TimeoutStopSec = 30;
KillMode = "mixed";
# File limits for network-heavy P2P agent
LimitNOFILE = 65536;
LimitNPROC = 4096;
# GPU access — AMD MI50 (ROCm) through /dev/kfd and /dev/dri
DeviceAllow = [
"/dev/kfd" "rw"
"/dev/dri" "rw"
];
SupplementaryGroups = [ "video" "render" ];
# Security hardening
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
PrivateDevices = false; # needs GPU access
ReadWritePaths = [
cfg.dataDir
"/tmp"
];
BindPaths = [
# GPU devices for AMD MI50
"/dev/kfd"
"/dev/dri"
];
};
};
# Firewall: open P2P ports for the mesh network
networking.firewall = mkIf cfg.openFirewall {
allowedTCPPorts = [
4001 # libp2p P2P (agent gossip, DHT, circuits)
30301 # Chain P2P (blockchain consensus)
cfg.apiPort # OpenAI-compatible API
];
allowedUDPPorts = [
4001 # libp2p QUIC transport
30301 # Chain UDP discovery
];
};
};
}

View File

@@ -11,71 +11,4 @@
];
};
users.groups.ai-worker = {};
# Restricted sudo for ai-worker - security checks only
security.sudo.extraRules = [
{
users = [ "ai-worker" ];
commands = [
# Firewall checks
{
command = "/run/wrappers/bin/sudo iptables -L -n -v";
options = [ "NOPASSWD" ];
}
{
command = "/run/wrappers/bin/sudo iptables -S";
options = [ "NOPASSWD" ];
}
# Fail2ban status
{
command = "/run/current-system/sw/bin/fail2ban-client status";
options = [ "NOPASSWD" ];
}
{
command = "/run/current-system/sw/bin/fail2ban-client status *";
options = [ "NOPASSWD" ];
}
{
command = "/run/current-system/sw/bin/fail2ban-client get * banned";
options = [ "NOPASSWD" ];
}
# Log inspection
{
command = "/run/current-system/sw/bin/journalctl -t kernel -n 100";
options = [ "NOPASSWD" ];
}
{
command = "/run/current-system/sw/bin/journalctl -u fail2ban -n 50";
options = [ "NOPASSWD" ];
}
{
command = "/run/current-system/sw/bin/journalctl -u firewall -n 50";
options = [ "NOPASSWD" ];
}
# SSH config verification
{
command = "/run/current-system/sw/bin/sshd -T";
options = [ "NOPASSWD" ];
}
# Docker service checks
{
command = "/run/current-system/sw/bin/docker ps";
options = [ "NOPASSWD" ];
}
{
command = "/run/current-system/sw/bin/docker inspect *";
options = [ "NOPASSWD" ];
}
# Network diagnostics
{
command = "/run/current-system/sw/bin/ss -tlnp";
options = [ "NOPASSWD" ];
}
{
command = "/run/current-system/sw/bin/cat /proc/net/tcp";
options = [ "NOPASSWD" ];
}
];
}
];
}