Compare commits
1 Commits
feat/hyper
...
feat/kvm-l
| Author | SHA1 | Date | |
|---|---|---|---|
| 37d690e4de |
Submodule assets/compose updated: d3f2e3b7b9...6b82a26c25
@@ -61,7 +61,6 @@
|
|||||||
./modules/nixos/services/open_code_server.nix
|
./modules/nixos/services/open_code_server.nix
|
||||||
./modules/nixos/services/ollama_init_custom_models.nix
|
./modules/nixos/services/ollama_init_custom_models.nix
|
||||||
./modules/nixos/services/openclaw_node.nix
|
./modules/nixos/services/openclaw_node.nix
|
||||||
./modules/nixos/services/hyperspace.nix
|
|
||||||
./modules/nixos/security/ai-worker-restricted.nix
|
./modules/nixos/security/ai-worker-restricted.nix
|
||||||
./users/gortium.nix
|
./users/gortium.nix
|
||||||
./users/ai-worker.nix
|
./users/ai-worker.nix
|
||||||
|
|||||||
@@ -36,7 +36,7 @@
|
|||||||
"transparent_hugepage=always" # because mucho ram
|
"transparent_hugepage=always" # because mucho ram
|
||||||
];
|
];
|
||||||
# 2. Load the specific drivers found by sensors-detect
|
# 2. Load the specific drivers found by sensors-detect
|
||||||
boot.kernelModules = [ "nct6775" "lm96163" "iptable_nat" "iptable_filter" ];
|
boot.kernelModules = [ "nct6775" "lm96163" "iptable_nat" "iptable_filter" "kvm-intel" "kvm" ];
|
||||||
# 3. Force the nct6775 driver to recognize the chip if it's stubborn
|
# 3. Force the nct6775 driver to recognize the chip if it's stubborn
|
||||||
boot.extraModprobeConfig = ''
|
boot.extraModprobeConfig = ''
|
||||||
options nct6775 force_id=0xd280
|
options nct6775 force_id=0xd280
|
||||||
@@ -207,7 +207,6 @@
|
|||||||
ai = {
|
ai = {
|
||||||
path = self + "/assets/compose/ai";
|
path = self + "/assets/compose/ai";
|
||||||
envFile = config.age.secrets.containers_env.path;
|
envFile = config.age.secrets.containers_env.path;
|
||||||
ports = [ 22000 ]; # Syncthing TCP sync
|
|
||||||
};
|
};
|
||||||
|
|
||||||
cloudstorage = {
|
cloudstorage = {
|
||||||
@@ -329,20 +328,21 @@
|
|||||||
# Mi50 config
|
# Mi50 config
|
||||||
hardware.graphics = {
|
hardware.graphics = {
|
||||||
enable = true;
|
enable = true;
|
||||||
enable32Bit = true; # Useful for some compatibility layers
|
enable32Bit = true;
|
||||||
extraPackages = with pkgs; [
|
extraPackages = with pkgs; [
|
||||||
rocmPackages.clr.icd # OpenCL/HIP runtime
|
rocmPackages.clr.icd
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
nixpkgs.config.rocmTargets = [ "gfx906" ];
|
nixpkgs.config.rocmTargets = [ "gfx906" ];
|
||||||
environment.variables = {
|
environment.variables = {
|
||||||
# This "tricks" ROCm into supporting the MI50 if using newer versions
|
|
||||||
HSA_OVERRIDE_GFX_VERSION = "9.0.6";
|
HSA_OVERRIDE_GFX_VERSION = "9.0.6";
|
||||||
# Ensures the system sees both GPUs
|
|
||||||
HIP_VISIBLE_DEVICES = "0,1";
|
HIP_VISIBLE_DEVICES = "0,1";
|
||||||
};
|
};
|
||||||
|
|
||||||
# Open ports in the firewall.
|
# KVM/libvirt for staging VM
|
||||||
|
virtualisation.libvirtd.enable = true;
|
||||||
|
|
||||||
|
# Open ports in the firewall.
|
||||||
# networking.firewall.allowedTCPPorts = [ ... ];
|
# networking.firewall.allowedTCPPorts = [ ... ];
|
||||||
# networking.firewall.allowedUDPPorts = [ ... ];
|
# networking.firewall.allowedUDPPorts = [ ... ];
|
||||||
# Or disable the firewall altogether.
|
# Or disable the firewall altogether.
|
||||||
@@ -475,7 +475,7 @@
|
|||||||
services.openssh.settings = {
|
services.openssh.settings = {
|
||||||
PermitRootLogin = "no";
|
PermitRootLogin = "no";
|
||||||
MaxAuthTries = 3;
|
MaxAuthTries = 3;
|
||||||
MaxSessions = 20;
|
MaxSessions = 10;
|
||||||
LoginGraceTime = 30;
|
LoginGraceTime = 30;
|
||||||
ClientAliveInterval = 300;
|
ClientAliveInterval = 300;
|
||||||
ClientAliveCountMax = 2;
|
ClientAliveCountMax = 2;
|
||||||
|
|||||||
@@ -1,134 +0,0 @@
|
|||||||
{ config, lib, pkgs, ... }:
|
|
||||||
|
|
||||||
let
|
|
||||||
cfg = config.services.hyperspace;
|
|
||||||
|
|
||||||
hyperspacePkg = pkgs.stdenv.mkDerivation {
|
|
||||||
name = "hyperspace-pods-${cfg.version}";
|
|
||||||
src = pkgs.fetchurl {
|
|
||||||
url = "https://github.com/hyperspaceai/aios-cli/releases/download/v${cfg.version}/aios-cli-x86_64-unknown-linux-gnu.tar.gz";
|
|
||||||
hash = cfg.packageHash;
|
|
||||||
};
|
|
||||||
sourceRoot = ".";
|
|
||||||
installPhase = ''
|
|
||||||
mkdir -p $out/libexec $out/bin
|
|
||||||
cp -r * $out/libexec/
|
|
||||||
chmod +x $out/libexec/aios-cli
|
|
||||||
ln -s $out/libexec/aios-cli $out/bin/hyperspace
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
in {
|
|
||||||
options.services.hyperspace = {
|
|
||||||
enable = lib.mkEnableOption "Hyperspace Pods P2P AI cluster agent";
|
|
||||||
|
|
||||||
version = lib.mkOption {
|
|
||||||
type = lib.types.str;
|
|
||||||
default = "5.45.30";
|
|
||||||
description = "Hyperspace CLI version to download.";
|
|
||||||
};
|
|
||||||
|
|
||||||
packageHash = lib.mkOption {
|
|
||||||
type = lib.types.str;
|
|
||||||
default = "sha256-f6fJ8t3exqtYwUD5j+WvD+Hm0oN/Eef0X+R9Rj23dE0=";
|
|
||||||
description = ''
|
|
||||||
SRI hash of the hyperspace release tarball (sha256-<base64>).
|
|
||||||
Must be updated when version changes. Generate with:
|
|
||||||
nix store prefetch-file --hash-algo sha256 \\
|
|
||||||
https://github.com/hyperspaceai/aios-cli/releases/download/v{version}/aios-cli-x86_64-unknown-linux-gnu.tar.gz
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
user = lib.mkOption {
|
|
||||||
type = lib.types.str;
|
|
||||||
default = "ai-worker";
|
|
||||||
description = "System user to run the Hyperspace agent.";
|
|
||||||
};
|
|
||||||
|
|
||||||
apiPort = lib.mkOption {
|
|
||||||
type = lib.types.port;
|
|
||||||
default = 8080;
|
|
||||||
description = "OpenAI-compatible API port (configurable via --api-port).";
|
|
||||||
};
|
|
||||||
|
|
||||||
profile = lib.mkOption {
|
|
||||||
type = lib.types.str;
|
|
||||||
default = "auto";
|
|
||||||
description = ''
|
|
||||||
Agent profile. Options: auto (auto-detect hardware), full (all capabilities),
|
|
||||||
inference (GPU inference only), embedding (CPU embedding only),
|
|
||||||
relay (lightweight relay), storage (storage + memory).
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
autoStart = lib.mkOption {
|
|
||||||
type = lib.types.bool;
|
|
||||||
default = true;
|
|
||||||
description = "Start the agent automatically on boot.";
|
|
||||||
};
|
|
||||||
|
|
||||||
openFirewall = lib.mkOption {
|
|
||||||
type = lib.types.bool;
|
|
||||||
default = true;
|
|
||||||
description = "Open P2P mesh (4001 TCP+UDP, 30301 TCP) and API port in the firewall.";
|
|
||||||
};
|
|
||||||
|
|
||||||
extraArgs = lib.mkOption {
|
|
||||||
type = lib.types.listOf lib.types.str;
|
|
||||||
default = [ ];
|
|
||||||
description = "Extra arguments to pass to 'hyperspace start'.";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
config = lib.mkIf cfg.enable {
|
|
||||||
systemd.services.hyperspace = {
|
|
||||||
description = "Hyperspace Pods P2P AI Cluster Agent";
|
|
||||||
after = [ "network.target" "network-online.target" ];
|
|
||||||
wants = [ "network-online.target" ];
|
|
||||||
wantedBy = lib.mkIf cfg.autoStart [ "multi-user.target" ];
|
|
||||||
|
|
||||||
path = with pkgs; [ bash coreutils ];
|
|
||||||
|
|
||||||
serviceConfig = {
|
|
||||||
Type = "simple";
|
|
||||||
User = cfg.user;
|
|
||||||
Group = cfg.user;
|
|
||||||
WorkingDirectory = "${hyperspacePkg}/libexec";
|
|
||||||
ExecStart = "${hyperspacePkg}/bin/hyperspace start --profile ${cfg.profile} --api-port ${toString cfg.apiPort} ${lib.escapeShellArgs cfg.extraArgs}";
|
|
||||||
Restart = "on-failure";
|
|
||||||
RestartSec = 5;
|
|
||||||
|
|
||||||
# AMD MI50 (ROCm) device access
|
|
||||||
DeviceAllow = [ "/dev/kfd rw" "/dev/dri rw" ];
|
|
||||||
|
|
||||||
# Supplementary groups for GPU/accelerator access
|
|
||||||
SupplementaryGroups = [ "video" "render" ];
|
|
||||||
|
|
||||||
# Hardening
|
|
||||||
NoNewPrivileges = true;
|
|
||||||
ProtectHome = "tmpfs";
|
|
||||||
ProtectSystem = "strict";
|
|
||||||
PrivateTmp = true;
|
|
||||||
PrivateDevices = false; # Needs /dev/kfd and /dev/dri
|
|
||||||
};
|
|
||||||
|
|
||||||
environment = {
|
|
||||||
HSA_OVERRIDE_GFX_VERSION = "9.0.6";
|
|
||||||
HOME = "/home/${cfg.user}";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# Firewall ports for P2P mesh (libp2p 4001, chain 30301) and API
|
|
||||||
networking.firewall.allowedTCPPorts = lib.mkIf cfg.openFirewall [ 4001 30301 cfg.apiPort ];
|
|
||||||
networking.firewall.allowedUDPPorts = lib.mkIf cfg.openFirewall [ 4001 ];
|
|
||||||
|
|
||||||
# Add GPU/accelerator groups to the service user (persistent beyond service restarts)
|
|
||||||
users.users = lib.mkIf (cfg.user == "ai-worker") {
|
|
||||||
ai-worker = {
|
|
||||||
extraGroups = [ "video" "render" ];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
# ROCm override for AMD MI50 (gfx906) compatibility
|
|
||||||
environment.variables.HSA_OVERRIDE_GFX_VERSION = "9.0.6";
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -4,7 +4,7 @@
|
|||||||
group = "ai-worker";
|
group = "ai-worker";
|
||||||
home = "/home/ai-worker";
|
home = "/home/ai-worker";
|
||||||
createHome = true;
|
createHome = true;
|
||||||
extraGroups = [ "docker" ];
|
extraGroups = [ "docker" "libvirtd" ];
|
||||||
shell = pkgs.bashInteractive;
|
shell = pkgs.bashInteractive;
|
||||||
openssh.authorizedKeys.keys = [
|
openssh.authorizedKeys.keys = [
|
||||||
keys.users.ai-worker.main
|
keys.users.ai-worker.main
|
||||||
|
|||||||
Reference in New Issue
Block a user