docs: add AGENTS.md with dev workflow and YAML conventions

feat: add Hermes worker anchor and provisioning script for Paperclip employees
- Add x-hermes-worker YAML anchor template in compose.yml (CPU-only workers, no GPU passthrough, OpenCode Go provider) - Add commented worker example with env vars placeholder - Create scripts/provision-hermes-worker.sh for automated worker provisioning (generates port, API key, volume dir, appends service) - Workers connect to Discord only, isolated per container - Volumes under /mnt/HoardingCow_docker_data/Hermes/<name>/
2026-05-19 15:27:21 -04:00 · 2026-05-19 14:13:02 -04:00
5 changed files with 274 additions and 97 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -0,0 +1,31 @@
+# AGENTS.md
+
+Development conventions for the compose repository (Docker Compose stacks).
+
+## Build & Deploy
+
+- The ai stack is managed via `systemctl restart ai_stack.service` on the NixOS host
+- Compose files are built from the git repo; apply via `nh os switch` or `systemctl restart ai_stack.service`
+- Never `docker compose up -d` directly — it bypasses the systemd env file and breaks secrets
+
+## Hermes Workers
+
+- Paperclip Hermes workers are added via `ai/scripts/provision-hermes-worker.sh`
+- The script appends only — never deletes or modifies existing content
+- Workers are CPU-only containers on the `ai_backend` network with no GPU passthrough
+
+## Workflow
+
+- New feature → clean branch from `origin/master` → push → PR on Gitea
+- Branch naming: `feat/description` (features), `fix/description` (bugs/docs)
+- Always branch from `origin/master`, never from another feature branch
+- Submodule changes (when this repo is consumed as a submodule): commit the submodule update in the parent repo
+- PR title should describe the change; body should explain motivation + summary
+- After PR merge, delete the feature branch
+
+## YAML Conventions
+
+- Use `x-*` extension fields for reusable anchors
+- Comment out inactive services rather than deleting them
+- Environment variables in dictionary format (`KEY: value`) for anchor compatibility
+- List format (`- KEY=value`) works for standard services but doesn't merge with anchors
--- a/ai/compose.yml
+++ b/ai/compose.yml
@@ -1,4 +1,35 @@
 version: "3.8"
+
+# ── Hermes Worker Template ──────────────────────────────────
+# Used by paperclip-worker-* Hermes containers via YAML anchor.
+# Each worker = one isolated Hermes agent for a Paperclip employee.
+# Override at service level: container_name, API_SERVER_PORT,
+# API_SERVER_KEY, DISCORD_BOT_TOKEN, volumes.
+# Workers have NO GPU — they use OpenCode Go or remote providers.
+x-hermes-worker: &hermes-worker
+  build:
+    context: ./hermes
+    ssh:
+      - default
+  entrypoint: ["/bin/bash", "-c",
+    "bash /opt/data/hermes-tools/install.sh && exec /usr/bin/tini -g -- /opt/hermes/docker/entrypoint.sh \"$@\"",
+    "hermes-entrypoint"]
+  command: gateway run
+  restart: always
+  environment:
+    API_SERVER_ENABLED: "true"
+    API_SERVER_HOST: "0.0.0.0"
+    OLLAMA_HOST: "http://ollama:11434"
+    OPENROUTER_API_KEY: ${OPENROUTER_API_KEY}
+    # Each worker needs its own OpenCode Go API key in .env
+    OPENCODE_GO_API_KEY: ${OPENCODE_GO_API_KEY}
+    GATEWAY_ALLOW_ALL_USERS: "true"
+    TZ: "America/Montreal"
+  networks:
+    ai_backend:
+  # NO devices — workers are CPU-only, no GPU passthrough
+# ─────────────────────────────────────────────────────────────
+
 services:

  # webui:
@@ -54,10 +85,6 @@ services:
      - TZ=America/Montreal
    volumes:
      - /mnt/HoardingCow_docker_data/Hermes/data:/opt/data
-      # Syncthing-shared org files — read-only view of user's agenda
-      - /mnt/HoardingCow_docker_data/Syncthing/telos-ro:/opt/data/telos-ro:ro
-      # Syncthing-shared inbox — write tasks here, they sync to user's laptop
-      - /mnt/HoardingCow_docker_data/Syncthing/telos-rw:/opt/data/telos-rw:rw
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
@@ -67,35 +94,6 @@ services:
    networks:
      - ai_backend

-  syncthing:
-    image: syncthing/syncthing:latest
-    container_name: syncthing
-    hostname: syncthing
-    restart: always
-    ports:
-      - "8384:8384"
-      - "22000:22000"
-      - "21027:21027/udp"
-    environment:
-      - TZ=America/Montreal
-    volumes:
-      - /mnt/HoardingCow_docker_data/Syncthing/config:/var/syncthing/config
-      - /mnt/HoardingCow_docker_data/Syncthing/telos-ro:/telos-ro
-      - /mnt/HoardingCow_docker_data/Syncthing/telos-rw:/telos-rw
-    networks:
-      - ai_backend
-      - ai_net
-    labels:
-      - "traefik.enable=true"
-      - "traefik.http.routers.syncthing-http.rule=Host(`syncthing.lazyworkhorse.net`)"
-      - "traefik.http.routers.syncthing-http.entrypoints=web"
-      - "traefik.http.routers.syncthing-http.middlewares=redirect-to-https"
-      - "traefik.http.routers.syncthing-https.rule=Host(`syncthing.lazyworkhorse.net`)"
-      - "traefik.http.routers.syncthing-https.entrypoints=websecure"
-      - "traefik.http.routers.syncthing-https.tls=true"
-      - "traefik.http.routers.syncthing-https.tls.certresolver=njalla"
-      - "traefik.http.services.syncthing.loadbalancer.server.port=8384"
-
  ollama:
    build:
      context: ./ollama
@@ -112,7 +110,40 @@ services:
      - /mnt/HoardingCow_docker_data/Ollama/ollama:/root/.ollama
    environment:
      - OLLAMA_VULKAN=0
+      - HSA_OVERRIDE_GFX_VERSION=9.0.6
+      - HCC_AMDGPU_TARGET=gfx906
+      - HIP_VISIBLE_DEVICES=0,1
+      - ROCR_VISIBLE_DEVICES=0,1
+      - HSA_ENABLE_SDMA=0 
      - OLLAMA_HOST=0.0.0.0
+      - OLLAMA_DEBUG=1
+      - OLLAMA_FLASH_ATTENTION=1
+      - OLLAMA_NUM_PARALLEL=2
+    devices:
+      # Map the render nodes and KFD for ROCm to work inside the container
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    group_add:
+      - "303"
+      - "26"
+
+# ── Paperclip Worker Hermes Agents ──────────────────────────
+# Each worker is an isolated Hermes agent for a Paperclip employee.
+# Add new workers with: ./scripts/provision-hermes-worker.sh <name> <discord-token>
+# The API server key and port are generated automatically.
+# Workers are CPU-only — they use OpenCode Go or remote providers.
+
+  # ── Worker Template (commented — uncomment + configure to activate) ──
+  # hermes-worker-1:
+  #   <<: *hermes-worker
+  #   container_name: hermes-worker-1
+  #   environment:
+  #     API_SERVER_PORT: "8651"
+  #     API_SERVER_KEY: "generated-by-provision-script"
+  #     DISCORD_BOT_TOKEN: ${WORKER_1_DISCORD_BOT_TOKEN}
+  #   volumes:
+  #     - /mnt/HoardingCow_docker_data/Hermes/worker-1:/opt/data
+# ─────────────────────────────────────────────────────────────

 networks:
  ai_net:
@@ -122,40 +153,47 @@ networks:
    driver: bridge
    name: ai_backend
    
-  llama-cpp-hermes:
-    image: llama-cpp:rocm-gfx906
-    container_name: llama-cpp-hermes
-    restart: unless-stopped
-    networks:
-      - ai_backend
-    ports:
-      - "127.0.0.1:8300:8080"
-    ipc: host
-    devices:
-      - /dev/kfd:/dev/kfd
-      - /dev/dri:/dev/dri
-    group_add:
-      - "303"
-      - "26"
-    environment:
-      - HSA_OVERRIDE_GFX_VERSION=9.0.6
-      - HSA_ENABLE_SDMA=0
-      - HIP_VISIBLE_DEVICES=0,1
-      - LLAMA_CACHE=/models
-    volumes:
-      - /mnt/HoardingCow_docker_data/Llama_cpp/models:/models
-      - /mnt/HoardingCow_docker_data/Ollama/ollama/models/blobs/sha256-17823599694fa3503ef54bf748d5078c6ce881f4d01616cafa255dc05d215a08:/model.gguf:ro
-    command: >
-      -m /model.gguf
-      --host 0.0.0.0
-      --port 8080
-      --gpu-layers 99
-      --ctx-size 163840
-      -ctk f16 -ctv f16
-      --flash-attn on
-      --split-mode layer
-      --no-mmap
-      --n-predict -1
+  # llama_cpp_devstral:
+  #   image: ghcr.io/ggml-org/llama.cpp:server-rocm
+  #   container_name: llama_cpp_devstral
+  #   restart: unless-stopped
+  #   networks:
+  #     - ai_backend
+  #   ports:
+  #     - "8300:8080"
+  #   ipc: host
+  #   devices:
+  #     - "/dev/kfd:/dev/kfd"
+  #     - "/dev/dri:/dev/dri"
+  #   group_add:
+  #     - "303" # video
+  #     - "26"  # render
+  #   environment:
+  #     HSA_OVERRIDE_GFX_VERSION: 9.0.6
+  #     HIP_VISIBLE_DEVICES: 0,1
+  #     LLAMA_CACHE: /models
+  #   volumes:
+  #     - /mnt/HoardingCow_docker_data/Llama_cpp/models:/models
+  #     - /mnt/HoardingCow_docker_data/Llama_cpp/devstral-agent.jinja:/template.jinja
+  #   command: >
+  #     -hf unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF:Devstral-Small-2-24B-Instruct-2512-Q8_0.gguf
+  #     -a devstral-2-small-llama_cpp
+  #     --chat-template-file /template.jinja
+  #     --host 0.0.0.0
+  #     --port 8080
+  #     --n-gpu-layers 99
+  #     --ctx-size 163840
+  #     --batch-size 4096
+  #     --ubatch-size 4096
+  #     --cache-type-k f16
+  #     --cache-type-v f16
+  #     --cache-reuse 256
+  #     --flash-attn on
+  #     --context-shift
+  #     --split-mode layer
+  #     --no-mmap
+  #     --n-predict -1
+  #     --parallel 2

  # vllm:
  #   image: nalanzeyu/vllm-gfx906:v0.9.0-rocm6.3
--- a/ai/llama-cpp/Dockerfile
+++ b/ai/llama-cpp/Dockerfile
@@ -1,30 +0,0 @@
-# llama-cpp-rocm6/Dockerfile
-# Custom llama.cpp server with ROCm 6.1 + gfx906 (MI50) support.
-# Build: docker build -t llama-cpp:rocm-gfx906 .
-
-FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y curl git build-essential pkg-config cmake make && rm -rf /var/lib/apt/lists/*
-ARG LLAMACPP_VERSION=b9596
-RUN git clone --depth 1 --branch ${LLAMACPP_VERSION} https://github.com/ggml-org/llama.cpp.git /build
-WORKDIR /build
-ENV HIP_PATH=/opt/rocm ROCM_PATH=/opt/rocm PATH=/opt/rocm/bin:/opt/rocm/llvm/bin:${PATH} CMAKE_PREFIX_PATH=/opt/rocm
-RUN mkdir build && cd build && \
-    cmake .. -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release \
-      -DAMDGPU_TARGETS="gfx906:xnack-" \
-      -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-      -DGGML_CUDA=OFF -DGGML_VULKAN=OFF -DGGML_METAL=OFF \
-      -DBUILD_SHARED_LIBS=OFF && \
-    cmake --build . --target llama-server -- -j $(nproc)
-
-FROM ubuntu:24.04
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
-    ca-certificates curl libstdc++6 libgomp1 libopenblas0 \
-    libnuma1 libelf1 libdrm2 libdrm-amdgpu1 \
-    && rm -rf /var/lib/apt/lists/*
-COPY --from=builder /opt/rocm/lib/ /opt/rocm/lib/
-COPY --from=builder /opt/rocm/share/ /opt/rocm/share/
-COPY --from=builder /build/build/bin/llama-server /usr/local/bin/llama-server
-RUN echo /opt/rocm/lib > /etc/ld.so.conf.d/rocm.conf && ldconfig
-ENV HSA_OVERRIDE_GFX_VERSION=9.0.6 HCC_AMDGPU_TARGET=gfx906 HSA_ENABLE_SDMA=0
-EXPOSE 8080
-ENTRYPOINT ["/usr/local/bin/llama-server"]
--- a/ai/scripts/provision-hermes-worker.sh
+++ b/ai/scripts/provision-hermes-worker.sh
@@ -0,0 +1,135 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# ── Hermes Worker Provisioner ──────────────────────────────
+# Adds a new Paperclip Hermes worker to the ai compose stack.
+#
+# Usage:
+#   ./provision-hermes-worker.sh <name> <discord_bot_token_var>
+#
+# Example:
+#   ./provision-hermes-worker.sh worker-1 WORKER_1_DISCORD_BOT_TOKEN
+#
+# The script APPENDS only — never modifies or removes existing
+# content, even commented lines.
+#
+# Post-provision steps (manual):
+#   1. Add secrets to agenix .env file
+#   2. systemctl restart ai_stack.service
+#   3. Configure Paperclip agent
+# ─────────────────────────────────────────────────────────────
+
+NAME="${1:?Usage: $0 <name> <discord_bot_token_var>}"
+TOKEN_VAR="${2:?Usage: $0 <name> <discord_bot_token_var>}"
+
+# ── Paths ───────────────────────────────────────────────────
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+COMPOSE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+COMPOSE_FILE="${COMPOSE_DIR}/compose.yml"
+
+# Each Hermes worker gets its own volume on the NFS HoardingCow
+VOLUME_BASE="/mnt/HoardingCow_docker_data/Hermes"
+VOLUME_DIR="${VOLUME_BASE}/${NAME}"
+
+# The Hermes container runs as UID 10000 (hermes user from Dockerfile)
+HERMES_UID=10000
+
+# ── Validation ──────────────────────────────────────────────
+if ! [ -f "$COMPOSE_FILE" ]; then
+  echo "❌ compose.yml not found at $COMPOSE_FILE"
+  exit 1
+fi
+
+if grep -q "^  ${NAME}:" "$COMPOSE_FILE"; then
+  echo "❌ Service '${NAME}' already exists in ${COMPOSE_FILE}"
+  exit 1
+fi
+
+# ── Generate unique API key ─────────────────────────────────
+# Used by Paperclip to authenticate against this worker's
+# Hermes API server (/v1/chat/completions)
+API_KEY="pc_worker_$(openssl rand -hex 16)"
+
+# ── Find next available API port ────────────────────────────
+# Workers get sequential ports starting at 8650.
+# Scans compose.yml for existing API_SERVER_PORT values and
+# picks the next one.
+BASE_PORT=8650
+MAX_PORT=0
+while IFS= read -r line; do
+  port="${line#*API_SERVER_PORT: \"}"
+  port="${port%%\"*}"
+  if [ -n "$port" ] && [ "$port" -gt "$MAX_PORT" ]; then
+    MAX_PORT="$port"
+  fi
+done < <(grep -oP 'API_SERVER_PORT:\s*"\d+"' "$COMPOSE_FILE" 2>/dev/null)
+
+NEW_PORT=$((MAX_PORT + 1))
+if [ "$NEW_PORT" -lt "$BASE_PORT" ]; then
+  NEW_PORT=$BASE_PORT
+fi
+
+# ── Create volume directory (on NFS) ────────────────────────
+echo "📁 Creating volume directory: ${VOLUME_DIR}"
+mkdir -p "$VOLUME_DIR"
+
+# Hermes container runs as UID 10000 — set ownership so the
+# container can write its config, sessions, skills
+if command -v chown &>/dev/null; then
+  chown -R "${HERMES_UID}:${HERMES_UID}" "$VOLUME_DIR" 2>/dev/null || \
+    echo "⚠ Could not chown ${VOLUME_DIR} — run with sudo if needed"
+fi
+
+# Make it group-readable for debugging
+chmod 755 "$VOLUME_DIR" 2>/dev/null || true
+
+# ── Append service to compose.yml ───────────────────────────
+echo "📝 Appending service '${NAME}' to compose.yml ..."
+
+TMPFILE=$(mktemp)
+
+awk -v name="$NAME" \
+    -v port="$NEW_PORT" \
+    -v api_key="$API_KEY" \
+    -v token_var="$TOKEN_VAR" \
+    '
+    # Insert new worker service block just before the networks: section
+    /^networks:/ {
+      print ""
+      print "  " name ":"
+      print "    <<: *hermes-worker"
+      print "    container_name: " name
+      print "    environment:"
+      print "      API_SERVER_PORT: \"" port "\""
+      print "      API_SERVER_KEY: \"" api_key "\""
+      print "      DISCORD_BOT_TOKEN: ${" token_var "}"
+      print "    volumes:"
+      print "      - /mnt/HoardingCow_docker_data/Hermes/" name ":/opt/data"
+      print ""
+    }
+    { print }
+' "$COMPOSE_FILE" > "$TMPFILE" && mv "$TMPFILE" "$COMPOSE_FILE"
+
+# ── Done ────────────────────────────────────────────────────
+echo ""
+echo "✅ Worker '${NAME}' provisioned successfully"
+echo ""
+echo "────────────────────────────────────────────"
+echo "  NEXT STEPS"
+echo "────────────────────────────────────────────"
+echo ""
+echo "1. Add secrets to the agenix .env stack file:"
+echo ""
+echo "   # ${NAME}"
+echo "   ${TOKEN_VAR}=<paste-discord-bot-token-here>"
+echo ""
+echo "2. Restart the AI stack:"
+echo ""
+echo "   systemctl restart ai_stack.service"
+echo ""
+echo "3. In Paperclip, create an agent with HTTP adapter:"
+echo ""
+echo "   Endpoint: http://${NAME}:${NEW_PORT}/v1/chat/completions"
+echo "   API Key:  ${API_KEY}"
+echo ""
+echo "────────────────────────────────────────────"
--- a/versioncontrol/compose.yml
+++ b/versioncontrol/compose.yml
@@ -8,10 +8,13 @@ services:
      - USER_GID=1000
      - GITEA__server__ROOT_URL=https://code.lazyworkhorse.net
      - GITEA__actions__ENABLED=true
+      - GITEA__actions__DEFAULT_ACTIONS_URL=off
      - SSH_PORT=2222
      - SSH_LISTEN_PORT=2222
      # Enable Gitea Actions (act_runner required on host)
      - GITEA__actions__ENABLED=true
+      # Don't fetch actions from GitHub (offline mode + local only)
+      - GITEA__actions__DEFAULT_ACTIONS_URL=off
    volumes:
      - /mnt/HoardingCow_docker_data/Gitea:/data
    networks: