Compare commits

..

2 Commits

Author SHA1 Message Date
1ec22ff3fc docs: add AGENTS.md with dev workflow and YAML conventions 2026-05-19 15:27:21 -04:00
b1dbdb9f2d feat: add Hermes worker anchor and provisioning script for Paperclip employees
- Add x-hermes-worker YAML anchor template in compose.yml
  (CPU-only workers, no GPU passthrough, OpenCode Go provider)
- Add commented worker example with env vars placeholder
- Create scripts/provision-hermes-worker.sh for automated worker
  provisioning (generates port, API key, volume dir, appends service)
- Workers connect to Discord only, isolated per container
- Volumes under /mnt/HoardingCow_docker_data/Hermes/<name>/
2026-05-19 14:13:02 -04:00
5 changed files with 218 additions and 187 deletions

31
AGENTS.md Normal file
View File

@@ -0,0 +1,31 @@
# AGENTS.md
Development conventions for the compose repository (Docker Compose stacks).
## Build & Deploy
- The ai stack is managed via `systemctl restart ai_stack.service` on the NixOS host
- Compose files are built from the git repo; apply via `nh os switch` or `systemctl restart ai_stack.service`
- Never `docker compose up -d` directly — it bypasses the systemd env file and breaks secrets
## Hermes Workers
- Paperclip Hermes workers are added via `ai/scripts/provision-hermes-worker.sh`
- The script appends only — never deletes or modifies existing content
- Workers are CPU-only containers on the `ai_backend` network with no GPU passthrough
## Workflow
- New feature → clean branch from `origin/master` → push → PR on Gitea
- Branch naming: `feat/description` (features), `fix/description` (bugs/docs)
- Always branch from `origin/master`, never from another feature branch
- Submodule changes (when this repo is consumed as a submodule): commit the submodule update in the parent repo
- PR title should describe the change; body should explain motivation + summary
- After PR merge, delete the feature branch
## YAML Conventions
- Use `x-*` extension fields for reusable anchors
- Comment out inactive services rather than deleting them
- Environment variables in dictionary format (`KEY: value`) for anchor compatibility
- List format (`- KEY=value`) works for standard services but doesn't merge with anchors

View File

@@ -1,4 +1,35 @@
version: "3.8"
# ── Hermes Worker Template ──────────────────────────────────
# Used by paperclip-worker-* Hermes containers via YAML anchor.
# Each worker = one isolated Hermes agent for a Paperclip employee.
# Override at service level: container_name, API_SERVER_PORT,
# API_SERVER_KEY, DISCORD_BOT_TOKEN, volumes.
# Workers have NO GPU — they use OpenCode Go or remote providers.
x-hermes-worker: &hermes-worker
build:
context: ./hermes
ssh:
- default
entrypoint: ["/bin/bash", "-c",
"bash /opt/data/hermes-tools/install.sh && exec /usr/bin/tini -g -- /opt/hermes/docker/entrypoint.sh \"$@\"",
"hermes-entrypoint"]
command: gateway run
restart: always
environment:
API_SERVER_ENABLED: "true"
API_SERVER_HOST: "0.0.0.0"
OLLAMA_HOST: "http://ollama:11434"
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY}
# Each worker needs its own OpenCode Go API key in .env
OPENCODE_GO_API_KEY: ${OPENCODE_GO_API_KEY}
GATEWAY_ALLOW_ALL_USERS: "true"
TZ: "America/Montreal"
networks:
ai_backend:
# NO devices — workers are CPU-only, no GPU passthrough
# ─────────────────────────────────────────────────────────────
services:
# webui:
@@ -54,10 +85,6 @@ services:
- TZ=America/Montreal
volumes:
- /mnt/HoardingCow_docker_data/Hermes/data:/opt/data
# Syncthing-shared org files — read-only view of user's agenda
- /mnt/HoardingCow_docker_data/Syncthing/telos-ro:/opt/data/telos-ro:ro
# Syncthing-shared inbox — write tasks here, they sync to user's laptop
- /mnt/HoardingCow_docker_data/Syncthing/telos-rw:/opt/data/telos-rw:rw
devices:
- /dev/kfd:/dev/kfd
- /dev/dri:/dev/dri
@@ -67,35 +94,6 @@ services:
networks:
- ai_backend
syncthing:
image: syncthing/syncthing:latest
container_name: syncthing
hostname: syncthing
restart: always
ports:
- "8384:8384"
- "22000:22000"
- "21027:21027/udp"
environment:
- TZ=America/Montreal
volumes:
- /mnt/HoardingCow_docker_data/Syncthing/config:/var/syncthing/config
- /mnt/HoardingCow_docker_data/Syncthing/telos-ro:/telos-ro
- /mnt/HoardingCow_docker_data/Syncthing/telos-rw:/telos-rw
networks:
- ai_backend
- ai_net
labels:
- "traefik.enable=true"
- "traefik.http.routers.syncthing-http.rule=Host(`syncthing.lazyworkhorse.net`)"
- "traefik.http.routers.syncthing-http.entrypoints=web"
- "traefik.http.routers.syncthing-http.middlewares=redirect-to-https"
- "traefik.http.routers.syncthing-https.rule=Host(`syncthing.lazyworkhorse.net`)"
- "traefik.http.routers.syncthing-https.entrypoints=websecure"
- "traefik.http.routers.syncthing-https.tls=true"
- "traefik.http.routers.syncthing-https.tls.certresolver=njalla"
- "traefik.http.services.syncthing.loadbalancer.server.port=8384"
ollama:
build:
context: ./ollama
@@ -129,6 +127,24 @@ services:
- "303"
- "26"
# ── Paperclip Worker Hermes Agents ──────────────────────────
# Each worker is an isolated Hermes agent for a Paperclip employee.
# Add new workers with: ./scripts/provision-hermes-worker.sh <name> <discord-token>
# The API server key and port are generated automatically.
# Workers are CPU-only — they use OpenCode Go or remote providers.
# ── Worker Template (commented — uncomment + configure to activate) ──
# hermes-worker-1:
# <<: *hermes-worker
# container_name: hermes-worker-1
# environment:
# API_SERVER_PORT: "8651"
# API_SERVER_KEY: "generated-by-provision-script"
# DISCORD_BOT_TOKEN: ${WORKER_1_DISCORD_BOT_TOKEN}
# volumes:
# - /mnt/HoardingCow_docker_data/Hermes/worker-1:/opt/data
# ─────────────────────────────────────────────────────────────
networks:
ai_net:
external: true

View File

@@ -1,154 +0,0 @@
#!/bin/bash
# ── Hermes Workspace Combined Entrypoint ──
# Waits for the Hermes gateway container (hermes:8642) to become healthy,
# then starts the Hermes Workspace web UI in the foreground.
# Supports graceful shutdown via SIGTERM/SIGINT.
# ──────────────────────────────────────────
set -euo pipefail
# ── Configuration ──────────────────────────────────────────────
GATEWAY_HOST="${GATEWAY_HOST:-hermes}"
GATEWAY_PORT="${GATEWAY_PORT:-8642}"
GATEWAY_URL="http://${GATEWAY_HOST}:${GATEWAY_PORT}"
HEALTH_ENDPOINT="${HEALTH_ENDPOINT:-/health}"
MAX_RETRIES="${HEALTH_MAX_RETRIES:-60}"
RETRY_INTERVAL="${HEALTH_RETRY_INTERVAL:-2}"
WORKSPACE_DIR="${WORKSPACE_DIR:-/workspace}"
WORKSPACE_ENTRY="${WORKSPACE_ENTRY:-server-entry.js}"
PID_FILE="${PID_FILE:-/tmp/workspace.pid}"
# ── Logging ────────────────────────────────────────────────────
log_info() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] [INFO] $*"; }
log_warn() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] [WARN] $*"; }
log_error() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] [ERROR] $*"; }
# ── Graceful Shutdown ──────────────────────────────────────────
_workspace_pid=""
_shutting_down=false
cleanup() {
if [ "$_shutting_down" = true ]; then
return
fi
_shutting_down=true
log_info "Shutdown signal received, cleaning up..."
# Stop workspace process if running
if [ -n "$_workspace_pid" ] && kill -0 "$_workspace_pid" 2>/dev/null; then
log_info "Stopping workspace (PID: $_workspace_pid)..."
kill -TERM "$_workspace_pid" 2>/dev/null || true
# Give it time to shut down gracefully
local wait_sec=10
while kill -0 "$_workspace_pid" 2>/dev/null && [ "$wait_sec" -gt 0 ]; do
sleep 1
wait_sec=$((wait_sec - 1))
done
# Force kill if still running
if kill -0 "$_workspace_pid" 2>/dev/null; then
log_warn "Workspace did not shut down gracefully, force killing..."
kill -KILL "$_workspace_pid" 2>/dev/null || true
fi
fi
# Clean up PID file
[ -f "$PID_FILE" ] && rm -f "$PID_FILE"
log_info "Shutdown complete."
exit 0
}
# Trap termination signals for graceful shutdown
trap cleanup SIGTERM SIGINT
# ── Gateway Health Check ───────────────────────────────────────
wait_for_gateway() {
local url="${GATEWAY_URL}${HEALTH_ENDPOINT}"
local retries="$MAX_RETRIES"
local interval="$RETRY_INTERVAL"
local attempt=0
log_info "Waiting for Hermes gateway at ${GATEWAY_URL}..."
log_info "Max retries: ${retries}, interval: ${interval}s"
while [ "$attempt" -lt "$retries" ]; do
attempt=$((attempt + 1))
if curl -fsS "${url}" >/dev/null 2>&1; then
log_info "Gateway is healthy after ${attempt} attempt(s) (${GATEWAY_URL})"
return 0
fi
if [ "$attempt" -lt "$retries" ]; then
log_info "Gateway not ready yet (attempt ${attempt}/${retries}), retrying in ${interval}s..."
sleep "$interval"
fi
done
log_error "Gateway did not become healthy after ${retries} attempts ($((retries * interval))s)"
return 1
}
# ── Workspace Startup ──────────────────────────────────────────
start_workspace() {
local entry="${WORKSPACE_DIR}/${WORKSPACE_ENTRY}"
if [ ! -d "$WORKSPACE_DIR" ]; then
log_error "Workspace directory not found: ${WORKSPACE_DIR}"
return 1
fi
if [ ! -f "$entry" ]; then
log_error "Workspace entry point not found: ${entry}"
return 1
fi
log_info "Starting Hermes Workspace web UI..."
log_info " Directory: ${WORKSPACE_DIR}"
log_info " Entry: ${entry}"
cd "$WORKSPACE_DIR"
# Start workspace in background so we can trap signals
node --max-old-space-size=2048 "${entry}" &
_workspace_pid=$!
echo "$_workspace_pid" > "$PID_FILE"
log_info "Workspace started (PID: ${_workspace_pid})"
# Wait for workspace process
wait "$_workspace_pid"
local exit_code=$?
log_info "Workspace exited with code ${exit_code}"
return "$exit_code"
}
# ── Main ───────────────────────────────────────────────────────
main() {
log_info "=== Hermes Workspace Combined Entrypoint ==="
log_info "Gateway: ${GATEWAY_URL}"
log_info "Workspace: ${WORKSPACE_DIR}/${WORKSPACE_ENTRY}"
log_info "PID file: ${PID_FILE}"
# Wait for gateway to be healthy
if ! wait_for_gateway; then
log_warn "Proceeding without confirmed gateway health..."
fi
# Start the workspace
start_workspace
local exit_code=$?
log_info "Entrypoint exiting with code ${exit_code}"
return "$exit_code"
}
# Run main; exit with its return code
main "$@"

View File

@@ -0,0 +1,135 @@
#!/usr/bin/env bash
set -euo pipefail
# ── Hermes Worker Provisioner ──────────────────────────────
# Adds a new Paperclip Hermes worker to the ai compose stack.
#
# Usage:
# ./provision-hermes-worker.sh <name> <discord_bot_token_var>
#
# Example:
# ./provision-hermes-worker.sh worker-1 WORKER_1_DISCORD_BOT_TOKEN
#
# The script APPENDS only — never modifies or removes existing
# content, even commented lines.
#
# Post-provision steps (manual):
# 1. Add secrets to agenix .env file
# 2. systemctl restart ai_stack.service
# 3. Configure Paperclip agent
# ─────────────────────────────────────────────────────────────
NAME="${1:?Usage: $0 <name> <discord_bot_token_var>}"
TOKEN_VAR="${2:?Usage: $0 <name> <discord_bot_token_var>}"
# ── Paths ───────────────────────────────────────────────────
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
COMPOSE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
COMPOSE_FILE="${COMPOSE_DIR}/compose.yml"
# Each Hermes worker gets its own volume on the NFS HoardingCow
VOLUME_BASE="/mnt/HoardingCow_docker_data/Hermes"
VOLUME_DIR="${VOLUME_BASE}/${NAME}"
# The Hermes container runs as UID 10000 (hermes user from Dockerfile)
HERMES_UID=10000
# ── Validation ──────────────────────────────────────────────
if ! [ -f "$COMPOSE_FILE" ]; then
echo "❌ compose.yml not found at $COMPOSE_FILE"
exit 1
fi
if grep -q "^ ${NAME}:" "$COMPOSE_FILE"; then
echo "❌ Service '${NAME}' already exists in ${COMPOSE_FILE}"
exit 1
fi
# ── Generate unique API key ─────────────────────────────────
# Used by Paperclip to authenticate against this worker's
# Hermes API server (/v1/chat/completions)
API_KEY="pc_worker_$(openssl rand -hex 16)"
# ── Find next available API port ────────────────────────────
# Workers get sequential ports starting at 8650.
# Scans compose.yml for existing API_SERVER_PORT values and
# picks the next one.
BASE_PORT=8650
MAX_PORT=0
while IFS= read -r line; do
port="${line#*API_SERVER_PORT: \"}"
port="${port%%\"*}"
if [ -n "$port" ] && [ "$port" -gt "$MAX_PORT" ]; then
MAX_PORT="$port"
fi
done < <(grep -oP 'API_SERVER_PORT:\s*"\d+"' "$COMPOSE_FILE" 2>/dev/null)
NEW_PORT=$((MAX_PORT + 1))
if [ "$NEW_PORT" -lt "$BASE_PORT" ]; then
NEW_PORT=$BASE_PORT
fi
# ── Create volume directory (on NFS) ────────────────────────
echo "📁 Creating volume directory: ${VOLUME_DIR}"
mkdir -p "$VOLUME_DIR"
# Hermes container runs as UID 10000 — set ownership so the
# container can write its config, sessions, skills
if command -v chown &>/dev/null; then
chown -R "${HERMES_UID}:${HERMES_UID}" "$VOLUME_DIR" 2>/dev/null || \
echo "⚠ Could not chown ${VOLUME_DIR} — run with sudo if needed"
fi
# Make it group-readable for debugging
chmod 755 "$VOLUME_DIR" 2>/dev/null || true
# ── Append service to compose.yml ───────────────────────────
echo "📝 Appending service '${NAME}' to compose.yml ..."
TMPFILE=$(mktemp)
awk -v name="$NAME" \
-v port="$NEW_PORT" \
-v api_key="$API_KEY" \
-v token_var="$TOKEN_VAR" \
'
# Insert new worker service block just before the networks: section
/^networks:/ {
print ""
print " " name ":"
print " <<: *hermes-worker"
print " container_name: " name
print " environment:"
print " API_SERVER_PORT: \"" port "\""
print " API_SERVER_KEY: \"" api_key "\""
print " DISCORD_BOT_TOKEN: ${" token_var "}"
print " volumes:"
print " - /mnt/HoardingCow_docker_data/Hermes/" name ":/opt/data"
print ""
}
{ print }
' "$COMPOSE_FILE" > "$TMPFILE" && mv "$TMPFILE" "$COMPOSE_FILE"
# ── Done ────────────────────────────────────────────────────
echo ""
echo "✅ Worker '${NAME}' provisioned successfully"
echo ""
echo "────────────────────────────────────────────"
echo " NEXT STEPS"
echo "────────────────────────────────────────────"
echo ""
echo "1. Add secrets to the agenix .env stack file:"
echo ""
echo " # ${NAME}"
echo " ${TOKEN_VAR}=<paste-discord-bot-token-here>"
echo ""
echo "2. Restart the AI stack:"
echo ""
echo " systemctl restart ai_stack.service"
echo ""
echo "3. In Paperclip, create an agent with HTTP adapter:"
echo ""
echo " Endpoint: http://${NAME}:${NEW_PORT}/v1/chat/completions"
echo " API Key: ${API_KEY}"
echo ""
echo "────────────────────────────────────────────"

View File

@@ -8,10 +8,13 @@ services:
- USER_GID=1000
- GITEA__server__ROOT_URL=https://code.lazyworkhorse.net
- GITEA__actions__ENABLED=true
- GITEA__actions__DEFAULT_ACTIONS_URL=off
- SSH_PORT=2222
- SSH_LISTEN_PORT=2222
# Enable Gitea Actions (act_runner required on host)
- GITEA__actions__ENABLED=true
# Don't fetch actions from GitHub (offline mode + local only)
- GITEA__actions__DEFAULT_ACTIONS_URL=off
volumes:
- /mnt/HoardingCow_docker_data/Gitea:/data
networks: