diff --git a/Makefile b/Makefile index 2c25ee3..6929a63 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ COMPOSE_PATH=~/Projects/AltNet/docker-compose # List of services (folder names) -SERVICES=monitoring ai cloudstorage crm_tp crm_cf mediacenter homeautomation network backup homepage passwordmanager +SERVICES=monitoring hermes ollama cloudstorage crm_tp crm_cf mediacenter homeautomation network backup homepage passwordmanager # Bring up all services all_up: diff --git a/ai/compose.yml b/ai/compose.yml deleted file mode 100644 index e5d9c4b..0000000 --- a/ai/compose.yml +++ /dev/null @@ -1,303 +0,0 @@ -version: "3.8" -services: - - # webui: - # image: ghcr.io/open-webui/open-webui:main - # volumes: - # - /mnt/HoardingCow_docker_data/Ollama/open-webui:/app/backend/data - # restart: always - # environment: - # - OLLAMA_API_BASE_URL=http://ollama:11434/api - # networks: - # - ai_net - # - ai_backend - # labels: - # - "traefik.enable=true" - - # # Router for HTTP + redirection to HTTPS - # - "traefik.http.routers.webui-http.rule=Host(`ai.lazyworkhorse.net`)" - # - "traefik.http.routers.webui-http.entrypoints=web" - # - "traefik.http.routers.webui-http.middlewares=redirect-to-https" - - # # Router for HTTPS with TLS - # - "traefik.http.routers.webui-https.rule=Host(`ai.lazyworkhorse.net`)" - # - "traefik.http.routers.webui-https.entrypoints=websecure" - # - "traefik.http.routers.webui-https.tls=true" - # - "traefik.http.routers.webui-https.tls.certresolver=njalla" - - hermes: - build: ./ - container_name: hermes - restart: always - # Gateway run enables the internal API server on port 8642 - command: gateway run - environment: - - OLLAMA_HOST=http://ollama:11434 - - API_SERVER_ENABLED=true - - API_SERVER_PORT=8642 - - API_SERVER_HOST=0.0.0.0 - - API_SERVER_KEY=hermes_local_key - - GATEWAY_ALLOW_ALL_USERS=true - - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - # ROCm for GPU-accelerated faster-whisper STT - - HSA_OVERRIDE_GFX_VERSION=9.0.6 - - HCC_AMDGPU_TARGET=gfx906 - - HIP_VISIBLE_DEVICES=0,1 - - ROCR_VISIBLE_DEVICES=0,1 - - HSA_ENABLE_SDMA=0 - - TZ=America/Montreal - volumes: - - /mnt/HoardingCow_docker_data/Hermes/data:/opt/data - devices: - - /dev/kfd:/dev/kfd - - /dev/dri:/dev/dri - group_add: - - "303" - - "26" - networks: - - ai_backend - - ollama: - build: - context: ./ollama - dockerfile: Dockerfile - image: ollama/ollama:rocm-gfx906 - container_name: ollama - privileged: true - tty: true - restart: always - ports: - - "127.0.0.1:11434:11434" - networks: - - ai_backend - volumes: - - /mnt/HoardingCow_docker_data/Ollama/ollama:/root/.ollama - environment: - - OLLAMA_VULKAN=0 - - HSA_OVERRIDE_GFX_VERSION=9.0.6 - - HCC_AMDGPU_TARGET=gfx906 - - HIP_VISIBLE_DEVICES=0,1 - - ROCR_VISIBLE_DEVICES=0,1 - - HSA_ENABLE_SDMA=0 - - OLLAMA_HOST=0.0.0.0 - - OLLAMA_DEBUG=1 - - OLLAMA_FLASH_ATTENTION=0 - - OLLAMA_NUM_PARALLEL=2 - devices: - # Map the render nodes and KFD for ROCm to work inside the container - - /dev/kfd:/dev/kfd - - /dev/dri:/dev/dri - group_add: - - "303" - - "26" - -networks: - ai_net: - external: true - name: ai_net - ai_backend: - driver: bridge - name: ai_backend - - # llama_cpp_devstral: - # image: ghcr.io/ggml-org/llama.cpp:server-rocm - # container_name: llama_cpp_devstral - # restart: unless-stopped - # networks: - # - ai_backend - # ports: - # - "8300:8080" - # ipc: host - # devices: - # - "/dev/kfd:/dev/kfd" - # - "/dev/dri:/dev/dri" - # group_add: - # - "303" # video - # - "26" # render - # environment: - # HSA_OVERRIDE_GFX_VERSION: 9.0.6 - # HIP_VISIBLE_DEVICES: 0,1 - # LLAMA_CACHE: /models - # volumes: - # - /mnt/HoardingCow_docker_data/Llama_cpp/models:/models - # - /mnt/HoardingCow_docker_data/Llama_cpp/devstral-agent.jinja:/template.jinja - # command: > - # -hf unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF:Devstral-Small-2-24B-Instruct-2512-Q8_0.gguf - # -a devstral-2-small-llama_cpp - # --chat-template-file /template.jinja - # --host 0.0.0.0 - # --port 8080 - # --n-gpu-layers 99 - # --ctx-size 163840 - # --batch-size 4096 - # --ubatch-size 4096 - # --cache-type-k f16 - # --cache-type-v f16 - # --cache-reuse 256 - # --flash-attn on - # --context-shift - # --split-mode layer - # --no-mmap - # --n-predict -1 - # --parallel 2 - - # vllm: - # image: nalanzeyu/vllm-gfx906:v0.9.0-rocm6.3 - # container_name: vllm - # # Required for multi-GPU communication (NCCL) - # ipc: host - # init: true - # shm_size: '2g' - # networks: - # - ai_backend - # ports: - # - "8300:8000" - # devices: - # - "/dev/kfd:/dev/kfd" - # - "/dev/dri:/dev/dri" - # group_add: - # - "303" - # - "26" - # environment: - # HSA_OVERRIDE_GFX_VERSION: 9.0.6 - # HSA_ENABLE_SDMA: 0 - # HIP_VISIBLE_DEVICES: 0,1 - # NCCL_P2P_DISABLE: 1 - # VLLM_WORKER_MULTIPROC_METHOD: spawn - # VLLM_USE_TRITON_FLASH_ATTN: 0 - # VLLM_USE_ROCM_CUSTOM_PAGED_ATTN: 0 - # VLLM_ATTENTION_BACKEND: ROPE_NAIVE - # VLLM_SKIP_WARMUP: 1 - # VLLM_USE_V1: 0 - # HF_TOKEN: ${HF_TOKEN} - # command: > - # vllm serve "mistralai/Devstral-Small-2-24B-Instruct-2512" - # --tensor-parallel-size 2 - # --max-model-len 8192 - # --gpu-memory-utilization 0.90 - # --tokenizer_mode mistral - # --config_format auto - # --load-format auto - # --enforce-eager - # --disable-custom-all-reduce - # --trust-remote-code - # --task generate - # --block-size 16 - # volumes: - # - /mnt/HoardingCow_docker_data/vllm/models:/root/.cache/huggingface - # restart: unless-stopped - - # n8n: - # image: n8nio/n8n:latest - # container_name: n8n - # restart: unless-stopped - # networks: - # - ai_net - # environment: - # - N8N_HOST=n8n.lazyworkhorse.net - # - N8N_PORT=5678 - # - N8N_PROTOCOL=https - # - NODE_ENV=production - # - N8N_ENCRYPTION_KEY=${N8N_ENCRYPTION_KEY} - # - WEBHOOK_URL=https://n8n.lazyworkhorse.net/ - # - GENERIC_TIMEZONE=America/New_York # Adjust to your timezone - # - N8N_BLOCK_EXTERNAL_STORAGE_ACCESS=false - # - N8N_NODES_PYTHON_CAN_IMPORT_MODULES=true - # - N8N_NATIVE_PYTHON_RUNNER=true - # - N8N_PYTHON_ALLOW_STDLIB=uuid,re,os,json - # - N8N_PYTHON_ALLOW_EXTERNAL=requests,pandas - # - NODE_FUNCTION_ALLOW_EXTERNAL=uuid,requests - # volumes: - # - /mnt/HoardingCow_docker_data/n8n:/home/node/.n8n - # labels: - # - "traefik.enable=true" - - # # Router for HTTP + redirection to HTTPS - # - "traefik.http.routers.n8n-http.rule=Host(`n8n.lazyworkhorse.net`)" - # - "traefik.http.routers.n8n-http.entrypoints=web" - # - "traefik.http.routers.n8n-http.middlewares=redirect-to-https" - - # # Router for HTTPS with TLS - # - "traefik.http.routers.n8n-https.rule=Host(`n8n.lazyworkhorse.net`)" - # - "traefik.http.routers.n8n-https.entrypoints=websecure" - # - "traefik.http.routers.n8n-https.tls=true" - # - "traefik.http.routers.n8n-https.tls.certresolver=njalla" - - # # Service Loadbalancer (n8n default port) - # - "traefik.http.services.n8n.loadbalancer.server.port=5678" - - # openclaw: - # image: coollabsio/openclaw:latest - # container_name: openclaw - # restart: unless-stopped - # expose: - # - "8080" # WebUI - # - "18789" # Gateway/WebSocket - # - "8788" # Nextcloud Webhook - # networks: - # - ai_net - # - ai_backend - # volumes: - # - /mnt/HoardingCow_docker_data/openclaw/data:/data - # - /home/gortium/infra:/data/workspace/infra - # environment: - # - TZ=America/Toronto - # - OPENCLAW_GATEWAY_TOKEN=${OPENCLAW_GATEWAY_TOKEN} - # - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} - # # Point to the sidecar browser - # - BROWSER_CDP_URL=http://openclaw-browser:9222 - # - BROWSER_EVALUATE_ENABLED=true - # - OPENCLAW_GATEWAY_HOST=0.0.0.0 - # - OPENCLAW_ALLOWED_ORIGINS=https://claw.lazyworkhorse.net - # labels: - # - "traefik.enable=true" - - # - "traefik.http.routers.openclaw-http.rule=Host(`claw.lazyworkhorse.net`)" - # - "traefik.http.routers.openclaw-http.entrypoints=web" - # - "traefik.http.routers.openclaw-http.middlewares=redirect-to-https" - - # - "traefik.http.routers.openclaw-https.rule=Host(`claw.lazyworkhorse.net`)" - # - "traefik.http.routers.openclaw-https.priority=50" - # - "traefik.http.routers.openclaw-https.entrypoints=websecure" - # - "traefik.http.routers.openclaw-https.tls=true" - # - "traefik.http.routers.openclaw-https.tls.certresolver=njalla" - # - "traefik.http.services.openclaw.loadbalancer.server.port=8080" - # depends_on: - # - openclaw-browser - - # openclaw-browser: - # image: ghcr.io/browserless/chromium:latest - # restart: always - # expose: - # - "3000" - # environment: - # - MAX_CONCURRENT_SESSIONS=10 - # - CONNECTION_TIMEOUT=300000 - # - PREBOOT_CHROME=true - # - DEMO_MODE=false - # networks: - # ai_backend: - # aliases: - # - browser - - # openclaw-ssh: - # image: linuxserver/openssh-server:latest - # container_name: openclaw-ssh - # environment: - # - PUID=1000 - # - PGID=1000 - # - PUBLIC_KEY_FILE=/config/ssh/authorized_keys - # - SUDO_ACCESS=false - # - PASSWORD_ACCESS=false - # volumes: - # - /mnt/HoardingCow_docker_data/openclaw/ssh-config:/config - # - /home/gortium/infra:/data/workspace/infra:ro - # restart: unless-stopped - # networks: - # - ai_backend - # labels: - # - "traefik.enable=true" - # - "traefik.tcp.routers.openclaw-ssh.rule=HostSNI(*)" - # - "traefik.tcp.routers.openclaw-ssh.entrypoints=sshnode" - # - "traefik.tcp.routers.openclaw-ssh.tls.passthrough=false" - # - "traefik.tcp.services.openclaw-ssh.loadbalancer.server.port=2222" diff --git a/ai/Dockerfile b/hermes/Dockerfile similarity index 100% rename from ai/Dockerfile rename to hermes/Dockerfile diff --git a/hermes/compose.yml b/hermes/compose.yml new file mode 100644 index 0000000..64fed90 --- /dev/null +++ b/hermes/compose.yml @@ -0,0 +1,38 @@ +version: "3.8" +services: + + hermes: + build: ./ + container_name: hermes + restart: always + # Gateway run enables the internal API server on port 8642 + command: gateway run + environment: + - OLLAMA_HOST=http://ollama:11434 + - API_SERVER_ENABLED=true + - API_SERVER_PORT=8642 + - API_SERVER_HOST=0.0.0.0 + - API_SERVER_KEY=hermes_local_key + - GATEWAY_ALLOW_ALL_USERS=true + - OPENROUTER_API_KEY=${OPEN...KEY} + # ROCm for GPU-accelerated faster-whisper STT + - HSA_OVERRIDE_GFX_VERSION=9.0.6 + - HCC_AMDGPU_TARGET=gfx906 + - HIP_VISIBLE_DEVICES=0,1 + - ROCR_VISIBLE_DEVICES=0,1 + - HSA_ENABLE_SDMA=0 + volumes: + - /mnt/HoardingCow_docker_data/Hermes/data:/opt/data + devices: + - /dev/kfd:/dev/kfd + - /dev/dri:/dev/dri + group_add: + - "303" + - "26" + networks: + - ai_backend + +networks: + ai_backend: + external: true + name: ai_backend diff --git a/ai/fix-permissions.sh b/hermes/fix-permissions.sh similarity index 100% rename from ai/fix-permissions.sh rename to hermes/fix-permissions.sh diff --git a/ai/patch_tts_tool.py b/hermes/patch_tts_tool.py similarity index 100% rename from ai/patch_tts_tool.py rename to hermes/patch_tts_tool.py diff --git a/ai/ollama/Dockerfile b/ollama/Dockerfile similarity index 74% rename from ai/ollama/Dockerfile rename to ollama/Dockerfile index aca5f3a..c6fc0ff 100644 --- a/ai/ollama/Dockerfile +++ b/ollama/Dockerfile @@ -1,18 +1,19 @@ -# ollama-gfx906/Dockerfile +# ollama/Dockerfile # -# Custom ollama image with ROCm 6.1 + gfx906 (MI50) support. -# The default ollama/rocm image ships ROCm 7.2 which dropped gfx906 support. -# This builds ollama from source targeting AMDGPU_TARGETS=gfx906. +# Custom ollama image with ROCm + gfx906 (MI50) support. +# The default ollama/rocm image ships ROCm 7.2 which drops gfx906 support. +# This builds ollama and its llama.cpp runner from source, targeting gfx906. # -# Build: docker build -t ollama/ollama:rocm-gfx906 . +# Build: +# docker build -t ollama/ollama:rocm-gfx906 ./ollama FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ - git golang cmake build-essential pkg-config \ + git golang-go cmake build-essential pkg-config \ && rm -rf /var/lib/apt/lists/* -ARG OLLAMA_VERSION=v0.13.5 +ARG OLLAMA_VERSION=v0.23.2 RUN git clone --depth 1 --branch ${OLLAMA_VERSION} https://github.com/ollama/ollama.git /build WORKDIR /build @@ -20,7 +21,7 @@ ENV HIP_PATH=/opt/rocm ENV ROCM_PATH=/opt/rocm ENV PATH=/opt/rocm/bin:/opt/rocm/hip/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin RUN cd llama.cpp && \ - mkdir build && cd build && \ + mkdir -p build && cd build && \ cmake .. \ -DLLAMA_HIPBLAS=ON \ -DCMAKE_C_COMPILER=clang \ @@ -52,6 +53,9 @@ RUN ldconfig ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/lib/ollama ENV HSA_OVERRIDE_GFX_VERSION=9.0.6 +ENV HCC_AMDGPU_TARGET=gfx906 +ENV HSA_ENABLE_SDMA=0 EXPOSE 11434 +ENTRYPOINT ["/bin/ollama"] CMD ["serve"] diff --git a/ollama/compose.yml b/ollama/compose.yml new file mode 100644 index 0000000..e769203 --- /dev/null +++ b/ollama/compose.yml @@ -0,0 +1,41 @@ +version: "3.8" + +services: + ollama: + build: + context: ./ + dockerfile: Dockerfile + image: ollama/ollama:rocm-gfx906 + container_name: ollama + privileged: true + tty: true + restart: always + ports: + - "127.0.0.1:11434:11434" + networks: + - ai_backend + volumes: + - /mnt/HoardingCow_docker_data/Ollama/ollama:/root/.ollama + environment: + - OLLAMA_VULKAN=0 + - OLLAMA_HOST=0.0.0.0 + - OLLAMA_DEBUG=1 + - OLLAMA_FLASH_ATTENTION=0 + - OLLAMA_NUM_PARALLEL=2 + # ROCm / gfx906 configuration + - HSA_OVERRIDE_GFX_VERSION=9.0.6 + - HCC_AMDGPU_TARGET=gfx906 + - HIP_VISIBLE_DEVICES=0,1 + - ROCR_VISIBLE_DEVICES=0,1 + - HSA_ENABLE_SDMA=0 + devices: + - /dev/kfd:/dev/kfd + - /dev/dri:/dev/dri + group_add: + - "303" + - "26" + +networks: + ai_backend: + external: true + name: ai_backend