diff --git a/ai/Dockerfile b/ai/Dockerfile index 1edd524..6c8ddeb 100644 --- a/ai/Dockerfile +++ b/ai/Dockerfile @@ -1,71 +1,73 @@ -FROM ghcr.io/astral-sh/uv:0.11.6-python3.13-trixie@sha256:b3c543b6c4f23a5f2df22866bd7857e5d304b67a564f4feab6ac22044dde719b AS uv_source -FROM tianon/gosu:1.19-trixie@sha256:3b176695959c71e123eb390d427efc665eeb561b1540e82679c15e992006b8b9 AS gosu_source -FROM debian:13.4 +# 1. On récupère la version la plus récente d'UV +FROM ghcr.io/astral-sh/uv:latest AS uv_source -# Disable Python stdout buffering to ensure logs are printed immediately -ENV PYTHONUNBUFFERED=1 +# 2. Image officielle Hermes Agent de NousResearch +# Contient déjà: Python, Node.js, npm, Playwright/Chromium, venv, tts_tool.py, etc. +FROM nousresearch/hermes-agent:latest -# Store Playwright browsers outside the volume mount so the build-time -# install survives the /opt/data volume overlay at runtime. -ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright - -# Install system dependencies in one layer, clear APT cache -# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.) -# that would otherwise accumulate when hermes runs as PID 1. See #15012. +# ---------- System dependencies ---------- +# The official hermes-agent image already has: git, curl, ffmpeg, python3, +# gcc, build-essential, openssh-client, procps, tini, ripgrep, docker-cli, +# libportaudio2, ca-certificates, etc. +# +# These extras we need to add back: +# - poppler-utils, imagemagick (PDF/image processing) +# - texlive-* (LaTeX typesetting for reports) +# - qemu-user-static, binfmt-support (QEMU cross-compilation) +# - emacs-nox (text editing in container) +USER root RUN apt-get update && \ apt-get install -y --no-install-recommends \ - build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini \ - curl poppler-utils imagemagick \ - chromium xvfb fonts-noto-color-emoji fonts-unifont fonts-liberation fonts-ipafont-gothic fonts-wqy-zenhei fonts-tlwg-loma-otf fonts-freefont-ttf \ - libasound2t64 libatk-bridge2.0-0t64 libatk1.0-0t64 libatspi2.0-0t64 libcairo2 libcups2t64 libdbus-1-3 libdrm2 libgbm1 libglib2.0-0t64 libnspr4 libnss3 libpango-1.0-0 libx11-6 libxcb1 libxcomposite1 libxdamage1 libxext6 libxfixes3 libxkbcommon0 libxrandr2 \ - texlive-latex-base texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-science \ - qemu-user-static binfmt-support qemu-user-binfmt \ - emacs-nox \ - libportaudio2 && \ + libportaudio2 \ + ca-certificates \ + poppler-utils \ + imagemagick \ + texlive-latex-base \ + texlive-latex-extra \ + texlive-fonts-recommended \ + texlive-xetex \ + texlive-science \ + qemu-user-static \ + binfmt-support \ + emacs-nox && \ rm -rf /var/lib/apt/lists/* -# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime -RUN useradd -u 10000 -m -d /opt/data hermes - -COPY --chmod=0755 --from=gosu_source /gosu /usr/local/bin/ -COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/ +# ---------- UV (hyperfast pip alternative) ---------- +COPY --chmod=0755 --from=uv_source /uv /usr/local/bin/ WORKDIR /opt/hermes -# ---------- Layer-cached dependency install ---------- -# Copy only package manifests first so npm install + Playwright are cached -# unless the lockfiles themselves change. -COPY package.json package-lock.json ./ -COPY web/package.json web/package-lock.json web/ +# ---------- Piper TTS dans le venv existant ---------- +# Le venv de l'image de base est root-owned, on doit installer en root aussi +RUN . /opt/hermes/.venv/bin/activate && \ + uv pip install --no-cache-dir piper-tts sounddevice numpy -RUN npm install --prefer-offline --no-audit && \ - npx playwright install --with-deps chromium --only-shell && \ - (cd web && npm install --prefer-offline --no-audit) && \ - npm cache clean --force +# ---------- Télécharger la voix Piper Ryan (high quality) ---------- +RUN mkdir -p /opt/hermes/.venv/share/piper/voices && \ + /opt/hermes/.venv/bin/python3 /dev/stdin << 'PYEOF' +import urllib.request +base = '/opt/hermes/.venv/share/piper/voices' +url = 'https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/ryan/high/en_US-ryan-high.onnx' +urllib.request.urlretrieve(url, base + '/en_US-ryan-high.onnx') +urllib.request.urlretrieve(url + '.json', base + '/en_US-ryan-high.onnx.json') +PYEOF -# ---------- Source code ---------- -# .dockerignore excludes node_modules, so the installs above survive. -COPY --chown=hermes:hermes . . - -# Build web dashboard (Vite outputs to hermes_cli/web_dist/) -RUN cd web && npm run build - -# ---------- Permissions ---------- -# Make install dir world-readable so any HERMES_UID can read it at runtime. -# The venv needs to be traversable too. -USER root -RUN chmod -R a+rX /opt/hermes -# Start as root so the entrypoint can usermod/groupmod + gosu. -# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000). - -# ---------- Python virtualenv ---------- -RUN uv venv && \ - uv pip install --no-cache-dir -e ".[all]" && \ - uv pip install --no-cache-dir sounddevice numpy faster-whisper +# ---------- Patch tts_tool.py: remplacer Edge TTS par Piper ---------- +# Edge TTS appelle les serveurs Microsoft — on ne veut jamais ça. +# Piper roule localement sur CPU, aucun cloud, aucune donnée qui sort. +COPY patch_tts_tool.py /tmp/patch_tts_tool.py +RUN /opt/hermes/.venv/bin/python3 /tmp/patch_tts_tool.py && rm /tmp/patch_tts_tool.py # ---------- Runtime ---------- -ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist +# Retour à l'utilisateur non-root pour la sécurité +USER hermes + ENV HERMES_HOME=/opt/data ENV PATH="/opt/data/.local/bin:${PATH}" + VOLUME [ "/opt/data" ] -ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ] + +# Script de réparation des permissions + patch TTS au démarrage +COPY --chmod=0755 fix-permissions.sh /opt/hermes/fix-permissions.sh + +ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/fix-permissions.sh" ] diff --git a/ai/compose.yml b/ai/compose.yml index 5780324..f7e4d8f 100644 --- a/ai/compose.yml +++ b/ai/compose.yml @@ -39,6 +39,12 @@ services: - API_SERVER_KEY=hermes_local_key - GATEWAY_ALLOW_ALL_USERS=true - OPENROUTER_API_KEY=${OPENROUTER_API_KEY} + # ROCm for GPU-accelerated faster-whisper STT + - HSA_OVERRIDE_GFX_VERSION=9.0.6 + - HCC_AMDGPU_TARGET=gfx906 + - HIP_VISIBLE_DEVICES=0,1 + - ROCR_VISIBLE_DEVICES=0,1 + - HSA_ENABLE_SDMA=0 volumes: - /mnt/HoardingCow_docker_data/Hermes/data:/opt/data devices: diff --git a/ai/fix-permissions.sh b/ai/fix-permissions.sh new file mode 100644 index 0000000..7af8d0c --- /dev/null +++ b/ai/fix-permissions.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# Startup permission fix + TTS patch. +# Runs as root before the entrypoint drops to the hermes user. +set -e + +HERMES_HOME="${HERMES_HOME:-/opt/data}" + +# Fix ownership on critical writable directories +chown -R hermes:hermes \ + "$HERMES_HOME/sessions" \ + "$HERMES_HOME/checkpoints" \ + "$HERMES_HOME/skills" \ + "$HERMES_HOME/memories" \ + "$HERMES_HOME/workspace" \ + "$HERMES_HOME/pastes" \ + "$HERMES_HOME/logs" \ + "$HERMES_HOME/cron" \ + "$HERMES_HOME/plans" \ + "$HERMES_HOME/hooks" \ + "$HERMES_HOME/cache" \ + 2>/dev/null || true + +# Fix data volume root ownership +if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$(id -u hermes)" ]; then + chown hermes:hermes "$HERMES_HOME" 2>/dev/null || true +fi + +# ---------- Patch tts_tool.py: replace Edge TTS with Piper ---------- +# Fallback runtime patch in case the volume's site-packages differ from the image. +# Idempotent: if already patched, the script does nothing. +PATCH_SCRIPT="/opt/hermes/patch_tts_tool.py" +if [ -f "$PATCH_SCRIPT" ]; then + echo "Applying TTS patch (Piper only, no Edge fallback)..." + /opt/hermes/.venv/bin/python3 "$PATCH_SCRIPT" 2>&1 || true +fi + +# Chain to the official Hermes entrypoint +exec /opt/hermes/docker/entrypoint.sh "$@" diff --git a/ai/patch_tts_tool.py b/ai/patch_tts_tool.py new file mode 100644 index 0000000..0aa056b --- /dev/null +++ b/ai/patch_tts_tool.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 +"""Patch Hermes TTS tool: add Piper TTS provider, remove Edge TTS as default. + +Patches ALL copies of tts_tool.py found (venv site-packages + /opt/hermes/tools/). + +Searches multiple paths for tts_tool.py so it works both at build time +(in the image venv) and at runtime (on the mounted data volume). + +Idempotent: if already patched, does nothing. +""" + +import sys +import os + +# --------------------------------------------------------------------------- +# Search for all copies of tts_tool.py +# --------------------------------------------------------------------------- +CANDIDATE_PATHS = [ + "/opt/hermes/.venv/lib/python3.13/site-packages/tools/tts_tool.py", + "/opt/hermes/tools/tts_tool.py", +] + +found_paths = [] + +for p in CANDIDATE_PATHS: + if os.path.exists(p): + found_paths.append(p) + print(f"Found tts_tool.py at: {p}") + +# Also try to find via Python import +import subprocess +try: + result = subprocess.run( + [sys.executable, "-c", "import tools.tts_tool; print(tools.tts_tool.__file__)"], + capture_output=True, text=True, timeout=5 + ) + if result.returncode == 0: + p = result.stdout.strip() + if os.path.exists(p) and p not in found_paths: + found_paths.append(p) + print(f"Found tts_tool.py via import at: {p}") +except Exception: + pass + +if not found_paths: + print("WARNING: tts_tool.py not found anywhere. Patching deferred to runtime.") + print(f"Searched: {CANDIDATE_PATHS}") + sys.exit(0) + +# --------------------------------------------------------------------------- +# Old else block: the Edge TTS default fallback to replace +# --------------------------------------------------------------------------- +old_else = ''' else: + # Default: Edge TTS (free), with NeuTTS as local fallback + edge_available = True + try: + _import_edge_tts() + except ImportError: + edge_available = False + + if edge_available: + logger.info("Generating speech with Edge TTS...") + try: + import concurrent.futures + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: + pool.submit( + lambda: asyncio.run(_generate_edge_tts(text, file_str, tts_config)) + ).result(timeout=60) + except RuntimeError: + asyncio.run(_generate_edge_tts(text, file_str, tts_config)) + elif _check_neutts_available(): + logger.info("Edge TTS not available, falling back to NeuTTS (local)...") + provider = "neutts" + _generate_neutts(text, file_str, tts_config) + else: + return json.dumps({ + "success": False, + "error": "No TTS provider available. Install edge-tts (pip install edge-tts) " + "or set up NeuTTS for local synthesis." + }, ensure_ascii=False)''' + +# --------------------------------------------------------------------------- +# New block: elif provider == "piper" + else: fallback with Piper only +# --------------------------------------------------------------------------- +new_block = ''' elif provider == "piper": + # Piper TTS (local, CPU, no cloud, no Microsoft) + piper_binary = "/opt/hermes/.venv/bin/piper" + piper_config = tts_config.get("piper", {}) + voice = piper_config.get("voice", "en_US-lessac-medium") + model_dir = piper_config.get("model_dir", "/opt/hermes/.venv/share/piper/voices") + model_path = os.path.join(model_dir, f"{voice}.onnx") + if not os.path.exists(model_path): + return json.dumps({ + "success": False, + "error": "Piper TTS voice model not found. " + "Install Piper TTS and download a voice model." + }, ensure_ascii=False) + logger.info("Generating speech with Piper TTS (local, CPU)...") + import subprocess as _sp + cmd = [piper_binary, "--model", model_path, "--output-raw"] + try: + proc = _sp.Popen(cmd, stdin=_sp.PIPE, stdout=_sp.PIPE, stderr=_sp.PIPE) + raw_audio, stderr = proc.communicate(input=text.encode(), timeout=60) + if proc.returncode != 0: + raise RuntimeError(f"Piper TTS failed: {stderr.decode()[:200]}") + ffmpeg_cmd = ["ffmpeg", "-f", "s16le", "-ar", "22050", "-ac", "1", "-i", "-", "-y", file_str] + _sp.run(ffmpeg_cmd, input=raw_audio, capture_output=True, timeout=30) + except Exception as e: + return json.dumps({ + "success": False, + "error": f"Piper TTS failed: {e}" + }, ensure_ascii=False) + + else: + # Default: Piper TTS (local, CPU, no cloud, no Microsoft) + piper_binary = "/opt/hermes/.venv/bin/piper" + piper_config = tts_config.get("piper", {}) + voice = piper_config.get("voice", "en_US-lessac-medium") + model_dir = piper_config.get("model_dir", "/opt/hermes/.venv/share/piper/voices") + model_path = os.path.join(model_dir, f"{voice}.onnx") + if os.path.exists(model_path) and os.path.exists(piper_binary): + logger.info("Generating speech with Piper TTS (local, CPU)...") + import subprocess as _sp + cmd = [piper_binary, "--model", model_path, "--output-raw"] + try: + proc = _sp.Popen(cmd, stdin=_sp.PIPE, stdout=_sp.PIPE, stderr=_sp.PIPE) + raw_audio, stderr = proc.communicate(input=text.encode(), timeout=60) + if proc.returncode != 0: + raise RuntimeError(stderr.decode()[:200]) + ffmpeg_cmd = ["ffmpeg", "-f", "s16le", "-ar", "22050", "-ac", "1", "-i", "-", "-y", file_str] + _sp.run(ffmpeg_cmd, input=raw_audio, capture_output=True, timeout=30) + except Exception: + pass + else: + return json.dumps({ + "success": False, + "error": "Piper TTS not available. Install piper-tts and download a voice model." + }, ensure_ascii=False)''' + +# --------------------------------------------------------------------------- +# Apply the patch to all copies found +# --------------------------------------------------------------------------- +patched_any = False + +for tts_path in found_paths: + with open(tts_path) as f: + code = f.read() + + if 'provider == "piper"' in code: + print(f"ALREADY PATCHED: {tts_path}") + continue + + if old_else in code: + code = code.replace(old_else, new_block, 1) + with open(tts_path, 'w') as f: + f.write(code) + print(f"PATCHED: {tts_path}") + patched_any = True + else: + print(f"SKIP {tts_path}: Edge fallback pattern not found") + import re + for m in re.finditer(r' else:\n # Default:', code): + start = max(0, m.start() - 100) + end = min(len(code), m.end() + 300) + print(f" Found 'else:/# Default:' at position {m.start()}:") + print(f" {code[start:end]}") + print(" ---") + # Don't exit with error — if one copy isn't patchable, try the others + +if not patched_any: + all_patched = all( + 'provider == "piper"' in open(p).read() + for p in found_paths + ) + if all_patched: + print("All copies already patched.") + sys.exit(0) + print("WARNING: Could not patch any copy of tts_tool.py") + sys.exit(1) + +print("tts_tool.py patched successfully across all copies.")