fix: clean Dockerfile with Piper TTS, external patch script

2026-05-09 13:41:37 +00:00
parent 28213eec5c
commit 25d7611043
3 changed files with 161 additions and 39 deletions
--- a/ai/Dockerfile
+++ b/ai/Dockerfile
@@ -38,49 +38,24 @@ USER hermes
 # On copie tout le projet d'un coup sans assumer la présence de fichiers de lock spécifiques
 COPY --chown=hermes:hermes . .

-# ---------- Python virtualenv ----------
+# ---------- Python virtualenv avec Piper TTS ----------
 RUN uv venv && \
-    uv pip install --no-cache-dir sounddevice numpy faster-whisper
+    uv pip install --no-cache-dir piper-tts sounddevice numpy faster-whisper

-# ---------- Patch tts_tool.py to add Coqui provider ----------
-RUN /opt/hermes/.venv/bin/python3 -c "
-tts_path = '/opt/hermes/.venv/lib/python3.13/site-packages/tools/tts_tool.py'
-with open(tts_path) as f:
-    code = f.read()
-coqui_block = '''
-        elif provider == \"coqui\":
-            logger.info(\"Generating speech with Coqui TTS (GPU, local)...\")
-            import subprocess
-            coqui_python = \"/opt/coqui-tts/bin/python3\"
-            coqui_script = \"/opt/coqui-tts/bin/coqui_synth.py\"
-            coqui_config = tts_config.get(\"coqui\", {})
-            model = coqui_config.get(\"model\", \"tts_models/en/vctk/vits\")
-            use_gpu = coqui_config.get(\"use_gpu\", True)
-            speaker = coqui_config.get(\"speaker\", \"\")
-            cmd = [
-                coqui_python, coqui_script,
-                \"--text\", text,
-                \"--out\", file_str,
-                \"--model\", model,
-            ]
-            if use_gpu:
-                cmd.append(\"--gpu\")
-            if speaker:
-                cmd.extend([\"--speaker\", speaker])
-            result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
-            if result.returncode != 0:
-                stderr = result.stderr.strip()
-                raise RuntimeError(f\"Coqui TTS failed: {stderr or 'unknown error'}\")
-            logger.info(\"Coqui TTS audio saved: %s\", file_str)
-'''
-code = code.replace(
-    '        else:\n            # Default: Edge TTS (free), with NeuTTS as local fallback',
-    coqui_block + '        else:\n            # Default: Edge TTS (free), with NeuTTS as local fallback'
-)
-with open(tts_path, 'w') as f:
-    f.write(code)
+# ---------- Télécharger la voix Piper Ryan ----------
+RUN mkdir -p /opt/hermes/.venv/share/piper/voices && \
+    /opt/hermes/.venv/bin/python3 -c "
+import urllib.request
+base = '/opt/hermes/.venv/share/piper/voices'
+url = 'https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/ryan/high/en_US-ryan-high.onnx'
+urllib.request.urlretrieve(url + '?download=true', base + '/en_US-ryan-high.onnx')
+urllib.request.urlretrieve(url + '.onnx.json?download=true', base + '/en_US-ryan-high.onnx.json')
 "

+# ---------- Patch tts_tool.py: remplacer Coqui par Piper, supprimer Edge ----------
+COPY ai/patch_tts_tool.py /tmp/patch_tts_tool.py
+RUN /opt/hermes/.venv/bin/python3 /tmp/patch_tts_tool.py && rm /tmp/patch_tts_tool.py
+
 # ---------- Runtime ----------
 ENV HERMES_HOME=/opt/data
 ENV PATH="/opt/data/.local/bin:${PATH}"
--- a/ai/pycache/patch_tts_tool.cpython-313.pyc
+++ b/ai/pycache/patch_tts_tool.cpython-313.pyc
--- a/ai/patch_tts_tool.py
+++ b/ai/patch_tts_tool.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+"""Patch Hermes TTS tool to add Piper provider and remove Edge TTS fallback."""
+import sys
+
+tts_path = '/opt/hermes/.venv/lib/python3.13/site-packages/tools/tts_tool.py'
+
+with open(tts_path) as f:
+    code = f.read()
+
+# Replace the Coqui provider block with Piper
+old_coqui = '        elif provider == "coqui":'
+new_piper = '''        elif provider == "piper":
+            logger.info("Generating speech with Piper TTS (local, CPU)...")
+            import subprocess
+            piper_binary = "/opt/hermes/.venv/bin/piper"
+            piper_config = tts_config.get("piper", {})
+            voice = piper_config.get("voice", "en_US-lessac-medium")
+            model_dir = piper_config.get("model_dir", "/opt/hermes/.venv/share/piper/voices")
+            model_path = os.path.join(model_dir, f"{voice}.onnx")
+            if not os.path.exists(model_path):
+                raise FileNotFoundError(f"Piper voice model not found: {model_path}")
+            cmd = [piper_binary, "--model", model_path, "--output-raw"]
+            proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            raw_audio, stderr = proc.communicate(input=text.encode(), timeout=60)
+            if proc.returncode != 0:
+                raise RuntimeError(f"Piper TTS failed: {stderr.decode()[:200]}")
+            ffmpeg_cmd = ["ffmpeg", "-f", "s16le", "-ar", "22050", "-ac", "1", "-i", "-", "-y", file_str]
+            subprocess.run(ffmpeg_cmd, input=raw_audio, capture_output=True, timeout=30)
+            logger.info("Piper TTS audio saved: %s", file_str)'''
+
+if old_coqui in code:
+    code = code.replace(old_coqui, new_piper)
+    print("Coqui -> Piper replaced")
+else:
+    # Fresh Hermes install - add Piper after the last provider (kittentts)
+    if 'provider == "piper"' in code:
+        print("Piper already present, skipping coqui replacement")
+    else:
+        print("Stock Hermes - adding Piper provider after kittentts...")
+        marker = '        elif provider == "kittentts":'
+        if marker in code:
+            # Find the end of the kittentts block and insert Piper before else
+            lines = code.split('\n')
+            kit_idx = None
+            for i, line in enumerate(lines):
+                if line.strip().startswith('elif provider == "kittentts":'):
+                    kit_idx = i
+                    break
+            if kit_idx is not None:
+                # Find the next blank line + else block
+                for i in range(kit_idx, len(lines)):
+                    if lines[i].strip() == 'else:':
+                        # Insert Piper block before this line
+                        indent = '        '
+                        piper_lines = new_piper.split('\n')
+                        insert = piper_lines + ['']
+                        lines[i:i] = insert
+                        code = '\n'.join(lines)
+                        print("Piper provider added after kittentts")
+                        break
+
+# Replace the Edge fallback with Piper fallback
+old_edge = '''        else:
+            # Default: Edge TTS (free), with NeuTTS as local fallback
+            edge_available = True
+            try:
+                _import_edge_tts()
+            except ImportError:
+                edge_available = False
+
+            if edge_available:
+                logger.info("Generating speech with Edge TTS...")
+                try:
+                    import concurrent.futures
+                    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+                        pool.submit(
+                            lambda: asyncio.run(_generate_edge_tts(text, file_str, tts_config))
+                        ).result(timeout=60)
+                except RuntimeError:
+                    asyncio.run(_generate_edge_tts(text, file_str, tts_config))
+            elif _check_neutts_available():
+                logger.info("Edge TTS not available, falling back to NeuTTS (local)...")
+                provider = "neutts"
+                _generate_neutts(text, file_str, tts_config)
+            else:
+                return json.dumps({
+                    "success": False,
+                    "error": "No TTS provider available. Install edge-tts (pip install edge-tts) "
+                             "or set up NeuTTS for local synthesis."
+                }, ensure_ascii=False)'''
+
+new_piper_fallback = '''        else:
+            # Default: Piper TTS (local, CPU, no cloud)
+            piper_available = False
+            try:
+                piper_binary = "/opt/hermes/.venv/bin/piper"
+                piper_config = tts_config.get("piper", {})
+                voice = piper_config.get("voice", "en_US-lessac-medium")
+                model_dir = piper_config.get("model_dir", "/opt/hermes/.venv/share/piper/voices")
+                model_path = os.path.join(model_dir, f"{voice}.onnx")
+                if os.path.exists(model_path):
+                    piper_available = True
+            except Exception:
+                pass
+
+            if piper_available:
+                logger.info("Generating speech with Piper TTS (local, CPU)...")
+                import subprocess
+                piper_binary = "/opt/hermes/.venv/bin/piper"
+                piper_config = tts_config.get("piper", {})
+                voice = piper_config.get("voice", "en_US-lessac-medium")
+                model_dir = piper_config.get("model_dir", "/opt/hermes/.venv/share/piper/voices")
+                model_path = os.path.join(model_dir, f"{voice}.onnx")
+                cmd = [piper_binary, "--model", model_path, "--output-raw"]
+                proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                raw_audio, stderr = proc.communicate(input=text.encode(), timeout=60)
+                if proc.returncode != 0:
+                    raise RuntimeError(f"Piper TTS failed: {stderr.decode()[:200]}")
+                ffmpeg_cmd = ["ffmpeg", "-f", "s16le", "-ar", "22050", "-ac", "1", "-i", "-", "-y", file_str]
+                subprocess.run(ffmpeg_cmd, input=raw_audio, capture_output=True, timeout=30)
+                logger.info("Piper TTS audio saved: %s", file_str)
+            else:
+                return json.dumps({
+                    "success": False,
+                    "error": "No TTS provider available. Install Piper TTS (pip install piper-tts) "
+                             "and download a voice model."
+                }, ensure_ascii=False)'''
+
+if old_edge in code:
+    code = code.replace(old_edge, new_piper_fallback)
+    print("Edge fallback replaced with Piper")
+else:
+    print("Edge fallback NOT found, checking if already Piper...")
+    if 'Default: Piper TTS' in code:
+        print("Piper fallback already present, skipping")
+    else:
+        print("ERROR: Could not find Edge fallback")
+        # Debug: show what the else block looks like
+        import re
+        match = re.search(r'        else:\n            # Default:', code)
+        if match:
+            print("Found else block at", match.start())
+        sys.exit(1)
+
+with open(tts_path, 'w') as f:
+    f.write(code)
+print("tts_tool.py patched successfully")