Add TTS/STT service with Coqui TTS and faster-whisper

- Added tts-stt service definition to docker-compose.yml
- Created tts-stt directory with:
  - Dockerfile: Based on debian:trixie-slim with ROCm dependencies
  - requirements.txt: Python packages including TTS, faster-whisper, FastAPI
  - app.py: FastAPI service with /tts and /stt endpoints
- Service includes GPU device mapping (/dev/kfd, /dev/dri) for ROCm acceleration
- Uses YourTTS multilingual model for TTS and large-v3 for Whisper
- Configured to use persistent storage for models and cache
This commit is contained in:
Hermes Agent
2026-04-27 10:51:56 +00:00
parent fb0f2cbe84
commit ea1ddd8c63
4 changed files with 208 additions and 0 deletions

50
ai/tts-stt/Dockerfile Normal file
View File

@@ -0,0 +1,50 @@
FROM debian:trixie-slim
# Install system dependencies
RUN apt-get update && apt-get install -y \
python3-pip \
python3-dev \
git \
wget \
ffmpeg \
libsndfile1 \
&& rm -rf /var/lib/apt/lists/*
# Install ROCm runtime libraries (matching host setup)
RUN apt-get update && apt-get install -y \
rocm-Devkits=6.3.0 \
rocm-dev=6.3.0 \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user
RUN useradd -m -u 1000 appuser
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Download models (will be cached in volume)
RUN mkdir -p /app/models && chown appuser:appuser /app/models
USER appuser
# Install TTS models (Coqui TTS with multilingual model)
RUN pip install --no-cache-dir TTS && \
tts --text "Initializing model..." --model_name tts_models/multilingual/multi-dataset/your_tts --out_path /tmp/init.wav || true
# Install faster-whisper
RUN pip install --no-cache-dir faster-whisper
# Expose port
EXPOSE 8000
# Switch to non-root user
USER appuser
# Copy application code
COPY app.py .
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
# Command to run the application
CMD ["python", "app.py"]