diff --git a/ai/honcho/Dockerfile b/ai/honcho/Dockerfile index a4d43d9..e124482 100644 --- a/ai/honcho/Dockerfile +++ b/ai/honcho/Dockerfile @@ -71,4 +71,4 @@ COPY honcho-nginx.conf /etc/nginx/conf.d/default.conf EXPOSE 80 -CMD ["sh", "-c", "nginx -g 'daemon off;' & fastapi run --host 127.0.0.1 --port 8000 src/main.py"] +CMD ["sh", "-c", "nginx -g 'daemon off;' & fastapi run --host 127.0.0.1 --port 8000 src/main.py & python3 -m src.deriver & wait -n"] diff --git a/ai/honcho/config.toml b/ai/honcho/config.toml index 3c474d1..8c61033 100644 --- a/ai/honcho/config.toml +++ b/ai/honcho/config.toml @@ -29,15 +29,15 @@ URL = "redis://honcho-redis:6379/0" [llm] DEFAULT_MAX_TOKENS = 4096 -# Embeddings via Ollama +# Embeddings via Ollama — bge-m3 provides 1024-dim [embedding] VECTOR_DIMENSIONS = 1024 MAX_INPUT_TOKENS = 8192 [embedding.model_config] transport = "openai" -model = "nomic-embed-text" -base_url = "http://ollama:11434/v1" +model = "bge-m3" +overrides = {base_url = "http://ollama:11434/v1", api_key = "ollama"} # --- Deriver --- [deriver] @@ -47,10 +47,9 @@ POLLING_SLEEP_INTERVAL_SECONDS = 5.0 FLUSH_ENABLED = true [deriver.model_config] +overrides = {base_url = "https://opencode.ai/zen/go/v1", api_key_env = "HONCHO_OPENAI_API_KEY"} transport = "openai" model = "deepseek-v4-flash" -base_url = "https://opencode.ai/zen/go/v1" -api_key_env = "HONCHO_OPENAI_API_KEY" # --- Dialectic --- [dialectic] @@ -61,42 +60,37 @@ SESSION_HISTORY_MAX_TOKENS = 8192 MAX_TOOL_ITERATIONS = 1 MAX_OUTPUT_TOKENS = 512 [dialectic.levels.minimal.model_config] +overrides = {base_url = "https://opencode.ai/zen/go/v1", api_key_env = "HONCHO_OPENAI_API_KEY"} transport = "openai" model = "deepseek-v4-flash" -base_url = "https://opencode.ai/zen/go/v1" -api_key_env = "HONCHO_OPENAI_API_KEY" [dialectic.levels.low] MAX_TOOL_ITERATIONS = 3 [dialectic.levels.low.model_config] +overrides = {base_url = "https://opencode.ai/zen/go/v1", api_key_env = "HONCHO_OPENAI_API_KEY"} transport = "openai" model = "deepseek-v4-flash" -base_url = "https://opencode.ai/zen/go/v1" -api_key_env = "HONCHO_OPENAI_API_KEY" [dialectic.levels.medium] MAX_TOOL_ITERATIONS = 2 [dialectic.levels.medium.model_config] +overrides = {base_url = "https://opencode.ai/zen/go/v1", api_key_env = "HONCHO_OPENAI_API_KEY"} transport = "openai" model = "deepseek-v4-flash" -base_url = "https://opencode.ai/zen/go/v1" -api_key_env = "HONCHO_OPENAI_API_KEY" [dialectic.levels.high] MAX_TOOL_ITERATIONS = 4 [dialectic.levels.high.model_config] +overrides = {base_url = "https://opencode.ai/zen/go/v1", api_key_env = "HONCHO_OPENAI_API_KEY"} transport = "openai" model = "deepseek-v4-flash" -base_url = "https://opencode.ai/zen/go/v1" -api_key_env = "HONCHO_OPENAI_API_KEY" [dialectic.levels.max] MAX_TOOL_ITERATIONS = 10 [dialectic.levels.max.model_config] +overrides = {base_url = "https://opencode.ai/zen/go/v1", api_key_env = "HONCHO_OPENAI_API_KEY"} transport = "openai" model = "deepseek-v4-flash" -base_url = "https://opencode.ai/zen/go/v1" -api_key_env = "HONCHO_OPENAI_API_KEY" # --- Summary --- [summary] @@ -105,10 +99,9 @@ MESSAGES_PER_SHORT_SUMMARY = 20 MESSAGES_PER_LONG_SUMMARY = 60 [summary.model_config] +overrides = {base_url = "https://opencode.ai/zen/go/v1", api_key_env = "HONCHO_OPENAI_API_KEY"} transport = "openai" model = "deepseek-v4-flash" -base_url = "https://opencode.ai/zen/go/v1" -api_key_env = "HONCHO_OPENAI_API_KEY" # --- Dream --- [dream] @@ -121,4 +114,4 @@ ENABLED = true # --- Vector Store --- [vector_store] TYPE = "pgvector" -DIMENSIONS = 1024 +# DIMENSIONS is deprecated — EMBEDDING.VECTOR_DIMENSIONS is authoritative