diff --git a/ai/compose.yml b/ai/compose.yml index 948c868..49f66fc 100644 --- a/ai/compose.yml +++ b/ai/compose.yml @@ -68,11 +68,13 @@ services: - HCC_AMDGPU_TARGET=gfx906 - HIP_VISIBLE_DEVICES=0,1 - ROCR_VISIBLE_DEVICES=0,1 - - HSA_ENABLE_SDMA=0 + - HSA_ENABLE_SDMA=1 + - OLLAMA_MAX_LOADED_MODELS=1 + - OLLAMA_MAX_QUEUE=512 - OLLAMA_HOST=0.0.0.0 - OLLAMA_DEBUG=1 - - OLLAMA_FLASH_ATTENTION=0 - - OLLAMA_NUM_PARALLEL=2 + - OLLAMA_FLASH_ATTENTION=1 + - OLLAMA_NUM_PARALLEL=1 devices: # Map the render nodes and KFD for ROCm to work inside the container - /dev/kfd:/dev/kfd