yatri-ai / docker-compose.yml
siddharthnavnath7's picture
project
d639fb1
version: "3.9"
services:
# ---- Main API Server ----
kumbh-api:
build:
context: .
dockerfile: Dockerfile
container_name: kumbh-api
ports:
- "8000:8000"
volumes:
- ./models:/app/models # Fine-tuned GGUF model
- ./vectordb/chroma_db:/app/vectordb/chroma_db
- ./data:/app/data
environment:
- CUDA_VISIBLE_DEVICES=0
- MODEL_PATH=/app/models/kumbh_model_q4_k_m.gguf
- CHROMA_DB_PATH=/app/vectordb/chroma_db
- LOG_LEVEL=INFO
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/health"]
interval: 30s
timeout: 10s
retries: 3
# ---- Ollama (Alternative LLM backend) ----
ollama:
image: ollama/ollama:latest
container_name: kumbh-ollama
ports:
- "11434:11434"
volumes:
- ollama_data:/root/.ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
restart: unless-stopped
profiles: ["ollama"] # Only start when --profile ollama is specified
volumes:
ollama_data: