FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04

# Avoid interactive prompts during package installation
ENV DEBIAN_FRONTEND=noninteractive

# Install Python 3.10 and system dependencies
# Pinned ESPnet commit requires Python 3.10 (uses distutils removed in 3.12)
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3.10 \
    python3.10-dev \
    python3.10-venv \
    python3-pip \
    git \
    gcc \
    g++ \
    libsndfile1 \
    ffmpeg \
    && rm -rf /var/lib/apt/lists/*

# Set Python 3.10 as default
RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \
    ln -sf /usr/bin/python3 /usr/bin/python

# Allow pip to install packages system-wide
ENV PIP_BREAK_SYSTEM_PACKAGES=1

# Set working directory
WORKDIR /app

# Install PyTorch ecosystem (cu121 wheels to match CUDA 12.1 base)
RUN pip install --no-cache-dir \
    torch==2.5.1 \
    torchaudio==2.5.1 \
    --index-url https://download.pytorch.org/whl/cu121

# Install common requirements (torch already installed above, pip will skip it)
RUN pip install --no-cache-dir \
    "transformers==4.47.1" \
    evaluate \
    "datasets==2.19.0" \
    librosa \
    jiwer \
    num2words \
    peft

# Install ESPnet and ESPnet Model Zoo from pinned commits
RUN pip install --no-cache-dir \
    "espnet @ git+https://github.com/espnet/espnet@29a6e5a62b5d850686df8a1454efec69e2861332" \
    "espnet_model_zoo @ git+https://github.com/espnet/espnet_model_zoo@8b7301923c1c529a126c86c16fc73ce356b94d62"

# Install additional dependencies
RUN pip install --no-cache-dir soundfile

# Force soundfile backend for datasets audio decoding
ENV HF_AUDIO_DECODER_BACKEND=soundfile

# Copy the full repository
COPY . /app

# Default entrypoint
ENTRYPOINT ["bash"]

EXPOSE 7860
CMD ["-c", "python3 -m http.server 7860"]