Dockerfile.gpu

FROM debian:bullseye-slim as pytorch-install

ARG PYTORCH_VERSION=2.0.1
ARG PYTHON_VERSION=3.9
ARG CUDA_VERSION=11.8.0
ARG MAMBA_VERSION=23.1.0-4
ARG CUDA_CHANNEL=nvidia
ARG INSTALL_CHANNEL=pytorch
ARG TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0;7.5;8.0;8.6+PTX"
# Automatically set by buildx
ARG TARGETPLATFORM

ENV PATH /opt/conda/bin:$PATH

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
    build-essential \
    ca-certificates \
    ccache \
    curl \
    git && \
    rm -rf /var/lib/apt/lists/*

# Install conda
# translating Docker's TARGETPLATFORM into mamba arches
RUN case ${TARGETPLATFORM} in \
    "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
    *)              MAMBA_ARCH=x86_64   ;; \
    esac && \
    curl -fsSL -v -o ~/miniforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Miniforge3-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
RUN chmod +x ~/miniforge.sh && \
    bash ~/miniforge.sh -b -p /opt/conda && \
    rm ~/miniforge.sh

# Install pytorch
# On arm64 we exit with an error code
RUN case ${TARGETPLATFORM} in \
    "linux/arm64")  exit 1 ;; \
    *)              /opt/conda/bin/conda update -qy conda &&  \
    /opt/conda/bin/conda install -c "${INSTALL_CHANNEL}" -c "${CUDA_CHANNEL}" -yq "python=${PYTHON_VERSION}" pytorch==$PYTORCH_VERSION torchvision torchaudio "pytorch-cuda=$(echo $CUDA_VERSION | cut -d'.' -f 1-2)"  ;; \
    esac && \
    /opt/conda/bin/conda clean -yqa

# CUDA kernels builder image
FROM pytorch-install as kernel-builder

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
    ninja-build \
    && rm -rf /var/lib/apt/lists/*

RUN /opt/conda/bin/conda install -q -c "nvidia/label/cuda-${CUDA_VERSION}"  cuda==$CUDA_VERSION && \
    /opt/conda/bin/conda clean -yqa

FROM debian:bullseye-slim as base

ENV PATH=/opt/conda/bin:$PATH \
    CONDA_PREFIX=/opt/conda

LABEL com.nvidia.volumes.needed="nvidia_driver"

# Copy conda with PyTorch installed
COPY --from=kernel-builder /opt/conda /opt/conda

RUN apt-get update && \
    apt-get install -y build-essential git zlib1g-dev cmake && \
    apt-get autoremove && \
    apt-get clean

WORKDIR /llm-api

COPY ./requirements.txt /llm-api/requirements.txt
RUN pip3 install --no-cache-dir --upgrade -r requirements.txt && \
    pip3 install --no-cache-dir accelerate==0.20.3 packaging==23.0 ninja==1.11.1 && \
    pip3 install --no-cache-dir --no-build-isolation flash-attn==v2.3.3 && \
    pip3 install https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.6/autoawq-0.1.6+cu118-cp39-cp39-linux_x86_64.whl && \
    pip3 cache purge && \
    /opt/conda/bin/conda clean -ya

COPY ./app /llm-api/app
ENV PYTHONPATH "/llm-api"

RUN git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa && \
    cd GPTQ-for-LLaMa && \
    pip3 install -r requirements.txt && \
    cd .. && \
    touch GPTQ-for-LLaMa/__init__.py && \
    mv GPTQ-for-LLaMa GPTQforLLaMa && \
    mv -f GPTQforLLaMa /llm-api/app/llms/gptq_llama/

FROM base

CMD ["python3", "./app/main.py"]