forked from generativelabs/exllama-runpod-serverless
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
23 lines (18 loc) · 741 Bytes
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
FROM runpod/pytorch:3.10-2.0.0-117
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN mkdir data
WORKDIR /data
# Install Python dependencies (Worker Template)
RUN pip install --upgrade pip && \
pip install safetensors==0.3.1 sentencepiece huggingface_hub git+https://github.com/winglian/runpod-python.git@streaming_job_dev
RUN git clone https://github.com/turboderp/exllama
RUN pip install -r exllama/requirements.txt
COPY handler.py /data/handler.py
COPY __init.py__ /data/__init__.py
ENV PYTHONPATH=/data/exllama
ENV MODEL_REPO=""
ENV PROMPT_PREFIX=""
ENV PROMPT_SUFFIX=""
ENV HUGGINGFACE_HUB_CACHE="/runpod-volume/huggingface-cache/hub"
ENV TRANSFORMERS_CACHE="/runpod-volume/huggingface-cache/hub"
CMD [ "python", "-m", "handler" ]