-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile.gpu
38 lines (28 loc) · 983 Bytes
/
Dockerfile.gpu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
ARG CUDA_VERSION
FROM ghcr.io/weaviate/t2v-transformers-models-rs/t2v-rs-root-gpu-${CUDA_VERSION}:0.0.1 as build
RUN . venv/bin/activate
WORKDIR /app
# Copy our manifests
COPY ./Cargo.lock ./Cargo.lock
COPY ./Cargo.toml ./Cargo.toml
COPY ./src ./src
COPY ./ci ./ci
ARG TARGETARCH
ARG MODEL_NAME
ENV MODEL_NAME ${MODEL_NAME}
ENV ONNX_RUNTIME false
ENV ONNX_CPU=${TARGETARCH}
ENV MODEL_DIR "./models/model"
# Download the models
RUN python3 -m ci.download
# Build only the dependencies to cache them
RUN cargo add candle-core --features "cuda"
RUN cargo build --release
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu22.04 as release
COPY --from=build /app/models/model /usr/local/models/model
COPY --from=build /app/target/release/bin /usr/local/bin/t2v-rs
ENV ENABLE_CUDA true
ENV MODEL_PATH /usr/local/models/model/model.safetensors
ENV TOKENIZER_PATH /usr/local/models/model/tokenizer.json
ENV CONFIG_PATH /usr/local/models/model/config.json
CMD ["/usr/local/bin/t2v-rs"]