-
Notifications
You must be signed in to change notification settings - Fork 109
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #53 from makaveli10/upgrade/tensorrt_llm_0_10_0
Upgrade to TensorRT-LLM v0.10.0.
- Loading branch information
Showing
18 changed files
with
140 additions
and
215 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,23 @@ | ||
ARG BASE_IMAGE=nvcr.io/nvidia/cuda | ||
ARG BASE_TAG=12.2.2-devel-ubuntu22.04 | ||
ARG BASE_TAG=12.4.0-runtime-ubuntu22.04 | ||
|
||
FROM ${BASE_IMAGE}:${BASE_TAG} as base | ||
ARG CUDA_ARCH | ||
ENV CUDA_ARCH=${CUDA_ARCH} | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y --no-install-recommends \ | ||
xz-utils \ | ||
curl \ | ||
git && \ | ||
RUN apt-get update && apt-get install -y \ | ||
python3.10 python3-pip openmpi-bin libopenmpi-dev git wget \ | ||
xz-utils curl && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
FROM base as devel | ||
WORKDIR /root | ||
COPY scripts/install-deps.sh /root | ||
RUN bash install-deps.sh && rm install-deps.sh | ||
COPY scripts/build-trt-llm.sh /root | ||
RUN bash build-trt-llm.sh && rm build-trt-llm.sh | ||
WORKDIR /root/ | ||
RUN pip3 install --no-cache-dir -U tensorrt_llm==0.10.0 --extra-index-url https://pypi.nvidia.com | ||
RUN git clone -b v0.10.0 --depth 1 https://github.com/NVIDIA/TensorRT-LLM.git && \ | ||
mv TensorRT-LLM/examples ./TensorRT-LLM-examples && \ | ||
rm -rf TensorRT-LLM | ||
|
||
FROM devel as release | ||
WORKDIR /root/ | ||
COPY scripts/install-trt-llm.sh /root | ||
RUN bash install-trt-llm.sh && rm install-trt-llm.sh | ||
COPY scripts/setup-whisperfusion.sh /root/ | ||
RUN ./setup-whisperfusion.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#!/bin/bash -e | ||
|
||
## Note: Phi is only available in main branch and hasnt been released yet. So, make sure to build TensorRT-LLM from main branch. | ||
|
||
cd /root/TensorRT-LLM-examples/phi | ||
|
||
## Build TensorRT for Phi-2 with `fp16` | ||
|
||
MODEL_TYPE=$1 | ||
echo "Download $MODEL_TYPE Huggingface models..." | ||
|
||
phi_path=$(huggingface-cli download --repo-type model microsoft/$MODEL_TYPE) | ||
echo "Building TensorRT Engine..." | ||
name=$1 | ||
pip install -r requirements.txt | ||
|
||
python3 ./convert_checkpoint.py --model_type $MODEL_TYPE \ | ||
--model_dir $phi_path \ | ||
--output_dir ./phi-checkpoint \ | ||
--dtype float16 | ||
|
||
trtllm-build \ | ||
--checkpoint_dir ./phi-checkpoint \ | ||
--output_dir $name \ | ||
--gpt_attention_plugin float16 \ | ||
--context_fmha enable \ | ||
--gemm_plugin float16 \ | ||
--max_batch_size 1 \ | ||
--max_input_len 1024 \ | ||
--max_output_len 1024 \ | ||
--tp_size 1 \ | ||
--pp_size 1 | ||
|
||
dest=/root/scratch-space/models | ||
mkdir -p "$dest/$name/tokenizer" | ||
cp -r "$name" "$dest" | ||
(cd "$phi_path" && cp config.json tokenizer_config.json tokenizer.json special_tokens_map.json added_tokens.json vocab.json merges.txt "$dest/$name/tokenizer") | ||
cp -r "$phi_path" "$dest/phi-orig-model" |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.