-
Notifications
You must be signed in to change notification settings - Fork 96
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
TRT-LLM Multi-Node Tutorial: initial check-in
This change creates the TensorRT-LLM Multi-Node tutorial and guide. Includes: - Instructions w/ explainations (README) - Helm chart - Container definitions - Server-side Python script - Various helpful YAML files.
- Loading branch information
Showing
27 changed files
with
3,115 additions
and
0 deletions.
There are no files selected for viewing
5 changes: 5 additions & 0 deletions
5
Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/.gitignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
.vscode/ | ||
**/.vscode/ | ||
|
||
dev_* | ||
**/dev_* |
678 changes: 678 additions & 0 deletions
678
Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/README.md
Large diffs are not rendered by default.
Oops, something went wrong.
1 change: 1 addition & 0 deletions
1
Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/.gitignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
dev_values.yaml |
20 changes: 20 additions & 0 deletions
20
Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/Chart.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
apiVersion: v2 | ||
appVersion: 0.1.0 | ||
description: Generative AI Multi-Node w/ Triton and TensorRT-LLM Guide/Tutorial | ||
icon: https://www.nvidia.com/content/dam/en-zz/Solutions/about-nvidia/logo-and-brand/[email protected] | ||
name: triton_trt-llm_multi-node_example | ||
version: 0.1.0 |
18 changes: 18 additions & 0 deletions
18
Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/gpt2_values.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
gpu: Tesla-V100-SXM2-16GB | ||
|
||
model: | ||
name: gpt2 |
26 changes: 26 additions & 0 deletions
26
...yment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-70b_values.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# See values.yaml for reference values. | ||
|
||
gpu: NVIDIA-A10G | ||
|
||
model: | ||
name: llama-2-70b | ||
tensorrtLlm: | ||
conversion: | ||
gpu: 8 | ||
memory: 256Gi | ||
parallelism: | ||
tensor: 8 |
26 changes: 26 additions & 0 deletions
26
...t/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b-chat_values.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# See values.yaml for reference values. | ||
|
||
gpu: Tesla-V100-SXM2-16GB | ||
|
||
model: | ||
name: llama-2-7b-chat | ||
tensorrtLlm: | ||
conversion: | ||
gpu: 2 | ||
memory: 64Gi | ||
parallelism: | ||
tensor: 2 |
26 changes: 26 additions & 0 deletions
26
...oyment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-2-7b_values.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# See values.yaml for reference values. | ||
|
||
gpu: Tesla-V100-SXM2-16GB | ||
|
||
model: | ||
name: llama-2-7b | ||
tensorrtLlm: | ||
conversion: | ||
gpu: 2 | ||
memory: 64Gi | ||
parallelism: | ||
tensor: 2 |
26 changes: 26 additions & 0 deletions
26
...ernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-70b-instruct_values.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# See values.yaml for reference values. | ||
|
||
gpu: NVIDIA-A10G | ||
|
||
model: | ||
name: llama-3-70b-instruct | ||
tensorrtLlm: | ||
conversion: | ||
gpu: 8 | ||
memory: 256Gi | ||
parallelism: | ||
tensor: 8 |
26 changes: 26 additions & 0 deletions
26
...bernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b-instruct_values.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# See values.yaml for reference values. | ||
|
||
gpu: Tesla-V100-SXM2-16GB | ||
|
||
model: | ||
name: llama-3-8b-instruct | ||
tensorrtLlm: | ||
conversion: | ||
gpu: 4 | ||
memory: 128Gi | ||
parallelism: | ||
tensor: 4 |
26 changes: 26 additions & 0 deletions
26
...oyment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/llama-3-8b_values.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# See values.yaml for reference values. | ||
|
||
gpu: Tesla-V100-SXM2-16GB | ||
|
||
model: | ||
name: llama-3-8b | ||
tensorrtLlm: | ||
conversion: | ||
gpu: 2 | ||
memory: 64Gi | ||
parallelism: | ||
tensor: 2 |
20 changes: 20 additions & 0 deletions
20
Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/opt125m_values.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
# See values.yaml for reference values. | ||
|
||
gpu: Tesla-V100-SXM2-16GB | ||
|
||
model: | ||
name: opt125m |
48 changes: 48 additions & 0 deletions
48
Deployment/Kubernetes/TensorRT-LLM_Multi-Node_Distributed_Models/chart/templates/NOTES.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
{{- $create_account := true }} | ||
{{- $create_job := true }} | ||
{{- $create_service := true }} | ||
{{- with $.Values.model }} | ||
{{- if .skipConversion }} | ||
{{- $create_job = false }} | ||
{{- end }} | ||
{{- end }} | ||
{{- with $.Values.kubernetes }} | ||
{{- if .noService }} | ||
{{- $create_service = false }} | ||
{{- end }} | ||
{{- if .serviceAccount}} | ||
{{- $create_account = false }} | ||
{{- end }} | ||
{{- end }} | ||
|
||
{{ $.Chart.Name }} ({{ $.Chart.Version }}) installation complete. | ||
|
||
Release Name: {{ $.Release.Name }} | ||
Namespace: {{ $.Release.Namespace }} | ||
Deployment Name: {{ $.Release.Name }} | ||
{{- if $create_job }} | ||
Conversion Job: {{ $.Release.Name }} | ||
{{- end }} | ||
{{- if $create_service }} | ||
Service Name: {{ $.Release.Name }} | ||
{{- end }} | ||
{{- if $create_account }} | ||
ServiceAccount Name: {{ $.Release.Name }} | ||
{{- end }} | ||
|
||
Helpful commands: | ||
|
||
$ helm status --namespace={{ $.Release.Namespace }} {{ $.Release.Name }} | ||
$ helm get --namespace={{ $.Release.Namespace }} all {{ $.Release.Name }} | ||
$ kubectl get --namespace={{ $.Release.Namespace }} --selector='app={{ $.Release.Name }}' deployments | ||
{{- if $create_job -}} | ||
,jobs | ||
{{- end -}} | ||
,pods | ||
{{- if $create_service -}} | ||
,services | ||
{{- end -}} | ||
,podmonitors | ||
{{- if $create_account -}} | ||
,serviceAccounts | ||
{{- end -}} |
Oops, something went wrong.