Skip to content

Commit

Permalink
TRT-LLM Multi-Node Tutorial: initial check-in
Browse files Browse the repository at this point in the history
This change creates the TensorRT-LLM Multi-Node tutorial and guide.

Includes:
- Instructions w/ explainations (README)
- Helm chart
- Container definitions
- Server-side Python script
- Various helpful YAML files.
  • Loading branch information
whoisj committed Jun 12, 2024
1 parent 0f920e0 commit a76a036
Show file tree
Hide file tree
Showing 27 changed files with 3,115 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.vscode/
**/.vscode/

dev_*
**/dev_*

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dev_values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: v2
appVersion: 0.1.0
description: Generative AI Multi-Node w/ Triton and TensorRT-LLM Guide/Tutorial
icon: https://www.nvidia.com/content/dam/en-zz/Solutions/about-nvidia/logo-and-brand/[email protected]
name: triton_trt-llm_multi-node_example
version: 0.1.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

gpu: Tesla-V100-SXM2-16GB

model:
name: gpt2
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# See values.yaml for reference values.

gpu: NVIDIA-A10G

model:
name: llama-2-70b
tensorrtLlm:
conversion:
gpu: 8
memory: 256Gi
parallelism:
tensor: 8
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# See values.yaml for reference values.

gpu: Tesla-V100-SXM2-16GB

model:
name: llama-2-7b-chat
tensorrtLlm:
conversion:
gpu: 2
memory: 64Gi
parallelism:
tensor: 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# See values.yaml for reference values.

gpu: Tesla-V100-SXM2-16GB

model:
name: llama-2-7b
tensorrtLlm:
conversion:
gpu: 2
memory: 64Gi
parallelism:
tensor: 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# See values.yaml for reference values.

gpu: NVIDIA-A10G

model:
name: llama-3-70b-instruct
tensorrtLlm:
conversion:
gpu: 8
memory: 256Gi
parallelism:
tensor: 8
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# See values.yaml for reference values.

gpu: Tesla-V100-SXM2-16GB

model:
name: llama-3-8b-instruct
tensorrtLlm:
conversion:
gpu: 4
memory: 128Gi
parallelism:
tensor: 4
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# See values.yaml for reference values.

gpu: Tesla-V100-SXM2-16GB

model:
name: llama-3-8b
tensorrtLlm:
conversion:
gpu: 2
memory: 64Gi
parallelism:
tensor: 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# See values.yaml for reference values.

gpu: Tesla-V100-SXM2-16GB

model:
name: opt125m
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{{- $create_account := true }}
{{- $create_job := true }}
{{- $create_service := true }}
{{- with $.Values.model }}
{{- if .skipConversion }}
{{- $create_job = false }}
{{- end }}
{{- end }}
{{- with $.Values.kubernetes }}
{{- if .noService }}
{{- $create_service = false }}
{{- end }}
{{- if .serviceAccount}}
{{- $create_account = false }}
{{- end }}
{{- end }}

{{ $.Chart.Name }} ({{ $.Chart.Version }}) installation complete.

Release Name: {{ $.Release.Name }}
Namespace: {{ $.Release.Namespace }}
Deployment Name: {{ $.Release.Name }}
{{- if $create_job }}
Conversion Job: {{ $.Release.Name }}
{{- end }}
{{- if $create_service }}
Service Name: {{ $.Release.Name }}
{{- end }}
{{- if $create_account }}
ServiceAccount Name: {{ $.Release.Name }}
{{- end }}

Helpful commands:

$ helm status --namespace={{ $.Release.Namespace }} {{ $.Release.Name }}
$ helm get --namespace={{ $.Release.Namespace }} all {{ $.Release.Name }}
$ kubectl get --namespace={{ $.Release.Namespace }} --selector='app={{ $.Release.Name }}' deployments
{{- if $create_job -}}
,jobs
{{- end -}}
,pods
{{- if $create_service -}}
,services
{{- end -}}
,podmonitors
{{- if $create_account -}}
,serviceAccounts
{{- end -}}
Loading

0 comments on commit a76a036

Please sign in to comment.