-
Notifications
You must be signed in to change notification settings - Fork 27
/
docker-compose.yml
46 lines (44 loc) · 966 Bytes
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
version: "3"
services:
openai_trtllm:
image: openai_trtllm
build:
context: .
dockerfile: Dockerfile
command:
- "--host"
- "0.0.0.0"
- "--port"
- "3000"
- "--triton-endpoint"
- "http://tensorrtllm_backend:8001"
ports:
- "3000:3000"
depends_on:
- tensorrtllm_backend
restart: on-failure
# Triton backend for TensorRT LLM
tensorrtllm_backend:
image: nvcr.io/nvidia/tritonserver:24.03-trtllm-python-py3
command:
- "tritonserver"
- "--model-repository=/models"
volumes:
- /path/to/model_repository:/models
ports:
- "8000:8000"
- "8001:8001"
- "8002:8002"
deploy:
replicas: 1
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [ gpu ]
shm_size: '2g'
ulimits:
memlock: -1
stack: 67108864
restart: on-failure