forked from huggingface/optimum-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
191 lines (140 loc) · 5.59 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# List of targets that are not associated with files
.PHONY: quality style install build_docker_cpu build_docker_cuda build_docker_rocm run_docker_cpu run_docker_cuda run_docker_rocm install_api_misc install_api_cpu install_api_cuda install_api_rocm install_cli_misc install_cli_cpu_pytorch install_cli_cpu_openvino install_cli_cpu_onnxruntime install_cli_cpu_neural_compressor install_cli_cuda_pytorch install_cli_rocm_pytorch install_cli_cuda_torch_ort install_cli_cuda_onnxruntime test_api_misc test_api_cpu test_api_cuda test_api_rocm test_cli_misc test_cli_cpu_pytorch test_cli_cpu_openvino test_cli_cpu_onnxruntime test_cli_cpu_neural_compressor test_cli_cuda_onnxruntime test_cli_cuda_pytorch_multi_gpu test_cli_cuda_pytorch_single_gpu test_cli_cuda_torch_ort_multi_gpu test_cli_cuda_torch_ort_single_gpu test_cli_rocm_pytorch_multi_gpu test_cli_rocm_pytorch_single_gpu
PWD := $(shell pwd)
USER_ID := $(shell id -u)
GROUP_ID := $(shell id -g)
quality:
ruff check .
ruff format --check .
style:
ruff format .
ruff check --fix .
install:
pip install -e .
## Build docker
build_cpu_image:
docker build --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t opt-bench-cpu:22.04 docker/cpu
build_cuda_118_image:
docker build --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) --build-arg TORCH_CUDA=cu118 --build-arg CUDA_VERSION=11.8.0 -t opt-bench-cuda:11.8.0 docker/cuda
build_cuda_121_image:
docker build --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) --build-arg TORCH_CUDA=cu121 --build-arg CUDA_VERSION=12.1.1 -t opt-bench-cuda:12.1.1 docker/cuda
build_rocm_image:
docker build --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t opt-bench-rocm:5.7.1 docker/rocm
# Run docker
run_cpu_container:
docker run \
-it \
--rm \
--pid host \
--volume $(PWD):/workspace \
--entrypoint /bin/bash \
--workdir /workspace \
opt-bench-cpu:22.04
run_cuda_118_container:
docker run \
-it \
--rm \
--pid host \
--gpus all \
--shm-size 64G \
--volume $(PWD):/workspace \
--entrypoint /bin/bash \
--workdir /workspace \
opt-bench-cuda:11.8.0
run_cuda_121_container:
docker run \
-it \
--rm \
--pid host \
--gpus all \
--shm-size 64G \
--volume $(PWD):/workspace \
--entrypoint /bin/bash \
--workdir /workspace \
opt-bench-cuda:12.1.1
run_rocm_container:
docker run \
-it \
--rm \
--pid host \
--shm-size 64G \
--device /dev/kfd \
--device /dev/dri \
--volume $(PWD):/workspace \
--entrypoint /bin/bash \
--workdir /workspace \
opt-bench-rocm:5.7.1
## Install extras
install_api_misc:
pip install -e .[testing,timm,diffusers,peft]
install_api_cpu:
pip install -e .[testing,timm,diffusers,peft]
install_api_cuda:
pip install -e .[testing,timm,diffusers,peft]
install_api_rocm:
pip install -e .[testing,timm,diffusers,peft]
install_cli_misc:
pip install -e .[testing,timm,diffusers,peft]
install_cli_cpu_pytorch:
pip install -e .[testing,peft,timm,diffusers]
install_cli_cpu_openvino:
pip install -e .[testing,peft,timm,diffusers,openvino]
install_cli_cpu_onnxruntime:
pip install -e .[testing,peft,timm,diffusers,onnxruntime]
install_cli_cpu_neural_compressor:
pip install -e .[testing,peft,timm,diffusers,neural-compressor]
install_cli_cuda_pytorch:
pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,bitsandbytes,deepspeed]
install_cli_rocm_pytorch:
pip install -e .[testing,timm,diffusers,peft,autoawq,auto-gptq,deepspeed]
install_cli_cuda_torch_ort:
pip install -e .[testing,timm,diffusers,peft,torch-ort,deepspeed]
python -m torch_ort.configure
install_cli_cuda_onnxruntime:
pip install -e .[testing,timm,diffusers,peft,onnxruntime-gpu]
# Run tests
test_api_misc:
pytest -s -k "api and not (cpu or cuda)
test_api_cpu:
pytest -s -k "api and cpu"
test_api_cuda:
pytest -s -k "api and cuda"
test_api_rocm:
pytest -s -k "api and cuda"
test_cli_misc:
pytest -s -k "cli and not (cpu or cuda)"
test_cli_cpu_pytorch:
pytest -s -k "cli and cpu and pytorch"
test_cli_cpu_openvino:
pytest -s -k "cli and cpu and openvino"
test_cli_cpu_onnxruntime:
pytest -s -k "cli and cpu and onnxruntime"
test_cli_cpu_neural_compressor:
pytest -s -k "cli and cpu and neural-compressor"
test_cli_cuda_onnxruntime:
pytest -s -k "cli and cuda and onnxruntime"
test_cli_cuda_pytorch_multi_gpu:
pytest -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not awq"
test_cli_cuda_pytorch_single_gpu:
pytest -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not awq"
test_cli_cuda_torch_ort_multi_gpu:
pytest -s -k "cli and cuda and torch-ort and (dp or ddp or device_map or deepspeed) and not peft"
test_cli_cuda_torch_ort_single_gpu:
pytest -s -k "cli and cuda and torch-ort and not (dp or ddp or device_map or deepspeed) and not peft"
test_cli_rocm_pytorch_multi_gpu:
pytest -s -k "cli and rocm and pytorch and (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
test_cli_rocm_pytorch_single_gpu:
pytest -s -k "cli and rocm and pytorch and not (dp or ddp or device_map or deepspeed) and not (bnb or awq)"
# llm-perf
install_llm_perf_cuda_pytorch:
pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq
pip install -U transformers huggingface_hub[hf_transfer]
pip install -e .[codecarbon]
run_llm_perf_cuda_pytorch_unquantized:
SUBSET=unquantized python llm_perf/benchmark_cuda_pytorch.py
run_llm_perf_cuda_pytorch_bnb:
SUBSET=bnb python llm_perf/benchmark_cuda_pytorch.py
run_llm_perf_cuda_pytorch_gptq:
SUBSET=gptq python llm_perf/benchmark_cuda_pytorch.py
run_llm_perf_cuda_pytorch_awq:
SUBSET=awq python llm_perf/benchmark_cuda_pytorch.py