Skip to content

Commit 962f893

Browse files
authored
Add ./torchbench.py --fast option (#198)
1 parent 4f916f5 commit 962f893

File tree

3 files changed

+42
-12
lines changed

3 files changed

+42
-12
lines changed

Makefile

+5-7
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ test: develop
1515
pytest tests
1616

1717
torchbench: develop
18-
python torchbench.py
18+
python torchbench.py --fast
1919

2020
overhead: develop
2121
python torchbench.py --overhead
@@ -129,12 +129,10 @@ baseline-gpu: develop
129129

130130
baseline-gpu-inductor: develop
131131
rm -f baseline_*.csv
132-
python torchbench.py -dcuda --float32 --isolate -n50 --inductor
133-
python torchbench.py -dcuda --float32 --isolate -n50 --backend=cudagraphs && mv speedup_cudagraphs.csv baseline_cudagraphs.csv
134-
python torchbench.py -dcuda --float32 --isolate -n50 --backend=cudagraphs_ts --nvfuser && mv speedup_cudagraphs_ts.csv baseline_cg_nvfuser.csv
135-
python torchbench.py -dcuda --float32 --isolate -n50 --backend=cudagraphs_ts && mv speedup_cudagraphs_ts.csv baseline_cg_nnc.csv
136-
# python torchbench.py -dcuda --float32 --isolate -n50 --speedup-ts --nvfuser && mv baseline_ts.csv baseline_ts_nvfuser.csv
137-
# python torchbench.py -dcuda --float32 --isolate -n50 --speedup-ts && mv baseline_ts.csv baseline_ts_nnc.csv
132+
python torchbench.py --cosine -dcuda --float32 --isolate -n50 --inductor
133+
python torchbench.py --cosine -dcuda --float32 --isolate -n50 --backend=cudagraphs && mv speedup_cudagraphs.csv baseline_cudagraphs.csv
134+
python torchbench.py --cosine -dcuda --float32 --isolate -n50 --backend=cudagraphs_ts --nvfuser && mv speedup_cudagraphs_ts.csv baseline_cg_nvfuser.csv
135+
python torchbench.py --cosine -dcuda --float32 --isolate -n50 --backend=cudagraphs_ts && mv speedup_cudagraphs_ts.csv baseline_cg_nnc.csv
138136
paste -d, inductor.csv baseline_cudagraphs.csv baseline_cg_nvfuser.csv baseline_cg_nnc.csv > baseline_all.csv
139137

140138

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ cd ../torchdynamo
376376
make lint-deps
377377
378378
# make sure it works
379-
./torchbench.py
379+
./torchbench.py --fast
380380
```
381381

382382
## Tests

torchbench.py

+36-4
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,32 @@
105105
"timm_efficientdet": 1,
106106
}
107107

108+
# These benchmarks took >600s on an i9-11900K CPU
109+
VERY_SLOW_BENCHMARKS = {
110+
"hf_BigBird", # 3339s
111+
"hf_Longformer", # 3062s
112+
"hf_T5", # 930s
113+
}
114+
115+
# These benchmarks took >60s on an i9-11900K CPU
116+
SLOW_BENCHMARKS = {
117+
*{
118+
"BERT_pytorch", # 137s
119+
"demucs", # 116s
120+
"fastNLP_Bert", # 242s
121+
"hf_Albert", # 221s
122+
"hf_Bart", # 400s
123+
"hf_Bert", # 334s
124+
"hf_DistilBert", # 187s
125+
"hf_GPT2", # 470s
126+
"hf_Reformer", # 141s
127+
"speech_transformer", # 317s
128+
"vision_maskrcnn", # 99s
129+
},
130+
*VERY_SLOW_BENCHMARKS,
131+
}
132+
133+
108134
current_name = ""
109135
current_device = ""
110136
output_filename = None
@@ -646,6 +672,9 @@ def main():
646672
parser.add_argument("--float16", action="store_true", help="cast model to fp16")
647673
parser.add_argument("--float32", action="store_true", help="cast model to fp32")
648674
parser.add_argument("--cosine", action="store_true", help="use cosine similarity")
675+
parser.add_argument(
676+
"--fast", "-f", action="store_true", help="skip slow benchmarks"
677+
)
649678
parser.add_argument("--only", help="used by --isolate to run just one model")
650679
parser.add_argument(
651680
"--minimum-call-count", type=int, help="filter out graphs with too few ops"
@@ -808,9 +837,6 @@ def main():
808837
}
809838
)
810839

811-
if args.no_skip:
812-
SKIP.clear()
813-
814840
if args.nvfuser:
815841
torch._C._jit_override_can_fuse_on_cpu(False)
816842
torch._C._jit_override_can_fuse_on_gpu(False)
@@ -835,6 +861,12 @@ def main():
835861
else:
836862
model_iter_fn = forward_pass
837863

864+
if args.fast:
865+
SKIP.update(SLOW_BENCHMARKS)
866+
867+
if args.devices == ["cpu"]:
868+
SKIP.update(VERY_SLOW_BENCHMARKS)
869+
838870
if args.no_skip:
839871
SKIP.clear()
840872

@@ -1173,7 +1205,7 @@ def run_one_model(
11731205

11741206
if output_filename and "coverage" in output_filename:
11751207
results.append(
1176-
f"{ok:3}/{total:3} +{frames_third_pass} frames {time.perf_counter()-t0:.0f}s"
1208+
f"{ok:3}/{total:3} +{frames_third_pass} frames {time.perf_counter()-t0:3.0f}s"
11771209
)
11781210

11791211
results.append(experiment(model, example_inputs))

0 commit comments

Comments
 (0)