set input tensor dtype #3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Benchmarks | |
on: | |
push: | |
branches: | |
- master | |
- update_benchmark | |
jobs: | |
testmacbenchmark: | |
name: Mac Benchmark | |
runs-on: [self-hosted, macOS] | |
if: github.repository_owner == 'tinygrad' | |
env: | |
PYTHONPATH: . | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v3 | |
- name: Symlink models and datasets | |
run: | | |
ln -s ~/tinygrad/disassemblers/applegpu disassemblers/applegpu | |
ln -s ~/tinygrad/weights/sd-v1-4.ckpt weights/sd-v1-4.ckpt | |
ln -s ~/tinygrad/weights/bpe_simple_vocab_16e6.txt.gz weights/bpe_simple_vocab_16e6.txt.gz | |
ln -s ~/tinygrad/weights/LLaMA weights/LLaMA | |
ln -s ~/tinygrad/extra/datasets/cifar-10-python.tar.gz extra/datasets/cifar-10-python.tar.gz | |
- name: Run model inference benchmark | |
run: python3 test/external/external_model_benchmark.py | |
- name: Test speed vs torch | |
run: BIG=2 MPS=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt | |
shell: bash | |
- name: Run Tensor Core GEMM | |
run: DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt | |
shell: bash | |
- name: Run Stable Diffusion | |
run: python3 examples/stable_diffusion.py --noshow --timing | tee sd.txt | |
shell: bash | |
- name: Run LLaMA | |
run: | | |
JIT=0 python3 examples/llama.py --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt | |
JIT=1 python3 examples/llama.py --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt | |
shell: bash | |
- name: Run GPT2 | |
run: | | |
JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt | |
JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt | |
shell: bash | |
- name: Run 10 CIFAR training steps | |
run: STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt | |
shell: bash | |
- name: Run 10 CIFAR training steps w winograd | |
run: WINO=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar_wino.txt | |
shell: bash | |
- uses: actions/upload-artifact@v3 | |
with: | |
name: Speed (Mac) | |
path: | | |
onnx_inference_speed.csv | |
torch_speed.txt | |
train_cifar.txt | |
train_cifar_wino.txt | |
llama_unjitted.txt | |
llama_jitted.txt | |
gpt2_unjitted.txt | |
gpt2_jitted.txt | |
matmul.txt | |
sd.txt | |
testnvidiabenchmark: | |
name: NVIDIA Benchmark | |
runs-on: [self-hosted, Linux, CUDA] | |
if: github.repository_owner == 'tinygrad' | |
env: | |
PYTHONPATH: . | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v3 | |
- name: Run model inference benchmark | |
run: CUDA=1 python3 test/external/external_model_benchmark.py | |
- name: Test speed vs torch | |
run: CUDA=1 BIG=2 TORCHCUDA=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt | |
shell: bash | |
- name: Run GPT2 | |
run: | | |
CUDA=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt | |
CUDA=1 JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt | |
shell: bash | |
- uses: actions/upload-artifact@v3 | |
with: | |
name: Speed (NVIDIA) | |
path: | | |
onnx_inference_speed.csv | |
torch_speed.txt | |
gpt2_unjitted.txt | |
gpt2_jitted.txt | |
testamdbenchmark: | |
name: AMD Benchmark | |
runs-on: [self-hosted, Linux, ROCM] | |
if: github.repository_owner == 'tinygrad' | |
env: | |
PYTHONPATH: . | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v3 | |
- name: Symlink models and datasets | |
run: | | |
ln -s ~/tinygrad/weights/sd-v1-4.ckpt weights/sd-v1-4.ckpt | |
ln -s ~/tinygrad/weights/bpe_simple_vocab_16e6.txt.gz weights/bpe_simple_vocab_16e6.txt.gz | |
ln -s ~/tinygrad/weights/LLaMA weights/LLaMA | |
ln -s ~/tinygrad/extra/datasets/cifar-10-python.tar.gz extra/datasets/cifar-10-python.tar.gz | |
- name: Run model inference benchmark | |
run: python3 test/external/external_model_benchmark.py | |
- name: Test speed vs torch | |
run: BIG=2 TORCHCUDA=1 python3 test/test_speed_v_torch.py | tee torch_speed.txt | |
shell: bash | |
- name: Run Tensor Core GEMM | |
run: HIP=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt | |
shell: bash | |
- name: Run Stable Diffusion | |
run: python3 examples/stable_diffusion.py --noshow --timing | tee sd.txt | |
shell: bash | |
- name: Run LLaMA | |
run: | | |
JIT=0 python3 examples/llama.py --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt | |
JIT=1 python3 examples/llama.py --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt | |
shell: bash | |
- name: Run GPT2 | |
run: | | |
JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt | |
JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt | |
shell: bash | |
- name: Run 10 CIFAR training steps | |
run: STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt | |
- name: Run 10 CIFAR training steps w winograd | |
run: WINO=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar_wino.txt | |
shell: bash | |
- name: Run 10 CIFAR training steps w WINO/HALF/HIP | |
run: HALF=1 HIP=1 WINO=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar_wino_half_hip.txt | |
- uses: actions/upload-artifact@v3 | |
with: | |
name: Speed (AMD) | |
path: | | |
onnx_inference_speed.csv | |
torch_speed.txt | |
train_cifar.txt | |
train_cifar_wino.txt | |
train_cifar_wino_half_hip.txt | |
llama_unjitted.txt | |
llama_jitted.txt | |
gpt2_unjitted.txt | |
gpt2_jitted.txt | |
matmul.txt | |
sd.txt |