From d4d9dc3e28b1da9c4d2aa335f9006393e9f36ff7 Mon Sep 17 00:00:00 2001 From: "Igoshev, Iaroslav" Date: Wed, 18 Sep 2024 15:15:06 +0000 Subject: [PATCH] Align resulting computation of GBs, TFLOPs in tutorials Signed-off-by: Igoshev, Iaroslav --- python/tutorials/01-vector-add.py | 2 +- python/tutorials/02-fused-softmax.py | 2 +- python/tutorials/05-layer-norm.py | 4 ++-- python/tutorials/06-fused-attention.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/tutorials/01-vector-add.py b/python/tutorials/01-vector-add.py index 5ada4d4d5e58..e0220a45ce04 100644 --- a/python/tutorials/01-vector-add.py +++ b/python/tutorials/01-vector-add.py @@ -123,7 +123,7 @@ def benchmark(size, provider): ms, min_ms, max_ms = triton.testing.do_bench(lambda: x + y, quantiles=quantiles) if provider == 'triton': ms, min_ms, max_ms = triton.testing.do_bench(lambda: add(x, y), quantiles=quantiles) - gbps = lambda ms: 3 * x.numel() * x.element_size() / ms * 1e-6 + gbps = lambda ms: 3 * x.numel() * x.element_size() * 1e-9 / (ms * 1e-3) return gbps(ms), gbps(max_ms), gbps(min_ms) diff --git a/python/tutorials/02-fused-softmax.py b/python/tutorials/02-fused-softmax.py index 57f0272779c6..359ae3a09176 100644 --- a/python/tutorials/02-fused-softmax.py +++ b/python/tutorials/02-fused-softmax.py @@ -231,7 +231,7 @@ def benchmark(M, N, provider): ms = triton.testing.do_bench(lambda: torch.softmax(x, axis=-1)) if provider == 'triton': ms = triton.testing.do_bench(lambda: softmax(x)) - gbps = lambda ms: 2 * x.nelement() * x.element_size() * 1e-9 / (ms * 1e-3) + gbps = lambda ms: 2 * x.numel() * x.element_size() * 1e-9 / (ms * 1e-3) return gbps(ms) diff --git a/python/tutorials/05-layer-norm.py b/python/tutorials/05-layer-norm.py index fb9ce3711f78..a234153a047e 100644 --- a/python/tutorials/05-layer-norm.py +++ b/python/tutorials/05-layer-norm.py @@ -353,12 +353,12 @@ def y_fwd(): # forward pass if mode == 'forward': - gbps = lambda ms: 2 * x.numel() * x.element_size() / ms * 1e-6 + gbps = lambda ms: 2 * x.numel() * x.element_size() * 1e-9 / (ms * 1e-3) ms, min_ms, max_ms = triton.testing.do_bench(y_fwd, quantiles=quantiles, rep=500) # backward pass if mode == 'backward': y = y_fwd() - gbps = lambda ms: 3 * x.numel() * x.element_size() / ms * 1e-6 # noqa: F811, E704 + gbps = lambda ms: 3 * x.numel() * x.element_size() * 1e-9 / (ms * 1e-3) # noqa: F811, E704 ms, min_ms, max_ms = triton.testing.do_bench(lambda: y.backward(dy, retain_graph=True), quantiles=quantiles, grad_to_none=[x], rep=500) return gbps(ms), gbps(max_ms), gbps(min_ms) diff --git a/python/tutorials/06-fused-attention.py b/python/tutorials/06-fused-attention.py index e533576d467b..8aaf210d90bf 100644 --- a/python/tutorials/06-fused-attention.py +++ b/python/tutorials/06-fused-attention.py @@ -633,7 +633,7 @@ def bench_flash_attention(BATCH, H, N_CTX, HEAD_DIM, causal, mode, provider, dev total_flops *= 0.5 if mode == "bwd": total_flops *= 2.5 # 2.0(bwd) + 0.5(recompute) - return total_flops / ms * 1e-9 + return total_flops * 1e-12 / (ms * 1e-3) if __name__ == "__main__":