diff --git a/main/.buildinfo b/main/.buildinfo index 07c1cacb3e6e..566fba1058ce 100644 --- a/main/.buildinfo +++ b/main/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 1466ed63583833c34ee1142b9ed470c5 +config: fe32c7cb5b97ad0f8d9995fdc7770612 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/main/.doctrees/environment.pickle b/main/.doctrees/environment.pickle index 6ba33ce862dc..cad16e82d9bb 100644 Binary files a/main/.doctrees/environment.pickle and b/main/.doctrees/environment.pickle differ diff --git a/main/.doctrees/getting-started/tutorials/01-vector-add.doctree b/main/.doctrees/getting-started/tutorials/01-vector-add.doctree index febc837592b8..02a6af780898 100644 Binary files a/main/.doctrees/getting-started/tutorials/01-vector-add.doctree and b/main/.doctrees/getting-started/tutorials/01-vector-add.doctree differ diff --git a/main/.doctrees/getting-started/tutorials/02-fused-softmax.doctree b/main/.doctrees/getting-started/tutorials/02-fused-softmax.doctree index 08588d7a05d6..96560fb470d3 100644 Binary files a/main/.doctrees/getting-started/tutorials/02-fused-softmax.doctree and b/main/.doctrees/getting-started/tutorials/02-fused-softmax.doctree differ diff --git a/main/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree b/main/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree index a45be7f2d752..427d4ab17984 100644 Binary files a/main/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree and b/main/.doctrees/getting-started/tutorials/03-matrix-multiplication.doctree differ diff --git a/main/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree b/main/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree index 69998af13cfa..ceff8774107b 100644 Binary files a/main/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree and b/main/.doctrees/getting-started/tutorials/04-low-memory-dropout.doctree differ diff --git a/main/.doctrees/getting-started/tutorials/05-layer-norm.doctree b/main/.doctrees/getting-started/tutorials/05-layer-norm.doctree index 2993dd0fd981..28c92ac7f883 100644 Binary files a/main/.doctrees/getting-started/tutorials/05-layer-norm.doctree and b/main/.doctrees/getting-started/tutorials/05-layer-norm.doctree differ diff --git a/main/.doctrees/getting-started/tutorials/06-fused-attention.doctree b/main/.doctrees/getting-started/tutorials/06-fused-attention.doctree index 3bcf2532994f..911e3946a095 100644 Binary files a/main/.doctrees/getting-started/tutorials/06-fused-attention.doctree and b/main/.doctrees/getting-started/tutorials/06-fused-attention.doctree differ diff --git a/main/.doctrees/getting-started/tutorials/07-extern-functions.doctree b/main/.doctrees/getting-started/tutorials/07-extern-functions.doctree index 9e043c5e78ab..690e9165bedf 100644 Binary files a/main/.doctrees/getting-started/tutorials/07-extern-functions.doctree and b/main/.doctrees/getting-started/tutorials/07-extern-functions.doctree differ diff --git a/main/.doctrees/getting-started/tutorials/08-grouped-gemm.doctree b/main/.doctrees/getting-started/tutorials/08-grouped-gemm.doctree index aff63f93bfc9..53365c776668 100644 Binary files a/main/.doctrees/getting-started/tutorials/08-grouped-gemm.doctree and b/main/.doctrees/getting-started/tutorials/08-grouped-gemm.doctree differ diff --git a/main/.doctrees/getting-started/tutorials/09-persistent-matmul.doctree b/main/.doctrees/getting-started/tutorials/09-persistent-matmul.doctree index d8e7c775155c..2de7c63c6447 100644 Binary files a/main/.doctrees/getting-started/tutorials/09-persistent-matmul.doctree and b/main/.doctrees/getting-started/tutorials/09-persistent-matmul.doctree differ diff --git a/main/.doctrees/getting-started/tutorials/sg_execution_times.doctree b/main/.doctrees/getting-started/tutorials/sg_execution_times.doctree index e11b38292d16..0f20a16afa29 100644 Binary files a/main/.doctrees/getting-started/tutorials/sg_execution_times.doctree and b/main/.doctrees/getting-started/tutorials/sg_execution_times.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.Config.doctree b/main/.doctrees/python-api/generated/triton.Config.doctree index 1b78c1b1b3b8..0f4d90e406b6 100644 Binary files a/main/.doctrees/python-api/generated/triton.Config.doctree and b/main/.doctrees/python-api/generated/triton.Config.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.autotune.doctree b/main/.doctrees/python-api/generated/triton.autotune.doctree index 272ac90f87ac..7b20e45add06 100644 Binary files a/main/.doctrees/python-api/generated/triton.autotune.doctree and b/main/.doctrees/python-api/generated/triton.autotune.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.heuristics.doctree b/main/.doctrees/python-api/generated/triton.heuristics.doctree index d02dfa8958e0..9c2f0386d662 100644 Binary files a/main/.doctrees/python-api/generated/triton.heuristics.doctree and b/main/.doctrees/python-api/generated/triton.heuristics.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.jit.doctree b/main/.doctrees/python-api/generated/triton.jit.doctree index 7a1a71802370..aa5856a0eb71 100644 Binary files a/main/.doctrees/python-api/generated/triton.jit.doctree and b/main/.doctrees/python-api/generated/triton.jit.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.abs.doctree b/main/.doctrees/python-api/generated/triton.language.abs.doctree index 28eaa06b3434..fc71d33ed9dc 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.abs.doctree and b/main/.doctrees/python-api/generated/triton.language.abs.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.advance.doctree b/main/.doctrees/python-api/generated/triton.language.advance.doctree index 2ca196ee4b88..d351108da1f1 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.advance.doctree and b/main/.doctrees/python-api/generated/triton.language.advance.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.arange.doctree b/main/.doctrees/python-api/generated/triton.language.arange.doctree index f877fdaac9e0..affe3c579bcd 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.arange.doctree and b/main/.doctrees/python-api/generated/triton.language.arange.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.argmax.doctree b/main/.doctrees/python-api/generated/triton.language.argmax.doctree index 87a844260e17..1fadc3fc28ea 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.argmax.doctree and b/main/.doctrees/python-api/generated/triton.language.argmax.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.argmin.doctree b/main/.doctrees/python-api/generated/triton.language.argmin.doctree index 989905148b64..f8b381b5c3b2 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.argmin.doctree and b/main/.doctrees/python-api/generated/triton.language.argmin.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.associative_scan.doctree b/main/.doctrees/python-api/generated/triton.language.associative_scan.doctree index 5530efaf1d1a..7abd1655663c 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.associative_scan.doctree and b/main/.doctrees/python-api/generated/triton.language.associative_scan.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.atomic_add.doctree b/main/.doctrees/python-api/generated/triton.language.atomic_add.doctree index 149b29a93c60..e174c6b799fc 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.atomic_add.doctree and b/main/.doctrees/python-api/generated/triton.language.atomic_add.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.atomic_and.doctree b/main/.doctrees/python-api/generated/triton.language.atomic_and.doctree index ab872be7e7e2..607400920ca7 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.atomic_and.doctree and b/main/.doctrees/python-api/generated/triton.language.atomic_and.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.atomic_cas.doctree b/main/.doctrees/python-api/generated/triton.language.atomic_cas.doctree index bd1f7cb2dc87..778ef5bae7b4 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.atomic_cas.doctree and b/main/.doctrees/python-api/generated/triton.language.atomic_cas.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.atomic_max.doctree b/main/.doctrees/python-api/generated/triton.language.atomic_max.doctree index 5ac546f6fdfe..c4a1f4fb3374 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.atomic_max.doctree and b/main/.doctrees/python-api/generated/triton.language.atomic_max.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.atomic_min.doctree b/main/.doctrees/python-api/generated/triton.language.atomic_min.doctree index 7cdfc665735d..7b392c88f5ce 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.atomic_min.doctree and b/main/.doctrees/python-api/generated/triton.language.atomic_min.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.atomic_or.doctree b/main/.doctrees/python-api/generated/triton.language.atomic_or.doctree index ca59464b182b..3568df977c23 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.atomic_or.doctree and b/main/.doctrees/python-api/generated/triton.language.atomic_or.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree b/main/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree index 7ea5507782ee..e0d2073ecb4d 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree and b/main/.doctrees/python-api/generated/triton.language.atomic_xchg.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.atomic_xor.doctree b/main/.doctrees/python-api/generated/triton.language.atomic_xor.doctree index 958b4afe1ca7..6b7a2e37576b 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.atomic_xor.doctree and b/main/.doctrees/python-api/generated/triton.language.atomic_xor.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.broadcast.doctree b/main/.doctrees/python-api/generated/triton.language.broadcast.doctree index b9216a654811..7e8b2a79f796 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.broadcast.doctree and b/main/.doctrees/python-api/generated/triton.language.broadcast.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.broadcast_to.doctree b/main/.doctrees/python-api/generated/triton.language.broadcast_to.doctree index b044123d7c2a..baa8f0a7329f 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.broadcast_to.doctree and b/main/.doctrees/python-api/generated/triton.language.broadcast_to.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.cast.doctree b/main/.doctrees/python-api/generated/triton.language.cast.doctree index 017d10bd683d..14c329bf3a82 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.cast.doctree and b/main/.doctrees/python-api/generated/triton.language.cast.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.cat.doctree b/main/.doctrees/python-api/generated/triton.language.cat.doctree index 05f0b315a746..dc7cf9325f77 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.cat.doctree and b/main/.doctrees/python-api/generated/triton.language.cat.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.cdiv.doctree b/main/.doctrees/python-api/generated/triton.language.cdiv.doctree index 2343122f1d23..4fdc0fbc34a5 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.cdiv.doctree and b/main/.doctrees/python-api/generated/triton.language.cdiv.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.ceil.doctree b/main/.doctrees/python-api/generated/triton.language.ceil.doctree index 0c38bce310e2..c5d8bdcb7c51 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.ceil.doctree and b/main/.doctrees/python-api/generated/triton.language.ceil.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.clamp.doctree b/main/.doctrees/python-api/generated/triton.language.clamp.doctree index e2ba57d9512c..1bcd7ff3f2bb 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.clamp.doctree and b/main/.doctrees/python-api/generated/triton.language.clamp.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.cos.doctree b/main/.doctrees/python-api/generated/triton.language.cos.doctree index f0ecdb23ca32..537455d8b709 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.cos.doctree and b/main/.doctrees/python-api/generated/triton.language.cos.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.cumprod.doctree b/main/.doctrees/python-api/generated/triton.language.cumprod.doctree index d36d94fa122b..c68c2d95db39 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.cumprod.doctree and b/main/.doctrees/python-api/generated/triton.language.cumprod.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.cumsum.doctree b/main/.doctrees/python-api/generated/triton.language.cumsum.doctree index b2e7e7afc663..5b801155d8d1 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.cumsum.doctree and b/main/.doctrees/python-api/generated/triton.language.cumsum.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.debug_barrier.doctree b/main/.doctrees/python-api/generated/triton.language.debug_barrier.doctree index 76e4760a4288..aad805b047f7 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.debug_barrier.doctree and b/main/.doctrees/python-api/generated/triton.language.debug_barrier.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.device_assert.doctree b/main/.doctrees/python-api/generated/triton.language.device_assert.doctree index 65fd230d9482..77eb3c4a8ab7 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.device_assert.doctree and b/main/.doctrees/python-api/generated/triton.language.device_assert.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.device_print.doctree b/main/.doctrees/python-api/generated/triton.language.device_print.doctree index 3556f916911a..66b8eab9174a 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.device_print.doctree and b/main/.doctrees/python-api/generated/triton.language.device_print.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.div_rn.doctree b/main/.doctrees/python-api/generated/triton.language.div_rn.doctree index 7de481e6f355..caa01aee6f4e 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.div_rn.doctree and b/main/.doctrees/python-api/generated/triton.language.div_rn.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.dot.doctree b/main/.doctrees/python-api/generated/triton.language.dot.doctree index 097c04695657..548cf7fa27c1 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.dot.doctree and b/main/.doctrees/python-api/generated/triton.language.dot.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.erf.doctree b/main/.doctrees/python-api/generated/triton.language.erf.doctree index 22f150453a9f..e2ead929a895 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.erf.doctree and b/main/.doctrees/python-api/generated/triton.language.erf.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.exp.doctree b/main/.doctrees/python-api/generated/triton.language.exp.doctree index 7fac30bd441a..5bfcc82da858 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.exp.doctree and b/main/.doctrees/python-api/generated/triton.language.exp.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.exp2.doctree b/main/.doctrees/python-api/generated/triton.language.exp2.doctree index f6060aacac9c..3f0cc027f1e2 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.exp2.doctree and b/main/.doctrees/python-api/generated/triton.language.exp2.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.expand_dims.doctree b/main/.doctrees/python-api/generated/triton.language.expand_dims.doctree index aedc8e310e92..88d10ccf8336 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.expand_dims.doctree and b/main/.doctrees/python-api/generated/triton.language.expand_dims.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.fdiv.doctree b/main/.doctrees/python-api/generated/triton.language.fdiv.doctree index 4f5ee7f53d9d..580e94f336db 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.fdiv.doctree and b/main/.doctrees/python-api/generated/triton.language.fdiv.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.flip.doctree b/main/.doctrees/python-api/generated/triton.language.flip.doctree index a67c04f82de4..dc4783c31ca4 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.flip.doctree and b/main/.doctrees/python-api/generated/triton.language.flip.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.floor.doctree b/main/.doctrees/python-api/generated/triton.language.floor.doctree index 1a269684ec2a..f611a99cb34f 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.floor.doctree and b/main/.doctrees/python-api/generated/triton.language.floor.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.fma.doctree b/main/.doctrees/python-api/generated/triton.language.fma.doctree index 78e08aa69b51..301059aebee3 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.fma.doctree and b/main/.doctrees/python-api/generated/triton.language.fma.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.full.doctree b/main/.doctrees/python-api/generated/triton.language.full.doctree index 2213a7d39abc..acc8d34c4b91 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.full.doctree and b/main/.doctrees/python-api/generated/triton.language.full.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.histogram.doctree b/main/.doctrees/python-api/generated/triton.language.histogram.doctree index a5c60642e4ff..caac8f84e3a1 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.histogram.doctree and b/main/.doctrees/python-api/generated/triton.language.histogram.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.inline_asm_elementwise.doctree b/main/.doctrees/python-api/generated/triton.language.inline_asm_elementwise.doctree index 3d299832a4d9..bb7cace7c491 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.inline_asm_elementwise.doctree and b/main/.doctrees/python-api/generated/triton.language.inline_asm_elementwise.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.interleave.doctree b/main/.doctrees/python-api/generated/triton.language.interleave.doctree index 702ce4f21e74..11b7e24f8645 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.interleave.doctree and b/main/.doctrees/python-api/generated/triton.language.interleave.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.join.doctree b/main/.doctrees/python-api/generated/triton.language.join.doctree index ac9206731e8e..c5f4ad42452a 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.join.doctree and b/main/.doctrees/python-api/generated/triton.language.join.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.load.doctree b/main/.doctrees/python-api/generated/triton.language.load.doctree index 28fccd6f5942..aae2b36482c7 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.load.doctree and b/main/.doctrees/python-api/generated/triton.language.load.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.log.doctree b/main/.doctrees/python-api/generated/triton.language.log.doctree index 96ba0390a812..a0f6908b76ce 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.log.doctree and b/main/.doctrees/python-api/generated/triton.language.log.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.log2.doctree b/main/.doctrees/python-api/generated/triton.language.log2.doctree index 72ab6e5d87f2..d5f12d7cb037 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.log2.doctree and b/main/.doctrees/python-api/generated/triton.language.log2.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.make_block_ptr.doctree b/main/.doctrees/python-api/generated/triton.language.make_block_ptr.doctree index 5df5ea3b7542..6c481a524862 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.make_block_ptr.doctree and b/main/.doctrees/python-api/generated/triton.language.make_block_ptr.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.max.doctree b/main/.doctrees/python-api/generated/triton.language.max.doctree index 9e0a9e564059..f579cb75d6f7 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.max.doctree and b/main/.doctrees/python-api/generated/triton.language.max.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.max_constancy.doctree b/main/.doctrees/python-api/generated/triton.language.max_constancy.doctree index db7f550a9192..c3c171a47fbc 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.max_constancy.doctree and b/main/.doctrees/python-api/generated/triton.language.max_constancy.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.max_contiguous.doctree b/main/.doctrees/python-api/generated/triton.language.max_contiguous.doctree index bd2912b69da8..c23432dd5c73 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.max_contiguous.doctree and b/main/.doctrees/python-api/generated/triton.language.max_contiguous.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.maximum.doctree b/main/.doctrees/python-api/generated/triton.language.maximum.doctree index 8e8f91f2e9d8..4e331e030e1f 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.maximum.doctree and b/main/.doctrees/python-api/generated/triton.language.maximum.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.min.doctree b/main/.doctrees/python-api/generated/triton.language.min.doctree index f5f01855af41..1e3dc5e3b6e0 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.min.doctree and b/main/.doctrees/python-api/generated/triton.language.min.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.minimum.doctree b/main/.doctrees/python-api/generated/triton.language.minimum.doctree index 079de3cb2637..3d5df3eb20d0 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.minimum.doctree and b/main/.doctrees/python-api/generated/triton.language.minimum.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.multiple_of.doctree b/main/.doctrees/python-api/generated/triton.language.multiple_of.doctree index 99bf7283261c..cde86604d251 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.multiple_of.doctree and b/main/.doctrees/python-api/generated/triton.language.multiple_of.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.num_programs.doctree b/main/.doctrees/python-api/generated/triton.language.num_programs.doctree index 7341f8cc91b8..9679eae75e55 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.num_programs.doctree and b/main/.doctrees/python-api/generated/triton.language.num_programs.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.permute.doctree b/main/.doctrees/python-api/generated/triton.language.permute.doctree index edae2b4ee010..5d36b7e0b513 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.permute.doctree and b/main/.doctrees/python-api/generated/triton.language.permute.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.program_id.doctree b/main/.doctrees/python-api/generated/triton.language.program_id.doctree index 2d7b692ed52d..01a6babae31c 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.program_id.doctree and b/main/.doctrees/python-api/generated/triton.language.program_id.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.rand.doctree b/main/.doctrees/python-api/generated/triton.language.rand.doctree index 55768c41a2ab..b63b968b33c9 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.rand.doctree and b/main/.doctrees/python-api/generated/triton.language.rand.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.randint.doctree b/main/.doctrees/python-api/generated/triton.language.randint.doctree index 671e4208b11b..e2a7b783712f 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.randint.doctree and b/main/.doctrees/python-api/generated/triton.language.randint.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.randint4x.doctree b/main/.doctrees/python-api/generated/triton.language.randint4x.doctree index ee5a905f7e94..a6526efdddb7 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.randint4x.doctree and b/main/.doctrees/python-api/generated/triton.language.randint4x.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.randn.doctree b/main/.doctrees/python-api/generated/triton.language.randn.doctree index c93ef073dae1..c62bcee65866 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.randn.doctree and b/main/.doctrees/python-api/generated/triton.language.randn.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.range.doctree b/main/.doctrees/python-api/generated/triton.language.range.doctree index 72e6bd47200b..a9290824d9bc 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.range.doctree and b/main/.doctrees/python-api/generated/triton.language.range.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.ravel.doctree b/main/.doctrees/python-api/generated/triton.language.ravel.doctree index 822cd8990f7e..80df06b5db87 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.ravel.doctree and b/main/.doctrees/python-api/generated/triton.language.ravel.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.reduce.doctree b/main/.doctrees/python-api/generated/triton.language.reduce.doctree index 708a105ffaa4..357ba7710cbb 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.reduce.doctree and b/main/.doctrees/python-api/generated/triton.language.reduce.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.reshape.doctree b/main/.doctrees/python-api/generated/triton.language.reshape.doctree index 81923389536a..5c4224290949 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.reshape.doctree and b/main/.doctrees/python-api/generated/triton.language.reshape.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.rsqrt.doctree b/main/.doctrees/python-api/generated/triton.language.rsqrt.doctree index c25f2c486427..d15fdd1e3bd8 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.rsqrt.doctree and b/main/.doctrees/python-api/generated/triton.language.rsqrt.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.sigmoid.doctree b/main/.doctrees/python-api/generated/triton.language.sigmoid.doctree index 7e9123afd121..deded43d76e3 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.sigmoid.doctree and b/main/.doctrees/python-api/generated/triton.language.sigmoid.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.sin.doctree b/main/.doctrees/python-api/generated/triton.language.sin.doctree index cbd0272695be..3236b6ea5514 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.sin.doctree and b/main/.doctrees/python-api/generated/triton.language.sin.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.softmax.doctree b/main/.doctrees/python-api/generated/triton.language.softmax.doctree index ecf5b5f5cb75..2931e830c75e 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.softmax.doctree and b/main/.doctrees/python-api/generated/triton.language.softmax.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.sort.doctree b/main/.doctrees/python-api/generated/triton.language.sort.doctree index 3109b42ce93a..eaef0b05ee0e 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.sort.doctree and b/main/.doctrees/python-api/generated/triton.language.sort.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.split.doctree b/main/.doctrees/python-api/generated/triton.language.split.doctree index 1924e43748dd..e3bf066321f3 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.split.doctree and b/main/.doctrees/python-api/generated/triton.language.split.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.sqrt.doctree b/main/.doctrees/python-api/generated/triton.language.sqrt.doctree index 4615cda75a4b..1d26cd2e4a4e 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.sqrt.doctree and b/main/.doctrees/python-api/generated/triton.language.sqrt.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.sqrt_rn.doctree b/main/.doctrees/python-api/generated/triton.language.sqrt_rn.doctree index 56f34852c83f..571f63e8e137 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.sqrt_rn.doctree and b/main/.doctrees/python-api/generated/triton.language.sqrt_rn.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.static_assert.doctree b/main/.doctrees/python-api/generated/triton.language.static_assert.doctree index 682c014f789d..d2a62c306a1e 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.static_assert.doctree and b/main/.doctrees/python-api/generated/triton.language.static_assert.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.static_print.doctree b/main/.doctrees/python-api/generated/triton.language.static_print.doctree index afb8c53a9d93..00cc401ef945 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.static_print.doctree and b/main/.doctrees/python-api/generated/triton.language.static_print.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.static_range.doctree b/main/.doctrees/python-api/generated/triton.language.static_range.doctree index b1acfda1040a..215d7beb37c2 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.static_range.doctree and b/main/.doctrees/python-api/generated/triton.language.static_range.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.store.doctree b/main/.doctrees/python-api/generated/triton.language.store.doctree index 9703c6d07692..83a74813992d 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.store.doctree and b/main/.doctrees/python-api/generated/triton.language.store.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.sum.doctree b/main/.doctrees/python-api/generated/triton.language.sum.doctree index 6ddeaf06650f..9b2e8940864b 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.sum.doctree and b/main/.doctrees/python-api/generated/triton.language.sum.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.swizzle2d.doctree b/main/.doctrees/python-api/generated/triton.language.swizzle2d.doctree index dd3cd58ef503..69b367df4d36 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.swizzle2d.doctree and b/main/.doctrees/python-api/generated/triton.language.swizzle2d.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.tensor.doctree b/main/.doctrees/python-api/generated/triton.language.tensor.doctree index 68dc13fb2c00..a0cea05c2d92 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.tensor.doctree and b/main/.doctrees/python-api/generated/triton.language.tensor.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.trans.doctree b/main/.doctrees/python-api/generated/triton.language.trans.doctree index 6ed50370051f..f42926f50592 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.trans.doctree and b/main/.doctrees/python-api/generated/triton.language.trans.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.umulhi.doctree b/main/.doctrees/python-api/generated/triton.language.umulhi.doctree index 3e26bb7b865a..3d38edab6518 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.umulhi.doctree and b/main/.doctrees/python-api/generated/triton.language.umulhi.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.view.doctree b/main/.doctrees/python-api/generated/triton.language.view.doctree index c1e29622a5eb..86604a9b7c03 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.view.doctree and b/main/.doctrees/python-api/generated/triton.language.view.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.where.doctree b/main/.doctrees/python-api/generated/triton.language.where.doctree index 288fb0702b6d..628f988a2526 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.where.doctree and b/main/.doctrees/python-api/generated/triton.language.where.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.xor_sum.doctree b/main/.doctrees/python-api/generated/triton.language.xor_sum.doctree index 0fb5d203c884..8bc5d8e89f5a 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.xor_sum.doctree and b/main/.doctrees/python-api/generated/triton.language.xor_sum.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.zeros.doctree b/main/.doctrees/python-api/generated/triton.language.zeros.doctree index 4efed57763ff..1b20653cb707 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.zeros.doctree and b/main/.doctrees/python-api/generated/triton.language.zeros.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.language.zeros_like.doctree b/main/.doctrees/python-api/generated/triton.language.zeros_like.doctree index 7ffa8aea2d33..1aa8d6a8741d 100644 Binary files a/main/.doctrees/python-api/generated/triton.language.zeros_like.doctree and b/main/.doctrees/python-api/generated/triton.language.zeros_like.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.testing.Benchmark.doctree b/main/.doctrees/python-api/generated/triton.testing.Benchmark.doctree index ac876b47de1c..4d8e64cf87d1 100644 Binary files a/main/.doctrees/python-api/generated/triton.testing.Benchmark.doctree and b/main/.doctrees/python-api/generated/triton.testing.Benchmark.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.testing.assert_close.doctree b/main/.doctrees/python-api/generated/triton.testing.assert_close.doctree index a466140d9615..03c02fceab6e 100644 Binary files a/main/.doctrees/python-api/generated/triton.testing.assert_close.doctree and b/main/.doctrees/python-api/generated/triton.testing.assert_close.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.testing.do_bench.doctree b/main/.doctrees/python-api/generated/triton.testing.do_bench.doctree index 7af8e24c3085..b0c7af4819c5 100644 Binary files a/main/.doctrees/python-api/generated/triton.testing.do_bench.doctree and b/main/.doctrees/python-api/generated/triton.testing.do_bench.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.testing.do_bench_cudagraph.doctree b/main/.doctrees/python-api/generated/triton.testing.do_bench_cudagraph.doctree index af26c0644d3e..a905d96aadf3 100644 Binary files a/main/.doctrees/python-api/generated/triton.testing.do_bench_cudagraph.doctree and b/main/.doctrees/python-api/generated/triton.testing.do_bench_cudagraph.doctree differ diff --git a/main/.doctrees/python-api/generated/triton.testing.perf_report.doctree b/main/.doctrees/python-api/generated/triton.testing.perf_report.doctree index 231f8ec2213d..2b9676a78d6f 100644 Binary files a/main/.doctrees/python-api/generated/triton.testing.perf_report.doctree and b/main/.doctrees/python-api/generated/triton.testing.perf_report.doctree differ diff --git a/main/.doctrees/python-api/triton.doctree b/main/.doctrees/python-api/triton.doctree index d64df63b3a27..21bcabaf6813 100644 Binary files a/main/.doctrees/python-api/triton.doctree and b/main/.doctrees/python-api/triton.doctree differ diff --git a/main/.doctrees/python-api/triton.language.doctree b/main/.doctrees/python-api/triton.language.doctree index 3962faba9657..299e2de0b363 100644 Binary files a/main/.doctrees/python-api/triton.language.doctree and b/main/.doctrees/python-api/triton.language.doctree differ diff --git a/main/.doctrees/python-api/triton.testing.doctree b/main/.doctrees/python-api/triton.testing.doctree index fa35dcf60697..88d50400ee85 100644 Binary files a/main/.doctrees/python-api/triton.testing.doctree and b/main/.doctrees/python-api/triton.testing.doctree differ diff --git a/main/.doctrees/sg_execution_times.doctree b/main/.doctrees/sg_execution_times.doctree index 0001f4d295be..17fb8cf9c8bd 100644 Binary files a/main/.doctrees/sg_execution_times.doctree and b/main/.doctrees/sg_execution_times.doctree differ diff --git a/main/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip b/main/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip index 92a2591a6f05..e4818f06fb32 100644 Binary files a/main/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip and b/main/_downloads/662999063954282841dc90b8945f85ce/tutorials_jupyter.zip differ diff --git a/main/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip b/main/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip index f573dc0bf430..398fd24459fc 100644 Binary files a/main/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip and b/main/_downloads/763344228ae6bc253ed1a6cf586aa30d/tutorials_python.zip differ diff --git a/main/_images/sphx_glr_01-vector-add_001.png b/main/_images/sphx_glr_01-vector-add_001.png index 5755351e4673..37813d6db76a 100644 Binary files a/main/_images/sphx_glr_01-vector-add_001.png and b/main/_images/sphx_glr_01-vector-add_001.png differ diff --git a/main/_images/sphx_glr_01-vector-add_thumb.png b/main/_images/sphx_glr_01-vector-add_thumb.png index 7d5c254470e4..3c3ae501c291 100644 Binary files a/main/_images/sphx_glr_01-vector-add_thumb.png and b/main/_images/sphx_glr_01-vector-add_thumb.png differ diff --git a/main/_images/sphx_glr_02-fused-softmax_001.png b/main/_images/sphx_glr_02-fused-softmax_001.png index d58189214721..278539df75af 100644 Binary files a/main/_images/sphx_glr_02-fused-softmax_001.png and b/main/_images/sphx_glr_02-fused-softmax_001.png differ diff --git a/main/_images/sphx_glr_02-fused-softmax_thumb.png b/main/_images/sphx_glr_02-fused-softmax_thumb.png index 86a7b0405c16..af3cec389654 100644 Binary files a/main/_images/sphx_glr_02-fused-softmax_thumb.png and b/main/_images/sphx_glr_02-fused-softmax_thumb.png differ diff --git a/main/_images/sphx_glr_03-matrix-multiplication_001.png b/main/_images/sphx_glr_03-matrix-multiplication_001.png index 38d0aef0df73..d3e9738a9afc 100644 Binary files a/main/_images/sphx_glr_03-matrix-multiplication_001.png and b/main/_images/sphx_glr_03-matrix-multiplication_001.png differ diff --git a/main/_images/sphx_glr_03-matrix-multiplication_002.png b/main/_images/sphx_glr_03-matrix-multiplication_002.png index 8166b10987a7..357665bf5bfb 100644 Binary files a/main/_images/sphx_glr_03-matrix-multiplication_002.png and b/main/_images/sphx_glr_03-matrix-multiplication_002.png differ diff --git a/main/_images/sphx_glr_03-matrix-multiplication_thumb.png b/main/_images/sphx_glr_03-matrix-multiplication_thumb.png index c449be6c2f10..883e6d781268 100644 Binary files a/main/_images/sphx_glr_03-matrix-multiplication_thumb.png and b/main/_images/sphx_glr_03-matrix-multiplication_thumb.png differ diff --git a/main/_images/sphx_glr_05-layer-norm_001.png b/main/_images/sphx_glr_05-layer-norm_001.png index 36671f461a2e..97c32e159770 100644 Binary files a/main/_images/sphx_glr_05-layer-norm_001.png and b/main/_images/sphx_glr_05-layer-norm_001.png differ diff --git a/main/_images/sphx_glr_05-layer-norm_thumb.png b/main/_images/sphx_glr_05-layer-norm_thumb.png index bfa5476d6d76..a562d3578c1c 100644 Binary files a/main/_images/sphx_glr_05-layer-norm_thumb.png and b/main/_images/sphx_glr_05-layer-norm_thumb.png differ diff --git a/main/_images/sphx_glr_06-fused-attention_001.png b/main/_images/sphx_glr_06-fused-attention_001.png index 58dcc298bfae..85963be24676 100644 Binary files a/main/_images/sphx_glr_06-fused-attention_001.png and b/main/_images/sphx_glr_06-fused-attention_001.png differ diff --git a/main/_images/sphx_glr_06-fused-attention_002.png b/main/_images/sphx_glr_06-fused-attention_002.png index ded656fed243..c5573451dd76 100644 Binary files a/main/_images/sphx_glr_06-fused-attention_002.png and b/main/_images/sphx_glr_06-fused-attention_002.png differ diff --git a/main/_images/sphx_glr_06-fused-attention_003.png b/main/_images/sphx_glr_06-fused-attention_003.png index 4bd0e5ff0922..f91212be5f82 100644 Binary files a/main/_images/sphx_glr_06-fused-attention_003.png and b/main/_images/sphx_glr_06-fused-attention_003.png differ diff --git a/main/_images/sphx_glr_06-fused-attention_thumb.png b/main/_images/sphx_glr_06-fused-attention_thumb.png index 57757ef3aa9f..23cacc77ba80 100644 Binary files a/main/_images/sphx_glr_06-fused-attention_thumb.png and b/main/_images/sphx_glr_06-fused-attention_thumb.png differ diff --git a/main/_images/sphx_glr_08-grouped-gemm_001.png b/main/_images/sphx_glr_08-grouped-gemm_001.png index fb198e8bcac0..f885afbbc4cb 100644 Binary files a/main/_images/sphx_glr_08-grouped-gemm_001.png and b/main/_images/sphx_glr_08-grouped-gemm_001.png differ diff --git a/main/_images/sphx_glr_08-grouped-gemm_thumb.png b/main/_images/sphx_glr_08-grouped-gemm_thumb.png index 9dddab83415b..d41f0e2e7ad0 100644 Binary files a/main/_images/sphx_glr_08-grouped-gemm_thumb.png and b/main/_images/sphx_glr_08-grouped-gemm_thumb.png differ diff --git a/main/_sources/getting-started/tutorials/01-vector-add.rst.txt b/main/_sources/getting-started/tutorials/01-vector-add.rst.txt index 732ce339efce..b4b2516287b8 100644 --- a/main/_sources/getting-started/tutorials/01-vector-add.rst.txt +++ b/main/_sources/getting-started/tutorials/01-vector-add.rst.txt @@ -231,8 +231,8 @@ We can now run the decorated function above. Pass `print_data=True` to see the p vector-add-performance: size Triton Torch 0 4096.0 8.000000 8.000000 - 1 8192.0 15.999999 19.200000 - 2 16384.0 38.400001 31.999999 + 1 8192.0 19.200000 19.200000 + 2 16384.0 31.999999 31.999999 3 32768.0 63.999998 63.999998 4 65536.0 127.999995 127.999995 5 131072.0 219.428568 219.428568 @@ -240,12 +240,12 @@ We can now run the decorated function above. Pass `print_data=True` to see the p 7 524288.0 614.400016 614.400016 8 1048576.0 819.200021 819.200021 9 2097152.0 1068.521715 1023.999964 - 10 4194304.0 1260.307736 1260.307736 - 11 8388608.0 1424.695621 1404.342820 + 10 4194304.0 1228.800031 1228.800031 + 11 8388608.0 1424.695621 1424.695621 12 16777216.0 1560.380965 1560.380965 13 33554432.0 1631.601649 1624.859540 14 67108864.0 1669.706983 1662.646960 - 15 134217728.0 1684.910539 1678.616907 + 15 134217728.0 1685.813499 1678.616907 @@ -253,7 +253,7 @@ We can now run the decorated function above. Pass `print_data=True` to see the p .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 12.374 seconds) + **Total running time of the script:** (0 minutes 21.245 seconds) .. _sphx_glr_download_getting-started_tutorials_01-vector-add.py: diff --git a/main/_sources/getting-started/tutorials/02-fused-softmax.rst.txt b/main/_sources/getting-started/tutorials/02-fused-softmax.rst.txt index 91d34dceb36d..68a8edbff30d 100644 --- a/main/_sources/getting-started/tutorials/02-fused-softmax.rst.txt +++ b/main/_sources/getting-started/tutorials/02-fused-softmax.rst.txt @@ -330,104 +330,104 @@ We will then compare its performance against (1) :code:`torch.softmax` and (2) t softmax-performance: N Triton Torch - 0 256.0 476.139488 696.788258 - 1 384.0 618.827725 827.479292 - 2 512.0 760.221748 933.050692 - 3 640.0 797.744455 964.590312 - 4 768.0 881.327202 1018.776646 - 5 896.0 931.629145 1073.534358 - 6 1024.0 989.121470 1115.624655 - 7 1152.0 1111.718456 610.957464 - 8 1280.0 1147.279460 665.683488 - 9 1408.0 1164.170094 724.584712 - 10 1536.0 1184.197350 780.834636 - 11 1664.0 1209.547564 813.845228 - 12 1792.0 1237.864193 858.475756 - 13 1920.0 1254.234428 908.944638 - 14 2048.0 1272.763984 959.063498 - 15 2176.0 1264.692131 977.239079 - 16 2304.0 1275.217537 1009.596217 - 17 2432.0 1294.768054 1056.159369 - 18 2560.0 1300.226170 1083.970309 - 19 2688.0 1312.048325 1104.527304 - 20 2816.0 1328.970602 1134.579440 - 21 2944.0 1324.895257 1169.377885 - 22 3072.0 1346.332719 1184.220808 - 23 3200.0 1350.957286 1191.658986 - 24 3328.0 1353.230317 1221.801758 - 25 3456.0 1370.634023 1249.755593 - 26 3584.0 1374.222742 1256.588724 - 27 3712.0 1378.666064 1265.859437 - 28 3840.0 1389.815263 1301.320415 - 29 3968.0 1391.592430 1313.186082 - 30 4096.0 1392.912426 1324.932316 - 31 4224.0 1331.948936 1162.355169 - 32 4352.0 1332.251661 1174.758168 - 33 4480.0 1354.332825 1183.392495 - 34 4608.0 1362.559726 1193.508869 - 35 4736.0 1361.412494 1199.610872 - 36 4864.0 1374.494003 1225.630352 - 37 4992.0 1375.903904 1233.775130 - 38 5120.0 1374.668680 1250.263491 - 39 5248.0 1372.858541 1257.322491 - 40 5376.0 1375.811956 1288.827038 - 41 5504.0 1381.364907 1298.268824 - 42 5632.0 1386.009236 1312.864735 - 43 5760.0 1391.944559 1327.558803 - 44 5888.0 1392.849132 1345.422003 - 45 6016.0 1396.644789 1351.073081 - 46 6144.0 1407.104309 1375.616484 - 47 6272.0 1418.086793 1375.157333 - 48 6400.0 1417.980384 1387.342354 - 49 6528.0 1415.249498 1391.602704 - 50 6656.0 1420.331310 1405.582361 - 51 6784.0 1417.202111 1412.663309 - 52 6912.0 1432.381749 1423.158102 - 53 7040.0 1419.793560 1431.854784 - 54 7168.0 1430.581340 1436.330930 - 55 7296.0 1429.945826 1443.592076 - 56 7424.0 1428.330462 1447.402937 - 57 7552.0 1424.607809 1455.469007 - 58 7680.0 1435.101889 1458.109689 - 59 7808.0 1431.190718 1463.371140 - 60 7936.0 1437.080733 1469.151768 - 61 8064.0 1440.554513 1475.989169 - 62 8192.0 1438.506525 1483.113065 - 63 8320.0 1388.839432 1404.856624 - 64 8448.0 1380.906669 1404.087829 - 65 8576.0 1397.216183 1394.726484 - 66 8704.0 1391.153016 1400.492698 - 67 8832.0 1385.181960 1403.439253 - 68 8960.0 1396.270444 1409.371359 - 69 9088.0 1409.370731 1415.615460 - 70 9216.0 1403.434935 1422.651793 - 71 9344.0 1402.856149 1424.026237 - 72 9472.0 1396.917353 1432.801002 - 73 9600.0 1396.123339 1432.422704 - 74 9728.0 1399.535917 1439.689956 - 75 9856.0 1414.048642 1442.948339 - 76 9984.0 1401.676171 1451.736028 - 77 10112.0 1414.126083 1457.734155 - 78 10240.0 1420.305433 1466.943900 - 79 10368.0 1411.127263 1463.104160 - 80 10496.0 1416.005610 1466.565466 - 81 10624.0 1409.151943 1470.820498 - 82 10752.0 1406.087477 1473.168060 - 83 10880.0 1401.270291 1482.425922 - 84 11008.0 1424.021362 1478.111364 - 85 11136.0 1422.071915 1484.134850 - 86 11264.0 1433.532950 1490.315245 - 87 11392.0 1420.309112 1489.868394 - 88 11520.0 1422.613285 1495.584278 - 89 11648.0 1429.311198 1497.151665 - 90 11776.0 1430.830235 1501.968013 - 91 11904.0 1440.067196 1507.183144 - 92 12032.0 1420.403459 1506.206378 - 93 12160.0 1419.023122 1511.268039 - 94 12288.0 1433.554729 1393.015506 - 95 12416.0 1449.698764 1392.734374 - 96 12544.0 1441.583664 1393.369388 - 97 12672.0 1445.716152 1392.544565 + 0 256.0 478.028491 682.211788 + 1 384.0 613.236913 801.876368 + 2 512.0 754.182178 911.741697 + 3 640.0 798.135845 958.299627 + 4 768.0 870.664640 1010.572452 + 5 896.0 928.360671 1058.014237 + 6 1024.0 984.002664 1120.445299 + 7 1152.0 1100.139529 610.393022 + 8 1280.0 1149.246505 669.128065 + 9 1408.0 1163.366768 720.776311 + 10 1536.0 1181.114345 779.522923 + 11 1664.0 1208.491767 813.867402 + 12 1792.0 1239.280240 855.664171 + 13 1920.0 1252.974956 908.944638 + 14 2048.0 1281.808976 958.445467 + 15 2176.0 1257.374456 974.126326 + 16 2304.0 1274.510204 1012.575192 + 17 2432.0 1299.346292 1054.240204 + 18 2560.0 1301.864916 1085.033216 + 19 2688.0 1315.793905 1099.785142 + 20 2816.0 1329.956456 1127.646529 + 21 2944.0 1320.420864 1163.865922 + 22 3072.0 1352.366259 1186.801725 + 23 3200.0 1348.247102 1194.823745 + 24 3328.0 1356.513581 1220.708485 + 25 3456.0 1373.092948 1245.892659 + 26 3584.0 1371.185145 1257.701248 + 27 3712.0 1381.051681 1270.816849 + 28 3840.0 1384.313603 1297.482545 + 29 3968.0 1386.132294 1316.563732 + 30 4096.0 1396.938877 1326.867313 + 31 4224.0 1331.989715 1158.815535 + 32 4352.0 1337.681856 1178.178747 + 33 4480.0 1356.518707 1181.381781 + 34 4608.0 1363.974264 1194.697773 + 35 4736.0 1357.126451 1200.045185 + 36 4864.0 1374.458060 1221.553039 + 37 4992.0 1367.842993 1235.787250 + 38 5120.0 1371.398724 1249.380283 + 39 5248.0 1371.351976 1259.701601 + 40 5376.0 1381.782947 1286.871473 + 41 5504.0 1381.364118 1295.814312 + 42 5632.0 1381.296323 1313.771361 + 43 5760.0 1394.546758 1325.800167 + 44 5888.0 1391.519427 1341.321506 + 45 6016.0 1398.167209 1353.458566 + 46 6144.0 1408.655146 1371.358857 + 47 6272.0 1416.639707 1378.268662 + 48 6400.0 1415.189418 1391.046639 + 49 6528.0 1413.696324 1396.664113 + 50 6656.0 1426.651691 1402.676188 + 51 6784.0 1410.454865 1412.084452 + 52 6912.0 1425.137374 1424.746635 + 53 7040.0 1418.304077 1429.993348 + 54 7168.0 1429.365415 1435.127952 + 55 7296.0 1427.991956 1440.786081 + 56 7424.0 1428.931798 1443.814394 + 57 7552.0 1423.834653 1454.145375 + 58 7680.0 1434.387630 1457.834798 + 59 7808.0 1433.907820 1466.229329 + 60 7936.0 1435.403687 1466.092109 + 61 8064.0 1440.171123 1475.208482 + 62 8192.0 1439.579975 1483.881006 + 63 8320.0 1387.635765 1404.007749 + 64 8448.0 1378.185540 1405.672848 + 65 8576.0 1392.153760 1394.156614 + 66 8704.0 1391.319871 1399.946213 + 67 8832.0 1378.923691 1404.064141 + 68 8960.0 1397.175502 1412.823133 + 69 9088.0 1407.773026 1413.353068 + 70 9216.0 1402.477995 1423.756587 + 71 9344.0 1399.845155 1422.123571 + 72 9472.0 1395.258197 1432.316744 + 73 9600.0 1395.873814 1430.873450 + 74 9728.0 1400.509255 1440.503042 + 75 9856.0 1414.313838 1441.375337 + 76 9984.0 1398.687599 1452.498519 + 77 10112.0 1411.513656 1454.502324 + 78 10240.0 1420.416437 1462.857218 + 79 10368.0 1414.151638 1466.318724 + 80 10496.0 1413.116185 1465.349377 + 81 10624.0 1410.750742 1467.233318 + 82 10752.0 1409.067946 1472.827147 + 83 10880.0 1402.241689 1480.663732 + 84 11008.0 1421.528681 1473.972025 + 85 11136.0 1420.534256 1487.110952 + 86 11264.0 1429.942133 1484.253568 + 87 11392.0 1415.399403 1489.279925 + 88 11520.0 1424.292151 1495.239122 + 89 11648.0 1423.625174 1495.959349 + 90 11776.0 1429.891952 1503.753380 + 91 11904.0 1443.761397 1505.134156 + 92 12032.0 1422.024153 1507.162510 + 93 12160.0 1417.047651 1509.910523 + 94 12288.0 1434.709538 1389.347092 + 95 12416.0 1448.814565 1391.463286 + 96 12544.0 1443.017867 1392.016397 + 97 12672.0 1450.992449 1392.449930 @@ -442,7 +442,7 @@ In the above plot, we can see that: .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 23.352 seconds) + **Total running time of the script:** (0 minutes 25.227 seconds) .. _sphx_glr_download_getting-started_tutorials_02-fused-softmax.py: diff --git a/main/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt b/main/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt index f0f9c05614f7..4f97bc6332ed 100644 --- a/main/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt +++ b/main/_sources/getting-started/tutorials/03-matrix-multiplication.rst.txt @@ -580,58 +580,58 @@ but feel free to arrange this script as you wish to benchmark any other matrix s 8 1280.0 1280.0 1280.0 157.538463 163.840004 9 1408.0 1408.0 1408.0 155.765024 132.970149 10 1536.0 1536.0 1536.0 176.947204 157.286398 - 11 1664.0 1664.0 1664.0 179.978245 179.978245 - 12 1792.0 1792.0 1792.0 172.914215 208.137481 + 11 1664.0 1664.0 1664.0 183.651271 179.978245 + 12 1792.0 1792.0 1792.0 172.914215 200.703997 13 1920.0 1920.0 1920.0 200.347822 166.554219 - 14 2048.0 2048.0 2048.0 226.719125 190.650180 - 15 2176.0 2176.0 2176.0 211.827867 209.621326 - 16 2304.0 2304.0 2304.0 229.691080 227.503545 - 17 2432.0 2432.0 2432.0 203.583068 200.674737 - 18 2560.0 2560.0 2560.0 222.911566 219.919464 - 19 2688.0 2688.0 2688.0 198.602388 198.602388 - 20 2816.0 2816.0 2816.0 212.752230 210.696652 - 21 2944.0 2944.0 2944.0 221.493479 223.479969 - 22 3072.0 3072.0 3072.0 210.494802 211.280236 - 23 3200.0 3200.0 3200.0 214.046818 215.488222 - 24 3328.0 3328.0 3328.0 207.467716 205.689424 - 25 3456.0 3456.0 3456.0 217.308808 219.677297 - 26 3584.0 3584.0 3584.0 210.082692 214.595213 - 27 3712.0 3712.0 3712.0 208.990259 218.593757 - 28 3840.0 3840.0 3840.0 207.879708 210.250955 - 29 3968.0 3968.0 3968.0 208.587935 214.077090 - 30 4096.0 4096.0 4096.0 219.668951 216.829933 + 14 2048.0 2048.0 2048.0 223.696203 190.650180 + 15 2176.0 2176.0 2176.0 211.827867 211.827867 + 16 2304.0 2304.0 2304.0 227.503545 225.357284 + 17 2432.0 2432.0 2432.0 205.069087 199.251522 + 18 2560.0 2560.0 2560.0 222.911566 217.006622 + 19 2688.0 2688.0 2688.0 199.647657 196.544332 + 20 2816.0 2816.0 2816.0 210.696652 208.680416 + 21 2944.0 2944.0 2944.0 218.579083 219.541994 + 22 3072.0 3072.0 3072.0 205.902197 211.280236 + 23 3200.0 3200.0 3200.0 214.046818 219.178074 + 24 3328.0 3328.0 3328.0 206.871539 208.067338 + 25 3456.0 3456.0 3456.0 216.724640 219.080343 + 26 3584.0 3584.0 3584.0 215.624440 212.565943 + 27 3712.0 3712.0 3712.0 209.868376 213.455857 + 28 3840.0 3840.0 3840.0 209.851994 208.271176 + 29 3968.0 3968.0 3968.0 211.114084 216.354501 + 30 4096.0 4096.0 4096.0 221.481394 218.240199 matmul-performance-fp8: M N K Triton 0 256.0 256.0 256.0 3.276800 - 1 384.0 384.0 384.0 9.216000 + 1 384.0 384.0 384.0 10.053818 2 512.0 512.0 512.0 20.164923 3 640.0 640.0 640.0 34.133334 4 768.0 768.0 768.0 42.130286 5 896.0 896.0 896.0 58.538665 - 6 1024.0 1024.0 1024.0 63.550060 + 6 1024.0 1024.0 1024.0 61.680940 7 1152.0 1152.0 1152.0 80.702267 8 1280.0 1280.0 1280.0 102.400003 9 1408.0 1408.0 1408.0 82.602666 - 10 1536.0 1536.0 1536.0 99.688560 + 10 1536.0 1536.0 1536.0 98.303997 11 1664.0 1664.0 1664.0 116.868992 - 12 1792.0 1792.0 1792.0 135.414749 + 12 1792.0 1792.0 1792.0 133.802668 13 1920.0 1920.0 1920.0 99.453240 14 2048.0 2048.0 2048.0 114.130722 15 2176.0 2176.0 2176.0 121.226797 - 16 2304.0 2304.0 2304.0 134.201527 + 16 2304.0 2304.0 2304.0 134.959733 17 2432.0 2432.0 2432.0 131.898888 - 18 2560.0 2560.0 2560.0 146.285712 + 18 2560.0 2560.0 2560.0 146.941707 19 2688.0 2688.0 2688.0 118.171514 20 2816.0 2816.0 2816.0 129.036114 21 2944.0 2944.0 2944.0 139.596724 - 22 3072.0 3072.0 3072.0 144.079147 - 23 3200.0 3200.0 3200.0 138.828637 - 24 3328.0 3328.0 3328.0 131.611151 - 25 3456.0 3456.0 3456.0 138.287420 - 26 3584.0 3584.0 3584.0 148.375230 - 27 3712.0 3712.0 3712.0 141.297511 + 22 3072.0 3072.0 3072.0 144.446699 + 23 3200.0 3200.0 3200.0 139.130432 + 24 3328.0 3328.0 3328.0 130.537253 + 25 3456.0 3456.0 3456.0 138.763456 + 26 3584.0 3584.0 3584.0 148.620481 + 27 3712.0 3712.0 3712.0 139.716570 28 3840.0 3840.0 3840.0 138.240003 - 29 3968.0 3968.0 3968.0 145.961642 + 29 3968.0 3968.0 3968.0 145.613293 30 4096.0 4096.0 4096.0 155.165002 @@ -640,7 +640,7 @@ but feel free to arrange this script as you wish to benchmark any other matrix s .. rst-class:: sphx-glr-timing - **Total running time of the script:** (2 minutes 17.000 seconds) + **Total running time of the script:** (2 minutes 17.281 seconds) .. _sphx_glr_download_getting-started_tutorials_03-matrix-multiplication.py: diff --git a/main/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt b/main/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt index 9ef1a6919f69..8159ab928353 100644 --- a/main/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt +++ b/main/_sources/getting-started/tutorials/04-low-memory-dropout.rst.txt @@ -242,7 +242,7 @@ References .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 0.677 seconds) + **Total running time of the script:** (0 minutes 0.684 seconds) .. _sphx_glr_download_getting-started_tutorials_04-low-memory-dropout.py: diff --git a/main/_sources/getting-started/tutorials/05-layer-norm.rst.txt b/main/_sources/getting-started/tutorials/05-layer-norm.rst.txt index 44174c5bada9..d38d9378694d 100644 --- a/main/_sources/getting-started/tutorials/05-layer-norm.rst.txt +++ b/main/_sources/getting-started/tutorials/05-layer-norm.rst.txt @@ -431,34 +431,34 @@ Specifically, one can set :code:`'mode': 'backward'` to benchmark the backward p layer-norm-backward: N Triton Torch - 0 1024.0 133.565214 378.092307 - 1 1536.0 203.668513 449.560983 - 2 2048.0 277.694924 517.389457 - 3 2560.0 341.333328 574.205608 - 4 3072.0 494.818794 614.400016 - 5 3584.0 569.642383 547.872604 - 6 4096.0 588.646687 561.737163 - 7 4608.0 704.407633 567.138460 - 8 5120.0 772.830175 568.888888 - 9 5632.0 824.195135 565.556483 - 10 6144.0 767.999973 562.809189 - 11 6656.0 907.636357 566.468098 + 0 1024.0 124.751268 378.092307 + 1 1536.0 188.081639 449.560983 + 2 2048.0 255.999991 517.389457 + 3 2560.0 305.671638 574.205608 + 4 3072.0 382.010363 614.400016 + 5 3584.0 457.531902 547.872604 + 6 4096.0 511.999982 561.737163 + 7 4608.0 699.949388 567.138460 + 8 5120.0 587.942569 568.888888 + 9 5632.0 640.606617 563.200014 + 10 6144.0 842.605744 562.809189 + 11 6656.0 907.636357 568.484014 12 7168.0 945.230752 540.981122 - 13 7680.0 945.230767 548.571433 + 13 7680.0 940.408194 548.571433 14 8192.0 968.512300 549.184373 15 8704.0 684.904931 561.548373 - 16 9216.0 722.823525 567.138460 - 17 9728.0 743.541391 570.836186 - 18 10240.0 744.727250 562.379850 + 16 9216.0 722.823525 565.687967 + 17 9728.0 743.541391 569.443892 + 18 10240.0 746.990876 562.379850 19 10752.0 789.137646 553.751076 20 11264.0 816.725104 559.701851 - 21 11776.0 807.497172 567.518063 - 22 12288.0 819.199988 572.644636 - 23 12800.0 850.969498 577.443635 - 24 13312.0 894.924340 578.782596 - 25 13824.0 904.021797 578.006963 - 26 14336.0 919.957230 568.700819 - 27 14848.0 923.191711 569.252402 + 21 11776.0 805.196592 567.518063 + 22 12288.0 819.199988 571.534916 + 23 12800.0 848.618804 577.443635 + 24 13312.0 902.508503 578.782596 + 25 13824.0 901.565197 578.006963 + 26 14336.0 917.503992 568.700819 + 27 14848.0 920.806184 569.252402 28 15360.0 930.909118 579.622631 29 15872.0 913.496396 580.682936 @@ -475,7 +475,7 @@ References .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 28.824 seconds) + **Total running time of the script:** (0 minutes 28.981 seconds) .. _sphx_glr_download_getting-started_tutorials_05-layer-norm.py: diff --git a/main/_sources/getting-started/tutorials/06-fused-attention.rst.txt b/main/_sources/getting-started/tutorials/06-fused-attention.rst.txt index ea1f1590f33a..240b22ebf556 100644 --- a/main/_sources/getting-started/tutorials/06-fused-attention.rst.txt +++ b/main/_sources/getting-started/tutorials/06-fused-attention.rst.txt @@ -63,25 +63,25 @@ Extra Credits: fused-attention-batch4-head32-d64-fwd-causal=True: N_CTX Triton [FP16] Triton [FP8] - 0 1024.0 113.385494 93.231589 - 1 2048.0 141.236211 112.587231 - 2 4096.0 154.912658 124.788749 - 3 8192.0 167.409449 131.746001 - 4 16384.0 170.826127 135.577739 + 0 1024.0 114.576118 93.770834 + 1 2048.0 140.554006 110.461577 + 2 4096.0 155.833908 122.716015 + 3 8192.0 165.748638 129.162773 + 4 16384.0 171.207451 133.933123 fused-attention-batch4-head32-d64-fwd-causal=False: N_CTX Triton [FP16] Triton [FP8] - 0 1024.0 156.440266 129.196888 - 1 2048.0 166.409771 140.830441 - 2 4096.0 169.408340 136.333865 - 3 8192.0 172.762207 143.414153 - 4 16384.0 175.455178 143.562768 + 0 1024.0 157.252966 129.125552 + 1 2048.0 166.577245 141.413943 + 2 4096.0 170.047206 137.960651 + 3 8192.0 171.741761 143.775832 + 4 16384.0 175.229696 144.606824 fused-attention-batch4-head32-d64-bwd-causal=True: N_CTX Triton [FP16] Triton [FP8] - 0 1024.0 73.565420 72.779406 - 1 2048.0 97.957491 97.942745 - 2 4096.0 117.538940 116.205826 - 3 8192.0 126.157821 128.499504 - 4 16384.0 132.658984 135.099245 + 0 1024.0 73.653940 72.609228 + 1 2048.0 98.056100 97.946040 + 2 4096.0 116.551437 116.186967 + 3 8192.0 126.632440 128.444165 + 4 16384.0 132.052073 135.095851 @@ -725,7 +725,7 @@ Extra Credits: .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 52.713 seconds) + **Total running time of the script:** (0 minutes 54.109 seconds) .. _sphx_glr_download_getting-started_tutorials_06-fused-attention.py: diff --git a/main/_sources/getting-started/tutorials/07-extern-functions.rst.txt b/main/_sources/getting-started/tutorials/07-extern-functions.rst.txt index 41896534ac1a..a95c60203afe 100644 --- a/main/_sources/getting-started/tutorials/07-extern-functions.rst.txt +++ b/main/_sources/getting-started/tutorials/07-extern-functions.rst.txt @@ -169,7 +169,7 @@ We can also customize the libdevice library path by passing the path to the `lib .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 0.223 seconds) + **Total running time of the script:** (0 minutes 0.224 seconds) .. _sphx_glr_download_getting-started_tutorials_07-extern-functions.py: diff --git a/main/_sources/getting-started/tutorials/08-grouped-gemm.rst.txt b/main/_sources/getting-started/tutorials/08-grouped-gemm.rst.txt index 63728b02dfb0..231b81fd0a75 100644 --- a/main/_sources/getting-started/tutorials/08-grouped-gemm.rst.txt +++ b/main/_sources/getting-started/tutorials/08-grouped-gemm.rst.txt @@ -39,7 +39,7 @@ of gemms. The scheduling is static and we do it on device. group-gemm-performance: N cuBLAS Triton - 0 128.0 0.020480 0.014336 + 0 128.0 0.021504 0.013312 1 256.0 0.023552 0.018432 2 512.0 0.032768 0.026624 3 1024.0 0.071680 0.087040 @@ -336,7 +336,7 @@ of gemms. The scheduling is static and we do it on device. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 3.370 seconds) + **Total running time of the script:** (0 minutes 3.443 seconds) .. _sphx_glr_download_getting-started_tutorials_08-grouped-gemm.py: diff --git a/main/_sources/getting-started/tutorials/09-persistent-matmul.rst.txt b/main/_sources/getting-started/tutorials/09-persistent-matmul.rst.txt index ae3be609affe..560927ad1eba 100644 --- a/main/_sources/getting-started/tutorials/09-persistent-matmul.rst.txt +++ b/main/_sources/getting-started/tutorials/09-persistent-matmul.rst.txt @@ -519,7 +519,7 @@ Users can pass command-line arguments to specify matrix dimensions and iteration .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 1.700 seconds) + **Total running time of the script:** (0 minutes 1.746 seconds) .. _sphx_glr_download_getting-started_tutorials_09-persistent-matmul.py: diff --git a/main/_sources/getting-started/tutorials/sg_execution_times.rst.txt b/main/_sources/getting-started/tutorials/sg_execution_times.rst.txt index fa2635055e4f..7571b1a353c4 100644 --- a/main/_sources/getting-started/tutorials/sg_execution_times.rst.txt +++ b/main/_sources/getting-started/tutorials/sg_execution_times.rst.txt @@ -6,7 +6,7 @@ Computation times ================= -**04:20.234** total execution time for 9 files **from getting-started/tutorials**: +**04:32.940** total execution time for 9 files **from getting-started/tutorials**: .. container:: @@ -33,29 +33,29 @@ Computation times - Time - Mem (MB) * - :ref:`sphx_glr_getting-started_tutorials_03-matrix-multiplication.py` (``03-matrix-multiplication.py``) - - 02:17.000 + - 02:17.281 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_06-fused-attention.py` (``06-fused-attention.py``) - - 00:52.713 + - 00:54.109 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_05-layer-norm.py` (``05-layer-norm.py``) - - 00:28.824 + - 00:28.981 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_02-fused-softmax.py` (``02-fused-softmax.py``) - - 00:23.352 + - 00:25.227 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_01-vector-add.py` (``01-vector-add.py``) - - 00:12.374 + - 00:21.245 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_08-grouped-gemm.py` (``08-grouped-gemm.py``) - - 00:03.370 + - 00:03.443 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_09-persistent-matmul.py` (``09-persistent-matmul.py``) - - 00:01.700 + - 00:01.746 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_04-low-memory-dropout.py` (``04-low-memory-dropout.py``) - - 00:00.677 + - 00:00.684 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_07-extern-functions.py` (``07-extern-functions.py``) - - 00:00.223 + - 00:00.224 - 0.0 diff --git a/main/_sources/sg_execution_times.rst.txt b/main/_sources/sg_execution_times.rst.txt index 34e795524916..0299e27a6a74 100644 --- a/main/_sources/sg_execution_times.rst.txt +++ b/main/_sources/sg_execution_times.rst.txt @@ -6,7 +6,7 @@ Computation times ================= -**04:20.234** total execution time for 9 files **from all galleries**: +**04:32.940** total execution time for 9 files **from all galleries**: .. container:: @@ -33,29 +33,29 @@ Computation times - Time - Mem (MB) * - :ref:`sphx_glr_getting-started_tutorials_03-matrix-multiplication.py` (``../python/tutorials/03-matrix-multiplication.py``) - - 02:17.000 + - 02:17.281 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_06-fused-attention.py` (``../python/tutorials/06-fused-attention.py``) - - 00:52.713 + - 00:54.109 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_05-layer-norm.py` (``../python/tutorials/05-layer-norm.py``) - - 00:28.824 + - 00:28.981 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_02-fused-softmax.py` (``../python/tutorials/02-fused-softmax.py``) - - 00:23.352 + - 00:25.227 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_01-vector-add.py` (``../python/tutorials/01-vector-add.py``) - - 00:12.374 + - 00:21.245 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_08-grouped-gemm.py` (``../python/tutorials/08-grouped-gemm.py``) - - 00:03.370 + - 00:03.443 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_09-persistent-matmul.py` (``../python/tutorials/09-persistent-matmul.py``) - - 00:01.700 + - 00:01.746 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_04-low-memory-dropout.py` (``../python/tutorials/04-low-memory-dropout.py``) - - 00:00.677 + - 00:00.684 - 0.0 * - :ref:`sphx_glr_getting-started_tutorials_07-extern-functions.py` (``../python/tutorials/07-extern-functions.py``) - - 00:00.223 + - 00:00.224 - 0.0 diff --git a/main/_static/searchtools.js b/main/_static/searchtools.js index 92da3f8b22cf..b08d58c9b9b9 100644 --- a/main/_static/searchtools.js +++ b/main/_static/searchtools.js @@ -178,7 +178,7 @@ const Search = { htmlToText: (htmlString, anchor) => { const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); - for (const removalQuery of [".headerlinks", "script", "style"]) { + for (const removalQuery of [".headerlink", "script", "style"]) { htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() }); } if (anchor) { @@ -328,13 +328,14 @@ const Search = { for (const [title, foundTitles] of Object.entries(allTitles)) { if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { for (const [file, id] of foundTitles) { - let score = Math.round(100 * queryLower.length / title.length) + const score = Math.round(Scorer.title * queryLower.length / title.length); + const boost = titles[file] === title ? 1 : 0; // add a boost for document titles normalResults.push([ docNames[file], titles[file] !== title ? `${titles[file]} > ${title}` : title, id !== null ? "#" + id : "", null, - score, + score + boost, filenames[file], ]); } diff --git a/main/getting-started/tutorials/01-vector-add.html b/main/getting-started/tutorials/01-vector-add.html index 915d0711a4fb..e2b3ae0c1a19 100644 --- a/main/getting-started/tutorials/01-vector-add.html +++ b/main/getting-started/tutorials/01-vector-add.html @@ -239,8 +239,8 @@

Benchmark
vector-add-performance:
            size       Triton        Torch
 0        4096.0     8.000000     8.000000
-1        8192.0    15.999999    19.200000
-2       16384.0    38.400001    31.999999
+1        8192.0    19.200000    19.200000
+2       16384.0    31.999999    31.999999
 3       32768.0    63.999998    63.999998
 4       65536.0   127.999995   127.999995
 5      131072.0   219.428568   219.428568
@@ -248,15 +248,15 @@ 

BenchmarkTotal running time of the script: (0 minutes 12.374 seconds)

+

Total running time of the script: (0 minutes 21.245 seconds)

-

Total running time of the script: (0 minutes 52.713 seconds)

+

Total running time of the script: (0 minutes 54.109 seconds)