From 67ad7616e07eb9e52a90a4a785c57f9e05929db1 Mon Sep 17 00:00:00 2001 From: LeiWang1999 Date: Mon, 19 Aug 2024 12:52:34 +0000 Subject: [PATCH] fix scale only lop3 tensorize instructions. --- bitblas/gpu/intrin/lop3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitblas/gpu/intrin/lop3.py b/bitblas/gpu/intrin/lop3.py index a6a7011a0..aee3eac8b 100644 --- a/bitblas/gpu/intrin/lop3.py +++ b/bitblas/gpu/intrin/lop3.py @@ -1127,7 +1127,7 @@ def fast_decode_desc(compressed: T.handle, decompressed: T.handle, scale: T.hand 1, ], dtype=target_dtype, - scope="global", + scope="local", ) with T.block("root"): T.reads(Compressed[0:n_storage_elems], Scale[0:1]) @@ -1173,7 +1173,7 @@ def fast_decode_impl(compressed: T.handle, decompressed: T.handle, scale: T.hand dtype=target_dtype, offset_factor=1, strides=[s0], - scope="global", + scope="local", ) with T.block("root"): T.reads(Compressed[0:n_storage_elems], Scale[0:1])