disable block reduce for int8

microsoft · Aug 13, 2024 · b81a3a8 · b81a3a8
1 parent 1816526
commit b81a3a8
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 2 deletions.
diff --git a/3rdparty/tvm b/3rdparty/tvm
diff --git a/bitblas/gpu/matmul_analysis.py b/bitblas/gpu/matmul_analysis.py
@@ -623,7 +623,8 @@ def check_last_trait(region: List[Range]):
         # Currently, we only support block reduction depth 2 for small M
         # When the func is a dequantize like ops, we should consider the M
         require_block_reduce = False
-        if hasattr(func.attrs, "dequantize_info"):
+        # And we only support float16 for now
+        if hasattr(func.attrs, "dequantize_info") and in_dtype == "float16":
             for arg in func.params:
                 inp_shape = func.buffer_map[arg].shape
                 M = inp_shape[0]