try and catch for non continuous tensor

ROCm · Oct 4, 2024 · cfe23d8 · cfe23d8
1 parent 2b25a0c
commit cfe23d8
Showing 1 changed file with 5 additions and 2 deletions.
diff --git a/vllm/model_executor/layers/tuned_gemm.py b/vllm/model_executor/layers/tuned_gemm.py
@@ -84,8 +84,11 @@ def mm(self, inp, weights, bias=None):
         # uses this for linear units. However, sampler
         # will use torch.matmul with 2 dimensions only
         if inp.dim() == 3:
-            inp_view = inp.reshape(-1, inp.size(-1))
-            batched = True
+            try:
+                inp_view = inp.view(-1, inp.size(-1))
+                batched = True
+            except RuntimeError:
+                return F.linear(inp, weights, bias)
         else:
             inp_view = inp
             batched = False