fix: extend attention mask type handling in MHARunnerFixedParams

qixiang-99 · symphonylyh · commit 882198d6baff · 2025-04-04T10:42:49.000-07:00
Added support for additional attention mask types (BIDIRECTIONAL, BIDIRECTIONALGLM, BLOCKSPARSE) in the MHARunnerFixedParams structure to fix the mapping issue between ContextAttentionMaskType and AttentionMaskType

Signed-off-by: Qixiang Lin &lt;qixiangl@nvidia.com&gt;
diff --git a/cpp/tensorrt_llm/kernels/contextFusedMultiHeadAttention/fused_multihead_attention_common.h b/cpp/tensorrt_llm/kernels/contextFusedMultiHeadAttention/fused_multihead_attention_common.h
@@ -202,6 +202,15 @@ struct MHARunnerFixedParams
         case 2: // tensorrt_llm::kernels::AttentionMaskType::SLIDING_WINDOW_CAUSAL
             attentionMaskType = ContextAttentionMaskType::SLIDING_WINDOW_CAUSAL;
             break;
+        case 3: // tensorrt_llm::kernels::AttentionMaskType::BIDIRECTIONAL
+            attentionMaskType = ContextAttentionMaskType::CAUSAL;
+            break;
+        case 4: // tensorrt_llm::kernels::AttentionMaskType::BIDIRECTIONALGLM
+            attentionMaskType = ContextAttentionMaskType::CAUSAL;
+            break;
+        case 5: // tensorrt_llm::kernels::AttentionMaskType::BLOCKSPARSE
+            attentionMaskType = ContextAttentionMaskType::CAUSAL;
+            break;
         case 6: // tensorrt_llm::kernels::AttentionMaskType::CUSTOM_MASK
             attentionMaskType = ContextAttentionMaskType::CUSTOM_MASK;
             break;