intel · yucai-intel · Nov 11, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 13, 2024
diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py
@@ -219,6 +219,7 @@
     "nn.functional.ctc_loss",
     "nn.functional.channel_shuffle",
     "nn.functional.multi_head_attention_forward",
+    "nn.TransformerEncoderLayer",
     "sigmoid",
     "logsigmoid",
     "sgn",

diff --git a/yaml/native/native_functions.yaml b/yaml/native/native_functions.yaml
@@ -5859,6 +5859,13 @@
     XPU: _dirichlet_grad_xpu
   autogen: _dirichlet_grad.out
 
+# Apparently, putting "forward" in the name will cause Python bindings to be skipped, so "fwd" it is.
+- func: _transformer_encoder_layer_fwd(Tensor src, int embed_dim, int num_heads, Tensor qkv_weight, Tensor qkv_bias, Tensor proj_weight, Tensor proj_bias, bool use_gelu, bool norm_first, float eps, Tensor norm_weight_1, Tensor norm_bias_1, Tensor norm_weight_2, Tensor norm_bias_2, Tensor ffn_weight_1, Tensor ffn_bias_1, Tensor ffn_weight_2, Tensor ffn_bias_2, Tensor? mask=None, int? mask_type=None) -> Tensor
+  variants: function
+  dispatch:
+    XPU: transformer_encoder_layer_forward
+  autogen: _transformer_encoder_layer_fwd.out
+
 # Fused implementation detail for transformers. Adds in-projection bias to QKV and divides Q by sqrt(D/num_heads).
 - func: _transform_bias_rescale_qkv(Tensor qkv, Tensor qkv_bias, int num_heads) -> (Tensor, Tensor, Tensor)
   dispatch: