Add huggingface from_pretrained / save_pretrained tests

pstjohn · pstjohn · commit cee769286091 · 2025-05-14T14:22:28.000-07:00
Adds integration tests to ensure models containing TransformerLayer
objects can be saved and loaded using the from_pretrained and
save_pretrained methods.

Signed-off-by: Peter St. John &lt;pstjohn@nvidia.com&gt;
diff --git a/setup.py b/setup.py
@@ -123,7 +123,7 @@ def setup_requirements() -> Tuple[List[str], List[str], List[str]]:
             )
             # Blackwell is not supported as of Triton 3.2.0, need custom internal build
             # install_reqs.append("triton")
-            test_reqs.extend(["numpy", "torchvision", "prettytable", "PyYAML"])
+            test_reqs.extend(["numpy", "torchvision", "prettytable", "PyYAML", "transformers"])
         if "jax" in frameworks:
             setup_reqs.extend(["jax[cuda12]", "flax>=0.7.1"])
             install_reqs.extend(["jax", "flax>=0.7.1"])
diff --git a/tests/pytorch/test_hf_integration.py b/tests/pytorch/test_hf_integration.py
@@ -0,0 +1,44 @@
+import pytest
+import torch
+from transformers.configuration_utils import PretrainedConfig
+from transformers.modeling_utils import PreTrainedModel
+
+from transformer_engine.pytorch.transformer import TransformerLayer
+from transformer_engine.pytorch.utils import is_bf16_compatible
+
+param_types = [torch.float32, torch.float16]
+if is_bf16_compatible():  # bf16 requires sm_80 or higher
+    param_types.append(torch.bfloat16)
+
+
+all_activations = ["gelu", "relu"]
+all_normalizations = ["LayerNorm", "RMSNorm"]
+
+
+@pytest.mark.parametrize("dtype", param_types)
+@pytest.mark.parametrize("activation", all_activations)
+@pytest.mark.parametrize("normalization", all_normalizations)
+def test_save_and_load_hf_model(tmp_path, dtype, activation, normalization):
+    class SimpleTEModel(PreTrainedModel):
+        config_class = PretrainedConfig
+
+        def __init__(self, config: PretrainedConfig):
+            super().__init__(config)
+            self.my_layer = TransformerLayer(
+                hidden_size=320,
+                num_attention_heads=16,
+                ffn_hidden_size=1024,
+                layer_number=None,
+                params_dtype=dtype,
+                activation=activation,
+                normalization=normalization,
+            )
+
+        def forward(self, hidden_states, attention_mask):
+            return self.my_layer(hidden_states, attention_mask)
+
+    model = SimpleTEModel(PretrainedConfig())
+
+    model.save_pretrained(tmp_path / "simple_te_model")
+    del model
+    SimpleTEModel.from_pretrained(tmp_path / "simple_te_model")

Original file line number	Diff line number	Diff line change
`@@ -123,7 +123,7 @@ def setup_requirements() -> Tuple[List[str], List[str], List[str]]:`
`123`	`123`	`)`
`124`	`124`	`# Blackwell is not supported as of Triton 3.2.0, need custom internal build`
`125`	`125`	`# install_reqs.append("triton")`
`126`		`- test_reqs.extend(["numpy", "torchvision", "prettytable", "PyYAML"])`
	`126`	`+ test_reqs.extend(["numpy", "torchvision", "prettytable", "PyYAML", "transformers"])`
`127`	`127`	`if "jax" in frameworks:`
`128`	`128`	`setup_reqs.extend(["jax[cuda12]", "flax>=0.7.1"])`
`129`	`129`	`install_reqs.extend(["jax", "flax>=0.7.1"])`