update

Chao1Han · Jan 17, 2025 · 71c0347 · 71c0347
1 parent e087a80
commit 71c0347
Show file tree

Hide file tree

Showing 18 changed files with 26 additions and 22 deletions.
diff --git a/test/distributed/fsdp/test_distributed_checkpoint.py b/test/distributed/fsdp/test_distributed_checkpoint.py
@@ -86,6 +86,6 @@ def test_distributed_checkpoint(self, state_dict_type) -> None:
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestDistributedCheckpoint, globals(), only_for=devices)
+instantiate_device_type_tests(TestDistributedCheckpoint, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_apply.py b/test/distributed/fsdp/test_fsdp_apply.py
@@ -110,6 +110,6 @@ def test_apply_in_summon_raises_error(self):
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestApply, globals(), only_for=devices)
+instantiate_device_type_tests(TestApply, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_checkpoint.py b/test/distributed/fsdp/test_fsdp_checkpoint.py
@@ -335,6 +335,6 @@ def test_checkpoint_submodule(self, device, use_reentrant: bool):
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestFSDPCheckpointSubmodule, globals(), only_for=devices)
+instantiate_device_type_tests(TestFSDPCheckpointSubmodule, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_clip_grad_norm.py b/test/distributed/fsdp/test_fsdp_clip_grad_norm.py
@@ -339,6 +339,6 @@ def _test_no_gradients(self, device, use_orig_params: bool):
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestClipGradNorm, globals(), only_for=devices)
+instantiate_device_type_tests(TestClipGradNorm, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_core.py b/test/distributed/fsdp/test_fsdp_core.py
@@ -513,10 +513,10 @@ def _patch_use_unsharded_views(self, new_use_unsharded_views: Callable):
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestHooks, globals(), only_for=devices)
-instantiate_device_type_tests(TestParityWithDDP, globals(), only_for=devices)
-instantiate_device_type_tests(TestNoGrad, globals(), only_for=devices)
-instantiate_device_type_tests(TestParamInit, globals(), only_for=devices)
-instantiate_device_type_tests(TestAutograd, globals(), only_for=devices)
+instantiate_device_type_tests(TestHooks, globals(), only_for=devices, allow_xpu=True)
+instantiate_device_type_tests(TestParityWithDDP, globals(), only_for=devices, allow_xpu=True)
+instantiate_device_type_tests(TestNoGrad, globals(), only_for=devices, allow_xpu=True)
+instantiate_device_type_tests(TestParamInit, globals(), only_for=devices, allow_xpu=True)
+instantiate_device_type_tests(TestAutograd, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_dtensor_state_dict.py b/test/distributed/fsdp/test_fsdp_dtensor_state_dict.py
@@ -287,7 +287,7 @@ def test_raises_warning_or_errors(self):
 
 devices = ("cuda", "hpu", "xpu")
 instantiate_device_type_tests(
-    TestFSDPWithDeviceMeshAndDTensor, globals(), only_for=devices
+    TestFSDPWithDeviceMeshAndDTensor, globals(), only_for=devices, allow_xpu=True
 )
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_exec_order.py b/test/distributed/fsdp/test_fsdp_exec_order.py
@@ -212,6 +212,6 @@ def test_train_eval(self, device, sharding_strategy: ShardingStrategy):
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestFSDPExecOrder, globals(), only_for=devices)
+instantiate_device_type_tests(TestFSDPExecOrder, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_fine_tune.py b/test/distributed/fsdp/test_fsdp_fine_tune.py
@@ -405,6 +405,6 @@ def _test_parity_with_non_frozen_fsdp(
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestFSDPFineTune, globals(), only_for=devices)
+instantiate_device_type_tests(TestFSDPFineTune, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_fx.py b/test/distributed/fsdp/test_fsdp_fx.py
@@ -114,6 +114,6 @@ def test_symbolic_tracing_outputs(self):
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestSymbolicTracing, globals(), only_for=devices)
+instantiate_device_type_tests(TestSymbolicTracing, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_input.py b/test/distributed/fsdp/test_fsdp_input.py
@@ -71,6 +71,6 @@ def forward(self, input):
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestInput, globals(), only_for=devices)
+instantiate_device_type_tests(TestInput, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_multiple_wrapping.py b/test/distributed/fsdp/test_fsdp_multiple_wrapping.py
@@ -62,6 +62,6 @@ def test_multiple_wrapping(self, device):
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestMultipleWrapping, globals(), only_for=devices)
+instantiate_device_type_tests(TestMultipleWrapping, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_overlap.py b/test/distributed/fsdp/test_fsdp_overlap.py
@@ -258,7 +258,7 @@ def world_size(self):
 
 devices = ("cuda", "hpu", "xpu")
 instantiate_device_type_tests(
-    TestForwardOverlapWorldSizeOne, globals(), only_for=devices
+    TestForwardOverlapWorldSizeOne, globals(), only_for=devices, allow_xpu=True
 )
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_pure_fp16.py b/test/distributed/fsdp/test_fsdp_pure_fp16.py
@@ -152,6 +152,6 @@ def _test_fp16_dtypes(
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestPureFP16, globals(), only_for=devices)
+instantiate_device_type_tests(TestPureFP16, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_traversal.py b/test/distributed/fsdp/test_fsdp_traversal.py
@@ -57,6 +57,6 @@ def test_fsdp_modules(self):
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestTraversal, globals(), only_for=devices)
+instantiate_device_type_tests(TestTraversal, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_fsdp_uneven.py b/test/distributed/fsdp/test_fsdp_uneven.py
@@ -69,6 +69,6 @@ def test_one_iteration(self, device):
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestUnevenParamShard, globals(), only_for=devices)
+instantiate_device_type_tests(TestUnevenParamShard, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_hsdp_dtensor_state_dict.py b/test/distributed/fsdp/test_hsdp_dtensor_state_dict.py
@@ -326,7 +326,7 @@ def forward(self, x):
 
 devices = ("cuda", "hpu", "xpu")
 instantiate_device_type_tests(
-    TestHSDPWithDeviceMeshAndDTensor, globals(), only_for=devices
+    TestHSDPWithDeviceMeshAndDTensor, globals(), only_for=devices, allow_xpu=True
 )
 if __name__ == "__main__":
     run_tests()
diff --git a/test/distributed/fsdp/test_utils.py b/test/distributed/fsdp/test_utils.py
@@ -131,6 +131,6 @@ def fill_fn(x):
 
 
 devices = ("cuda", "hpu", "xpu")
-instantiate_device_type_tests(TestUtils, globals(), only_for=devices)
+instantiate_device_type_tests(TestUtils, globals(), only_for=devices, allow_xpu=True)
 if __name__ == "__main__":
     run_tests()
diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py
@@ -44,6 +44,7 @@
     TestCase,
     run_tests,
     TEST_HPU,
+    TEST_XPU,
 )
 from torch.testing._internal.distributed.multi_threaded_pg import (
     _install_threaded_pg,
@@ -199,6 +200,8 @@ def wrapper(*args, **kwargs):
                 return func(*args, **kwargs)
             if TEST_HPU and torch.hpu.device_count() >= x:
                 return func(*args, **kwargs)
+            if TEST_XPU and torch.xpu.device_count() >= x:
+                return func(*args, **kwargs)
             sys.exit(TEST_SKIPS[f"multi-gpu-{x}"].exit_code)
 
         return wrapper
@@ -510,7 +513,8 @@ def init_multigpu_helper(world_size: int, backend: str):
     nGPUs = torch.cuda.device_count()
     if TEST_HPU:
         nGPUs = torch.hpu.device_count()
-
+    if TEST_XPU:
+        nGPUs = torch.xpu.device_count()
     visible_devices = range(nGPUs)
 
     # If rank is less than or equal to number of available GPU's