enable xpu tests

zhangxiaoli73 · zhangxiaoli73 · commit 9d89a1ff3bbb · 2025-01-23T15:27:59.000+08:00
diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py
@@ -5179,14 +5179,18 @@ def get_cycles_per_ms() -> float:
     """
 
     def measure() -> float:
-        start = torch.cuda.Event(enable_timing=True)
-        end = torch.cuda.Event(enable_timing=True)
-        start.record()
-        torch.cuda._sleep(1000000)
-        end.record()
-        end.synchronize()
-        cycles_per_ms = 1000000 / start.elapsed_time(end)
-        return cycles_per_ms
+        if torch.cuda.is_available():
+            start = torch.cuda.Event(enable_timing=True)
+            end = torch.cuda.Event(enable_timing=True)
+            start.record()
+            torch.cuda._sleep(1000000)
+            end.record()
+            end.synchronize()
+            cycles_per_ms = 1000000 / start.elapsed_time(end)
+            return cycles_per_ms
+        elif torch.xpu.is_available():
+            cycles_per_ms = 1000000 / 1000.0
+            return cycles_per_ms
 
     # Get 10 values and remove the 2 max and 2 min and return the avg.
     # This is to avoid system disturbance that skew the results, e.g.
diff --git a/torch/testing/_internal/distributed/_tensor/common_dtensor.py b/torch/testing/_internal/distributed/_tensor/common_dtensor.py
@@ -32,6 +32,7 @@
 from torch.testing._internal.common_utils import (
     TEST_HPU,
     TEST_CUDA,
+    TEST_XPU
 )
 from torch.testing._internal.common_distributed import (
     MultiProcessTestCase,
@@ -52,6 +53,10 @@
     DEVICE_TYPE = "hpu"
     PG_BACKEND = "hccl"
     DEVICE_COUNT = _get_device_module("hpu").device_count()
+elif TEST_XPU:
+    DEVICE_TYPE = "xpu"
+    PG_BACKEND = "xccl"
+    DEVICE_COUNT = _get_device_module("xpu").device_count()
 else:
     DEVICE_TYPE = "cpu"
     PG_BACKEND = "gloo"
@@ -325,6 +330,8 @@ def backend(self) -> str:
             backend = "nccl"
         elif TEST_HPU:
             backend = "hccl"
+        elif TEST_XPU:
+            backend = "xccl"
         else:
             backend = "gloo"
         return backend
@@ -396,10 +403,10 @@ def wrapper(
             self, *args: tuple[object], **kwargs: dict[str, Any]  # type: ignore[misc]
         ) -> None:
             # if enough GPU we can use GPU, otherwise we fallback to CPU
-            if not TEST_CUDA or torch.cuda.device_count() < self.world_size:
-                self.device_type = "cpu"
-            else:
-                self.device_type = DEVICE_TYPE
+            # if not TEST_CUDA or torch.cuda.device_count() < self.world_size:
+            #     self.device_type = "cpu"
+            # else:
+            self.device_type = DEVICE_TYPE  #zl_debug need to refine
 
             self.init_pg(eager_init)