Skip to content

Commit

Permalink
enable xpu tests
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangxiaoli73 committed Jan 23, 2025
1 parent 3d97844 commit 9d89a1f
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 12 deletions.
20 changes: 12 additions & 8 deletions torch/testing/_internal/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5179,14 +5179,18 @@ def get_cycles_per_ms() -> float:
"""

def measure() -> float:
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record()
torch.cuda._sleep(1000000)
end.record()
end.synchronize()
cycles_per_ms = 1000000 / start.elapsed_time(end)
return cycles_per_ms
if torch.cuda.is_available():
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record()
torch.cuda._sleep(1000000)
end.record()
end.synchronize()
cycles_per_ms = 1000000 / start.elapsed_time(end)
return cycles_per_ms
elif torch.xpu.is_available():
cycles_per_ms = 1000000 / 1000.0
return cycles_per_ms

# Get 10 values and remove the 2 max and 2 min and return the avg.
# This is to avoid system disturbance that skew the results, e.g.
Expand Down
15 changes: 11 additions & 4 deletions torch/testing/_internal/distributed/_tensor/common_dtensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from torch.testing._internal.common_utils import (
TEST_HPU,
TEST_CUDA,
TEST_XPU
)
from torch.testing._internal.common_distributed import (
MultiProcessTestCase,
Expand All @@ -52,6 +53,10 @@
DEVICE_TYPE = "hpu"
PG_BACKEND = "hccl"
DEVICE_COUNT = _get_device_module("hpu").device_count()
elif TEST_XPU:
DEVICE_TYPE = "xpu"
PG_BACKEND = "xccl"
DEVICE_COUNT = _get_device_module("xpu").device_count()
else:
DEVICE_TYPE = "cpu"
PG_BACKEND = "gloo"
Expand Down Expand Up @@ -325,6 +330,8 @@ def backend(self) -> str:
backend = "nccl"
elif TEST_HPU:
backend = "hccl"
elif TEST_XPU:
backend = "xccl"
else:
backend = "gloo"
return backend
Expand Down Expand Up @@ -396,10 +403,10 @@ def wrapper(
self, *args: tuple[object], **kwargs: dict[str, Any] # type: ignore[misc]
) -> None:
# if enough GPU we can use GPU, otherwise we fallback to CPU
if not TEST_CUDA or torch.cuda.device_count() < self.world_size:
self.device_type = "cpu"
else:
self.device_type = DEVICE_TYPE
# if not TEST_CUDA or torch.cuda.device_count() < self.world_size:
# self.device_type = "cpu"
# else:
self.device_type = DEVICE_TYPE #zl_debug need to refine

self.init_pg(eager_init)

Expand Down

0 comments on commit 9d89a1f

Please sign in to comment.