From 0f6f42dc0a8b5e9d533e6f45bb531d6e7d43e101 Mon Sep 17 00:00:00 2001 From: Yong Hoon Shin Date: Tue, 16 Jan 2024 08:20:40 -0800 Subject: [PATCH] Enable DDP test Summary: Enables DDP test to run in sandbox envs with network restrictions by using FileStore instead of TCPStore Differential Revision: D52802276 --- torchrec/distributed/composable/tests/test_ddp.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/torchrec/distributed/composable/tests/test_ddp.py b/torchrec/distributed/composable/tests/test_ddp.py index d4cf33d08..10285d059 100644 --- a/torchrec/distributed/composable/tests/test_ddp.py +++ b/torchrec/distributed/composable/tests/test_ddp.py @@ -42,7 +42,12 @@ def _run_init_parameters(cls, path: str) -> None: else: device: torch.device = torch.device("cpu") backend = "gloo" - dist.init_process_group(backend=backend) + dist.init_process_group( + backend=backend, + rank=rank, + world_size=world_size, + init_method=f"file://{os.path.join(path, 'dist_rdvz')}", + ) num_float_features = 32 tables = [ @@ -94,7 +99,12 @@ def _run(cls, path: str) -> None: else: device: torch.device = torch.device("cpu") backend = "gloo" - dist.init_process_group(backend=backend) + dist.init_process_group( + backend=backend, + rank=rank, + world_size=world_size, + init_method=f"file://{os.path.join(path, 'dist_rdvz')}", + ) num_float_features = 32 tables = [