Skip to content

Commit

Permalink
Back out "Separate DDP and DMP pgs to improve T16 training QPS"
Browse files Browse the repository at this point in the history
Summary:
Original commit changeset: 27b5dce8fb21

Original Phabricator Diff: D58691294

Differential Revision: D58790831
  • Loading branch information
henrylhtsang authored and facebook-github-bot committed Jun 20, 2024
1 parent e84a0f0 commit 45e2c90
Showing 1 changed file with 1 addition and 7 deletions.
8 changes: 1 addition & 7 deletions torchrec/distributed/model_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,6 @@ def __init__(
init_data_parallel: bool = True,
init_parameters: bool = True,
data_parallel_wrapper: Optional[DataParallelWrapper] = None,
separate_pg_for_ddp: bool = False,
) -> None:
super().__init__()
torch._C._log_api_usage_once(f"torchrec.distributed.{self.__class__.__name__}")
Expand All @@ -233,11 +232,6 @@ def __init__(
assert pg is not None, "Process group is not initialized"
env = ShardingEnv.from_process_group(pg)
self._env: ShardingEnv = env
if separate_pg_for_ddp:
ddp_pg = dist.new_group()
self.ddp_env: ShardingEnv = ShardingEnv.from_process_group(ddp_pg)
else:
self.ddp_env: ShardingEnv = self._env

if device is None:
device = torch.device("cpu")
Expand Down Expand Up @@ -309,7 +303,7 @@ def init_data_parallel(self) -> None:
# Allocate any 'meta' tensors
if self.init_parameters:
self._init_parameters(self._dmp_wrapped_module)
self._data_parallel_wrapper.wrap(self, self.ddp_env, self.device)
self._data_parallel_wrapper.wrap(self, self._env, self.device)
self._ddp_wrapped = True

def copy(
Expand Down

0 comments on commit 45e2c90

Please sign in to comment.