Skip to content

Commit

Permalink
add oneccl env
Browse files Browse the repository at this point in the history
  • Loading branch information
Chao1Han committed Sep 12, 2024
1 parent 3530e43 commit 485ae8b
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 0 deletions.
12 changes: 12 additions & 0 deletions torch/csrc/distributed/c10d/ProcessGroupXCCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,18 @@ ProcessGroupXCCL::ProcessGroupXCCL(
: Backend(rank, size), store_(store) {
blockingWait_ = getCvarBool(TORCH_XCCL_BLOCKING_WAIT, false);
init();

{
int local_rank = getXCCLEnvVar("LOCAL_RANK");
int local_world_size = getXCCLEnvVar("LOCAL_WORLD_SIZE");
if (local_rank == -1 || local_world_size == -1) {
local_rank = rank;
local_world_size = size;
}
setXCCLEnvVar("CCL_PROCESS_LAUNCHER", "none");
setXCCLEnvVar("CCL_LOCAL_RANK", local_rank);
setXCCLEnvVar("CCL_LOCAL_SIZE", local_world_size);
}
}

ProcessGroupXCCL::~ProcessGroupXCCL() = default;
Expand Down
26 changes: 26 additions & 0 deletions torch/csrc/distributed/c10d/ProcessGroupXCCL.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,32 @@
#include <torch/csrc/distributed/c10d/Store.hpp>
namespace c10d {

namespace {
int getXCCLEnvVar(std::string envVarName) {
char* stringValue = std::getenv(envVarName.c_str());
if (stringValue != nullptr) {
try {
int val = std::stoi(stringValue);
return val;
} catch (std::exception& e) {
TORCH_CHECK(
false,
"Invalid value for environment variable: " + std::string(envVarName));
}
} else {
return -1;
}
}

void setXCCLEnvVar(std::string envVarName, int val) {
setenv(envVarName.c_str(), std::to_string(val).c_str(), val);
}

void setXCCLEnvVar(std::string envVarName, std::string val) {
setenv(envVarName.c_str(), val.c_str(), 1);
}
} // namespace

static std::vector<std::string> TORCH_XCCL_BLOCKING_WAIT = {
"TORCH_XCCL_BLOCKING_WAIT",
"XCCL_BLOCKING_WAIT"};
Expand Down

0 comments on commit 485ae8b

Please sign in to comment.