Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unused functions and arguments from comms_utils.py #152

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 0 additions & 131 deletions et_replay/comm/comms_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,49 +73,6 @@ def gracefulExit(args: Any = 0) -> None:
sys.exit(args)


def parsesize(ipValue: str) -> int:
"""
nccl-tests compatible input-size parsing.

Args:
ipValue: Contains size of input.
Returns:
size: Returns the size of input.
"""
units = 0
size = 0.0

value = ""

# This function would be invoked in a loop - once for each data-type. For first iteration, ipValue is of type string but after that,
# the type of ipValue equals the returntype of prior iteration ie; int. Hence, type check is moved up as first condition.
if isinstance(ipValue, int) or ipValue.isnumeric():
units = 1
value = ipValue

elif ipValue.find("G") != -1:
units = 1024 * 1024 * 1024
unitIdx = ipValue.find("G")
value = ipValue[0:unitIdx]

elif ipValue.find("M") != -1:
units = 1024 * 1024
unitIdx = ipValue.find("M")
value = ipValue[0:unitIdx]

elif ipValue.find("K") != -1:
units = 1024
unitIdx = ipValue.find("K")
value = ipValue[0:unitIdx]

else:
logger.error(f"Could not parse input size {ipValue}")
gracefulExit()

size = int(value) * units
return int(size)


def parseRankList(ipStr: str) -> List[int]:
"""
Parses a string into a rank list.
Expand All @@ -142,56 +99,6 @@ def parseRankList(ipStr: str) -> List[int]:
return rankList


def getAlgBW(elapsedTimeNS: float, dataSize: int, numIters: int) -> Tuple[float, float]:
"""
Similar to how algorithmic bandwidth is computed in nccl-tests.

Args:
elapsedTimeNS: Total elapsed time for run in ns.
dataSize: Size in bytes of the data being ran.
numIters: Number of iterations for run.
Returns:
(avgIterNs, algBW): Returns the average amount of time in ns per iteration, and the algBW (GBps) calculated.
"""
avgIterNS = 0.0
if numIters != 0:
avgIterNS = elapsedTimeNS / numIters

algBW = 0.0
if avgIterNS != 0:
algBW = (dataSize) / (avgIterNS) # dataSize dividied by ns gives us GBps
return (avgIterNS, algBW)


def getSizes(
beginSize: int, endSize: int, stepFactor: int, stepBytes: int
) -> List[int]:
"""
Gets the sizes of each iteration.

Args:
beginSize: Size of first iteration.
endSize: Size of last iteration.
stepFactor: Factor that each iteration increases by.
Returns:
allSizes: List that contains size of each iteration up to endSize.
"""
curSize = beginSize
numIters = 0
maxIters = 100
allSizes = []
while curSize <= endSize:
allSizes.append(curSize)
curSize = curSize * stepFactor if stepBytes == 0 else curSize + stepBytes
numIters = numIters + 1
if numIters > 100:
logger.error(
f"For finding allSizes numIters: {numIters} is greater than maxIters: {maxIters}"
)
break
return allSizes


def fixBeginSize(commsParams: commsParamsHolder, world_size: int) -> None:
"""
Validate begin size to match other parameters.
Expand Down Expand Up @@ -799,41 +706,9 @@ def __init__(self, args: Namespace) -> None:
self.enable_local_report = args.enable_local_report
self.enable_profiler = args.enable_profiler
self.use_perf_logger = args.use_perf_logger
self.ibv_devices = args.ibv_devices
self.init_only = args.init_only


class commsDlrmParamsHolder(commsParamsHolderBase):
"""Class holding object for the input parameters of DLRM benchmark."""

def __init__(
self,
args,
mpi_env_params: Dict[str, int],
) -> None:
super().__init__(args)

# extra DLRM parameters
self.numDevices = mpi_env_params["world_size"]
self.numBatches = args.num_batches + args.warmup_batches
# NOTE: Should ensure that dataSize = int(N) * numDevices * batchSize
self.numBatchesPerEpoch = args.mini_batch_size
self.dataSize = (
mpi_env_params["world_size"] * self.numBatches * self.numBatchesPerEpoch
)
self.embedLayers = [] # scaledEmbedLayers
self.mini_batch_size = args.mini_batch_size
self.arch_sparse_feature_size = args.arch_sparse_feature_size
self.nw_stack = args.nw_stack
self.warmup_batches = args.warmup_batches
self.device = args.device
self.backend = args.backend

# additional parameters used in runBench()
self.perf_debug = args.perf_debug
self.print_comms = args.print_comms


class commsParamsHolder(commsParamsHolderBase):
"""Class holding object for the input parameters from collective benchmark."""

Expand Down Expand Up @@ -1627,12 +1502,6 @@ def readArgs(self, parser: ArgumentParser) -> None:
default=None,
help="add name of custom performer loggers to use them in additional to text output, user is responsible to implement and register the custom performance logger",
) # use custom performer logger
parser.add_argument(
"--ibv-devices",
type=str,
default="",
help="list of ib devices to use for distributed communication",
) # experimental feature
parser.add_argument(
"--init-only",
action="store_true",
Expand Down
Loading