From c189dd933a5057d48d594fa568307419e02b003b Mon Sep 17 00:00:00 2001 From: Sheng Fu Date: Wed, 7 Aug 2024 18:29:50 -0700 Subject: [PATCH] Remove unused functions and arguments from comms_utils.py (#152) Summary: Remove unused functions and arguments from comms_utils.py Pull Request resolved: https://github.com/facebookresearch/param/pull/152 Test Plan: $ comm_replay --trace-type et --trace-path /home/sanshang/021_debug/000_code/param/trace/traces_megatronlm_gpt_43B_32ranks_pytnightly0703/execution_trace Reviewed By: briancoutinho Differential Revision: D60880333 Pulled By: shengfukevin fbshipit-source-id: 26e0ee457bca609356d8384992ab34ffaab0db8b --- et_replay/comm/comms_utils.py | 131 ---------------------------------- 1 file changed, 131 deletions(-) diff --git a/et_replay/comm/comms_utils.py b/et_replay/comm/comms_utils.py index 7d6727da..08a0201d 100644 --- a/et_replay/comm/comms_utils.py +++ b/et_replay/comm/comms_utils.py @@ -73,49 +73,6 @@ def gracefulExit(args: Any = 0) -> None: sys.exit(args) -def parsesize(ipValue: str) -> int: - """ - nccl-tests compatible input-size parsing. - - Args: - ipValue: Contains size of input. - Returns: - size: Returns the size of input. - """ - units = 0 - size = 0.0 - - value = "" - - # This function would be invoked in a loop - once for each data-type. For first iteration, ipValue is of type string but after that, - # the type of ipValue equals the returntype of prior iteration ie; int. Hence, type check is moved up as first condition. - if isinstance(ipValue, int) or ipValue.isnumeric(): - units = 1 - value = ipValue - - elif ipValue.find("G") != -1: - units = 1024 * 1024 * 1024 - unitIdx = ipValue.find("G") - value = ipValue[0:unitIdx] - - elif ipValue.find("M") != -1: - units = 1024 * 1024 - unitIdx = ipValue.find("M") - value = ipValue[0:unitIdx] - - elif ipValue.find("K") != -1: - units = 1024 - unitIdx = ipValue.find("K") - value = ipValue[0:unitIdx] - - else: - logger.error(f"Could not parse input size {ipValue}") - gracefulExit() - - size = int(value) * units - return int(size) - - def parseRankList(ipStr: str) -> List[int]: """ Parses a string into a rank list. @@ -142,56 +99,6 @@ def parseRankList(ipStr: str) -> List[int]: return rankList -def getAlgBW(elapsedTimeNS: float, dataSize: int, numIters: int) -> Tuple[float, float]: - """ - Similar to how algorithmic bandwidth is computed in nccl-tests. - - Args: - elapsedTimeNS: Total elapsed time for run in ns. - dataSize: Size in bytes of the data being ran. - numIters: Number of iterations for run. - Returns: - (avgIterNs, algBW): Returns the average amount of time in ns per iteration, and the algBW (GBps) calculated. - """ - avgIterNS = 0.0 - if numIters != 0: - avgIterNS = elapsedTimeNS / numIters - - algBW = 0.0 - if avgIterNS != 0: - algBW = (dataSize) / (avgIterNS) # dataSize dividied by ns gives us GBps - return (avgIterNS, algBW) - - -def getSizes( - beginSize: int, endSize: int, stepFactor: int, stepBytes: int -) -> List[int]: - """ - Gets the sizes of each iteration. - - Args: - beginSize: Size of first iteration. - endSize: Size of last iteration. - stepFactor: Factor that each iteration increases by. - Returns: - allSizes: List that contains size of each iteration up to endSize. - """ - curSize = beginSize - numIters = 0 - maxIters = 100 - allSizes = [] - while curSize <= endSize: - allSizes.append(curSize) - curSize = curSize * stepFactor if stepBytes == 0 else curSize + stepBytes - numIters = numIters + 1 - if numIters > 100: - logger.error( - f"For finding allSizes numIters: {numIters} is greater than maxIters: {maxIters}" - ) - break - return allSizes - - def fixBeginSize(commsParams: commsParamsHolder, world_size: int) -> None: """ Validate begin size to match other parameters. @@ -799,41 +706,9 @@ def __init__(self, args: Namespace) -> None: self.enable_local_report = args.enable_local_report self.enable_profiler = args.enable_profiler self.use_perf_logger = args.use_perf_logger - self.ibv_devices = args.ibv_devices self.init_only = args.init_only -class commsDlrmParamsHolder(commsParamsHolderBase): - """Class holding object for the input parameters of DLRM benchmark.""" - - def __init__( - self, - args, - mpi_env_params: Dict[str, int], - ) -> None: - super().__init__(args) - - # extra DLRM parameters - self.numDevices = mpi_env_params["world_size"] - self.numBatches = args.num_batches + args.warmup_batches - # NOTE: Should ensure that dataSize = int(N) * numDevices * batchSize - self.numBatchesPerEpoch = args.mini_batch_size - self.dataSize = ( - mpi_env_params["world_size"] * self.numBatches * self.numBatchesPerEpoch - ) - self.embedLayers = [] # scaledEmbedLayers - self.mini_batch_size = args.mini_batch_size - self.arch_sparse_feature_size = args.arch_sparse_feature_size - self.nw_stack = args.nw_stack - self.warmup_batches = args.warmup_batches - self.device = args.device - self.backend = args.backend - - # additional parameters used in runBench() - self.perf_debug = args.perf_debug - self.print_comms = args.print_comms - - class commsParamsHolder(commsParamsHolderBase): """Class holding object for the input parameters from collective benchmark.""" @@ -1627,12 +1502,6 @@ def readArgs(self, parser: ArgumentParser) -> None: default=None, help="add name of custom performer loggers to use them in additional to text output, user is responsible to implement and register the custom performance logger", ) # use custom performer logger - parser.add_argument( - "--ibv-devices", - type=str, - default="", - help="list of ib devices to use for distributed communication", - ) # experimental feature parser.add_argument( "--init-only", action="store_true",