Skip to content

Commit

Permalink
Remove unused functions and arguments from comms_utils.py (#152)
Browse files Browse the repository at this point in the history
Summary:
Remove unused functions and arguments from comms_utils.py

Pull Request resolved: #152

Test Plan: $ comm_replay --trace-type et --trace-path /home/sanshang/021_debug/000_code/param/trace/traces_megatronlm_gpt_43B_32ranks_pytnightly0703/execution_trace

Reviewed By: briancoutinho

Differential Revision: D60880333

Pulled By: shengfukevin

fbshipit-source-id: 26e0ee457bca609356d8384992ab34ffaab0db8b
  • Loading branch information
shengfukevin authored and facebook-github-bot committed Aug 8, 2024
1 parent c466b60 commit c189dd9
Showing 1 changed file with 0 additions and 131 deletions.
131 changes: 0 additions & 131 deletions et_replay/comm/comms_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,49 +73,6 @@ def gracefulExit(args: Any = 0) -> None:
sys.exit(args)


def parsesize(ipValue: str) -> int:
"""
nccl-tests compatible input-size parsing.
Args:
ipValue: Contains size of input.
Returns:
size: Returns the size of input.
"""
units = 0
size = 0.0

value = ""

# This function would be invoked in a loop - once for each data-type. For first iteration, ipValue is of type string but after that,
# the type of ipValue equals the returntype of prior iteration ie; int. Hence, type check is moved up as first condition.
if isinstance(ipValue, int) or ipValue.isnumeric():
units = 1
value = ipValue

elif ipValue.find("G") != -1:
units = 1024 * 1024 * 1024
unitIdx = ipValue.find("G")
value = ipValue[0:unitIdx]

elif ipValue.find("M") != -1:
units = 1024 * 1024
unitIdx = ipValue.find("M")
value = ipValue[0:unitIdx]

elif ipValue.find("K") != -1:
units = 1024
unitIdx = ipValue.find("K")
value = ipValue[0:unitIdx]

else:
logger.error(f"Could not parse input size {ipValue}")
gracefulExit()

size = int(value) * units
return int(size)


def parseRankList(ipStr: str) -> List[int]:
"""
Parses a string into a rank list.
Expand All @@ -142,56 +99,6 @@ def parseRankList(ipStr: str) -> List[int]:
return rankList


def getAlgBW(elapsedTimeNS: float, dataSize: int, numIters: int) -> Tuple[float, float]:
"""
Similar to how algorithmic bandwidth is computed in nccl-tests.
Args:
elapsedTimeNS: Total elapsed time for run in ns.
dataSize: Size in bytes of the data being ran.
numIters: Number of iterations for run.
Returns:
(avgIterNs, algBW): Returns the average amount of time in ns per iteration, and the algBW (GBps) calculated.
"""
avgIterNS = 0.0
if numIters != 0:
avgIterNS = elapsedTimeNS / numIters

algBW = 0.0
if avgIterNS != 0:
algBW = (dataSize) / (avgIterNS) # dataSize dividied by ns gives us GBps
return (avgIterNS, algBW)


def getSizes(
beginSize: int, endSize: int, stepFactor: int, stepBytes: int
) -> List[int]:
"""
Gets the sizes of each iteration.
Args:
beginSize: Size of first iteration.
endSize: Size of last iteration.
stepFactor: Factor that each iteration increases by.
Returns:
allSizes: List that contains size of each iteration up to endSize.
"""
curSize = beginSize
numIters = 0
maxIters = 100
allSizes = []
while curSize <= endSize:
allSizes.append(curSize)
curSize = curSize * stepFactor if stepBytes == 0 else curSize + stepBytes
numIters = numIters + 1
if numIters > 100:
logger.error(
f"For finding allSizes numIters: {numIters} is greater than maxIters: {maxIters}"
)
break
return allSizes


def fixBeginSize(commsParams: commsParamsHolder, world_size: int) -> None:
"""
Validate begin size to match other parameters.
Expand Down Expand Up @@ -799,41 +706,9 @@ def __init__(self, args: Namespace) -> None:
self.enable_local_report = args.enable_local_report
self.enable_profiler = args.enable_profiler
self.use_perf_logger = args.use_perf_logger
self.ibv_devices = args.ibv_devices
self.init_only = args.init_only


class commsDlrmParamsHolder(commsParamsHolderBase):
"""Class holding object for the input parameters of DLRM benchmark."""

def __init__(
self,
args,
mpi_env_params: Dict[str, int],
) -> None:
super().__init__(args)

# extra DLRM parameters
self.numDevices = mpi_env_params["world_size"]
self.numBatches = args.num_batches + args.warmup_batches
# NOTE: Should ensure that dataSize = int(N) * numDevices * batchSize
self.numBatchesPerEpoch = args.mini_batch_size
self.dataSize = (
mpi_env_params["world_size"] * self.numBatches * self.numBatchesPerEpoch
)
self.embedLayers = [] # scaledEmbedLayers
self.mini_batch_size = args.mini_batch_size
self.arch_sparse_feature_size = args.arch_sparse_feature_size
self.nw_stack = args.nw_stack
self.warmup_batches = args.warmup_batches
self.device = args.device
self.backend = args.backend

# additional parameters used in runBench()
self.perf_debug = args.perf_debug
self.print_comms = args.print_comms


class commsParamsHolder(commsParamsHolderBase):
"""Class holding object for the input parameters from collective benchmark."""

Expand Down Expand Up @@ -1627,12 +1502,6 @@ def readArgs(self, parser: ArgumentParser) -> None:
default=None,
help="add name of custom performer loggers to use them in additional to text output, user is responsible to implement and register the custom performance logger",
) # use custom performer logger
parser.add_argument(
"--ibv-devices",
type=str,
default="",
help="list of ib devices to use for distributed communication",
) # experimental feature
parser.add_argument(
"--init-only",
action="store_true",
Expand Down

0 comments on commit c189dd9

Please sign in to comment.