-
Notifications
You must be signed in to change notification settings - Fork 5
/
cost_homo_cluster.py
70 lines (55 loc) · 2.98 KB
/
cost_homo_cluster.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Copyright 2024 Samsung Electronics Co., Ltd. All Rights Reserved
import argparse
from copy import copy
from typing import List, Tuple
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from arguments import parse_args
from data_loader import ProfileDataLoader
from gpu_cluster import GPUCluster
from model.cost_estimator import HomoCostEstimator
from model.cost_validation import EstimateCostValidator
from model.activation_parameter import GPTActivationAndParam
from search_space.plan import UniformPlanGenerator
from utils import ModelConfig
from search_space.plan import UniformPlan
def cost_homo_cluster(args: argparse.Namespace, gpu_cluster: GPUCluster, cost_estimator: HomoCostEstimator) -> List[Tuple[UniformPlan, float]]:
estimate_costs = []
for plan in UniformPlanGenerator(num_devices=gpu_cluster.get_total_num_devices(),
max_tp=args.max_profiled_tp_degree, max_gbs=args.gbs):
if plan.gbs != args.gbs:
continue
try:
time_cost, stage_memory_cost, OOM = cost_estimator.get_cost(plan, device_types[0])
estimate_costs.append((copy(plan), time_cost))
print(f'\n{plan}')
print(f"time: {time_cost}, memory(stage): {stage_memory_cost}")
except KeyError as e:
print(f'KeyError: {e}')
return estimate_costs
if __name__ == "__main__":
args = parse_args()
gpu_cluster = GPUCluster(hostfile_path=args.hostfile_path, clusterfile_path=args.clusterfile_path)
assert 10 <= gpu_cluster.get_inter_bandwidth(0) <= 500, \
"intra-bandwidth for NVLink should exist within a range 10GB/s to 500GB/s"
assert 1 <= gpu_cluster.get_intra_bandwidth(0) <= 50, \
"inter-bandwidth should exist within a range 1GB/s to 50GB/s"
data_loader = ProfileDataLoader(profile_dir=args.profile_data_path, runtime_cost_dir=args.evaluation_data_path)
profile_data, device_types = data_loader.load_profile_data_all()
if len(profile_data.keys()) > 0:
print('\nProfiled data has been loaded.')
assert len(profile_data.keys()) > 0, 'There is no profiled data at the specified path.'
model_config = ModelConfig(model_name=args.model_name,
num_layers=args.num_layers,
sequence_length=args.sequence_length,
vocab_size=args.vocab_size,
hidden_size=args.hidden_size,
attention_head_size=args.attention_head_size)
model_volume = GPTActivationAndParam(model_config, profile_data['model']['parameters'])
cost_estimator = HomoCostEstimator(profile_data, model_config, model_volume, gpu_cluster)
estimate_costs = cost_homo_cluster(args, gpu_cluster, cost_estimator)
sorted_result = sorted(estimate_costs, key=lambda kv: kv[1])
print('rank, cost, plan')
for idx, result in enumerate(sorted_result):
print(f'{idx + 1}, {result[1]}, {result[0]}')