Skip to content

Commit

Permalink
fix(rayTrain): scaling config args #68
Browse files Browse the repository at this point in the history
  • Loading branch information
jyaacoub committed Dec 15, 2023
1 parent 2f85c7f commit 1f1cd72
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions rayTrain_Tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@ def train_func(config):
model = Loader.init_model(model=config["model"], pro_feature=config["feature_opt"],
pro_edge=config["edge_opt"],
# additional kwargs send to model class to handle
dropout=config["dropout"], dropout_prot=config["dropout_prot"]
pro_emb_dim=config["pro_emb_dim"], extra_profc_layer=config["extra_profc_layer"])
dropout=config["dropout"],
dropout_prot=config["dropout_prot"], pro_emb_dim=config["pro_emb_dim"], extra_profc_layer=config["extra_profc_layer"]
)

# prepare model with rayTrain (moves it to correct device and wraps it in DDP)
model = ray.train.torch.prepare_model(model)

# ============ Load dataset ==============
print("Loading Dataset")
loaders = Loader.load_DataLoaders(data=config['dataset'], pro_feature=config['feature_opt'],
edge_opt=config['edge_opt'],
path=cfg.DATA_ROOT,
Expand Down Expand Up @@ -73,7 +75,7 @@ def train_func(config):
search_space = {
## constants:
"epochs": 10,
"model": "SPD",
"model": "EDI",
"dataset": "davis",
"feature_opt": "nomsa",
"edge_opt": "binary",
Expand All @@ -93,10 +95,10 @@ def train_func(config):

# each worker is a node from the ray cluster.
# WARNING: SBATCH GPU directive should match num_workers*GPU_per_worker
scaling_config = ScalingConfig(num_workers=2, # number of ray actors to launch to distribute compute across
scaling_config = ScalingConfig(num_workers=4, # number of ray actors to launch to distribute compute across
use_gpu=True, # default is for each worker to have 1 GPU (overrided by resources per worker)
# resources_per_worker={"CPU": 6, "GPU": 2},
# trainer_resources={"CPU": 6, "GPU": 2},
resources_per_worker={"CPU": 2, "GPU": 1},
# trainer_resources={"CPU": 2, "GPU": 1},
# placement_strategy="PACK", # place workers on same node
)

Expand Down

0 comments on commit 1f1cd72

Please sign in to comment.