Skip to content

Commit

Permalink
Merge pull request #2244 from DARMA-tasking/2243-replay-using-phase-m…
Browse files Browse the repository at this point in the history
…odulus

2243 allow replay to repeat phases using modulus
  • Loading branch information
lifflander committed Jan 25, 2024
2 parents 26c8c78 + eee59c7 commit b736aaa
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 15 deletions.
23 changes: 16 additions & 7 deletions src/vt/vrt/collection/balance/workload_replay.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ namespace vt { namespace vrt { namespace collection {
namespace balance { namespace replay {

void replayWorkloads(
PhaseType initial_phase, PhaseType phases_to_run
PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod
) {
// read in object loads from json files
auto const filename = theConfig()->getLBDataFileIn();
Expand All @@ -67,11 +67,11 @@ void replayWorkloads(
&LBManager::statsHandler
>(theLBManager()->getProxy());

replayWorkloads(initial_phase, phases_to_run, workloads, stats_cb);
replayWorkloads(initial_phase, phases_to_run, phase_mod, workloads, stats_cb);
}

void replayWorkloads(
PhaseType initial_phase, PhaseType phases_to_run,
PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod,
std::shared_ptr<LBDataHolder> workloads,
Callback<std::vector<balance::LoadData>> stats_cb
) {
Expand Down Expand Up @@ -102,6 +102,8 @@ void replayWorkloads(
// simulate the given number of phases
auto stop_phase = initial_phase + phases_to_run;
for (PhaseType phase = initial_phase; phase < stop_phase; phase++) {
PhaseType input_phase = phase_mod == 0 ? phase : phase % phase_mod;

// reapply the base load model if in case we overwrote it on a previous iter
theLBManager()->setLoadModel(base_load_model);

Expand All @@ -113,7 +115,7 @@ void replayWorkloads(

// point the load model at the workloads for the relevant phase
runInEpochCollective("WorkloadReplayDriver -> updateLoads", [=] {
base_load_model->updateLoads(phase);
base_load_model->updateLoads(input_phase);
});

if (theConfig()->vt_debug_replay) {
Expand All @@ -123,7 +125,7 @@ void replayWorkloads(
++count;
vt_debug_print(
normal, replay,
"workload for element {} is here on phase {}\n", workload_id, phase
"workload for element {} is here on input_phase {}\n", workload_id, input_phase
);
}
}
Expand Down Expand Up @@ -161,7 +163,7 @@ void replayWorkloads(
}

if (this_rank == 0) {
vt_print(replay, "Simulating phase {}...\n", phase);
vt_print(replay, "Simulating phase {} using inputs from phase {}...\n", phase, input_phase);
}

if (theConfig()->vt_debug_replay) {
Expand Down Expand Up @@ -227,12 +229,19 @@ void replayWorkloads(
auto cb = theCB()->makeFunc<ReassignmentMsg>(
vt::pipe::LifetimeEnum::Once, postLBWork
);
theLBManager()->selectStartLB(phase, cb);
auto lb = theLBManager()->decideLBToRun(phase, true);
auto const start_time = timing::getCurrentTime();
theLBManager()->startLB(input_phase, lb, cb);
auto const total_time = timing::getCurrentTime() - start_time;
if (lb != LBType::NoLB) {
vt_print(replay, "Time in load balancer: {}\n", total_time);
}
});
runInEpochCollective("WorkloadReplayDriver -> destroyLB", [&] {
theLBManager()->destroyLB();
});
auto last_phase_info = theLBManager()->getPhaseInfo();
last_phase_info->phase = phase;
thePhase()->printSummary(last_phase_info);
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/vt/vrt/collection/balance/workload_replay.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ namespace balance { namespace replay {
* object exists during any given phase.
*/
void replayWorkloads(
PhaseType initial_phase, PhaseType phases_to_run
PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod
);

/**
Expand All @@ -92,7 +92,7 @@ void replayWorkloads(
* same rank as the object exists during any given phase.
*/
void replayWorkloads(
PhaseType initial_phase, PhaseType phases_to_run,
PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod,
std::shared_ptr<LBDataHolder> workloads,
Callback<std::vector<balance::LoadData>> stats_cb
);
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/collection/test_workload_data_migrator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,7 @@ TEST_F(TestWorkloadReplay, test_run_replay_verify_some_stats) {

// then replay them but allow the lb to place objects differently
vt::vrt::collection::balance::replay::replayWorkloads(
initial_phase, num_phases, lbdh, stats_cb
initial_phase, num_phases, 0, lbdh, stats_cb
);
}

Expand Down
17 changes: 12 additions & 5 deletions tools/workload_replay/simulate_replay.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,28 @@ int main(int argc, char** argv) {
vt::initialize(argc, argv);

vtAbortIf(
argc != 3,
"Must have two app-specific arguments: <initial phase> <phases to run>\n"
argc < 3 or argc > 4,
"Must have two or three app-specific arguments:\n"
" <initial phase> <phases to run> [phase modulus]\n"
"The json workload files needs to be specified using\n"
"--vt_lb_data_file_in and --vt_lb_data_dir_in"
" --vt_lb_data_in, --vt_lb_data_file_in, and --vt_lb_data_dir_in"
);

// initial phase to simulate
PhaseType initial_phase = atoi(argv[1]);
// number of phases to simulate
PhaseType phases_to_run = atoi(argv[2]);
// phase modulus to apply to input
PhaseType phase_mod = 0;

if (argc > 3) {
phase_mod = atoi(argv[3]);
}

// the workloads used will be those specified with the command-line arguments
// --vt_lb_data_file_in and --vt_lb_data_dir_in
// --vt_lb_data_in, --vt_lb_data_file_in, and --vt_lb_data_dir_in
vt::vrt::collection::balance::replay::replayWorkloads(
initial_phase, phases_to_run
initial_phase, phases_to_run, phase_mod
);

vt::finalize();
Expand Down

0 comments on commit b736aaa

Please sign in to comment.