diff --git a/src/vt/vrt/collection/balance/workload_replay.cc b/src/vt/vrt/collection/balance/workload_replay.cc index 602d38b059..dcbe4da8d7 100644 --- a/src/vt/vrt/collection/balance/workload_replay.cc +++ b/src/vt/vrt/collection/balance/workload_replay.cc @@ -56,7 +56,7 @@ namespace vt { namespace vrt { namespace collection { namespace balance { namespace replay { void replayWorkloads( - PhaseType initial_phase, PhaseType phases_to_run + PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod ) { // read in object loads from json files auto const filename = theConfig()->getLBDataFileIn(); @@ -67,11 +67,11 @@ void replayWorkloads( &LBManager::statsHandler >(theLBManager()->getProxy()); - replayWorkloads(initial_phase, phases_to_run, workloads, stats_cb); + replayWorkloads(initial_phase, phases_to_run, phase_mod, workloads, stats_cb); } void replayWorkloads( - PhaseType initial_phase, PhaseType phases_to_run, + PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod, std::shared_ptr workloads, Callback> stats_cb ) { @@ -102,6 +102,8 @@ void replayWorkloads( // simulate the given number of phases auto stop_phase = initial_phase + phases_to_run; for (PhaseType phase = initial_phase; phase < stop_phase; phase++) { + PhaseType input_phase = phase_mod == 0 ? phase : phase % phase_mod; + // reapply the base load model if in case we overwrote it on a previous iter theLBManager()->setLoadModel(base_load_model); @@ -113,7 +115,7 @@ void replayWorkloads( // point the load model at the workloads for the relevant phase runInEpochCollective("WorkloadReplayDriver -> updateLoads", [=] { - base_load_model->updateLoads(phase); + base_load_model->updateLoads(input_phase); }); if (theConfig()->vt_debug_replay) { @@ -123,7 +125,7 @@ void replayWorkloads( ++count; vt_debug_print( normal, replay, - "workload for element {} is here on phase {}\n", workload_id, phase + "workload for element {} is here on input_phase {}\n", workload_id, input_phase ); } } @@ -161,7 +163,7 @@ void replayWorkloads( } if (this_rank == 0) { - vt_print(replay, "Simulating phase {}...\n", phase); + vt_print(replay, "Simulating phase {} using inputs from phase {}...\n", phase, input_phase); } if (theConfig()->vt_debug_replay) { @@ -227,12 +229,19 @@ void replayWorkloads( auto cb = theCB()->makeFunc( vt::pipe::LifetimeEnum::Once, postLBWork ); - theLBManager()->selectStartLB(phase, cb); + auto lb = theLBManager()->decideLBToRun(phase, true); + auto const start_time = timing::getCurrentTime(); + theLBManager()->startLB(input_phase, lb, cb); + auto const total_time = timing::getCurrentTime() - start_time; + if (lb != LBType::NoLB) { + vt_print(replay, "Time in load balancer: {}\n", total_time); + } }); runInEpochCollective("WorkloadReplayDriver -> destroyLB", [&] { theLBManager()->destroyLB(); }); auto last_phase_info = theLBManager()->getPhaseInfo(); + last_phase_info->phase = phase; thePhase()->printSummary(last_phase_info); } } diff --git a/src/vt/vrt/collection/balance/workload_replay.h b/src/vt/vrt/collection/balance/workload_replay.h index 2e36b39231..925d50d747 100644 --- a/src/vt/vrt/collection/balance/workload_replay.h +++ b/src/vt/vrt/collection/balance/workload_replay.h @@ -74,7 +74,7 @@ namespace balance { namespace replay { * object exists during any given phase. */ void replayWorkloads( - PhaseType initial_phase, PhaseType phases_to_run + PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod ); /** @@ -92,7 +92,7 @@ void replayWorkloads( * same rank as the object exists during any given phase. */ void replayWorkloads( - PhaseType initial_phase, PhaseType phases_to_run, + PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod, std::shared_ptr workloads, Callback> stats_cb ); diff --git a/tests/unit/collection/test_workload_data_migrator.cc b/tests/unit/collection/test_workload_data_migrator.cc index d30c0bbf7b..32680266f9 100644 --- a/tests/unit/collection/test_workload_data_migrator.cc +++ b/tests/unit/collection/test_workload_data_migrator.cc @@ -878,7 +878,7 @@ TEST_F(TestWorkloadReplay, test_run_replay_verify_some_stats) { // then replay them but allow the lb to place objects differently vt::vrt::collection::balance::replay::replayWorkloads( - initial_phase, num_phases, lbdh, stats_cb + initial_phase, num_phases, 0, lbdh, stats_cb ); } diff --git a/tools/workload_replay/simulate_replay.cc b/tools/workload_replay/simulate_replay.cc index 8a7045c655..76258e1004 100644 --- a/tools/workload_replay/simulate_replay.cc +++ b/tools/workload_replay/simulate_replay.cc @@ -50,21 +50,28 @@ int main(int argc, char** argv) { vt::initialize(argc, argv); vtAbortIf( - argc != 3, - "Must have two app-specific arguments: \n" + argc < 3 or argc > 4, + "Must have two or three app-specific arguments:\n" + " [phase modulus]\n" "The json workload files needs to be specified using\n" - "--vt_lb_data_file_in and --vt_lb_data_dir_in" + " --vt_lb_data_in, --vt_lb_data_file_in, and --vt_lb_data_dir_in" ); // initial phase to simulate PhaseType initial_phase = atoi(argv[1]); // number of phases to simulate PhaseType phases_to_run = atoi(argv[2]); + // phase modulus to apply to input + PhaseType phase_mod = 0; + + if (argc > 3) { + phase_mod = atoi(argv[3]); + } // the workloads used will be those specified with the command-line arguments - // --vt_lb_data_file_in and --vt_lb_data_dir_in + // --vt_lb_data_in, --vt_lb_data_file_in, and --vt_lb_data_dir_in vt::vrt::collection::balance::replay::replayWorkloads( - initial_phase, phases_to_run + initial_phase, phases_to_run, phase_mod ); vt::finalize();