Skip to content

Commit

Permalink
Merged pivoting of work(A) and trail(R)
Browse files Browse the repository at this point in the history
  • Loading branch information
TeachRaccooon committed Aug 13, 2024
1 parent f91d2ce commit 27f1fd0
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 8 deletions.
11 changes: 4 additions & 7 deletions RandLAPACK/drivers/rl_cqrrp_gpu.hh
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,6 @@ int CQRRP_blocked_GPU<T, RNG>::call(
high_resolution_clock::time_point qrcp_t_stop;
high_resolution_clock::time_point qrcp_piv_t_start;
high_resolution_clock::time_point qrcp_piv_t_stop;
high_resolution_clock::time_point r_piv_t_start;
high_resolution_clock::time_point r_piv_t_stop;
high_resolution_clock::time_point piv_A_t_start;
high_resolution_clock::time_point piv_A_t_stop;
high_resolution_clock::time_point preconditioning_t_start;
Expand All @@ -171,7 +169,6 @@ int CQRRP_blocked_GPU<T, RNG>::call(
long preallocation_t_dur = 0;
long qrcp_t_dur = 0;
long qrcp_piv_t_dur = 0;
long r_piv_t_dur = 0;
long piv_A_t_dur = 0;
long preconditioning_t_dur = 0;
long cholqr_t_dur = 0;
Expand Down Expand Up @@ -332,7 +329,7 @@ int CQRRP_blocked_GPU<T, RNG>::call(
lapack_queue.sync();
qrcp_t_stop = high_resolution_clock::now();
qrcp_t_dur += duration_cast<microseconds>(qrcp_t_stop - qrcp_t_start).count();
r_piv_t_start = high_resolution_clock::now();
piv_A_t_start = high_resolution_clock::now();
}


Expand Down Expand Up @@ -463,10 +460,10 @@ int CQRRP_blocked_GPU<T, RNG>::call(
if(this -> timing) {
total_t_stop = high_resolution_clock::now();
total_t_dur = duration_cast<microseconds>(total_t_stop - total_t_start).count();
long t_rest = total_t_dur - (preallocation_t_dur + qrcp_t_dur + r_piv_t_dur + piv_A_t_dur + preconditioning_t_dur + cholqr_t_dur + orhr_col_t_dur + updating_A_t_dur + updating_J_t_dur + updating_R_t_dur + updating_Sk_t_dur);
this -> times.resize(16);
long t_rest = total_t_dur - (preallocation_t_dur + qrcp_t_dur + piv_A_t_dur + preconditioning_t_dur + cholqr_t_dur + orhr_col_t_dur + updating_A_t_dur + updating_J_t_dur + updating_R_t_dur + updating_Sk_t_dur);
this -> times.resize(15);
auto qrcp_main_t_dur = qrcp_t_dur - qrcp_piv_t_dur;
this -> times = {n, b_sz_const, preallocation_t_dur, qrcp_main_t_dur, qrcp_piv_t_dur, r_piv_t_dur, piv_A_t_dur, preconditioning_t_dur, cholqr_t_dur, orhr_col_t_dur, updating_A_t_dur, updating_J_t_dur, updating_R_t_dur, updating_Sk_t_dur, t_rest, total_t_dur};
this -> times = {n, b_sz_const, preallocation_t_dur, qrcp_main_t_dur, qrcp_piv_t_dur, piv_A_t_dur, preconditioning_t_dur, cholqr_t_dur, orhr_col_t_dur, updating_A_t_dur, updating_J_t_dur, updating_R_t_dur, updating_Sk_t_dur, t_rest, total_t_dur};

printf("\n\n/------------ICQRRP TIMING RESULTS BEGIN------------/\n");
printf("Preallocation time: %25ld μs,\n", preallocation_t_dur);
Expand Down
2 changes: 1 addition & 1 deletion test/drivers/test_cqrrp_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ class TestCQRRP : public ::testing::TestWithParam<int64_t>
//printf("RANK AS RETURNED BY CQRRP GPU %4ld\n", rank);
printf(" BLOCK SIZE = %ld TIME (MS) = %ld\n", block_size, diff);
std::ofstream file(output_filename, std::ios::app);
std::copy(CQRRP_GPU.times.data(), CQRRP_GPU.times.data() + 16, std::ostream_iterator<T>(file, ", "));
std::copy(CQRRP_GPU.times.data(), CQRRP_GPU.times.data() + 15, std::ostream_iterator<T>(file, ", "));
file << "\n";

data_regen(m_info, all_data, state);
Expand Down

0 comments on commit 27f1fd0

Please sign in to comment.