diff --git a/RandLAPACK/drivers/rl_rbki.hh b/RandLAPACK/drivers/rl_rbki.hh index 243c5829..69261b1f 100644 --- a/RandLAPACK/drivers/rl_rbki.hh +++ b/RandLAPACK/drivers/rl_rbki.hh @@ -428,7 +428,6 @@ int RBKI::call( break; } } - printf("Total iters %d\n", iter); this -> norm_R_end = norm_R; this->num_krylov_iters = iter; @@ -450,84 +449,20 @@ int RBKI::call( get_factors_t_start = high_resolution_clock::now(); } - printf("%ld, %ld\n", end_rows, end_cols); - std::ofstream file2("run_out/S_TO_DECOMPOSE.txt", std::ios::trunc); - for (int i = 0; i < n; ++i) { - for (int j = 0; j < (n+k); ++j) { - file2 << std::setprecision(20) << *(S + i * (n + k) + j) << " "; - } - file2 << "\n"; // Move to the next line after each row - } - - char name_A [] = "A"; - //RandBLAS::util::print_colmaj(m, n, A, name_A); - - char name0 [] = "S TO DECOMPOSE"; - //RandBLAS::util::print_colmaj(n + k, n, S, name0); - - - //RandBLAS::util::print_colmaj(n+k, n, S, name); if (iter % 2 != 0) { - printf("Decomposing R\n"); // [U_hat, Sigma, V_hat] = svd(R') lapack::gesdd(Job::SomeVec, end_rows, end_cols, R, n, Sigma, U_hat, end_rows, VT_hat, end_cols); } else { - printf("Decomposing S\n"); // [U_hat, Sigma, V_hat] = svd(S) lapack::gesdd(Job::SomeVec, end_rows, end_cols, S, n + k, Sigma, U_hat, end_rows, VT_hat, end_cols); } - char name2 [] = "U_hat"; - char name3 [] = "VT_hat"; - char name4 [] = "Sigma"; - - //RandBLAS::util::print_colmaj(end_rows, end_cols, U_hat, name2); - //RandBLAS::util::print_colmaj(end_cols, end_cols, VT_hat, name3); - //RandBLAS::util::print_colmaj(end_cols, 1, Sigma, name4); - - std::ofstream file5("run_out/U_hat.txt", std::ios::trunc); - for (int i = 0; i < end_cols; ++i) { - for (int j = 0; j < end_rows; ++j) { - file5 << std::setprecision(20) << *(U_hat + i * end_rows + j) << " "; - } - file5 << "\n"; // Move to the next line after each row - } - - - std::ofstream file6("run_out/VT_hat.txt", std::ios::trunc); - for (int i = 0; i < end_cols; ++i) { - for (int j = 0; j < end_cols; ++j) { - file6 << std::setprecision(20) << *(VT_hat + i * end_cols + j) << " "; - } - file6 << "\n"; // Move to the next line after each row - } - - - // U = X_ev * U_hat blas::gemm(Layout::ColMajor, Op::NoTrans, Op::NoTrans, m, end_cols, end_rows, 1.0, X_ev, m, U_hat, end_rows, 0.0, U, m); // V = Y_od * V_hat // We actually perform VT = V_hat' * Y_odd' blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, end_cols, n, end_cols, 1.0, VT_hat, end_cols, Y_od, n, 0.0, VT, n); - - std::ofstream file3("run_out/X_ev.txt", std::ios::trunc); - for (int i = 0; i < end_rows; ++i) { - for (int j = 0; j < m; ++j) { - file3 << std::setprecision(20) << *(X_ev + i * m + j) << " "; - } - file3 << "\n"; // Move to the next line after each row - } - - - std::ofstream file4("run_out/Y_od.txt", std::ios::trunc); - for (int i = 0; i < end_cols; ++i) { - for (int j = 0; j < n; ++j) { - file4 << std::setprecision(20) << *(Y_od + i * n + j) << " "; - } - file4 << "\n"; // Move to the next line after each row - } - if(this -> timing) { get_factors_t_stop = high_resolution_clock::now(); get_factors_t_dur = duration_cast(get_factors_t_stop - get_factors_t_start).count(); diff --git a/benchmark/bench_RBKI/RBKI_speed_comparisons.cc b/benchmark/bench_RBKI/RBKI_speed_comparisons.cc index 7f1b7ad9..7ee2903f 100644 --- a/benchmark/bench_RBKI/RBKI_speed_comparisons.cc +++ b/benchmark/bench_RBKI/RBKI_speed_comparisons.cc @@ -58,7 +58,6 @@ static void update_best_time(int iter, long &t_best, long &t_curr, T* S1, T* S2, template static T residual_error_comp(RBKI_benchmark_data &all_data, int64_t target_rank, int64_t custom_rank) { - printf("%ld\n", custom_rank); auto m = all_data.row; auto n = all_data.col; @@ -94,8 +93,6 @@ static void update_best_time(int iter, long &t_best, long &t_curr, T* S1, T* S2, T nrm1 = lapack::lange(Norm::Fro, m, custom_rank, U_cpy_dat, m); T nrm2 = lapack::lange(Norm::Fro, custom_rank, n, VT_cpy_dat, n); - printf("%e %e\n", nrm1, nrm2); - return std::sqrt(std::pow(nrm1, 2) + std::pow(nrm2, 2)); } @@ -126,7 +123,6 @@ static void call_all_algs( // These matrices will be full-rank. // Hence, target_rank = b_sz * num_krylov_iters / 2 RBKI.max_krylov_iters = (int) ((target_rank * 2) / b_sz); - printf("Max Krylov iters %d\n", RBKI.max_krylov_iters); // timing vars long dur_rbki = 0; @@ -144,32 +140,6 @@ static void call_all_algs( auto stop_rbki = high_resolution_clock::now(); dur_rbki = duration_cast(stop_rbki - start_rbki).count(); - std::ofstream file1("run_out/U.txt", std::ios::trunc); - for (int i = 0; i < target_rank; ++i) { - for (int j = 0; j < m; ++j) { - file1 << std::setprecision(20) << *(all_data.U.data() + i * m + j) << " "; - } - file1 << "\n"; // Move to the next line after each row - } - - std::ofstream file2("run_out/VT.txt", std::ios::trunc); - for (int i = 0; i < n; ++i) { - for (int j = 0; j < target_rank; ++j) { - file2 << std::setprecision(20) << *(all_data.VT.data() + i * n + j) << " "; - } - file2 << "\n"; // Move to the next line after each row - } - - char name [] = "VT"; - //RandBLAS::util::print_colmaj(n, n, all_data.VT.data(), name); - - std::ofstream file3("run_out/S.txt", std::ios::trunc); - for (int i = 0; i < target_rank; ++i) { - file3 << std::setprecision(20) << *(all_data.Sigma.data() + i) << " "; - file3 << "\n"; // Move to the next line after each row - } - - T residual_err_custom = residual_error_comp(all_data, target_rank, custom_rank); T residual_err_target = residual_error_comp(all_data, target_rank, target_rank); @@ -199,7 +169,7 @@ int main(int argc, char *argv[]) { int64_t b_sz_stop = 0; int64_t target_rank_start = 512; int64_t target_rank_curr = target_rank_start; - int64_t target_rank_stop = 512; + int64_t target_rank_stop = 4096; int64_t custom_rank = 10; double tol = std::pow(std::numeric_limits::epsilon(), 0.85); auto state = RandBLAS::RNGState(); @@ -211,15 +181,15 @@ int main(int argc, char *argv[]) { // Generate the input matrix. RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::custom_input); m_info.filename = argv[1]; - m_info.workspace_query_mod = 1; + m_info.workspace_query_mod = 3; // Workspace query; RandLAPACK::gen::mat_gen(m_info, NULL, state); // Update basic params. m = m_info.rows; n = m_info.cols; - b_sz_start = 16;//std::max((int64_t) 1, n / 10); - b_sz_stop = 16;//std::max((int64_t) 1, n / 10); + b_sz_start = 8;//std::max((int64_t) 1, n / 10); + b_sz_stop = 256;//std::max((int64_t) 1, n / 10); // Allocate basic workspace. RBKI_benchmark_data all_data(m, n, tol);