From 3d92940db081aeda2ef1a03f1db151c9209002bd Mon Sep 17 00:00:00 2001 From: TeachRaccooon Date: Wed, 2 Oct 2024 13:00:06 -0700 Subject: [PATCH] Update --- benchmark/bench_CQRRP/CQRRP_pivot_quality.cc | 16 ++++++++++++++-- benchmark/bench_CQRRP/CQRRP_runtime_breakdown.cc | 4 ++-- benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc | 2 +- benchmark/bench_CQRRP/HQRRP_runtime_breakdown.cc | 2 +- .../bench_CQRRP/ICQRRP_subroutines_speed.cc | 4 ++-- 5 files changed, 20 insertions(+), 8 deletions(-) diff --git a/benchmark/bench_CQRRP/CQRRP_pivot_quality.cc b/benchmark/bench_CQRRP/CQRRP_pivot_quality.cc index c0a7189a..c1722b39 100644 --- a/benchmark/bench_CQRRP/CQRRP_pivot_quality.cc +++ b/benchmark/bench_CQRRP/CQRRP_pivot_quality.cc @@ -190,7 +190,7 @@ int main(int argc, char *argv[]) { int64_t m = std::stol(size); int64_t n = std::stol(size); double d_factor = 1.0; - int64_t b_sz = 1024; + int64_t b_sz = 256; double tol = std::pow(std::numeric_limits::epsilon(), 0.85); auto state = RandBLAS::RNGState(); auto state_constant1 = state; @@ -202,13 +202,25 @@ int main(int argc, char *argv[]) { // Allocate basic workspace QR_speed_benchmark_data all_data(m, n, tol, d_factor); // Generate the input matrix - gaussian suffices for performance tests. - RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::spiked); + RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::polynomial); m_info.cond_num = std::pow(10, 10); m_info.rank = n; m_info.exponent = 2.0; m_info.scaling = std::pow(10, 10); RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + std::fstream file("A_generated_rows_" + std::to_string(m) + + "_cols_" + std::to_string(n) + + "_b_sz_" + std::to_string(b_sz) + + "_d_factor_" + std::to_string(d_factor) + + ".dat", std::fstream::app); + for (int i = 0; i < n ; ++i){ + for (int j = 0; j < m ; ++j){ + file << all_data.A[m * i + j] << ", "; + } + file << "\n"; + } + R_norm_ratio(m_info, b_sz, all_data, state_constant1); printf("R done\n"); sv_ratio(m_info, b_sz, all_data, state_constant2); diff --git a/benchmark/bench_CQRRP/CQRRP_runtime_breakdown.cc b/benchmark/bench_CQRRP/CQRRP_runtime_breakdown.cc index 9208726f..fb6d914f 100644 --- a/benchmark/bench_CQRRP/CQRRP_runtime_breakdown.cc +++ b/benchmark/bench_CQRRP/CQRRP_runtime_breakdown.cc @@ -112,7 +112,7 @@ int main(int argc, char *argv[]) { int64_t m = std::stol(size); int64_t n = std::stol(size); double d_factor = 1.0; - int64_t b_sz_start = 256; + int64_t b_sz_start = 32; int64_t b_sz_end = 2048; double tol = std::pow(std::numeric_limits::epsilon(), 0.85); auto state = RandBLAS::RNGState(); @@ -120,7 +120,7 @@ int main(int argc, char *argv[]) { // Timing results std::vector res; // Number of algorithm runs. We only record best times. - int64_t numruns = 10; + int64_t numruns = 5; // Allocate basic workspace QR_speed_benchmark_data all_data(m, n, tol, d_factor); diff --git a/benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc b/benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc index a5577322..5bcbd93c 100644 --- a/benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc +++ b/benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc @@ -202,7 +202,7 @@ int main(int argc, char *argv[]) { int64_t m = std::stol(size); int64_t n = std::stol(size); double d_factor = 1.0; - int64_t b_sz_start = 256; + int64_t b_sz_start = 32; int64_t b_sz_end = 2048; double tol = std::pow(std::numeric_limits::epsilon(), 0.85); auto state = RandBLAS::RNGState(); diff --git a/benchmark/bench_CQRRP/HQRRP_runtime_breakdown.cc b/benchmark/bench_CQRRP/HQRRP_runtime_breakdown.cc index 65ad2164..5de2fbef 100644 --- a/benchmark/bench_CQRRP/HQRRP_runtime_breakdown.cc +++ b/benchmark/bench_CQRRP/HQRRP_runtime_breakdown.cc @@ -107,7 +107,7 @@ int main(int argc, char *argv[]) { int64_t m = std::stol(size); int64_t n = std::stol(size); double d_factor = 1.0; - int64_t b_sz_start = 256; + int64_t b_sz_start = 32; int64_t b_sz_end = 2048; double tol = std::pow(std::numeric_limits::epsilon(), 0.85); auto state = RandBLAS::RNGState(); diff --git a/benchmark/bench_CQRRP/ICQRRP_subroutines_speed.cc b/benchmark/bench_CQRRP/ICQRRP_subroutines_speed.cc index 7782ad7b..be8e8f17 100644 --- a/benchmark/bench_CQRRP/ICQRRP_subroutines_speed.cc +++ b/benchmark/bench_CQRRP/ICQRRP_subroutines_speed.cc @@ -332,8 +332,8 @@ int main(int argc, char *argv[]) { int64_t i = 0; // Declare parameters int64_t m = std::stol(size); - int64_t n_start = 256; - int64_t n_stop = 1024; + int64_t n_start = 32; + int64_t n_stop = 2048; int64_t nb_start = 32; auto state = RandBLAS::RNGState(); auto state_B = RandBLAS::RNGState();