diff --git a/RandLAPACK/drivers/rl_rsvd.hh b/RandLAPACK/drivers/rl_rsvd.hh index 8fa8e98e..094bc8b9 100644 --- a/RandLAPACK/drivers/rl_rsvd.hh +++ b/RandLAPACK/drivers/rl_rsvd.hh @@ -26,7 +26,7 @@ class RSVDalg { T tol, T* &U, T* &S, - T* &VT, + T* &V, RandBLAS::RNGState &state ) = 0; }; @@ -80,7 +80,7 @@ class RSVD : public RSVDalg { /// Initially, may not have any space allocated for it. /// /// @param[in] VT - /// Buffer for the \transpose{V}-factor. + /// Buffer for the V-factor. /// Initially, may not have any space allocated for it. /// /// @param[out] U @@ -89,8 +89,8 @@ class RSVD : public RSVDalg { /// @param[out] S /// Stores k-by-k factor \Sigma. /// - /// @param[out] VT - /// Stores k-by-n factor \transpose{V}. + /// @param[out] V + /// Stores k-by-n factor V. /// /// @returns 0 if successful @@ -102,7 +102,7 @@ class RSVD : public RSVDalg { T tol, T* &U, T* &S, - T* &VT, + T* &V, RandBLAS::RNGState &state ) override; @@ -122,28 +122,28 @@ int RSVD::call( T tol, T* &U, T* &S, - T* &VT, + T* &V, RandBLAS::RNGState &state ){ T* Q = nullptr; - T* B = nullptr; + T* BT = nullptr; // Q and B sizes will be adjusted automatically - this->QB_Obj.call(m, n, A, k, this->block_sz, tol, Q, B, state); + this->QB_Obj.call(m, n, A, k, this->block_sz, tol, Q, BT, state); - T* U_buf = ( T * ) calloc(k * k, sizeof( T ) ); + T* UT_buf = ( T * ) calloc(k * k, sizeof( T ) ); // Making sure all vectors are large enough U = ( T * ) calloc(m * k, sizeof( T ) ); S = ( T * ) calloc(k, sizeof( T ) ); - VT = ( T * ) calloc(n * k, sizeof( T ) ); + V = ( T * ) calloc(n * k, sizeof( T ) ); // SVD of B - lapack::gesdd(Job::SomeVec, k, n, B, k, S, U_buf, k, VT, k); + lapack::gesdd(Job::SomeVec, n, k, BT, n, S, V, n, UT_buf, k); // Adjusting U - blas::gemm(Layout::ColMajor, Op::NoTrans, Op::NoTrans, m, k, k, 1.0, Q, m, U_buf, k, 0.0, U, m); + blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, k, k, 1.0, Q, m, UT_buf, k, 0.0, U, m); free(Q); - free(B); - free(U_buf); + free(BT); + free(UT_buf); return 0; } diff --git a/benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc b/benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc index 94e2bf95..9dee22bc 100644 --- a/benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc +++ b/benchmark/bench_CQRRP/CQRRP_speed_comparisons.cc @@ -92,7 +92,7 @@ static void call_all_algs( printf("\nITERATION %d\n", i); // Testing GEQRF auto start_geqp3 = high_resolution_clock::now(); - //lapack::geqp3(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data()); + lapack::geqp3(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data()); auto stop_geqp3 = high_resolution_clock::now(); dur_geqp3 = duration_cast(stop_geqp3 - start_geqp3).count(); printf("TOTAL TIME FOR GEQP3 %ld\n", dur_geqp3); @@ -114,7 +114,7 @@ static void call_all_algs( // Testing CQRRP - best setup auto start_cqrrp = high_resolution_clock::now(); - CQRRP_blocked.call(m, n, all_data.A.data(), m, d_factor, all_data.tau.data(), all_data.J.data(), state_alg); + //CQRRP_blocked.call(m, n, all_data.A.data(), m, d_factor, all_data.tau.data(), all_data.J.data(), state_alg); auto stop_cqrrp = high_resolution_clock::now(); dur_cqrrp = duration_cast(stop_cqrrp - start_cqrrp).count(); printf("TOTAL TIME FOR CQRRP %ld\n", dur_cqrrp); @@ -123,12 +123,12 @@ static void call_all_algs( state_gen = state; state_alg = state; // Clear and re-generate data - data_regen(m_info, all_data, state_gen, 0); + //data_regen(m_info, all_data, state_gen, 0); // Testing CQRRP - using QP3 CQRRP_blocked.use_qp3 = true; auto start_cqrrp_qp3 = high_resolution_clock::now(); - CQRRP_blocked.call(m, n, all_data.A.data(), m, d_factor, all_data.tau.data(), all_data.J.data(), state_alg); + //CQRRP_blocked.call(m, n, all_data.A.data(), m, d_factor, all_data.tau.data(), all_data.J.data(), state_alg); auto stop_cqrrp_qp3 = high_resolution_clock::now(); CQRRP_blocked.use_qp3 = false; dur_cqrrp_qp3 = duration_cast(stop_cqrrp_qp3 - start_cqrrp_qp3).count(); @@ -138,11 +138,11 @@ static void call_all_algs( state_gen = state; state_alg = state; // Clear and re-generate data - data_regen(m_info, all_data, state_gen, 1); + //data_regen(m_info, all_data, state_gen, 1); // Testing HQRRP DEFAULT auto start_hqrrp = high_resolution_clock::now(); - RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 0, state_alg, (T*) nullptr); + //RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 0, state_alg, (T*) nullptr); auto stop_hqrrp = high_resolution_clock::now(); dur_hqrrp = duration_cast(stop_hqrrp - start_hqrrp).count(); printf("TOTAL TIME FOR HQRRP %ld\n", dur_hqrrp); @@ -151,11 +151,11 @@ static void call_all_algs( state_gen = state; state_alg = state; // Clear and re-generate data - data_regen(m_info, all_data, state_gen, 1); + //data_regen(m_info, all_data, state_gen, 1); // Testing HQRRP with GEQRF auto start_hqrrp_geqrf = high_resolution_clock::now(); - RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 1, state_alg, (T*) nullptr); + //RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 1, state_alg, (T*) nullptr); auto stop_hqrrp_geqrf = high_resolution_clock::now(); dur_hqrrp_geqrf = duration_cast(stop_hqrrp_geqrf - start_hqrrp_geqrf).count(); printf("TOTAL TIME FOR HQRRP WITH GEQRF %ld\n", dur_hqrrp_geqrf); @@ -164,11 +164,11 @@ static void call_all_algs( state_gen = state; state_alg = state; // Clear and re-generate data - data_regen(m_info, all_data, state_gen, 1); + //data_regen(m_info, all_data, state_gen, 1); // Testing HQRRP with CholQR auto start_hqrrp_cholqr = high_resolution_clock::now(); - RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 2, state_alg, (T*) nullptr); + //RandLAPACK::hqrrp(m, n, all_data.A.data(), m, all_data.J.data(), all_data.tau.data(), b_sz, (d_factor - 1) * b_sz, panel_pivoting, 2, state_alg, (T*) nullptr); auto stop_hqrrp_cholqr = high_resolution_clock::now(); dur_hqrrp_cholqr = duration_cast(stop_hqrrp_cholqr - start_hqrrp_cholqr).count(); printf("TOTAL TIME FOR HQRRP WITH CHOLQRQ %ld\n", dur_hqrrp_cholqr); @@ -177,7 +177,7 @@ static void call_all_algs( state_gen = state; state_alg = state; // Clear and re-generate data - data_regen(m_info, all_data, state_gen, 0); + //data_regen(m_info, all_data, state_gen, 0); std::ofstream file(output_filename, std::ios::app); file << dur_cqrrp << ", " << dur_cqrrp_qp3 << ", " << dur_hqrrp << ", " << dur_hqrrp_geqrf << ", " << dur_hqrrp_geqrf << ", " << dur_geqrf << ", " << dur_geqp3 << ",\n"; @@ -186,18 +186,18 @@ static void call_all_algs( int main() { // Declare parameters - int64_t m = std::pow(2, 16); - int64_t n = std::pow(2, 16); + int64_t m = 10000; + int64_t n = 10000; double d_factor = 1.25; - int64_t b_sz_start = 256; - int64_t b_sz_end = 2048; + int64_t b_sz_start = 64; + int64_t b_sz_end = 64; double tol = std::pow(std::numeric_limits::epsilon(), 0.85); auto state = RandBLAS::RNGState(); auto state_constant = state; // Timing results std::vector res; // Number of algorithm runs. We only record best times. - int64_t numruns = 2; + int64_t numruns = 50; // Allocate basic workspace QR_speed_benchmark_data all_data(m, n, tol, d_factor); diff --git a/test/comps/test_qb.cc b/test/comps/test_qb.cc index 5fee5455..68f281ec 100644 --- a/test/comps/test_qb.cc +++ b/test/comps/test_qb.cc @@ -23,8 +23,8 @@ class TestQB : public ::testing::Test int64_t rank; std::vector A; std::vector Q; - std::vector B; - std::vector B_cpy; + std::vector BT; + std::vector BT_cpy; std::vector A_hat; std::vector A_k; std::vector A_cpy; @@ -37,7 +37,7 @@ class TestQB : public ::testing::Test QBTestData(int64_t m, int64_t n, int64_t k) : A(m * n, 0.0), - B_cpy(k * n, 0.0), + BT_cpy(k * n, 0.0), A_hat(m * n, 0.0), A_k(m * n, 0.0), A_cpy(m * n, 0.0), @@ -119,16 +119,16 @@ class TestQB : public ::testing::Test T* S_dat = all_data.S.data(); T* VT_dat = all_data.VT.data(); - T* Q = nullptr; - T* B = nullptr; + T* Q = nullptr; + T* BT = nullptr; // Regular QB2 call - all_algs.QB.call(m, n, all_data.A.data(), k, block_sz, tol, Q, B, state); + all_algs.QB.call(m, n, all_data.A.data(), k, block_sz, tol, Q, BT, state); // Reassing pointers because Q, B have been resized T* Q_dat = Q; - T* B_dat = B; - T* B_cpy_dat = all_data.B_cpy.data(); + T* BT_dat = BT; + T* BT_cpy_dat = all_data.BT_cpy.data(); printf("Inner dimension of QB: %-25ld\n", k); @@ -137,14 +137,14 @@ class TestQB : public ::testing::Test // Generate a reference identity RandLAPACK::util::eye(k, k, Ident); // Buffer for testing B - blas::copy(k * n, B_dat, 1, B_cpy_dat, 1); + blas::copy(k * n, BT_dat, 1, BT_cpy_dat, 1); // A_hat = Q * B - blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, n, k, 1.0, Q_dat, m, B_dat, n, 0.0, A_hat_dat, m); + blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, n, k, 1.0, Q_dat, m, BT_dat, n, 0.0, A_hat_dat, m); // TEST 1: A = A - Q * B = 0 - blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, n, k, -1.0, Q_dat, m, B_dat, n, 1.0, A_dat, m); + blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, n, k, -1.0, Q_dat, m, BT_dat, n, 1.0, A_dat, m); // TEST 2: B - Q'A = 0 - //blas::gemm(Layout::ColMajor, Op::Trans, Op::Trans, k, n, m, -1.0, Q_dat, m, A_cpy_2_dat, m, 1.0, B_cpy_dat, n); + //blas::gemm(Layout::ColMajor, Op::Trans, Op::Trans, k, n, m, -1.0, Q_dat, m, A_cpy_2_dat, m, 1.0, BT_cpy_dat, n); // TEST 3: Q'Q = I blas::syrk(Layout::ColMajor, Uplo::Upper, Op::Trans, k, m, 1.0, Q_dat, m, -1.0, Ident_dat, k); @@ -163,7 +163,7 @@ class TestQB : public ::testing::Test printf("FRO NORM OF A - QB: %e\n", norm_test_1); ASSERT_NEAR(norm_test_1, 0, test_tol); // Test 2 Output - //T norm_test_2 = lapack::lange(Norm::Fro, n, k, B_cpy_dat, n); + //T norm_test_2 = lapack::lange(Norm::Fro, n, k, BT_cpy_dat, n); //printf("FRO NORM OF B - Q'A: %e\n", norm_test_2); //ASSERT_NEAR(norm_test_2, 0, test_tol); // Test 3 Output @@ -175,7 +175,7 @@ class TestQB : public ::testing::Test printf("FRO NORM OF A_k - QB: %e\n", norm_test_4); ASSERT_NEAR(norm_test_4, 0, test_tol); free(Q); - free(B); + free(BT); } /// k = min(m, n) test for CholQRCP: @@ -197,25 +197,25 @@ class TestQB : public ::testing::Test T* A_dat = all_data.A.data(); T* Q_dat = all_data.Q.data(); - T* B_dat = all_data.B.data(); + T* BT_dat = all_data.BT.data(); T* A_hat_dat = all_data.A_hat.data(); T* Q = nullptr; - T* B = nullptr; + T* BT = nullptr; // Regular QB2 call - all_algs.QB.call(m, n, all_data.A.data(), k_est, block_sz, tol, Q, B, state); + all_algs.QB.call(m, n, all_data.A.data(), k_est, block_sz, tol, Q, BT, state); // Reassing pointers because Q, B have been resized - Q_dat = Q; - B_dat = B; + Q_dat = Q; + BT_dat = BT; printf("Inner dimension of QB: %ld\n", k_est); // A_hat = Q * B - blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, n, k_est, 1.0, Q_dat, m, B_dat, n, 0.0, A_hat_dat, m); + blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, n, k_est, 1.0, Q_dat, m, BT_dat, n, 0.0, A_hat_dat, m); // TEST 1: A = A - Q * B = 0 - blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, n, k_est, -1.0, Q_dat, m, B_dat, n, 1.0, A_dat, m); + blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, n, k_est, -1.0, Q_dat, m, BT_dat, n, 1.0, A_dat, m); T norm_test_1 = lapack::lange(Norm::Fro, m, n, A_dat, m); T test_tol = std::pow(std::numeric_limits::epsilon(), 0.75); @@ -231,7 +231,7 @@ class TestQB : public ::testing::Test EXPECT_TRUE(norm_test_1 <= (tol * norm_A)); } free(Q); - free(B); + free(BT); } }; @@ -363,26 +363,3 @@ TEST_F(TestQB, Polynomial_Decay_zero_tol2) delete all_data; delete all_algs; } - - -TEST_F(TestQB, random_test) -{ - /* - int64_t rows_1 = 2; - int64_t cols_1 = 3; - int64_t rows_2 = 2; - int64_t cols_2 = 2; - std::vector A = { 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5}; - std::vector B (0, 3 * 2); - double* A_dat = A.data(); - double* B_dat = B.data(); - blas::gemm(Layout::ColMajor, Op::Trans, Op::NoTrans, cols_1, rows_1, rows_2, 1.0, A_dat, rows_1, &A_dat[rows_1 * cols_1], rows_2, 0.0, B_dat, cols_1); - - char name[] = "A"; - char name1[] = "B"; - - RandBLAS::util::print_colmaj(rows_1, cols_1 + cols_2, A_dat, name); - RandBLAS::util::print_colmaj(cols_1, rows_1, B_dat, name1); - */ - -} \ No newline at end of file diff --git a/test/drivers/test_rsvd.cc b/test/drivers/test_rsvd.cc index 4dc28279..1ca69f6a 100644 --- a/test/drivers/test_rsvd.cc +++ b/test/drivers/test_rsvd.cc @@ -31,7 +31,7 @@ class TestRSVD : public ::testing::Test std::vector s1; std::vector S1; std::vector U1; - std::vector VT1; + std::vector V1; // For low-rank SVD std::vector s; std::vector S; @@ -50,7 +50,7 @@ class TestRSVD : public ::testing::Test s1(n, 0.0), S1(n * n, 0.0), U1(m * n, 0.0), - VT1(n * n, 0.0), + V1(n * n, 0.0), // For low-rank SVD s(n, 0.0), @@ -123,7 +123,7 @@ class TestRSVD : public ::testing::Test T* U1_dat = nullptr; T* s1_dat = nullptr; - T* VT1_dat = nullptr; + T* V1_dat = nullptr; T* S1_dat = all_data.S1.data(); T* U_dat = all_data.U.data(); @@ -132,16 +132,15 @@ class TestRSVD : public ::testing::Test T* VT_dat = all_data.VT.data(); // Regular QB2 call - all_algs.RSVD.call(m, n, all_data.A.data(), k, tol, U1_dat, s1_dat, VT1_dat, state); - - // Construnct A_approx_determ = U1 * S1 * VT1 + all_algs.RSVD.call(m, n, all_data.A.data(), k, tol, U1_dat, s1_dat, V1_dat, state); + // Construnct A_approx_determ = U1 * S1 * V1^T // Turn vector into diagonal matrix RandLAPACK::util::diag(k, k, s1_dat, k, S1_dat); // U1 * S1 = A_approx_determ_duf blas::gemm(Layout::ColMajor, Op::NoTrans, Op::NoTrans, m, k, k, 1.0, U1_dat, m, S1_dat, k, 1.0, A_approx_determ_duf_dat, m); - // A_approx_determ_duf * VT1 = A_approx_determ - blas::gemm(Layout::ColMajor, Op::NoTrans, Op::NoTrans, m, n, k, 1.0, A_approx_determ_duf_dat, m, VT1_dat, k, 0.0, A_approx_determ_dat, m); + // A_approx_determ_duf * V1^T = A_approx_determ + blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, n, k, 1.0, A_approx_determ_duf_dat, m, V1_dat, n, 0.0, A_approx_determ_dat, m); //T norm_test_4 = lapack::lange(Norm::Fro, m, n, A_cpy_dat, m); //printf("FRO NORM OF A_k - QB: %e\n", norm_test_4); @@ -161,14 +160,14 @@ class TestRSVD : public ::testing::Test free(U1_dat); free(s1_dat); - free(VT1_dat); + free(V1_dat); } }; TEST_F(TestRSVD, SimpleTest) { - int64_t m = 100; - int64_t n = 100; + int64_t m = 10; + int64_t n = 10; int64_t k = 5; int64_t p = 10; int64_t passes_per_iteration = 1; @@ -181,14 +180,17 @@ TEST_F(TestRSVD, SimpleTest) bool cond_check = true; bool orth_check = true; - RSVDTestData all_data(m, n, k); - algorithm_objects all_algs(verbosity, cond_check, orth_check, p, passes_per_iteration, block_sz); + auto all_data = new RSVDTestData(m, n, k); + auto all_algs = new algorithm_objects(verbosity, cond_check, orth_check, p, passes_per_iteration, block_sz); RandLAPACK::gen::mat_gen_info m_info(m, n, RandLAPACK::gen::polynomial); m_info.cond_num = 2; m_info.rank = k; - RandLAPACK::gen::mat_gen(m_info, all_data.A.data(), state); + RandLAPACK::gen::mat_gen(m_info, (*all_data).A.data(), state); + + computational_helper(*all_data); + test_RSVD1_general(tol, *all_data, *all_algs, state); - computational_helper(all_data); - test_RSVD1_general(tol, all_data, all_algs, state); + delete all_data; + delete all_algs; }