diff --git a/RandLAPACK/comps/rl_qb.hh b/RandLAPACK/comps/rl_qb.hh index 08f75379..8f751ed0 100644 --- a/RandLAPACK/comps/rl_qb.hh +++ b/RandLAPACK/comps/rl_qb.hh @@ -26,10 +26,10 @@ class QBalg { int64_t n, T* A, int64_t &k, - int64_t block_sz, + int64_t b_sz, T tol, - T* Q, - T* B, + T* &Q, + T* &B, RandBLAS::RNGState &state ) = 0; }; @@ -49,7 +49,6 @@ class QB : public QBalg { ) : RF_Obj(rf_obj), Orth_Obj(orth_obj) { verbosity = verb; orth_check = orth; - dim_growth_factor = 4; } /// Iteratively build an approximate QB factorization of A, @@ -59,9 +58,9 @@ class QB : public QBalg { /// or /// (2) Q has k columns. /// Each iteration involves sketching A from the right by a sketching - /// matrix with "block_sz" columns. + /// matrix with "b_sz" columns. /// - /// The number of columns in Q increase by "block_sz" at each iteration, unless + /// The number of columns in Q increase by "b_sz" at each iteration, unless /// that would bring #cols(Q) > k. In that case, the final iteration only /// adds enough columns to Q so that #cols(Q) == k. /// The implementation relies on RowSketcher and RangeFinder, @@ -88,7 +87,7 @@ class QB : public QBalg { /// @param[in] k /// Expected rank of the matrix A. If unknown, set k=min(m,n). /// - /// @param[in] block_sz + /// @param[in] b_sz /// The block size in this blocked QB algorithm. Add this many columns /// to Q at each iteration (except possibly the final iteration). /// @@ -117,10 +116,10 @@ class QB : public QBalg { int64_t n, T* A, int64_t &k, - int64_t block_sz, + int64_t b_sz, T tol, - T* Q, - T* B, + T* &Q, + T* &B, RandBLAS::RNGState &state ) override; @@ -129,14 +128,6 @@ class QB : public QBalg { RandLAPACK::Stabilization &Orth_Obj; bool verbosity; bool orth_check; - - //This represents how much space is currently allocated for cols of Q and rows of B. - //This is <= k. We are assuming that the user may not have given "enough" - //space when allocating Q, B initially. - int64_t curr_lim; - - // By how much are we increasing the dimension when we've reached curr_lim - int dim_growth_factor; }; // ----------------------------------------------------------------------------- @@ -146,179 +137,20 @@ int QB::call( int64_t n, T* A, int64_t &k, - int64_t block_sz, + int64_t b_sz, T tol, - T* Q, - T* B, + T* &Q, + T* &B, RandBLAS::RNGState &state ){ - int64_t curr_sz = 0; - int64_t next_sz = 0; - this->curr_lim = k; - tol = std::max(tol, 100 * std::numeric_limits::epsilon()); - T norm_B = 0.0; - T prev_err = 0.0; - T approx_err = 0.0; - - fprintf( stderr,"%d\n", m); - fprintf( stderr,"%d\n", n); - fprintf( stderr,"%ld\n", block_sz); - - // Make sure Q, B have space for at least one iteration - if(!Q) - Q = ( T * ) realloc(Q, m * block_sz * sizeof( T ) ); - if(!B) - B = ( T * ) realloc(B, n * block_sz * sizeof( T ) ); - - T* A_cpy = ( T * ) calloc( m * n, sizeof( T ) ); - T* QtQi = ( T * ) calloc( this->curr_lim * block_sz, sizeof( T ) ); - T* Q_i = ( T * ) calloc( m * block_sz, sizeof( T ) ); - T* B_i_trans = ( T * ) calloc( block_sz * n, sizeof( T ) ); - - // pre-compute nrom - T norm_A = lapack::lange(Norm::Fro, m, n, A, m); - // Immediate termination criteria - if(norm_A == 0.0) { - // Zero matrix termination - k = curr_sz; - free(A_cpy); - free(QtQi); - free(Q_i); - free(B_i_trans); - return 1; - } - - // Copy the initial data to avoid unwanted modification TODO #1 - lapack::lacpy(MatrixType::General, m, n, A, m, A_cpy, m); int ctr = 0; - while(k > curr_sz) { - // Dynamically changing block size - block_sz = std::min(block_sz, k - curr_sz); - next_sz = curr_sz + block_sz; - - fprintf( stderr,"Next sz %d\n", next_sz); - fprintf( stderr,"this->curr_lim %d\n", this->curr_lim); - // Make sure we have enough space for everything - if(next_sz > this->curr_lim) { - this->curr_lim = std::min(2 * this->curr_lim, k); - Q = ( T * ) realloc(Q, (this->curr_lim) * m * sizeof( T )); - B = ( T * ) realloc(B, (this->curr_lim) * n * sizeof( T )); - QtQi = ( T * ) realloc(QtQi, (this->curr_lim) * block_sz * sizeof( T )); - } - - fprintf( stderr, "Size Q %ld\n", (this->curr_lim) * m ); - fprintf( stderr, "Size B %ld\n", (this->curr_lim) * n ); - fprintf( stderr, "Size QtQi %ld\n", (this->curr_lim) * block_sz ); - - // Calling RangeFinder - if(this->RF_Obj.call(m, n, A_cpy, block_sz, Q_i, state)) { - free(A_cpy); - free(QtQi); - free(Q_i); - free(B_i_trans); - return 6; // RF failed - } - - if(this->orth_check) { - if (util::orthogonality_check(m, block_sz, block_sz, Q_i, this->verbosity)) { - // Lost orthonormality of Q - k = curr_sz; - free(A_cpy); - free(QtQi); - free(Q_i); - free(B_i_trans); - return 4; - } - } - - // No need to reorthogonalize on the 1st pass - if(curr_sz != 0) { - // Q_i = orth(Q_i - Q(Q'Q_i)) - blas::gemm(Layout::ColMajor, Op::Trans, Op::NoTrans, curr_sz, block_sz, m, 1.0, Q, m, Q_i, m, 0.0, QtQi, this->curr_lim); - blas::gemm(Layout::ColMajor, Op::NoTrans, Op::NoTrans, m, block_sz, curr_sz, -1.0, Q, m, QtQi, this->curr_lim, 1.0, Q_i, m); - this->Orth_Obj.call(m, block_sz, Q_i); - } - - //B_i' = A' * Q_i' - blas::gemm(Layout::ColMajor, Op::Trans, Op::NoTrans, n, block_sz, m, 1.0, A_cpy, m, Q_i, m, 0.0, B_i_trans, n); - - // Updating B norm estimation - T norm_B_i = lapack::lange(Norm::Fro, n, block_sz, B_i_trans, n); - norm_B = std::hypot(norm_B, norm_B_i); - // Updating approximation error - prev_err = approx_err; - approx_err = std::sqrt(std::abs(norm_A - norm_B)) * (std::sqrt(norm_A + norm_B) / norm_A); - - // Early termination - handling round-off error accumulation - if ((curr_sz > 0) && (approx_err > prev_err)) { - // Early termination - error growth - // Only need to move B's data, no resizing - k = curr_sz; - free(A_cpy); - free(QtQi); - free(Q_i); - free(B_i_trans); - return 2; - } - - fprintf( stderr,"before copy\n"); - - fprintf( stderr,"Curr_sz %d\n", curr_sz); - - if (ctr == 1) { - free(A_cpy); - free(QtQi); - free(Q_i); - free(B_i_trans); - return 0; - } - - // Update the matrices Q and B - lapack::lacpy(MatrixType::General, m, block_sz, Q_i, m, &Q[m * curr_sz], m); - lapack::lacpy(MatrixType::General, n, block_sz, B_i_trans, n, &B[n * curr_sz], n); - - fprintf( stderr,"Copy\n"); - - if(this->orth_check) { - if (util::orthogonality_check(m, this->curr_lim, next_sz, Q, this->verbosity)) { - // Lost orthonormality of Q - k = curr_sz; - free(A_cpy); - free(QtQi); - free(Q_i); - free(B_i_trans); - return 5; - } - } - - fprintf( stderr,"After orth\n\n"); - - curr_sz += block_sz; - // Termination criteria - if (approx_err < tol) { - // Reached the required error tol - k = curr_sz; - free(A_cpy); - free(QtQi); - free(Q_i); - free(B_i_trans); - return 0; - } - - // This step is only necessary for the next iteration - // A = A - Q_i * B_i - blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, n, block_sz, -1.0, Q_i, m, B_i_trans, n, 1.0, A_cpy, m); - ++ ctr; + while(10 > ctr) { + Q = ( double * ) realloc(Q, ctr * sizeof( double )); + ++ctr; } - free(A_cpy); - free(QtQi); - free(Q_i); - free(B_i_trans); - - // Reached expected rank without achieving the tolerance - return 3; + return 1; } } // end namespace RandLAPACK diff --git a/test/comps/test_qb.cc b/test/comps/test_qb.cc index 596d49a3..686b9468 100644 --- a/test/comps/test_qb.cc +++ b/test/comps/test_qb.cc @@ -37,8 +37,6 @@ class TestQB : public ::testing::Test QBTestData(int64_t m, int64_t n, int64_t k) : A(m * n, 0.0), - Q(m * k, 0.0), - B(k * n, 0.0), B_cpy(k * n, 0.0), A_hat(m * n, 0.0), A_k(m * n, 0.0), @@ -107,7 +105,7 @@ class TestQB : public ::testing::Test QBTestData &all_data, alg_type &all_algs, RandBLAS::RNGState &state) { - +/* auto m = all_data.row; auto n = all_data.col; auto k = all_data.rank; @@ -121,11 +119,22 @@ class TestQB : public ::testing::Test T* s_dat = all_data.s.data(); T* S_dat = all_data.S.data(); T* VT_dat = all_data.VT.data(); +*/ + int64_t m = 0; + int64_t n = 0; + int64_t k = 0; + T* A; + T* Q; + T* B; // Regular QB2 call - int err = all_algs.QB.call(m, n, all_data.A.data(), k, block_sz, tol, all_data.Q.data(), all_data.B.data(), state); - printf("Erro num %d\n", err); + all_algs.QB.call(m, n, A, k, block_sz, tol, Q, B, state); + printf("%f\n", Q[0]); + free(A); + free(Q); + free(B); +/* // Reassing pointers because Q, B have been resized T* Q_dat = all_data.Q.data(); T* B_dat = all_data.B.data(); @@ -175,6 +184,7 @@ class TestQB : public ::testing::Test T norm_test_4 = lapack::lange(Norm::Fro, m, n, A_hat_dat, m); printf("FRO NORM OF A_k - QB: %e\n", norm_test_4); ASSERT_NEAR(norm_test_4, 0, test_tol); +*/ } /// k = min(m, n) test for CholQRCP: @@ -227,14 +237,22 @@ class TestQB : public ::testing::Test EXPECT_TRUE(norm_test_1 <= (tol * norm_A)); } } + + void rand_fun(int* &ptr) + { + ptr = (int*) realloc(ptr, 3 * sizeof(int)); + ptr[0] = 1; + ptr[2] = 2; + ptr[3] = 3; + } }; TEST_F(TestQB, Polynomial_Decay_general1) { - int64_t m = 100; - int64_t n = 100; - int64_t k = 50; - int64_t p = 5; + int64_t m = 10; + int64_t n = 10; + int64_t k = 5; + int64_t p = 2; int64_t passes_per_iteration = 1; int64_t block_sz = 2; double tol = std::pow(std::numeric_limits::epsilon(), 0.75); @@ -261,6 +279,15 @@ TEST_F(TestQB, Polynomial_Decay_general1) delete all_algs; } + +TEST_F(TestQB, test_rand) +{ + int* ptr; + rand_fun(ptr); + printf("%d\n", ptr[0]); +} + +/* TEST_F(TestQB, Polynomial_Decay_general2) { int64_t m = 100; @@ -358,7 +385,7 @@ TEST_F(TestQB, Polynomial_Decay_zero_tol2) } - +*/ TEST_F(TestQB, random_test) { /* @@ -378,4 +405,5 @@ TEST_F(TestQB, random_test) RandBLAS::util::print_colmaj(rows_1, cols_1 + cols_2, A_dat, name); RandBLAS::util::print_colmaj(cols_1, rows_1, B_dat, name1); */ + } \ No newline at end of file diff --git a/test/drivers/test_rsvd.cc b/test/drivers/test_rsvd.cc index 66ce18ac..7d8d3b35 100644 --- a/test/drivers/test_rsvd.cc +++ b/test/drivers/test_rsvd.cc @@ -160,7 +160,7 @@ class TestRSVD : public ::testing::Test */ } }; - +/* TEST_F(TestRSVD, SimpleTest) { int64_t m = 100; @@ -188,3 +188,4 @@ TEST_F(TestRSVD, SimpleTest) computational_helper(all_data); test_RSVD1_general(tol, all_data, all_algs, state); } +*/ \ No newline at end of file