Skip to content

Commit

Permalink
Minimal example that breaks QB.
Browse files Browse the repository at this point in the history
  • Loading branch information
TeachRaccooon committed Apr 26, 2024
1 parent c433fe4 commit 95c0ae3
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 195 deletions.
200 changes: 16 additions & 184 deletions RandLAPACK/comps/rl_qb.hh
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ class QBalg {
int64_t n,
T* A,
int64_t &k,
int64_t block_sz,
int64_t b_sz,
T tol,
T* Q,
T* B,
T* &Q,
T* &B,
RandBLAS::RNGState<RNG> &state
) = 0;
};
Expand All @@ -49,7 +49,6 @@ class QB : public QBalg<T, RNG> {
) : RF_Obj(rf_obj), Orth_Obj(orth_obj) {
verbosity = verb;
orth_check = orth;
dim_growth_factor = 4;
}

/// Iteratively build an approximate QB factorization of A,
Expand All @@ -59,9 +58,9 @@ class QB : public QBalg<T, RNG> {
/// or
/// (2) Q has k columns.
/// Each iteration involves sketching A from the right by a sketching
/// matrix with "block_sz" columns.
/// matrix with "b_sz" columns.
///
/// The number of columns in Q increase by "block_sz" at each iteration, unless
/// The number of columns in Q increase by "b_sz" at each iteration, unless
/// that would bring #cols(Q) > k. In that case, the final iteration only
/// adds enough columns to Q so that #cols(Q) == k.
/// The implementation relies on RowSketcher and RangeFinder,
Expand All @@ -88,7 +87,7 @@ class QB : public QBalg<T, RNG> {
/// @param[in] k
/// Expected rank of the matrix A. If unknown, set k=min(m,n).
///
/// @param[in] block_sz
/// @param[in] b_sz
/// The block size in this blocked QB algorithm. Add this many columns
/// to Q at each iteration (except possibly the final iteration).
///
Expand Down Expand Up @@ -117,10 +116,10 @@ class QB : public QBalg<T, RNG> {
int64_t n,
T* A,
int64_t &k,
int64_t block_sz,
int64_t b_sz,
T tol,
T* Q,
T* B,
T* &Q,
T* &B,
RandBLAS::RNGState<RNG> &state
) override;

Expand All @@ -129,14 +128,6 @@ class QB : public QBalg<T, RNG> {
RandLAPACK::Stabilization<T> &Orth_Obj;
bool verbosity;
bool orth_check;

//This represents how much space is currently allocated for cols of Q and rows of B.
//This is <= k. We are assuming that the user may not have given "enough"
//space when allocating Q, B initially.
int64_t curr_lim;

// By how much are we increasing the dimension when we've reached curr_lim
int dim_growth_factor;
};

// -----------------------------------------------------------------------------
Expand All @@ -146,179 +137,20 @@ int QB<T, RNG>::call(
int64_t n,
T* A,
int64_t &k,
int64_t block_sz,
int64_t b_sz,
T tol,
T* Q,
T* B,
T* &Q,
T* &B,
RandBLAS::RNGState<RNG> &state
){

int64_t curr_sz = 0;
int64_t next_sz = 0;
this->curr_lim = k;
tol = std::max(tol, 100 * std::numeric_limits<T>::epsilon());
T norm_B = 0.0;
T prev_err = 0.0;
T approx_err = 0.0;

fprintf( stderr,"%d\n", m);
fprintf( stderr,"%d\n", n);
fprintf( stderr,"%ld\n", block_sz);

// Make sure Q, B have space for at least one iteration
if(!Q)
Q = ( T * ) realloc(Q, m * block_sz * sizeof( T ) );
if(!B)
B = ( T * ) realloc(B, n * block_sz * sizeof( T ) );

T* A_cpy = ( T * ) calloc( m * n, sizeof( T ) );
T* QtQi = ( T * ) calloc( this->curr_lim * block_sz, sizeof( T ) );
T* Q_i = ( T * ) calloc( m * block_sz, sizeof( T ) );
T* B_i_trans = ( T * ) calloc( block_sz * n, sizeof( T ) );

// pre-compute nrom
T norm_A = lapack::lange(Norm::Fro, m, n, A, m);
// Immediate termination criteria
if(norm_A == 0.0) {
// Zero matrix termination
k = curr_sz;
free(A_cpy);
free(QtQi);
free(Q_i);
free(B_i_trans);
return 1;
}

// Copy the initial data to avoid unwanted modification TODO #1
lapack::lacpy(MatrixType::General, m, n, A, m, A_cpy, m);
int ctr = 0;
while(k > curr_sz) {
// Dynamically changing block size
block_sz = std::min(block_sz, k - curr_sz);
next_sz = curr_sz + block_sz;

fprintf( stderr,"Next sz %d\n", next_sz);
fprintf( stderr,"this->curr_lim %d\n", this->curr_lim);
// Make sure we have enough space for everything
if(next_sz > this->curr_lim) {
this->curr_lim = std::min(2 * this->curr_lim, k);
Q = ( T * ) realloc(Q, (this->curr_lim) * m * sizeof( T ));
B = ( T * ) realloc(B, (this->curr_lim) * n * sizeof( T ));
QtQi = ( T * ) realloc(QtQi, (this->curr_lim) * block_sz * sizeof( T ));
}

fprintf( stderr, "Size Q %ld\n", (this->curr_lim) * m );
fprintf( stderr, "Size B %ld\n", (this->curr_lim) * n );
fprintf( stderr, "Size QtQi %ld\n", (this->curr_lim) * block_sz );

// Calling RangeFinder
if(this->RF_Obj.call(m, n, A_cpy, block_sz, Q_i, state)) {
free(A_cpy);
free(QtQi);
free(Q_i);
free(B_i_trans);
return 6; // RF failed
}

if(this->orth_check) {
if (util::orthogonality_check(m, block_sz, block_sz, Q_i, this->verbosity)) {
// Lost orthonormality of Q
k = curr_sz;
free(A_cpy);
free(QtQi);
free(Q_i);
free(B_i_trans);
return 4;
}
}

// No need to reorthogonalize on the 1st pass
if(curr_sz != 0) {
// Q_i = orth(Q_i - Q(Q'Q_i))
blas::gemm(Layout::ColMajor, Op::Trans, Op::NoTrans, curr_sz, block_sz, m, 1.0, Q, m, Q_i, m, 0.0, QtQi, this->curr_lim);
blas::gemm(Layout::ColMajor, Op::NoTrans, Op::NoTrans, m, block_sz, curr_sz, -1.0, Q, m, QtQi, this->curr_lim, 1.0, Q_i, m);
this->Orth_Obj.call(m, block_sz, Q_i);
}

//B_i' = A' * Q_i'
blas::gemm(Layout::ColMajor, Op::Trans, Op::NoTrans, n, block_sz, m, 1.0, A_cpy, m, Q_i, m, 0.0, B_i_trans, n);

// Updating B norm estimation
T norm_B_i = lapack::lange(Norm::Fro, n, block_sz, B_i_trans, n);
norm_B = std::hypot(norm_B, norm_B_i);
// Updating approximation error
prev_err = approx_err;
approx_err = std::sqrt(std::abs(norm_A - norm_B)) * (std::sqrt(norm_A + norm_B) / norm_A);

// Early termination - handling round-off error accumulation
if ((curr_sz > 0) && (approx_err > prev_err)) {
// Early termination - error growth
// Only need to move B's data, no resizing
k = curr_sz;
free(A_cpy);
free(QtQi);
free(Q_i);
free(B_i_trans);
return 2;
}

fprintf( stderr,"before copy\n");

fprintf( stderr,"Curr_sz %d\n", curr_sz);

if (ctr == 1) {
free(A_cpy);
free(QtQi);
free(Q_i);
free(B_i_trans);
return 0;
}

// Update the matrices Q and B
lapack::lacpy(MatrixType::General, m, block_sz, Q_i, m, &Q[m * curr_sz], m);
lapack::lacpy(MatrixType::General, n, block_sz, B_i_trans, n, &B[n * curr_sz], n);

fprintf( stderr,"Copy\n");

if(this->orth_check) {
if (util::orthogonality_check(m, this->curr_lim, next_sz, Q, this->verbosity)) {
// Lost orthonormality of Q
k = curr_sz;
free(A_cpy);
free(QtQi);
free(Q_i);
free(B_i_trans);
return 5;
}
}

fprintf( stderr,"After orth\n\n");

curr_sz += block_sz;
// Termination criteria
if (approx_err < tol) {
// Reached the required error tol
k = curr_sz;
free(A_cpy);
free(QtQi);
free(Q_i);
free(B_i_trans);
return 0;
}

// This step is only necessary for the next iteration
// A = A - Q_i * B_i
blas::gemm(Layout::ColMajor, Op::NoTrans, Op::Trans, m, n, block_sz, -1.0, Q_i, m, B_i_trans, n, 1.0, A_cpy, m);
++ ctr;
while(10 > ctr) {
Q = ( double * ) realloc(Q, ctr * sizeof( double ));
++ctr;
}

free(A_cpy);
free(QtQi);
free(Q_i);
free(B_i_trans);

// Reached expected rank without achieving the tolerance
return 3;
return 1;
}

} // end namespace RandLAPACK
Expand Down
48 changes: 38 additions & 10 deletions test/comps/test_qb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ class TestQB : public ::testing::Test

QBTestData(int64_t m, int64_t n, int64_t k) :
A(m * n, 0.0),
Q(m * k, 0.0),
B(k * n, 0.0),
B_cpy(k * n, 0.0),
A_hat(m * n, 0.0),
A_k(m * n, 0.0),
Expand Down Expand Up @@ -107,7 +105,7 @@ class TestQB : public ::testing::Test
QBTestData<T> &all_data,
alg_type &all_algs,
RandBLAS::RNGState<RNG> &state) {

/*
auto m = all_data.row;
auto n = all_data.col;
auto k = all_data.rank;
Expand All @@ -121,11 +119,22 @@ class TestQB : public ::testing::Test
T* s_dat = all_data.s.data();
T* S_dat = all_data.S.data();
T* VT_dat = all_data.VT.data();
*/
int64_t m = 0;
int64_t n = 0;
int64_t k = 0;
T* A;
T* Q;
T* B;

// Regular QB2 call
int err = all_algs.QB.call(m, n, all_data.A.data(), k, block_sz, tol, all_data.Q.data(), all_data.B.data(), state);
printf("Erro num %d\n", err);
all_algs.QB.call(m, n, A, k, block_sz, tol, Q, B, state);

printf("%f\n", Q[0]);
free(A);
free(Q);
free(B);
/*
// Reassing pointers because Q, B have been resized
T* Q_dat = all_data.Q.data();
T* B_dat = all_data.B.data();
Expand Down Expand Up @@ -175,6 +184,7 @@ class TestQB : public ::testing::Test
T norm_test_4 = lapack::lange(Norm::Fro, m, n, A_hat_dat, m);
printf("FRO NORM OF A_k - QB: %e\n", norm_test_4);
ASSERT_NEAR(norm_test_4, 0, test_tol);
*/
}

/// k = min(m, n) test for CholQRCP:
Expand Down Expand Up @@ -227,14 +237,22 @@ class TestQB : public ::testing::Test
EXPECT_TRUE(norm_test_1 <= (tol * norm_A));
}
}

void rand_fun(int* &ptr)
{
ptr = (int*) realloc(ptr, 3 * sizeof(int));
ptr[0] = 1;
ptr[2] = 2;
ptr[3] = 3;
}
};

TEST_F(TestQB, Polynomial_Decay_general1)
{
int64_t m = 100;
int64_t n = 100;
int64_t k = 50;
int64_t p = 5;
int64_t m = 10;
int64_t n = 10;
int64_t k = 5;
int64_t p = 2;
int64_t passes_per_iteration = 1;
int64_t block_sz = 2;
double tol = std::pow(std::numeric_limits<double>::epsilon(), 0.75);
Expand All @@ -261,6 +279,15 @@ TEST_F(TestQB, Polynomial_Decay_general1)
delete all_algs;
}


TEST_F(TestQB, test_rand)
{
int* ptr;
rand_fun(ptr);
printf("%d\n", ptr[0]);
}

/*
TEST_F(TestQB, Polynomial_Decay_general2)
{
int64_t m = 100;
Expand Down Expand Up @@ -358,7 +385,7 @@ TEST_F(TestQB, Polynomial_Decay_zero_tol2)
}

*/
TEST_F(TestQB, random_test)
{
/*
Expand All @@ -378,4 +405,5 @@ TEST_F(TestQB, random_test)
RandBLAS::util::print_colmaj(rows_1, cols_1 + cols_2, A_dat, name);
RandBLAS::util::print_colmaj(cols_1, rows_1, B_dat, name1);
*/

}
3 changes: 2 additions & 1 deletion test/drivers/test_rsvd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ class TestRSVD : public ::testing::Test
*/
}
};

/*
TEST_F(TestRSVD, SimpleTest)
{
int64_t m = 100;
Expand Down Expand Up @@ -188,3 +188,4 @@ TEST_F(TestRSVD, SimpleTest)
computational_helper(all_data);
test_RSVD1_general(tol, all_data, all_algs, state);
}
*/

0 comments on commit 95c0ae3

Please sign in to comment.