diff --git a/RandBLAS/trig_skops.hh b/RandBLAS/trig_skops.hh new file mode 100644 index 00000000..804fe89e --- /dev/null +++ b/RandBLAS/trig_skops.hh @@ -0,0 +1,440 @@ +#include "RandBLAS/base.hh" +#include "RandBLAS/exceptions.hh" +#include "RandBLAS/random_gen.hh" +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#define MAX(a, b) (((a) < (b)) ? (b) : (a)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +namespace RandBLAS { + + // ============================================================================= + /// WARNING: None of the following functions or overloads thereof are part of the + /// public API + /// + + // Generates a vector of Rademacher entries using the Random123 library + template + void generate_rademacher_vector_r123(sint_t* buff, uint32_t key_seed, uint32_t ctr_seed, int64_t n) { + RNG rng; + + typename RNG::ctr_type c; + typename RNG::key_type key = {{key_seed}}; + + // Sequential loop for generating Rademacher entries + for (int64_t i = 0; i < n; ++i) { + // Set the counter for each random number + c[0] = ctr_seed + i; // Ensure each counter is unique + + // Generate a 2x32-bit random number using the Philox generator + typename RNG::ctr_type r = rng(c, key); + + // Convert the random number into a float in [0, 1) using u01fixedpt + float rand_value = r123::u01fixedpt(r.v[0]); + + // Convert the float into a Rademacher entry (-1 or 1) + buff[i] = rand_value < 0.5 ? -1 : 1; + } + } + + template + RandBLAS::RNGState generate_rademacher_vector_r123(sint_t* buff, int64_t n, RandBLAS::RNGState seed_state) { + RNG rng; + auto [ctr, key] = seed_state; + + for (int64_t i = 0; i < n; ++i) { + typename RNG::ctr_type r = rng(ctr, key); + + float rand_value = r123::u01fixedpt(r.v[0]); + + buff[i] = rand_value < 0.5 ? -1 : 1; + + ctr.incr(); + } + + // Return the updated RNGState (with the incremented counter) + return RandBLAS::RNGState {ctr, key}; + } + + // Catch-all method for applying the diagonal Rademacher + // entries in-place to an input matrix, `A` + template + void apply_diagonal_rademacher( + bool left, + blas::Layout layout, + int64_t rows, + int64_t cols, + T* A, + sint_t* diag + ) { + //TODO: Investigate better schemes for performing the scaling + //TODO: Move to `RandBLAS/util.hh` + if(left && layout == blas::Layout::ColMajor) { + for(int64_t col=0; col < cols; col++) { + if(diag[col] > 0) + continue; + blas::scal(rows, diag[col], &A[col * rows], 1); + } + } + else if(left && layout == blas::Layout::RowMajor) { + for(int64_t col=0; col < cols; col++) { + if(diag[col] > 0) + continue; + blas::scal(rows, diag[col], &A[col], cols); + } + } + else if(!left && layout == blas::Layout::ColMajor) { + for(int64_t row = 0; row < rows; row++) { + if(diag[row] > 0) + continue; + blas::scal(cols, diag[row], &A[row], rows); + } + } + else { + for(int64_t row = 0; row < rows; row++) { + if(diag[row] > 0) + continue; + blas::scal(cols, diag[row], &A[row * cols], 1); + } + } + } + + template + void permuteRowsToTop( + blas::Layout layout, + int64_t rows, + int64_t cols, + sint_t* selected_rows, + int64_t d, // size of `selected_rows` + T* A + ) { + int64_t top = 0; // Keeps track of the topmost unselected row + + //TODO: discuss precise semantics of `selected_rows` in this function + if(layout == blas::Layout::ColMajor) { + for (int64_t i=0; i < d; i++) { + randblas_error_if_msg(selected_rows[i] == top, + "The list of provided indices should be unique"); + if (selected_rows[i] != top) { + // Use BLAS swap to swap the entire rows + // Swapping row 'selected' with row 'top' + blas::swap(cols, &A[top], rows, &A[selected_rows[i]], rows); + } + } + } + else { + // For `RowMajor` ordering + for (int64_t i=0; i < d; i++) { + randblas_error_if_msg(selected_rows[i] == top, + "The list of provided indices should be unique"); + if (selected_rows[i] != top) { + blas::swap(cols, &A[cols * selected_rows[i]], 1, &A[cols * top], 1); + } + } + } + } + + template + void permuteColsToLeft( + blas::Layout layout, + int64_t rows, + int64_t cols, + sint_t* selected_cols, + int64_t d, // size of `selectedRows` + T* A + ) { + int64_t left = 0; // Keeps track of the topmost unselected column + + if(layout == blas::Layout::ColMajor) { + for (int64_t i=0; i < d; i++) { + if (selected_cols[i] != left) { + // Use BLAS::swap to swap entire columns at once + // Swapping col 'selected' with col 'top' + blas::swap(rows, &A[rows * selected_cols[i]], 1, &A[rows * left], 1); + } + } + } + else { + // For `RowMajor` ordering + for (int64_t i=0; i < d; i++) { + if (selected_cols[i] != left) { + blas::swap(rows, &A[selected_cols[i]], cols, &A[left], cols); + } + } + } + } + + template + void fht_left_col_major(T *buf, int64_t log_n, int64_t num_rows, int64_t num_cols) { + int64_t n = 1 << log_n; + + // Apply FHT to each column independently + for (int64_t col = 0; col < num_cols; ++col) { + // Pointer to the beginning of the current column in the Column-Major order + T* col_buf = buf + col * num_rows; + + // Apply the original FHT on this column + for (int64_t i = 0; i < log_n; ++i) { + int64_t s1 = 1 << i; + int64_t s2 = s1 << 1; + for (int64_t j = 0; j < n; j += s2) { + for (int64_t k = 0; k < s1; ++k) { + // For implicitly padding the input we just have to make sure + // we replace all out-of-bounds accesses with zeros + bool b1 = j + k < num_rows; + bool b2 = j + k + s1 < num_rows; + T u = b1 ? col_buf[j + k] : 0; + T v = b2 ? col_buf[j + k + s1] : 0; + if(b1 && b2) { + col_buf[j + k] = u + v; + col_buf[j + k + s1] = u - v; + } + else if(!b2 && b1) { + col_buf[j + k] = u + v; + } + else if(!b2 && !b1) + continue; + } + } + } + } + } + + template + void fht_left_row_major(T *buf, int64_t log_n, int64_t num_rows, int64_t num_cols) { + int64_t n = 1 << log_n; + + // Apply FHT to each column independently + for (int64_t col = 0; col < num_cols; ++col) { + // Apply the original FHT on this column + for (int64_t i = 0; i < log_n; ++i) { + int64_t s1 = 1 << i; + int64_t s2 = s1 << 1; + for (int64_t j = 0; j < n; j += s2) { + for (int64_t k = 0; k < s1; ++k) { + // For implicitly padding the input we just have to make sure + // we replace all out-of-bounds accesses with zeros + bool b1 = j + k < num_rows; + bool b2 = j + k + s1 < num_rows; + T u = b1 ? buf[(j + k) * num_cols + col] : 0; + T v = b2 ? buf[(j + k + s1) * num_cols + col] : 0; + if(b1 && b2) { + buf[(j + k) * num_cols + col] = u + v; + buf[(j + k + s1) * num_cols + col] = u - v; + } + else if(!b2 && b1) { + buf[(j + k) * num_cols + col] = u + v; + } + else if(!b2 && !b1) + continue; + } + } + } + } + } + + template + void fht_right_row_major(T *buf, int64_t log_n, int64_t num_rows, int64_t num_cols) { + int64_t n = 1 << log_n; + + // Apply FHT to each row independently + for (int64_t row = 0; row < num_rows; ++row) { + // Pointer to the beginning of the current row in RowMajor order + T * row_buf = buf + row * num_cols; + + // Apply the original FHT on this row + for (int64_t i = 0; i < log_n; ++i) { + int64_t s1 = 1 << i; + int64_t s2 = s1 << 1; + for (int64_t j = 0; j < n; j += s2) { + for (int64_t k = 0; k < s1; ++k) { + // For implicitly padding the input we just have to make sure + // we replace all out-of-bounds accesses with zeros + bool b1 = j + k < num_cols; + bool b2 = j + k + s1 < num_cols; + T u = b1 ? row_buf[j + k] : 0; + T v = b2 ? row_buf[j + k + s1] : 0; + if(b1 && b2) { + row_buf[j + k] = u + v; + row_buf[j + k + s1] = u - v; + } + else if(!b2 && b1) { + row_buf[j + k] = u + v; + } + else if(!b2 && !b1) + continue; + } + } + } + } + } + + template + void fht_right_col_major(T *buf, int64_t log_n, int64_t num_rows, int64_t num_cols) { + int64_t n = 1 << log_n; + + // Apply FHT to each row independently + for (int64_t row= 0; row < num_rows; ++row) { + // Apply the original FHT on this column + for (int64_t i = 0; i < log_n; ++i) { + int64_t s1 = 1 << i; + int64_t s2 = s1 << 1; + for (int64_t j = 0; j < n; j += s2) { + for (int64_t k = 0; k < s1; ++k) { + // For implicitly padding the input we just have to make sure + // we replace all out-of-bounds accesses with zeros + bool b1 = j + k < num_cols; + bool b2 = j + k + s1 < num_cols; + T u = b1 ? buf[(j + k) * num_rows + row] : 0; + T v = b2 ? buf[(j + k + s1) * num_rows + row] : 0; + if(b1 && b2) { + buf[(j + k) * num_rows + row] = u + v; + buf[(j + k + s1) * num_rows + row] = u - v; + } + else if(!b2 && b1) { + buf[(j + k) * num_rows + row] = u + v; + } + else if(!b2 && !b1) + continue; + } + } + } + } + } + + template + void fht_dispatch( + bool left, + blas::Layout layout, + T* buff, + int64_t log_n, + int64_t num_rows, + int64_t num_cols + ) + { + if(left && layout == blas::Layout::ColMajor) + fht_left_col_major(buff, log_n, num_rows, num_cols); + else if(left && layout == blas::Layout::RowMajor) + fht_left_row_major(buff, log_n, num_rows, num_cols); + else if(!left && layout == blas::Layout::ColMajor) + fht_right_col_major(buff, log_n, num_rows, num_cols); + else + fht_right_row_major(buff, log_n, num_rows, num_cols); + } +} + + +namespace RandBLAS::trig { +/* + * These functions apply an in-place, SRHT-like transform to the input matrix + * i.e. A <- (\Pi H D)A OR A <- A(D H \Pi) (which is equivalent to A <- A(\Pi H D)^{-1}) + * layout: Layout of the input matrix (`ColMajor/RowMajor`) + * A: (m x n), input dimensions of `A` + * d: The number of rows/columns that will be permuted by the action of $\Pi$ + */ +template +inline void lmiget( + blas::Layout layout, + RandBLAS::RNGState random_state, + int64_t m, // `A` is `(m x n)` + int64_t n, + int64_t d, // `d` is the number of rows that have to be permuted by `\Pi` + T* A // data-matrix +) { + // Size of the Rademacher entries = |A_cols| + //TODO: Change `diag` to float/doubles (same data type as the matrix) + sint_t* diag = new sint_t[n]; + sint_t* selected_rows = new sint_t[d]; + + auto [ctr, key] = random_state; + + //Step 1: Scale with `D` + //Populating `diag` + generate_rademacher_vector_r123(diag, key[0], ctr[0], n); + apply_diagonal_rademacher(true, layout, m, n, A, diag); + + //Step 2: Apply the Hadamard transform + fht_dispatch(true, layout, A, std::log2(MAX(m, n)), m, n); + + //Step 3: Permute the rows + std::vector idxs_minor(d); // Placeholder + std::vector vals(d); // Placeholder + + // Populating `selected_rows` + //TODO: Do I return this at some point? + RandBLAS::RNGState next_state = RandBLAS::repeated_fisher_yates( + random_state, + d, // Number of samples (vec_nnz) + m, // Total number of elements (dim_major) + 1, // Single sample round (dim_minor) + selected_rows, // Holds the required output + idxs_minor.data(), // Placeholder + vals.data() // Placeholder + ); + + permuteRowsToTop(layout, m, n, selected_rows, d, A); + + free(diag); + free(selected_rows); +} + + +template +inline void rmiget( + blas::Layout layout, + RandBLAS::RNGState random_state, + int64_t m, // `A` is `(m x n)` + int64_t n, + int64_t d, // `d` is the number of cols that have to be permuted by `\Pi` + T* A // data-matrix +) +{ + // Size of the Rademacher entries = |A_cols| + //TODO: Change `diag` to float/doubles (same data type as the matrix) + sint_t* diag = new sint_t[m]; + sint_t* selected_cols = new sint_t[d]; + + auto [ctr, key] = random_state; + + //Step 1: Scale with `D` + //Populating `diag` + generate_rademacher_vector_r123(diag, key[0], ctr[0], n); + apply_diagonal_rademacher(false, layout, m, n, A, diag); + + //Step 2: Apply the Hadamard transform + fht_dispatch(false, layout, A, std::log2(MAX(m, n)), m, n); + + //Step 3: Permute the rows + std::vector idxs_minor(d); // Placeholder + std::vector vals(d); // Placeholder + + // Populating `selected_rows` + //TODO: Do I return this at some point? + RandBLAS::RNGState next_state = RandBLAS::repeated_fisher_yates( + random_state, + d, // Number of samples (vec_nnz) + m, // Total number of elements (dim_major) + 1, // Single sample round (dim_minor) + selected_cols, // Holds the required output + idxs_minor.data(), // Placeholder + vals.data() // Placeholder + ); + + permuteColsToLeft(layout, m, n, selected_cols, d, A); + + free(diag); + free(selected_cols); +} +} diff --git a/test/test_matmul_cores/test_trig.cc b/test/test_matmul_cores/test_trig.cc new file mode 100644 index 00000000..3283764a --- /dev/null +++ b/test/test_matmul_cores/test_trig.cc @@ -0,0 +1,556 @@ +// Copyright, 2024. See LICENSE for copyright holder information. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// (1) Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// (2) Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// (3) Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// + +#include "RandBLAS.hh" +#include "RandBLAS/base.hh" +#include "RandBLAS/trig_skops.hh" +#include + +#include +#include +#include +#include +#include + +using RandBLAS::trig::lmiget; +using RandBLAS::trig::rmiget; +using RandBLAS::generate_rademacher_vector_r123; +using RandBLAS::apply_diagonal_rademacher; +using RandBLAS::permuteRowsToTop; +using RandBLAS::permuteColsToLeft; +using RandBLAS::fht_dispatch; +using Eigen::MatrixXd; + + +class TestLMIGET : public::testing::Test +{ + protected: + virtual void SetUp(){}; + + virtual void TearDown(){}; + + inline static std::vector keys {0, 42}; + + // Helper function for explicitly generating a Hadamard matrix + // (Note that ColMajor and RowMajor storage is identical for `H`) + static std::vector generate_hadamard(int64_t log_n) { + int64_t size = 1 << log_n; // size = 2^n + std::vector> H(size, std::vector(size, 1)); // Initialize H_1 + + // Sylvester's construction: recursively build the matrix + for (int n = 1; n <= log_n; ++n) { + double curr_size = 1 << n; // Current size of the matrix is 2^n + for (int i = 0; i < curr_size / 2; ++i) { + for (int j = 0; j < curr_size / 2; ++j) { + // Fill the bottom-left and bottom-right quadrants + H[i + curr_size / 2][j] = H[i][j]; // Copy the top-left quadrant to bottom-left + H[i][j + curr_size / 2] = H[i][j]; // Copy the top-left quadrant to top-right + H[i + curr_size / 2][j + curr_size / 2] = -H[i][j]; // Fill bottom-right with negative values + } + } + } + + // Flatten into a vector in ColMajor order + std::vector H_flat(size * size); + + for (int col = 0; col < size; ++col) { + for (int row = 0; row < size; ++row) { + H_flat[col * size + row] = H[row][col]; + } + } + + return H_flat; + } + + static std::vector generate_random_vector(int size, double lower_bound, double upper_bound) { + // Create a random device and seed the random number generator + std::random_device rd; + std::mt19937 gen(rd()); + + // Define the distribution range for the random doubles + std::uniform_real_distribution<> dist(lower_bound, upper_bound); + + // Create a vector of the specified size + std::vector random_vector(size); + + // Generate random doubles and fill the vector + for (int i = 0; i < size; ++i) { + random_vector[i] = dist(gen); + } + + return random_vector; + } + + enum class transforms {diag_scale, hadamard, permute}; + + // Tests to verify correctness of each of the transforms + template + static void correctness( + uint32_t seed, + transforms transform, + int64_t m, // Generated data matrix, `A` is of size `(m x n)` + int64_t n, + bool left, + blas::Layout layout, + double epsilon=1e-5 + ) { + // Grabbing a random matrix + std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); + Eigen::Matrix A_col = Eigen::Map>(A_vec.data(), m, n); + Eigen::Map> A_row(A_vec.data(), m, n); + + // Deep copy + MatrixXd B; + if(layout == blas::Layout::RowMajor) + B = A_row; + else + B = A_col; + + switch (transform) { + case transforms::permute: { + // Simply compares against Eigen::PermutationMatrix + Eigen::PermutationMatrix perm(5); + + std::vector V = left ? std::vector(m) : std::vector(n); + + int cnt = 0; + // int cnt = 0; + for(int i = 0; i < V.size(); i++) { + if(i == 0) + V[i] = V.size() - 1; + else if(i == V.size() - 1) + V[i] = 0; + else + V[i] = cnt; + cnt++; + } + + Eigen::VectorXi indices = Eigen::Map(V.data(), V.size()); + + // Set the indices in the permutation matrix + perm.indices() = indices; + + sint_t* v = new sint_t; + *v = V.size() - 1; + + if(left) { + if(layout == blas::Layout::RowMajor) + RandBLAS::permuteRowsToTop(layout, m, n, v, 1, A_row.data()); + else + RandBLAS::permuteRowsToTop(layout, m, n, v, 1, A_col.data()); + } + else { + if(layout == blas::Layout::RowMajor) + RandBLAS::permuteColsToLeft(layout, m, n, v, 1, A_row.data()); + else + RandBLAS::permuteColsToLeft(layout, m, n, v, 1, A_col.data()); + } + + // Or just do A.isApprox(B) + double norm_permute = 0.0; + if(left) { + if(layout == blas::Layout::RowMajor) + norm_permute = (A_row - perm * B).norm(); + else + norm_permute = (A_col - perm * B).norm(); + } + else { + if(layout == blas::Layout::RowMajor) + norm_permute = (A_row - B * perm).norm(); + else + norm_permute = (A_col - B * perm).norm(); + } + + // Or do A.isApprox(H * B) + randblas_require(norm_permute < epsilon); + + break; + } + case transforms::hadamard: { + // Here, simply check against explicit application of the Hadamard matrix + + int ld = (left) ? m : n; + if(layout == blas::Layout::ColMajor) + RandBLAS::fht_dispatch(left, layout, A_col.data(), std::log2(ld), m, n); + else + RandBLAS::fht_dispatch(left, layout, A_row.data(), std::log2(ld), m, n); + + std::vector H_vec = generate_hadamard(std::log2(ld)); + //TODO: Should have a check here to enforce that `m` and `n` are powers of 2 (since + // my `generate_hadamard` function does not take care to pad an input matrix) + MatrixXd H = Eigen::Map(H_vec.data(), int(std::pow(2, std::log2(ld))), int(std::pow(2, std::log2(ld)))); + + double norm_hadamard = 0.0; + if(left) { + if(layout == blas::Layout::RowMajor) + norm_hadamard = (A_row - H * B).norm(); + else + norm_hadamard = (A_col - H * B).norm(); + } + else { + if(layout == blas::Layout::RowMajor) + norm_hadamard = (A_row - B * H).norm(); + else + norm_hadamard = (A_col - B * H).norm(); + } + + randblas_require(norm_hadamard < epsilon); + + break; + } + case transforms::diag_scale: { + // Scales all rows/cols by -1 and checks if A == -A + std::vector buff = left ? std::vector(n, -1) : std::vector(m, -1); + + double norm_diag = 0.0; + if(layout == blas::Layout::RowMajor) { + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); + norm_diag = (A_row + B).norm(); + } + else { + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); + norm_diag = (A_col + B).norm(); + } + + randblas_require(norm_diag < epsilon); + + break; + } + } + } + + template + static void inverse_transform( + uint32_t seed, + int64_t m, // Generated data matrix, `A` is of size `(m x n)` + int64_t n, + int64_t d, // #rows/cols that will be permuted + bool left, + blas::Layout layout, + double epsilon=1e-5 + ) { + // Grabbing a random matrix + std::vector A_vec = generate_random_vector(m * n, 0.0, 10.0); + Eigen::Matrix A_col = Eigen::Map>(A_vec.data(), m, n); + Eigen::Map> A_row(A_vec.data(), m, n); + + // Deep copy + MatrixXd B; + if(layout == blas::Layout::RowMajor) + B = A_row; + else + B = A_col; + + //// Performing \Pi H D + // Step 1: setup the diagonal scaling + std::vector buff = left ? std::vector(n, -1) : std::vector(m, -1); + + if(layout == blas::Layout::RowMajor) { + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); + } + else { + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); + } + + // Step 2: apply the hadamard transform + int ld = (left) ? m : n; + if(layout == blas::Layout::ColMajor){ + RandBLAS::fht_dispatch(left, layout, A_col.data(), int(std::log2(ld)), m, n); + } + else { + RandBLAS::fht_dispatch(left, layout, A_row.data(), int(std::log2(ld)), m, n); + } + + // Step 3: Permuting + std::vector indices(d); + + std::iota(indices.begin(), indices.end(), 1); + if(left) { + if(layout == blas::Layout::RowMajor) + RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_row.data()); + else + RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_col.data()); + } + else { + if(layout == blas::Layout::RowMajor) + RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_row.data()); + else + RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_col.data()); + } + + //// Performing D H \Pi + + //Step 1: Un-permute + std::reverse(indices.begin(), indices.end()); + + if(left) { + if(layout == blas::Layout::RowMajor) + RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_row.data()); + else + RandBLAS::permuteRowsToTop(layout, m, n, indices.data(), d, A_col.data()); + } + else { + if(layout == blas::Layout::RowMajor) + RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_row.data()); + else + RandBLAS::permuteColsToLeft(layout, m, n, indices.data(), d, A_col.data()); + } + + // Step-2: Apply H^{-1} + if(layout == blas::Layout::ColMajor) { + RandBLAS::fht_dispatch(left, layout, A_col.data(), int(std::log2(ld)), m, n); + A_col = A_col * 1/std::pow(2, int(std::log2(ld))); + } + else { + RandBLAS::fht_dispatch(left, layout, A_row.data(), int(std::log2(ld)), m, n); + A_row = A_row * 1/std::pow(2, int(std::log2(ld))); + } + + //Step-3: Inverting `D` + if(layout == blas::Layout::RowMajor) { + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_row.data(), buff.data()); + } + else { + RandBLAS::apply_diagonal_rademacher(left, layout, m, n, A_col.data(), buff.data()); + } + + double norm_inverse = 0.0; + + if(layout == blas::Layout::RowMajor) { + norm_inverse = (A_row - B).norm(); + } + else { + norm_inverse = (A_col - B).norm(); + } + + randblas_require(norm_inverse < epsilon); + + + } +}; + +//////////////////////////////////////////////////////////////////////// +// +// +// Checking correctness of each of the transforms +// +// +//////////////////////////////////////////////////////////////////////// + +TEST_F(TestLMIGET, test_diag_left_colmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::diag_scale, + 100, + 100, + true, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_diag_right_colmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::diag_scale, + 100, + 100, + false, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_diag_left_rowmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::diag_scale, + 100, + 100, + true, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_diag_right_rowmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::diag_scale, + 100, + 100, + false, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_permute_left_colmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::permute, + 100, + 100, + true, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_permute_right_colmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::permute, + 100, + 100, + false, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_permute_left_rowmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::permute, + 100, + 100, + true, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_permute_right_rowmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::permute, + 100, + 100, + false, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_hadamard_left_colmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::hadamard, + 128, + 100, + true, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_hadamard_right_colmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::hadamard, + 100, + 128, + false, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_hadamard_left_rowmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::hadamard, + 128, + 100, + true, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_hadamard_right_rowmajor) { + for(uint32_t seed: keys) + correctness( + seed, + transforms::hadamard, + 100, + 128, + false, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_inverse_left_colmajor) { + for(uint32_t seed: keys) + inverse_transform( + seed, + 128, + 128, + 25, + true, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_inverse_right_colmajor) { + for(uint32_t seed: keys) + inverse_transform( + seed, + 100, + 128, + 25, + false, + blas::Layout::ColMajor + ); +} + +TEST_F(TestLMIGET, test_inverse_left_rowmajor) { + for(uint32_t seed: keys) + inverse_transform( + seed, + 128, + 128, + 25, + true, + blas::Layout::RowMajor + ); +} + +TEST_F(TestLMIGET, test_inverse_right_rowmajor) { + for(uint32_t seed: keys) + inverse_transform( + seed, + 100, + 128, + 25, + false, + blas::Layout::RowMajor + ); +}