diff --git a/RandLAPACK/drivers/rl_cqrrp.hh b/RandLAPACK/drivers/rl_cqrrp.hh
index 998c0b63..564325e1 100644
--- a/RandLAPACK/drivers/rl_cqrrp.hh
+++ b/RandLAPACK/drivers/rl_cqrrp.hh
@@ -85,6 +85,9 @@ class CQRRP_blocked : public CQRRPalg<T, RNG> {
         /// @param[in] tau
         ///     Pointer to a vector of size n. On entry, is empty.
         ///
+        /// @param[in] state
+        ///     RNG state parameter, required for sketching operator generation.
+        ///
         /// @param[out] A
         ///     Overwritten by Implicit Q and explicit R factors.
         ///
diff --git a/RandLAPACK/drivers/rl_cqrrpt.hh b/RandLAPACK/drivers/rl_cqrrpt.hh
index 77406edd..70aa3743 100644
--- a/RandLAPACK/drivers/rl_cqrrpt.hh
+++ b/RandLAPACK/drivers/rl_cqrrpt.hh
@@ -92,6 +92,9 @@ class CQRRPT : public CQRRPTalg<T, RNG> {
         ///     Represents the upper-triangular R factor of QR factorization.
         ///     On entry, is empty and may not have any space allocated for it.
         ///
+        /// @param[in] state
+        ///     RNG state parameter, required for sketching operator generation.
+        ///
         /// @param[out] A
         ///     Overwritten by an m-by-k orthogonal Q factor.
         ///     Matrix is stored explicitly.
diff --git a/RandLAPACK/drivers/rl_nysbki.hh b/RandLAPACK/drivers/rl_nysbki.hh
deleted file mode 100644
index 439c4c02..00000000
--- a/RandLAPACK/drivers/rl_nysbki.hh
+++ /dev/null
@@ -1,117 +0,0 @@
-#ifndef randlapack_NysBKI_h
-#define randlapack_NysBKI_h
-
-#include "rl_util.hh"
-#include "rl_blaspp.hh"
-#include "rl_lapackpp.hh"
-#include "rl_hqrrp.hh"
-
-#include <RandBLAS.hh>
-#include <cstdint>
-#include <vector>
-#include <chrono>
-#include <numeric>
-#include <climits>
-
-using namespace std::chrono;
-
-namespace RandLAPACK {
-
-template <typename T, typename RNG>
-class NysBKIalg {
-    public:
-        virtual ~NysBKIalg() {}
-        virtual int call(
-            int64_t m,
-            T* A,
-            int64_t lda,
-            int64_t k,
-            T* V,
-            T* Lambda,
-            RandBLAS::RNGState<RNG> &state
-        ) = 0;
-};
-
-template <typename T, typename RNG>
-class NysBKI : public NysBKIalg<T, RNG> {
-    public:
-        NysBKI(
-            bool verb,
-            bool time_subroutines,
-            T ep
-        ) {
-            verbosity = verb;
-            timing = time_subroutines;
-            tol = ep;
-            max_krylov_iters = INT_MAX;
-        }
-        int call(
-            int64_t m,
-            T* A,
-            int64_t lda,
-            int64_t k,
-            T* V,
-            T* Lambda,
-            RandBLAS::RNGState<RNG> &state
-        ) override;
-    public:
-        bool verbosity;
-        bool timing;
-        T tol;
-        int num_krylov_iters;
-        int max_krylov_iters;
-        std::vector<long> times;
-        T norm_R_end;
-};
-
-// -----------------------------------------------------------------------------
-template <typename T, typename RNG>
-int NysBKI<T, RNG>::call(
-    int64_t m,
-    T* A,
-    int64_t lda,
-    int64_t k,
-    T* V,
-    T* Lambda,
-    RandBLAS::RNGState<RNG> &state
-){
-    int iter = 0;
-
-    T* X   = ( T * ) calloc( m * (m + k), sizeof( T ) );
-    T* X_i = X;
-    T* Y   = ( T * ) calloc( m * (m + k), sizeof( T ) );
-    T* Y_i = Y;
-
-    // tau space for QR
-    T* tau = ( T * ) calloc( k,           sizeof( T ) );
-
-
-    // Generate a dense Gaussian random matrx.
-    RandBLAS::DenseDist D(m, k);
-    state = RandBLAS::fill_dense(D, X_i, state).second;
-    // [X_i, ~] = qr(randn(m, m), 0)
-    lapack::geqrf(m, k, X_i, m, tau);
-    // Y_i = A * X_i
-    blas::gemm(Layout::ColMajor, Op::NoTrans, Op::NoTrans, m, k, m, 1.0, A, m, X_i, m, 0.0, Y_i, m);
-
-    while(iter < max_krylov_iters) {
-        // Advance X_i pointer
-        X_i = X_i + (m * k);
-        lapack::lacpy(MatrixType::Upper, m, k, X, m, X_i, m);
-
-        if (!iter) {
-            // X_i+1 = Y_i + tol * X_i;
-            blas::scal(m * k, this->tol, X_i, 1);	
-            blas::axpy(m * k, 1.0, Y_i, 1, X_i, 1);
-        } else {
-
-        }
-
-
-
-    }
-
-    return 0;
-}
-} // end namespace RandLAPACK
-#endif
\ No newline at end of file
diff --git a/RandLAPACK/drivers/rl_rbki.hh b/RandLAPACK/drivers/rl_rbki.hh
index 69261b1f..71ed4c4f 100644
--- a/RandLAPACK/drivers/rl_rbki.hh
+++ b/RandLAPACK/drivers/rl_rbki.hh
@@ -21,6 +21,21 @@ namespace RandLAPACK {
 template <typename T, typename RNG>
 class RBKIalg {
     public:
+
+        /// RBKI algorithm is a method for finding truncated SVD based on block Krylov iterations.
+        /// This algorithm is a version of Algroithm A.1 from https://arxiv.org/pdf/2306.12418.pdf
+        /// 
+        /// The main difference is in the fact that an economy SVD is performed only once at the very end 
+        /// of the algorithm run and that the termination criteria is not based on singular vectir residual evaluation.
+        /// Instead, the scheme terminates if:
+        ///     1. ||R||_F > sqrt(1 - eps^2) ||A||_F, which ensures that we've exhausted all vectors and doing more 
+        ///        iterations would bring no benefit or that ||A - hat(A)||_F < eps * ||A||_F.
+        ///     2. Stop if the bottom right entry of R or S is numerically close to zero (up to square root of machine eps).
+        /// 
+        /// The main cos of this algorithm comes from large GEMMs with the input matrix A.
+        ///
+        /// The algorithm optionally times all of its subcomponents through a user-defined 'timing' parameter.
+
         virtual ~RBKIalg() {}
         virtual int call(
             int64_t m,
@@ -48,6 +63,51 @@ class RBKI : public RBKIalg<T, RNG> {
             tol = ep;
             max_krylov_iters = INT_MAX;
         }
+
+        /// Computes a QR factorization with column pivots of the form:
+        ///     A[:, J] = QR,
+        /// where Q and R are of size m-by-k and k-by-n, with rank(A) = k.
+        /// Stores implict Q factor and explicit R factor in A's space (output formatted exactly like GEQP3).
+        ///
+        /// @param[in] m
+        ///     The number of rows in the matrix A.
+        ///
+        /// @param[in] n
+        ///     The number of columns in the matrix A.
+        ///
+        /// @param[in] A
+        ///     Pointer to the m-by-n matrix A, stored in a column-major format.
+        ///
+        /// @param[in] lda
+        ///     Leading dimension of A.
+        ///
+        /// @param[in] k
+        ///     Sampling dimension of a sketching operator, m >= (k * n) >= n.
+        ///
+        /// @param[in] U
+        ///     On output, an empty matrix.
+        ///
+        /// @param[in] VT
+        ///     On output, an empty matrix.
+        ///
+        /// @param[in] Sigma
+        ///     On output, an empty matrix.
+        ///
+        /// @param[in] state
+        ///     RNG state parameter, required for sketching operator generation.
+        ///
+        /// @param[out] U
+        ///     Stores m by ((num_iters / 2) * k) orthonormal matrix of left singular vectors.
+        ///
+        /// @param[out] VT
+        ///     Stores ((num_iters / 2) * k) * n orthonormal matrix of right singular vectors.
+        ///
+        /// @param[out] Sigma
+        ///     Stores ((num_iters / 2) * k) singular values. 
+        ///
+        /// @return = 0: successful exit
+        ///
+
         int call(
             int64_t m,
             int64_t n,