From cb42636f10fc7d4523fc5d589062660bd1ff621c Mon Sep 17 00:00:00 2001 From: Giulio Ermanno Pibiri Date: Wed, 24 Jul 2024 15:28:59 +0200 Subject: [PATCH 1/2] minor --- include/info.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/include/info.cpp b/include/info.cpp index fa3069e..3a683f1 100644 --- a/include/info.cpp +++ b/include/info.cpp @@ -39,9 +39,12 @@ double bits_per_kmer_formula(uint64_t k, /* kmer length */ assert(k > 0); assert(k >= m); - const uint64_t N = n + M * (k - 1); // num. symbols in SPSS + const uint64_t N = n + M * (k - 1); // num. characters in SPSS + + /* summing (M-1) provides an upper bound */ + double num_minimizers = + (2.0 * n) / (k - m + 2) + (M - 1); // not distinct, hence num. of super-kmers - // double num_minimizers = (2.0 * n) / (k - m + 2); // not distinct, hence num. of super-kmers // std::cout << "num_minimizers = " << num_minimizers << std::endl; // std::cout << "minimizers: " << (3.0 * num_minimizers) / n << " [bits/kmer]" << std::endl; // std::cout << "pieces: " << (M * (2.0 + std::ceil(std::log2(static_cast(N) / M)))) / n @@ -53,8 +56,11 @@ double bits_per_kmer_formula(uint64_t k, /* kmer length */ // << std::endl; // std::cout << "strings: " << (2.0 * N) / n << " [bits/kmer]" << std::endl; - double num_bits = 2 * n * (1.0 + (5.0 + std::ceil(std::log2(N))) / (k - m + 2)) + - M * (2 * k + std::ceil(std::log2(static_cast(n) / M + k - 1))); + // double num_bits = 2 * n * (1.0 + (5.0 + std::ceil(std::log2(N))) / (k - m + 2)) + + // M * (2 * k + std::ceil(std::log2(static_cast(n) / M + k - 1))); + + double num_bits = 2 * N + num_minimizers * (5.0 + std::ceil(std::log2(N))) + + M * (2.0 + std::ceil(std::log2(static_cast(N) / M))); return num_bits / n; } From fdfa6869eb69dc3d19dc14a979aa8bdd011b6635 Mon Sep 17 00:00:00 2001 From: Giulio Ermanno Pibiri Date: Wed, 24 Jul 2024 15:43:44 +0200 Subject: [PATCH 2/2] minor --- include/info.cpp | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/include/info.cpp b/include/info.cpp index 3a683f1..15f27b3 100644 --- a/include/info.cpp +++ b/include/info.cpp @@ -41,25 +41,10 @@ double bits_per_kmer_formula(uint64_t k, /* kmer length */ const uint64_t N = n + M * (k - 1); // num. characters in SPSS - /* summing (M-1) provides an upper bound */ - double num_minimizers = - (2.0 * n) / (k - m + 2) + (M - 1); // not distinct, hence num. of super-kmers + /* summing (M-1) provides an upper bound to the num. of super-kmers */ + double Z = (2.0 * n) / (k - m + 2) + (M - 1); - // std::cout << "num_minimizers = " << num_minimizers << std::endl; - // std::cout << "minimizers: " << (3.0 * num_minimizers) / n << " [bits/kmer]" << std::endl; - // std::cout << "pieces: " << (M * (2.0 + std::ceil(std::log2(static_cast(N) / M)))) / n - // << " [bits/kmer]" << std::endl; - // std::cout << "num_super_kmers_before_bucket: " << (2.0 * num_minimizers) / n << " [bits/kmer] - // " - // << std::endl; - // std::cout << "offsets: " << (std::ceil(std::log2(N)) * num_minimizers) / n << " [bits/kmer]" - // << std::endl; - // std::cout << "strings: " << (2.0 * N) / n << " [bits/kmer]" << std::endl; - - // double num_bits = 2 * n * (1.0 + (5.0 + std::ceil(std::log2(N))) / (k - m + 2)) + - // M * (2 * k + std::ceil(std::log2(static_cast(n) / M + k - 1))); - - double num_bits = 2 * N + num_minimizers * (5.0 + std::ceil(std::log2(N))) + + double num_bits = 2 * N + Z * (5.0 + std::ceil(std::log2(N))) + M * (2.0 + std::ceil(std::log2(static_cast(N) / M))); return num_bits / n;