From 49966c8bf82615faa91e2750350c807bc4daef69 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Mon, 19 Feb 2024 20:04:56 +0100 Subject: [PATCH] allow for ignoring reverse complements when checking neighbours --- include/dictionary.cpp | 52 +++++++++++++++++++++--------------------- include/dictionary.hpp | 18 +++++++-------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/include/dictionary.cpp b/include/dictionary.cpp index ce5f209..ffa7b8c 100644 --- a/include/dictionary.cpp +++ b/include/dictionary.cpp @@ -106,61 +106,61 @@ uint64_t dictionary::contig_size(uint64_t contig_id) const { return contig_length - m_k + 1; } -void dictionary::forward_neighbours(kmer_t suffix, neighbourhood& res) const { - res.forward_A = lookup_advanced_uint(suffix + (util::char_to_uint('A') << (2 * (m_k - 1)))); - res.forward_C = lookup_advanced_uint(suffix + (util::char_to_uint('C') << (2 * (m_k - 1)))); - res.forward_G = lookup_advanced_uint(suffix + (util::char_to_uint('G') << (2 * (m_k - 1)))); - res.forward_T = lookup_advanced_uint(suffix + (util::char_to_uint('T') << (2 * (m_k - 1)))); +void dictionary::forward_neighbours(kmer_t suffix, neighbourhood& res, bool check_reverse_complement) const { + res.forward_A = lookup_advanced_uint(suffix + (util::char_to_uint('A') << (2 * (m_k - 1))), check_reverse_complement); + res.forward_C = lookup_advanced_uint(suffix + (util::char_to_uint('C') << (2 * (m_k - 1))), check_reverse_complement); + res.forward_G = lookup_advanced_uint(suffix + (util::char_to_uint('G') << (2 * (m_k - 1))), check_reverse_complement); + res.forward_T = lookup_advanced_uint(suffix + (util::char_to_uint('T') << (2 * (m_k - 1))), check_reverse_complement); } -void dictionary::backward_neighbours(kmer_t prefix, neighbourhood& res) const { - res.backward_A = lookup_advanced_uint(prefix + util::char_to_uint('A')); - res.backward_C = lookup_advanced_uint(prefix + util::char_to_uint('C')); - res.backward_G = lookup_advanced_uint(prefix + util::char_to_uint('G')); - res.backward_T = lookup_advanced_uint(prefix + util::char_to_uint('T')); +void dictionary::backward_neighbours(kmer_t prefix, neighbourhood& res, bool check_reverse_complement) const { + res.backward_A = lookup_advanced_uint(prefix + util::char_to_uint('A'), check_reverse_complement); + res.backward_C = lookup_advanced_uint(prefix + util::char_to_uint('C'), check_reverse_complement); + res.backward_G = lookup_advanced_uint(prefix + util::char_to_uint('G'), check_reverse_complement); + res.backward_T = lookup_advanced_uint(prefix + util::char_to_uint('T'), check_reverse_complement); } -neighbourhood dictionary::kmer_forward_neighbours(char const* string_kmer) const { +neighbourhood dictionary::kmer_forward_neighbours(char const* string_kmer, bool check_reverse_complement) const { kmer_t uint_kmer = util::string_to_uint_kmer(string_kmer, m_k); - return kmer_forward_neighbours(uint_kmer); + return kmer_forward_neighbours(uint_kmer, check_reverse_complement); } -neighbourhood dictionary::kmer_forward_neighbours(kmer_t uint_kmer) const { +neighbourhood dictionary::kmer_forward_neighbours(kmer_t uint_kmer, bool check_reverse_complement) const { neighbourhood res; kmer_t suffix = uint_kmer >> 2; - forward_neighbours(suffix, res); + forward_neighbours(suffix, res, check_reverse_complement); return res; } -neighbourhood dictionary::kmer_backward_neighbours(char const* string_kmer) const { +neighbourhood dictionary::kmer_backward_neighbours(char const* string_kmer, bool check_reverse_complement) const { kmer_t uint_kmer = util::string_to_uint_kmer(string_kmer, m_k); - return kmer_backward_neighbours(uint_kmer); + return kmer_backward_neighbours(uint_kmer, check_reverse_complement); } -neighbourhood dictionary::kmer_backward_neighbours(kmer_t uint_kmer) const { +neighbourhood dictionary::kmer_backward_neighbours(kmer_t uint_kmer, bool check_reverse_complement) const { neighbourhood res; kmer_t prefix = (uint_kmer << 2) & ((kmer_t(1) << (2 * m_k)) - 1); - backward_neighbours(prefix, res); + backward_neighbours(prefix, res, check_reverse_complement); return res; } -neighbourhood dictionary::kmer_neighbours(char const* string_kmer) const { +neighbourhood dictionary::kmer_neighbours(char const* string_kmer, bool check_reverse_complement) const { kmer_t uint_kmer = util::string_to_uint_kmer(string_kmer, m_k); - return kmer_neighbours(uint_kmer); + return kmer_neighbours(uint_kmer, check_reverse_complement); } -neighbourhood dictionary::kmer_neighbours(kmer_t uint_kmer) const { +neighbourhood dictionary::kmer_neighbours(kmer_t uint_kmer, bool check_reverse_complement) const { neighbourhood res; kmer_t suffix = uint_kmer >> 2; - forward_neighbours(suffix, res); + forward_neighbours(suffix, res, check_reverse_complement); kmer_t prefix = (uint_kmer << 2) & ((kmer_t(1) << (2 * m_k)) - 1); - backward_neighbours(prefix, res); + backward_neighbours(prefix, res, check_reverse_complement); return res; } -neighbourhood dictionary::contig_neighbours(uint64_t contig_id) const { +neighbourhood dictionary::contig_neighbours(uint64_t contig_id, bool check_reverse_complement) const { assert(contig_id < num_contigs()); neighbourhood res; kmer_t suffix = m_buckets.contig_suffix(contig_id, m_k); - forward_neighbours(suffix, res); + forward_neighbours(suffix, res, check_reverse_complement); kmer_t prefix = m_buckets.contig_prefix(contig_id, m_k) << 2; - backward_neighbours(prefix, res); + backward_neighbours(prefix, res, check_reverse_complement); return res; } diff --git a/include/dictionary.hpp b/include/dictionary.hpp index 0bf0b6e..5302fd3 100644 --- a/include/dictionary.hpp +++ b/include/dictionary.hpp @@ -40,15 +40,15 @@ struct dictionary { uint64_t contig_size(uint64_t contig_id) const; /* Navigational queries. */ - neighbourhood kmer_forward_neighbours(char const* string_kmer) const; - neighbourhood kmer_forward_neighbours(kmer_t uint_kmer) const; - neighbourhood kmer_backward_neighbours(char const* string_kmer) const; - neighbourhood kmer_backward_neighbours(kmer_t uint_kmer) const; + neighbourhood kmer_forward_neighbours(char const* string_kmer, bool check_reverse_complement = true) const; + neighbourhood kmer_forward_neighbours(kmer_t uint_kmer, bool check_reverse_complement = true) const; + neighbourhood kmer_backward_neighbours(char const* string_kmer, bool check_reverse_complement = true) const; + neighbourhood kmer_backward_neighbours(kmer_t uint_kmer, bool check_reverse_complement = true) const; /* forward and backward */ - neighbourhood kmer_neighbours(char const* string_kmer) const; - neighbourhood kmer_neighbours(kmer_t uint_kmer) const; - neighbourhood contig_neighbours(uint64_t contig_id) const; + neighbourhood kmer_neighbours(char const* string_kmer, bool check_reverse_complement = true) const; + neighbourhood kmer_neighbours(kmer_t uint_kmer, bool check_reverse_complement = true) const; + neighbourhood contig_neighbours(uint64_t contig_id, bool check_reverse_complement = true) const; /* Return the weight of the kmer given its id. */ uint64_t weight(uint64_t kmer_id) const; @@ -118,8 +118,8 @@ struct dictionary { lookup_result lookup_uint_regular_parsing(kmer_t uint_kmer) const; lookup_result lookup_uint_canonical_parsing(kmer_t uint_kmer) const; - void forward_neighbours(kmer_t suffix, neighbourhood& res) const; - void backward_neighbours(kmer_t prefix, neighbourhood& res) const; + void forward_neighbours(kmer_t suffix, neighbourhood& res, bool check_reverse_complement) const; + void backward_neighbours(kmer_t prefix, neighbourhood& res, bool check_reverse_complement) const; }; } // namespace sshash