From f35e39958ded891464ba3be9ced77447c071fa26 Mon Sep 17 00:00:00 2001 From: OndrejSladky Date: Thu, 21 Mar 2024 14:24:54 +0100 Subject: [PATCH 01/10] Added support for k-mers up to 128. --- Makefile | 2 +- src/khash_utils.h | 15 +- src/kmers.h | 16 +- src/main.cpp | 12 +- src/parser.h | 2 + src/prophasm.h | 6 +- src/uint256_t.build | 17 + src/uint256_t.cpp | 756 +++++++++++++++++++++++++++++++++++ src/uint256_t.h | 11 + src/uint256_t.include | 639 +++++++++++++++++++++++++++++ src/uint256_t_config.include | 19 + 11 files changed, 1486 insertions(+), 9 deletions(-) create mode 100644 src/uint256_t.build create mode 100644 src/uint256_t.cpp create mode 100644 src/uint256_t.h create mode 100644 src/uint256_t.include create mode 100644 src/uint256_t_config.include diff --git a/Makefile b/Makefile index 2c8163f..c4680c9 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ quick-verify: $(PROG) $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa $(PROG): $(SRC)/main.cpp $(SRC)/$(wildcard *.cpp *.h *.hpp) src/version.h ./create-version.sh - $(CXX) $(CXXFLAGS) $(SRC)/main.cpp $(SRC)/kthread.c -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) $(SRC)/main.cpp $(SRC)/uint256_t.cpp $(SRC)/kthread.c -o $@ $(LDFLAGS) prophasmtest: $(TESTS)/unittest.cpp gtest-all.o $(SRC)/$(wildcard *.cpp *.h *.hpp) $(TESTS)/$(wildcard *.cpp *.h *.hpp) diff --git a/src/khash_utils.h b/src/khash_utils.h index eab13a0..3ac2d99 100644 --- a/src/khash_utils.h +++ b/src/khash_utils.h @@ -6,20 +6,29 @@ #include "kmers.h" #include "khash.h" +typedef unsigned char byte; #define kh_int128_hash_func(key) kh_int64_hash_func((khint64_t)((key)>>65^(key)^(key)<<21)) #define kh_int128_hash_equal(a, b) ((a) == (b)) +#define kh_int256_hash_func(key) kh_int128_hash_func((__uint128_t)((key)>>129^(key)^(key)<<35)) +#define kh_int256_hash_equal(a, b) ((a) == (b)) #define KHASH_MAP_INIT_INT128(name, khval_t) \ KHASH_INIT(name, __uint128_t, khval_t, 1, kh_int128_hash_func, kh_int128_hash_equal) #define KHASH_SET_INIT_INT128(name) \ - KHASH_INIT(name, __uint128_t, char, 0, kh_int128_hash_func, kh_int128_hash_equal) + KHASH_INIT(name, __uint128_t, byte, 0, kh_int128_hash_func, kh_int256_hash_equal) -typedef unsigned char byte; +#define KHASH_MAP_INIT_INT256(name, khval_t) \ + KHASH_INIT(name, uint256_t, khval_t, 1, kh_int256_hash_func, kh_int128_hash_equal) + +#define KHASH_SET_INIT_INT256(name) \ + KHASH_INIT(name, uint256_t, byte, 0, kh_int256_hash_func, kh_int256_hash_equal) +KHASH_MAP_INIT_INT256(S256M, byte) KHASH_MAP_INIT_INT128(S128M, byte) KHASH_MAP_INIT_INT64(S64M, byte) +KHASH_SET_INIT_INT256(S256S) KHASH_SET_INIT_INT128(S128S) KHASH_SET_INIT_INT64(S64S) @@ -86,6 +95,8 @@ INIT_KHASH_UTILS(64, 64S) INIT_KHASH_UTILS(64, 64M) INIT_KHASH_UTILS(128, 128S) INIT_KHASH_UTILS(128, 128M) +INIT_KHASH_UTILS(256, 256S) +INIT_KHASH_UTILS(256, 256M) /// Return the next k-mer in the k-mer set and update the index. template diff --git a/src/kmers.h b/src/kmers.h index 996d115..182b1a6 100644 --- a/src/kmers.h +++ b/src/kmers.h @@ -4,8 +4,11 @@ #include #include +#include "uint256_t.h" + typedef uint64_t kmer64_t; typedef __uint128_t kmer128_t; +typedef uint256_t kmer256_t; /// Convert the given basic nucleotide to int so it can be used for indexing in AC. /// If non-existing nucleotide is given, return -1. @@ -71,9 +74,17 @@ inline kmer128_t word_reverse_complement(kmer128_t w) { w = ( w >> 64 ) | ( w << 64); return ((U)-1) - w; } +/// Compute the reverse complement of a word. +/// Copyright: Jellyfish GPL-3.0 +inline kmer256_t word_reverse_complement(kmer256_t w) { + kmer128_t low = word_reverse_complement(w.lower()); + kmer128_t high = word_reverse_complement(w.upper()); + return kmer256_t(high, low); +} constexpr int KMER_SIZE_64 = 64; constexpr int KMER_SIZE_128 = 128; +constexpr int KMER_SIZE_256 = 256; #define INIT_KMERS(type) \ \ /* Get the mask to mask k-mers. */ \ @@ -89,6 +100,7 @@ inline kmer##type##_t ReverseComplement(kmer##type##_t kMer, int k) { INIT_KMERS(64) INIT_KMERS(128) +INIT_KMERS(256) /// Return the lexicographically smaller of the k-mer and its reverse complement. template @@ -102,7 +114,7 @@ const char letters[4] {'A', 'C', 'G', 'T'}; /// Return the index-th nucleotide from the encoded k-mer. template inline char NucleotideAtIndex(kmer_t encoded, int k, int index) { - return letters[(encoded >> ((k - index - kmer_t(1)) << kmer_t(1))) & kmer_t(3)]; + return letters[(unsigned long)(encoded >> ((k - index - kmer_t(1)) << kmer_t(1))) & kmer_t(3)]; } /// Convert the encoded KMer representation to string. @@ -111,7 +123,7 @@ std::string NumberToKMer(kmer_t encoded, int length) { std::string ret(length, 'N'); for (int i = 0; i < length; ++i) { // The last two bits correspond to one nucleotide. - ret[length - i -1] = letters[encoded & 3]; + ret[length - i -1] = letters[(unsigned long)encoded & 3]; // Move to the next letter. encoded >>= 2; } diff --git a/src/main.cpp b/src/main.cpp index 634d6f3..5afcc7c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -9,7 +9,7 @@ #include "khash_utils.h" -constexpr int MAX_K = 64; +constexpr int MAX_K = 128; int Help() { @@ -200,6 +200,8 @@ INIT_RUN(64, 64S) INIT_RUN(64, 64M) INIT_RUN(128, 128S) INIT_RUN(128, 128M) +INIT_RUN(256, 256S) +INIT_RUN(256, 256M) int main(int argc, char **argv) { int32_t k = -1; @@ -329,11 +331,17 @@ int main(int argc, char **argv) { } else { return run64M(k, intersectionPath, inPaths, outPaths, statsPath, fstats, computeIntersection, computeOutput, verbose, complements, threads, setCount); } - } else { + } else if (k <= 64) { if (MINIMUM_ABUNDANCE == (byte)1) { return run128S(k, intersectionPath, inPaths, outPaths, statsPath, fstats, computeIntersection, computeOutput, verbose, complements, threads, setCount); } else { return run128M(k, intersectionPath, inPaths, outPaths, statsPath, fstats, computeIntersection, computeOutput, verbose, complements, threads, setCount); } + } else { + if (MINIMUM_ABUNDANCE == (byte)1) { + return run256S(k, intersectionPath, inPaths, outPaths, statsPath, fstats, computeIntersection, computeOutput, verbose, complements, threads, setCount); + } else { + return run256M(k, intersectionPath, inPaths, outPaths, statsPath, fstats, computeIntersection, computeOutput, verbose, complements, threads, setCount); + } } } diff --git a/src/parser.h b/src/parser.h index a756085..cb1c4ea 100644 --- a/src/parser.h +++ b/src/parser.h @@ -82,3 +82,5 @@ INIT_PARSER(64, 64S) INIT_PARSER(64, 64M) INIT_PARSER(128, 128S) INIT_PARSER(128, 128M) +INIT_PARSER(256, 256S) +INIT_PARSER(256, 256M) diff --git a/src/prophasm.h b/src/prophasm.h index 2d7ac43..d099cfb 100644 --- a/src/prophasm.h +++ b/src/prophasm.h @@ -60,7 +60,7 @@ void NextSimplitig(KHT *kMers, kmer_t begin, std::ostream& of, int k, bool comp } else { // Extend the simplitig to the right. eraseKMer(kMers, next, k, complements); - simplitig.emplace_back(letters[ext]); + simplitig.emplace_back(letters[(unsigned int)ext]); last = next; } } else { @@ -71,7 +71,7 @@ void NextSimplitig(KHT *kMers, kmer_t begin, std::ostream& of, int k, bool comp } else { // Extend the simplitig to the left. eraseKMer(kMers, next, k, complements); - simplitig.emplace_front(letters[ext]); + simplitig.emplace_front(letters[(unsigned int)ext]); first = next; } } @@ -121,3 +121,5 @@ INIT_PROPHASM(64, 64S) INIT_PROPHASM(64, 64M) INIT_PROPHASM(128, 128S) INIT_PROPHASM(128, 128M) +INIT_PROPHASM(256, 256S) +INIT_PROPHASM(256, 256M) diff --git a/src/uint256_t.build b/src/uint256_t.build new file mode 100644 index 0000000..114299a --- /dev/null +++ b/src/uint256_t.build @@ -0,0 +1,17 @@ +// IMPLEMENTATION BUILD HEADER + +// We need uint128_t symbols as plain "extern", neither import nor export +// because we're linking the 128 and 256 object files into a single library +// So we can only have one export for symbol in any translation unit +#define UINT256_T_EXTERN +typedef __uint128_t uint128_t; +#undef UINT256_T_EXTERN + +#ifndef _UNIT256_T_BUILD + #define _UINT256_T_BUILD + #include "uint256_t_config.include" + const uint128_t uint128_0(0); + const uint128_t uint128_1(1); + #define UINT256_T_EXTERN _UINT256_T_EXPORT +#endif +#include "uint256_t.include" diff --git a/src/uint256_t.cpp b/src/uint256_t.cpp new file mode 100644 index 0000000..0cc4858 --- /dev/null +++ b/src/uint256_t.cpp @@ -0,0 +1,756 @@ +#define __LITTLE_ENDIAN__ +#include "uint256_t.build" +#include +#include + +const uint128_t uint128_64(64); +const uint128_t uint128_128(128); +const uint128_t uint128_256(256); +const uint256_t uint256_0(0); +const uint256_t uint256_1(1); +const uint256_t uint256_max(uint128_t(-1), uint128_t(-1)); + +/* +uint256_t::uint256_t(const std::string & s, uint8_t base) { + init_from_base(s.c_str(), base); +} + +uint256_t::uint256_t(const char * s, uint8_t base) { + init_from_base(s, base); +} +*/ + +uint256_t::uint256_t(const bool & b) + : uint256_t((uint8_t) b) +{} +/* +void uint256_t::init_from_base(const char * s, uint8_t base) { + *this = 0; + + uint256_t power(1); + uint8_t digit; + int pos = strlen(s) - 1; + while(pos >= 0) { + digit = 0; + if('0' <= s[pos] && s[pos] <= '9') { + digit = s[pos] - '0'; + } else if('a' <= s[pos] && s[pos] <= 'z') { + digit = s[pos] - 'a' + 10; + } + *this += digit * power; + pos--; + power *= base; + } +} +*/ + +uint256_t & uint256_t::operator=(const bool & rhs) { + UPPER = 0; + LOWER = rhs; + return *this; +} + +uint256_t::operator bool() const{ + return (bool) (UPPER | LOWER); +} + +uint256_t::operator uint8_t() const{ + return (uint8_t) LOWER; +} + +uint256_t::operator uint16_t() const{ + return (uint16_t) LOWER; +} + +uint256_t::operator uint32_t() const{ + return (uint32_t) LOWER; +} + +uint256_t::operator uint64_t() const{ + return (uint64_t) LOWER; +} + +uint256_t::operator uint128_t() const{ + return LOWER; +} + +uint256_t uint256_t::operator&(const uint128_t & rhs) const{ + return uint256_t(uint128_0, LOWER & rhs); +} + +uint256_t uint256_t::operator&(const uint256_t & rhs) const{ + return uint256_t(UPPER & rhs.UPPER, LOWER & rhs.LOWER); +} + +uint256_t & uint256_t::operator&=(const uint128_t & rhs){ + UPPER = uint128_0; + LOWER &= rhs; + return *this; +} + +uint256_t & uint256_t::operator&=(const uint256_t & rhs){ + UPPER &= rhs.UPPER; + LOWER &= rhs.LOWER; + return *this; +} + +uint256_t uint256_t::operator|(const uint128_t & rhs) const{ + return uint256_t(UPPER , LOWER | rhs); +} + +uint256_t uint256_t::operator|(const uint256_t & rhs) const{ + return uint256_t(UPPER | rhs.UPPER, LOWER | rhs.LOWER); +} + +uint256_t & uint256_t::operator|=(const uint128_t & rhs){ + LOWER |= rhs; + return *this; +} + +uint256_t & uint256_t::operator|=(const uint256_t & rhs){ + UPPER |= rhs.UPPER; + LOWER |= rhs.LOWER; + return *this; +} + +uint256_t uint256_t::operator^(const uint128_t & rhs) const{ + return uint256_t(UPPER, LOWER ^ rhs); +} + +uint256_t uint256_t::operator^(const uint256_t & rhs) const{ + return uint256_t(UPPER ^ rhs.UPPER, LOWER ^ rhs.LOWER); +} + +uint256_t & uint256_t::operator^=(const uint128_t & rhs){ + LOWER ^= rhs; + return *this; +} + +uint256_t & uint256_t::operator^=(const uint256_t & rhs){ + UPPER ^= rhs.UPPER; + LOWER ^= rhs.LOWER; + return *this; +} + +uint256_t uint256_t::operator~() const{ + return uint256_t(~UPPER, ~LOWER); +} + +uint256_t uint256_t::operator<<(const uint128_t & rhs) const{ + return *this << uint256_t(rhs); +} + +uint256_t uint256_t::operator<<(const uint256_t & rhs) const{ + const uint128_t shift = rhs.LOWER; + if (((bool) rhs.UPPER) || (shift >= uint128_256)){ + return uint256_0; + } + else if (shift == uint128_128){ + return uint256_t(LOWER, uint128_0); + } + else if (shift == uint128_0){ + return *this; + } + else if (shift < uint128_128){ + return uint256_t((UPPER << shift) + (LOWER >> (uint128_128 - shift)), LOWER << shift); + } + else if ((uint128_256 > shift) && (shift > uint128_128)){ + return uint256_t(LOWER << (shift - uint128_128), uint128_0); + } + else{ + return uint256_0; + } +} + +uint256_t & uint256_t::operator<<=(const uint128_t & shift){ + return *this <<= uint256_t(shift); +} + +uint256_t & uint256_t::operator<<=(const uint256_t & shift){ + *this = *this << shift; + return *this; +} + +uint256_t uint256_t::operator>>(const uint128_t & rhs) const{ + return *this >> uint256_t(rhs); +} + +uint256_t uint256_t::operator>>(const uint256_t & rhs) const{ + const uint128_t shift = rhs.LOWER; + if (((bool) rhs.UPPER) | (shift >= uint128_256)){ + return uint256_0; + } + else if (shift == uint128_128){ + return uint256_t(UPPER); + } + else if (shift == uint128_0){ + return *this; + } + else if (shift < uint128_128){ + return uint256_t(UPPER >> shift, (UPPER << (uint128_128 - shift)) + (LOWER >> shift)); + } + else if ((uint128_256 > shift) && (shift > uint128_128)){ + return uint256_t(UPPER >> (shift - uint128_128)); + } + else{ + return uint256_0; + } +} + +uint256_t & uint256_t::operator>>=(const uint128_t & shift){ + return *this >>= uint256_t(shift); +} + +uint256_t & uint256_t::operator>>=(const uint256_t & shift){ + *this = *this >> shift; + return *this; +} + +bool uint256_t::operator!() const{ + return ! (bool) *this; +} + +bool uint256_t::operator&&(const uint128_t & rhs) const{ + return (*this && uint256_t(rhs)); +} + +bool uint256_t::operator&&(const uint256_t & rhs) const{ + return ((bool) *this && (bool) rhs); +} + +bool uint256_t::operator||(const uint128_t & rhs) const{ + return (*this || uint256_t(rhs)); +} + +bool uint256_t::operator||(const uint256_t & rhs) const{ + return ((bool) *this || (bool) rhs); +} + +bool uint256_t::operator==(const uint128_t & rhs) const{ + return (*this == uint256_t(rhs)); +} + +bool uint256_t::operator==(const uint256_t & rhs) const{ + return ((UPPER == rhs.UPPER) && (LOWER == rhs.LOWER)); +} + +bool uint256_t::operator!=(const uint128_t & rhs) const{ + return (*this != uint256_t(rhs)); +} + +bool uint256_t::operator!=(const uint256_t & rhs) const{ + return ((UPPER != rhs.UPPER) | (LOWER != rhs.LOWER)); +} + +bool uint256_t::operator>(const uint128_t & rhs) const{ + return (*this > uint256_t(rhs)); +} + +bool uint256_t::operator>(const uint256_t & rhs) const{ + if (UPPER == rhs.UPPER){ + return (LOWER > rhs.LOWER); + } + if (UPPER > rhs.UPPER){ + return true; + } + return false; +} + +bool uint256_t::operator<(const uint128_t & rhs) const{ + return (*this < uint256_t(rhs)); +} + +bool uint256_t::operator<(const uint256_t & rhs) const{ + if (UPPER == rhs.UPPER){ + return (LOWER < rhs.LOWER); + } + if (UPPER < rhs.UPPER){ + return true; + } + return false; +} + +bool uint256_t::operator>=(const uint128_t & rhs) const{ + return (*this >= uint256_t(rhs)); +} + +bool uint256_t::operator>=(const uint256_t & rhs) const{ + return ((*this > rhs) | (*this == rhs)); +} + +bool uint256_t::operator<=(const uint128_t & rhs) const{ + return (*this <= uint256_t(rhs)); +} + +bool uint256_t::operator<=(const uint256_t & rhs) const{ + return ((*this < rhs) | (*this == rhs)); +} + +uint256_t uint256_t::operator+(const uint128_t & rhs) const{ + return *this + uint256_t(rhs); +} + +uint256_t uint256_t::operator+(const uint256_t & rhs) const{ + return uint256_t(UPPER + rhs.UPPER + (((LOWER + rhs.LOWER) < LOWER)?uint128_1:uint128_0), LOWER + rhs.LOWER); +} + +uint256_t & uint256_t::operator+=(const uint128_t & rhs){ + return *this += uint256_t(rhs); +} + +uint256_t & uint256_t::operator+=(const uint256_t & rhs){ + UPPER = rhs.UPPER + UPPER + ((LOWER + rhs.LOWER) < LOWER); + LOWER = LOWER + rhs.LOWER; + return *this; +} + +uint256_t uint256_t::operator-(const uint128_t & rhs) const{ + return *this - uint256_t(rhs); +} + +uint256_t uint256_t::operator-(const uint256_t & rhs) const{ + return uint256_t(UPPER - rhs.UPPER - ((LOWER - rhs.LOWER) > LOWER), LOWER - rhs.LOWER); +} + +uint256_t & uint256_t::operator-=(const uint128_t & rhs){ + return *this -= uint256_t(rhs); +} + +uint256_t & uint256_t::operator-=(const uint256_t & rhs){ + *this = *this - rhs; + return *this; +} + +/* +uint256_t uint256_t::operator*(const uint128_t & rhs) const{ + return *this * uint256_t(rhs); +} + */ + +/* +uint256_t uint256_t::operator*(const uint256_t & rhs) const{ + // split values into 4 64-bit parts + uint128_t top[4] = {UPPER.upper(), UPPER.lower(), LOWER.upper(), LOWER.lower()}; + uint128_t bottom[4] = {rhs.upper().upper(), rhs.upper().lower(), rhs.lower().upper(), rhs.lower().lower()}; + uint128_t products[4][4]; + + // multiply each component of the values + for(int y = 3; y > -1; y--){ + for(int x = 3; x > -1; x--){ + products[3 - y][x] = top[x] * bottom[y]; + } + } + + // first row + uint128_t fourth64 = uint128_t(products[0][3].lower()); + uint128_t third64 = uint128_t(products[0][2].lower()) + uint128_t(products[0][3].upper()); + uint128_t second64 = uint128_t(products[0][1].lower()) + uint128_t(products[0][2].upper()); + uint128_t first64 = uint128_t(products[0][0].lower()) + uint128_t(products[0][1].upper()); + + // second row + third64 += uint128_t(products[1][3].lower()); + second64 += uint128_t(products[1][2].lower()) + uint128_t(products[1][3].upper()); + first64 += uint128_t(products[1][1].lower()) + uint128_t(products[1][2].upper()); + + // third row + second64 += uint128_t(products[2][3].lower()); + first64 += uint128_t(products[2][2].lower()) + uint128_t(products[2][3].upper()); + + // fourth row + first64 += uint128_t(products[3][3].lower()); + + // combines the values, taking care of carry over + return uint256_t(first64 << uint128_64, uint128_0) + + uint256_t(third64.upper(), third64 << uint128_64) + + uint256_t(second64, uint128_0) + + uint256_t(fourth64); +} + */ + +/* +uint256_t & uint256_t::operator*=(const uint128_t & rhs){ + return *this *= uint256_t(rhs); +} + +uint256_t & uint256_t::operator*=(const uint256_t & rhs){ + *this = *this * rhs; + return *this; +} + */ + +/* +std::pair uint256_t::divmod(const uint256_t & lhs, const uint256_t & rhs) const{ + // Save some calculations ///////////////////// + if (rhs == uint256_0){ + throw std::domain_error("Error: division or modulus by 0"); + } + else if (rhs == uint256_1){ + return std::pair (lhs, uint256_0); + } + else if (lhs == rhs){ + return std::pair (uint256_1, uint256_0); + } + else if ((lhs == uint256_0) || (lhs < rhs)){ + return std::pair (uint256_0, lhs); + } + + std::pair qr(uint256_0, lhs); + uint256_t copyd = rhs << (lhs.bits() - rhs.bits()); + uint256_t adder = uint256_1 << (lhs.bits() - rhs.bits()); + if (copyd > qr.second){ + copyd >>= uint256_1; + adder >>= uint256_1; + } + while (qr.second >= rhs){ + if (qr.second >= copyd){ + qr.second -= copyd; + qr.first |= adder; + } + copyd >>= uint256_1; + adder >>= uint256_1; + } + return qr; +} + + +uint256_t uint256_t::operator/(const uint128_t & rhs) const{ + return *this / uint256_t(rhs); +} + +uint256_t uint256_t::operator/(const uint256_t & rhs) const{ + return divmod(*this, rhs).first; +} + +uint256_t & uint256_t::operator/=(const uint128_t & rhs){ + return *this /= uint256_t(rhs); +} + +uint256_t & uint256_t::operator/=(const uint256_t & rhs){ + *this = *this / rhs; + return *this; +} + +uint256_t uint256_t::operator%(const uint128_t & rhs) const{ + return *this % uint256_t(rhs); +} + +uint256_t uint256_t::operator%(const uint256_t & rhs) const{ + return *this - (rhs * (*this / rhs)); +} + +uint256_t & uint256_t::operator%=(const uint128_t & rhs){ + return *this %= uint256_t(rhs); +} + +uint256_t & uint256_t::operator%=(const uint256_t & rhs){ + *this = *this % rhs; + return *this; +} + */ + +uint256_t & uint256_t::operator++(){ + *this += uint256_1; + return *this; +} + +uint256_t uint256_t::operator++(int){ + uint256_t temp(*this); + ++*this; + return temp; +} + +uint256_t & uint256_t::operator--(){ + *this -= uint256_1; + return *this; +} + +uint256_t uint256_t::operator--(int){ + uint256_t temp(*this); + --*this; + return temp; +} + +uint256_t uint256_t::operator+() const{ + return *this; +} + +uint256_t uint256_t::operator-() const{ + return ~*this + uint256_1; +} + +const uint128_t & uint256_t::upper() const { + return UPPER; +} + +const uint128_t & uint256_t::lower() const { + return LOWER; +} + +/* +std::vector uint256_t::export_bits() const { + std::vector ret; + ret.reserve(32); + UPPER.export_bits(ret); + LOWER.export_bits(ret); + return ret; +} + +std::vector uint256_t::export_bits_truncate() const { + std::vector ret = export_bits(); + + //prune the zeroes + int i = 0; + while (ret[i] == 0 && i < 64) i++; + ret.erase(ret.begin(), ret.begin() + i); + + return ret; +} + +uint16_t uint256_t::bits() const{ + uint16_t out = 0; + if (UPPER){ + out = 128; + uint128_t up = UPPER; + while (up){ + up >>= uint128_1; + out++; + } + } + else{ + uint128_t low = LOWER; + while (low){ + low >>= uint128_1; + out++; + } + } + return out; +} + */ + +/* +std::string uint256_t::str(uint8_t base, const unsigned int & len) const{ + if ((base < 2) || (base > 36)){ + throw std::invalid_argument("Base must be in the range 2-36"); + } + std::string out = ""; + if (!(*this)){ + out = "0"; + } + else{ + std::pair qr(*this, uint256_0); + do{ + qr = divmod(qr.first, base); + out = "0123456789abcdefghijklmnopqrstuvwxyz"[(uint8_t) qr.second] + out; + } while (qr.first); + } + if (out.size() < len){ + out = std::string(len - out.size(), '0') + out; + } + return out; +} +*/ + +uint256_t operator&(const uint128_t & lhs, const uint256_t & rhs){ + return rhs & lhs; +} + +uint128_t & operator&=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs & lhs).lower(); + return lhs; +} + +uint256_t operator|(const uint128_t & lhs, const uint256_t & rhs){ + return rhs | lhs; +} + +uint128_t & operator|=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs | lhs).lower(); + return lhs; +} + +uint256_t operator^(const uint128_t & lhs, const uint256_t & rhs){ + return rhs ^ lhs; +} + +uint128_t & operator^=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs ^ lhs).lower(); + return lhs; +} + +uint256_t operator<<(const bool & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint8_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint16_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint32_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint64_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint128_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const int8_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const int16_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const int32_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const int64_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint128_t & operator<<=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (uint256_t(lhs) << rhs).lower(); + return lhs; +} + +uint256_t operator>>(const bool & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint8_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint16_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint32_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint64_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint128_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const int8_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const int16_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const int32_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const int64_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint128_t & operator>>=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (uint256_t(lhs) >> rhs).lower(); + return lhs; +} + +// Comparison Operators +bool operator==(const uint128_t & lhs, const uint256_t & rhs){ + return rhs == lhs; +} + +bool operator!=(const uint128_t & lhs, const uint256_t & rhs){ + return rhs != lhs; +} + +bool operator>(const uint128_t & lhs, const uint256_t & rhs){ + return rhs < lhs; +} + +bool operator<(const uint128_t & lhs, const uint256_t & rhs){ + return rhs > lhs; +} + +bool operator>=(const uint128_t & lhs, const uint256_t & rhs){ + return rhs <= lhs; +} + +bool operator<=(const uint128_t & lhs, const uint256_t & rhs){ + return rhs >= lhs; +} + +// Arithmetic Operators +uint256_t operator+(const uint128_t & lhs, const uint256_t & rhs){ + return rhs + lhs; +} + +uint128_t & operator+=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs + lhs).lower(); + return lhs; +} + +uint256_t operator-(const uint128_t & lhs, const uint256_t & rhs){ + return -(rhs - lhs); +} + +uint128_t & operator-=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (-(rhs - lhs)).lower(); + return lhs; +} + +/* +uint256_t operator*(const uint128_t & lhs, const uint256_t & rhs){ + return rhs * lhs; +} + +uint128_t & operator*=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs * lhs).lower(); + return lhs; +} + +uint256_t operator/(const uint128_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) / rhs; +} + +uint128_t & operator/=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (uint256_t(lhs) / rhs).lower(); + return lhs; +} + +uint256_t operator%(const uint128_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) % rhs; +} + +uint128_t & operator%=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (uint256_t(lhs) % rhs).lower(); + return lhs; +} + */ + +/* +std::ostream & operator<<(std::ostream & stream, const uint256_t & rhs){ + if (stream.flags() & stream.oct){ + stream << rhs.str(8); + } + else if (stream.flags() & stream.dec){ + stream << rhs.str(10); + } + else if (stream.flags() & stream.hex){ + stream << rhs.str(16); + } + return stream; +} + */ diff --git a/src/uint256_t.h b/src/uint256_t.h new file mode 100644 index 0000000..1e9f552 --- /dev/null +++ b/src/uint256_t.h @@ -0,0 +1,11 @@ +// PUBLIC IMPORT HEADER +#ifndef _UINT256_H_ +#define _UINT256_H_ +#include "uint256_t_config.include" +#define UINT256_T_EXTERN _UINT256_T_IMPORT +typedef __uint128_t uint128_t; +const uint128_t uint128_0(0); +const uint128_t uint128_1(1); +#define __LITTLE_ENDIAN__ +#include "uint256_t.include" +#endif diff --git a/src/uint256_t.include b/src/uint256_t.include new file mode 100644 index 0000000..5dfa739 --- /dev/null +++ b/src/uint256_t.include @@ -0,0 +1,639 @@ +/* +uint256_t.h +An unsigned 256 bit integer library for C++ + +Copyright (c) 2013 - 2017 Jason Lee @ calccrypto at gmail.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +With much help from Auston Sterling + +Thanks to François Dessenne for convincing me +to do a general rewrite of this class. +*/ + +#ifndef __UINT256_T__ +#define __UINT256_T__ + +#include +#include +#include +#include +#include +#include + +class UINT256_T_EXTERN uint256_t; + +// Give uint256_t type traits +namespace std { // This is probably not a good idea + template <> struct is_arithmetic : std::true_type {}; + template <> struct is_integral : std::true_type {}; + template <> struct is_unsigned : std::true_type {}; +} + +class uint256_t{ + private: +#ifdef __BIG_ENDIAN__ + uint128_t UPPER, LOWER; +#endif +#ifdef __LITTLE_ENDIAN__ + uint128_t LOWER, UPPER; +#endif + + public: + // Constructors + uint256_t() = default; + uint256_t(const uint256_t & rhs) = default; + uint256_t(uint256_t && rhs) = default; + uint256_t(const std::string & s); + uint256_t(const char *val); + uint256_t(const std::string & s, uint8_t base); + uint256_t(const char *val, uint8_t base); + uint256_t(const bool & b); + + template ::value, T>::type > + uint256_t(const T & rhs) +#ifdef __BIG_ENDIAN__ + : UPPER(uint128_0), LOWER(rhs) +#endif +#ifdef __LITTLE_ENDIAN__ + : LOWER(rhs), UPPER(uint128_0) +#endif + { + if (std::is_signed::value) { + if (rhs < 0) { + UPPER = uint128_t(-1); + } + } + } + + template ::value && std::is_integral::value, void>::type> + uint256_t(const S & upper_rhs, const T & lower_rhs) +#ifdef __BIG_ENDIAN__ + : UPPER(upper_rhs), LOWER(lower_rhs) +#endif +#ifdef __LITTLE_ENDIAN__ + : LOWER(lower_rhs), UPPER(upper_rhs) +#endif + {} + + uint256_t(const uint128_t & upper_rhs, const uint128_t & lower_rhs) +#ifdef __BIG_ENDIAN__ + : UPPER(upper_rhs), LOWER(lower_rhs) +#endif +#ifdef __LITTLE_ENDIAN__ + : LOWER(lower_rhs), UPPER(upper_rhs) +#endif + {} + uint256_t(const uint128_t & lower_rhs) +#ifdef __BIG_ENDIAN__ + : UPPER(uint128_0), LOWER(lower_rhs) +#endif +#ifdef __LITTLE_ENDIAN__ + : LOWER(lower_rhs), UPPER(uint128_0) +#endif + {} + + template ::value && + std::is_integral::value && + std::is_integral::value && + std::is_integral::value, void>::type> + uint256_t(const R & upper_lhs, const S & lower_lhs, const T & upper_rhs, const U & lower_rhs) +#ifdef __BIG_ENDIAN__ + : UPPER(upper_lhs, lower_lhs), LOWER(upper_rhs, lower_rhs) +#endif +#ifdef __LITTLE_ENDIAN__ + : LOWER(upper_rhs, lower_rhs), UPPER(upper_lhs, lower_lhs) +#endif + {} + + /* + // RHS input args only + std::vector export_bits() const; + std::vector export_bits_truncate() const; + */ + // Assignment Operator + uint256_t & operator=(const uint256_t & rhs) = default; + uint256_t & operator=(uint256_t && rhs) = default; + + template ::value, T>::type> + uint256_t & operator=(const T & rhs){ + UPPER = uint128_0; + + if (std::is_signed::value) { + if (rhs < 0) { + UPPER = uint128_t(-1); + } + } + + LOWER = rhs; + return *this; + } + + uint256_t & operator=(const bool & rhs); + + // Typecast Operators + operator bool () const; + operator uint8_t () const; + operator uint16_t () const; + operator uint32_t () const; + operator uint64_t () const; + operator uint128_t () const; + + // Bitwise Operators + uint256_t operator&(const uint128_t & rhs) const; + uint256_t operator&(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator&(const T & rhs) const{ + return uint256_t(uint128_0, LOWER & (uint128_t) rhs); + } + + uint256_t & operator&=(const uint128_t & rhs); + uint256_t & operator&=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator&=(const T & rhs){ + UPPER = uint128_0; + LOWER &= rhs; + return *this; + } + + uint256_t operator|(const uint128_t & rhs) const; + uint256_t operator|(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator|(const T & rhs) const{ + return uint256_t(UPPER, LOWER | uint128_t(rhs)); + } + + uint256_t & operator|=(const uint128_t & rhs); + uint256_t & operator|=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator|=(const T & rhs){ + LOWER |= (uint128_t) rhs; + return *this; + } + + uint256_t operator^(const uint128_t & rhs) const; + uint256_t operator^(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator^(const T & rhs) const{ + return uint256_t(UPPER, LOWER ^ (uint128_t) rhs); + } + + uint256_t & operator^=(const uint128_t & rhs); + uint256_t & operator^=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator^=(const T & rhs){ + LOWER ^= (uint128_t) rhs; + return *this; + } + + uint256_t operator~() const; + + // Bit Shift Operators + uint256_t operator<<(const uint128_t & shift) const; + uint256_t operator<<(const uint256_t & shift) const; + + template ::value, T>::type > + uint256_t operator<<(const T & rhs) const{ + return *this << uint256_t(rhs); + } + + uint256_t & operator<<=(const uint128_t & shift); + uint256_t & operator<<=(const uint256_t & shift); + + template ::value, T>::type > + uint256_t & operator<<=(const T & rhs){ + *this = *this << uint256_t(rhs); + return *this; + } + + uint256_t operator>>(const uint128_t & shift) const; + uint256_t operator>>(const uint256_t & shift) const; + + template ::value, T>::type > + uint256_t operator>>(const T & rhs) const{ + return *this >> uint256_t(rhs); + } + + uint256_t & operator>>=(const uint128_t & shift); + uint256_t & operator>>=(const uint256_t & shift); + + template ::value, T>::type > + uint256_t & operator>>=(const T & rhs){ + *this = *this >> uint256_t(rhs); + return *this; + } + + // Logical Operators + bool operator!() const; + + bool operator&&(const uint128_t & rhs) const; + bool operator&&(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator&&(const T & rhs) const{ + return ((bool) *this && rhs); + } + + bool operator||(const uint128_t & rhs) const; + bool operator||(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator||(const T & rhs) const{ + return ((bool) *this || rhs); + } + + // Comparison Operators + bool operator==(const uint128_t & rhs) const; + bool operator==(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator==(const T & rhs) const{ + return (!UPPER && (LOWER == uint128_t(rhs))); + } + + bool operator!=(const uint128_t & rhs) const; + bool operator!=(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator!=(const T & rhs) const{ + return ((bool) UPPER | (LOWER != uint128_t(rhs))); + } + + bool operator>(const uint128_t & rhs) const; + bool operator>(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator>(const T & rhs) const{ + return ((bool) UPPER | (LOWER > uint128_t(rhs))); + } + + bool operator<(const uint128_t & rhs) const; + bool operator<(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator<(const T & rhs) const{ + return (!UPPER)?(LOWER < uint128_t(rhs)):false; + } + + bool operator>=(const uint128_t & rhs) const; + bool operator>=(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator>=(const T & rhs) const{ + return ((*this > rhs) | (*this == rhs)); + } + + bool operator<=(const uint128_t & rhs) const; + bool operator<=(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator<=(const T & rhs) const{ + return ((*this < rhs) | (*this == rhs)); + } + + // Arithmetic Operators + uint256_t operator+(const uint128_t & rhs) const; + uint256_t operator+(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator+(const T & rhs) const{ + return uint256_t(UPPER + ((LOWER + (uint128_t) rhs) < LOWER), LOWER + (uint128_t) rhs); + } + + uint256_t & operator+=(const uint128_t & rhs); + uint256_t & operator+=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator+=(const T & rhs){ + return *this += uint256_t(rhs); + } + + uint256_t operator-(const uint128_t & rhs) const; + uint256_t operator-(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator-(const T & rhs) const{ + return uint256_t(UPPER - ((LOWER - rhs) > LOWER), LOWER - rhs); + } + + uint256_t & operator-=(const uint128_t & rhs); + uint256_t & operator-=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator-=(const T & rhs){ + return *this = *this - uint256_t(rhs); + } + + /* + uint256_t operator*(const uint128_t & rhs) const; + uint256_t operator*(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator*(const T & rhs) const{ + return *this * uint256_t(rhs); + } + uint256_t & operator*=(const uint128_t & rhs); + uint256_t & operator*=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator*=(const T & rhs){ + return *this = *this * uint256_t(rhs); + } +*/ + + private: + std::pair divmod(const uint256_t & lhs, const uint256_t & rhs) const; + void init(const char * s); + void init_from_base(const char * s, uint8_t base); + + public: + uint256_t operator/(const uint128_t & rhs) const; + uint256_t operator/(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator/(const T & rhs) const{ + return *this / uint256_t(rhs); + } + + uint256_t & operator/=(const uint128_t & rhs); + uint256_t & operator/=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator/=(const T & rhs){ + return *this = *this / uint256_t(rhs); + } + + uint256_t operator%(const uint128_t & rhs) const; + uint256_t operator%(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator%(const T & rhs) const{ + return *this % uint256_t(rhs); + } + + uint256_t & operator%=(const uint128_t & rhs); + uint256_t & operator%=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator%=(const T & rhs){ + return *this = *this % uint256_t(rhs); + } + + // Increment Operators + uint256_t & operator++(); + uint256_t operator++(int); + + // Decrement Operators + uint256_t & operator--(); + uint256_t operator--(int); + + // Nothing done since promotion doesn't work here + uint256_t operator+() const; + + // two's complement + uint256_t operator-() const; + + // Get private values + const uint128_t & upper() const; + const uint128_t & lower() const; + + // Get bitsize of value + uint16_t bits() const; + + // Get string representation of value + std::string str(uint8_t base = 10, const unsigned int & len = 0) const; +}; + +// useful values +UINT256_T_EXTERN extern const uint128_t uint128_64; +UINT256_T_EXTERN extern const uint128_t uint128_128; +UINT256_T_EXTERN extern const uint128_t uint128_256; +UINT256_T_EXTERN extern const uint256_t uint256_0; +UINT256_T_EXTERN extern const uint256_t uint256_1; +UINT256_T_EXTERN extern const uint256_t uint256_max; + +// Bitwise Operators +UINT256_T_EXTERN uint256_t operator&(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > + uint256_t operator&(const T & lhs, const uint256_t & rhs){ + return rhs & lhs; +} + +UINT256_T_EXTERN uint128_t & operator&=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator&=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (rhs & lhs); +} + +UINT256_T_EXTERN uint256_t operator|(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +uint256_t operator|(const T & lhs, const uint256_t & rhs){ + return rhs | lhs; +} + +UINT256_T_EXTERN uint128_t & operator|=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator|=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (rhs | lhs); +} + +UINT256_T_EXTERN uint256_t operator^(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +uint256_t operator^(const T & lhs, const uint256_t & rhs){ + return rhs ^ lhs; +} + +uint128_t & operator^=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator^=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (rhs ^ lhs); +} + +// Bitshift operators +UINT256_T_EXTERN uint256_t operator<<(const bool & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const uint8_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const uint16_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const uint32_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const uint64_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const uint128_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const int8_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const int16_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const int32_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const int64_t & lhs, const uint256_t & rhs); + +UINT256_T_EXTERN uint128_t & operator<<=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator<<=(T & lhs, const uint256_t & rhs){ + lhs = static_cast (uint256_t(lhs) << rhs); + return lhs; +} + +UINT256_T_EXTERN uint256_t operator>>(const bool & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const uint8_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const uint16_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const uint32_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const uint64_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const uint128_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const int8_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const int16_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const int32_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const int64_t & lhs, const uint256_t & rhs); + +UINT256_T_EXTERN uint128_t & operator>>=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator>>=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (uint256_t(lhs) >> rhs); +} + +// Comparison Operators +UINT256_T_EXTERN bool operator==(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +bool operator==(const T & lhs, const uint256_t & rhs){ + return (!rhs.upper() && ((uint64_t) lhs == rhs.lower())); +} + +UINT256_T_EXTERN bool operator!=(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +bool operator!=(const T & lhs, const uint256_t & rhs){ + return (rhs.upper() | ((uint64_t) lhs != rhs.lower())); +} + +UINT256_T_EXTERN bool operator>(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +bool operator>(const T & lhs, const uint256_t & rhs){ + return rhs.upper()?false:((uint128_t) lhs > rhs.lower()); +} + +UINT256_T_EXTERN bool operator<(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +bool operator<(const T & lhs, const uint256_t & rhs){ + return rhs.upper()?true:((uint128_t) lhs < rhs.lower()); +} + +UINT256_T_EXTERN bool operator>=(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +bool operator>=(const T & lhs, const uint256_t & rhs){ + return rhs.upper()?false:((uint128_t) lhs >= rhs.lower()); +} + +UINT256_T_EXTERN bool operator<=(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +bool operator<=(const T & lhs, const uint256_t & rhs){ + return rhs.upper()?true:((uint128_t) lhs <= rhs.lower()); +} + +// Arithmetic Operators +UINT256_T_EXTERN uint256_t operator+(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +uint256_t operator+(const T & lhs, const uint256_t & rhs){ + return rhs + lhs; +} + +UINT256_T_EXTERN uint128_t & operator+=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator+=(T & lhs, const uint256_t & rhs){ + lhs = static_cast (rhs + lhs); + return lhs; +} + +UINT256_T_EXTERN uint256_t operator-(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +uint256_t operator-(const T & lhs, const uint256_t & rhs){ + return -(rhs - lhs); +} + +UINT256_T_EXTERN uint128_t & operator-=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator-=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (-(rhs - lhs)); +} + +/* +UINT256_T_EXTERN uint256_t operator*(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +uint256_t operator*(const T & lhs, const uint256_t & rhs){ + return rhs * lhs; +} + +UINT256_T_EXTERN uint128_t & operator*=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator*=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (rhs * lhs); +} + */ + +UINT256_T_EXTERN uint256_t operator/(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +uint256_t operator/(const T & lhs, const uint256_t & rhs){ + return uint256_t(lhs) / rhs; +} + +UINT256_T_EXTERN uint128_t & operator/=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator/=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (uint256_t(lhs) / rhs); +} + +UINT256_T_EXTERN uint256_t operator%(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +uint256_t operator%(const T & lhs, const uint256_t & rhs){ + return uint256_t(lhs) % rhs; +} + +UINT256_T_EXTERN uint128_t & operator%=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator%=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (uint256_t(lhs) % rhs); +} + +// IO Operator +UINT256_T_EXTERN std::ostream & operator<<(std::ostream & stream, const uint256_t & rhs); +#endif diff --git a/src/uint256_t_config.include b/src/uint256_t_config.include new file mode 100644 index 0000000..e83db51 --- /dev/null +++ b/src/uint256_t_config.include @@ -0,0 +1,19 @@ +#ifndef _UINT256_T_CONFIG_ + #define _UINT256_T_CONFIG_ + #if defined(_MSC_VER) + #if defined(_DLL) + #define _UINT256_T_EXPORT __declspec(dllexport) + #define _UINT256_T_IMPORT __declspec(dllimport) + #else + #define _UINT256_T_EXPORT + #define _UINT256_T_IMPORT + #endif + #else + // All modules on Unix are compiled with -fvisibility=hidden + // All API symbols get visibility default + // whether or not we're static linking or dynamic linking (with -fPIC) + #define _UINT256_T_EXPORT __attribute__((visibility("default"))) + #define _UINT256_T_IMPORT __attribute__((visibility("default"))) + #endif +#endif + From 23638a8ff40d76c64fc60d1c04dbf8df619284bb Mon Sep 17 00:00:00 2001 From: OndrejSladky Date: Thu, 21 Mar 2024 14:49:39 +0100 Subject: [PATCH 02/10] Fixed unittests on linux. --- Makefile | 5 +++-- src/kmers.h | 2 +- src/{ => uint256_t}/uint256_t.build | 0 src/{ => uint256_t}/uint256_t.cpp | 0 src/{ => uint256_t}/uint256_t.h | 0 src/{ => uint256_t}/uint256_t.include | 0 src/{ => uint256_t}/uint256_t_config.include | 0 7 files changed, 4 insertions(+), 3 deletions(-) rename src/{ => uint256_t}/uint256_t.build (100%) rename src/{ => uint256_t}/uint256_t.cpp (100%) rename src/{ => uint256_t}/uint256_t.h (100%) rename src/{ => uint256_t}/uint256_t.include (100%) rename src/{ => uint256_t}/uint256_t_config.include (100%) diff --git a/Makefile b/Makefile index c4680c9..8e9b52b 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ CXX= g++ CXXFLAGS= -g -Wall -Wno-unused-function -std=c++17 -O2 LDFLAGS= -lz -lpthread SRC= src +UINT256= $(SRC)/uint256_t SCRIPTS= scripts DATA= data TESTS= tests @@ -26,11 +27,11 @@ quick-verify: $(PROG) $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa $(PROG): $(SRC)/main.cpp $(SRC)/$(wildcard *.cpp *.h *.hpp) src/version.h ./create-version.sh - $(CXX) $(CXXFLAGS) $(SRC)/main.cpp $(SRC)/uint256_t.cpp $(SRC)/kthread.c -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) $(SRC)/main.cpp $(UINT256)/uint256_t.cpp $(SRC)/kthread.c -o $@ $(LDFLAGS) prophasmtest: $(TESTS)/unittest.cpp gtest-all.o $(SRC)/$(wildcard *.cpp *.h *.hpp) $(TESTS)/$(wildcard *.cpp *.h *.hpp) - $(CXX) $(CXXFLAGS) -isystem $(GTEST)/include -I $(GTEST)/include $(TESTS)/unittest.cpp gtest-all.o -pthread -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -isystem $(GTEST)/include -I $(GTEST)/include $(TESTS)/unittest.cpp $(UINT256)/uint256_t.cpp gtest-all.o -pthread -o $@ $(LDFLAGS) gtest-all.o: $(GTEST)/src/gtest-all.cc $(wildcard *.cpp *.h *.hpp) $(CXX) $(CXXFLAGS) -isystem $(GTEST)/include -I $(GTEST)/include -I $(GTEST) -DGTEST_CREATE_SHARED_LIBRARY=1 -c -pthread $(GTEST)/src/gtest-all.cc -o $@ diff --git a/src/kmers.h b/src/kmers.h index 182b1a6..27040ea 100644 --- a/src/kmers.h +++ b/src/kmers.h @@ -4,7 +4,7 @@ #include #include -#include "uint256_t.h" +#include "uint256_t/uint256_t.h" typedef uint64_t kmer64_t; typedef __uint128_t kmer128_t; diff --git a/src/uint256_t.build b/src/uint256_t/uint256_t.build similarity index 100% rename from src/uint256_t.build rename to src/uint256_t/uint256_t.build diff --git a/src/uint256_t.cpp b/src/uint256_t/uint256_t.cpp similarity index 100% rename from src/uint256_t.cpp rename to src/uint256_t/uint256_t.cpp diff --git a/src/uint256_t.h b/src/uint256_t/uint256_t.h similarity index 100% rename from src/uint256_t.h rename to src/uint256_t/uint256_t.h diff --git a/src/uint256_t.include b/src/uint256_t/uint256_t.include similarity index 100% rename from src/uint256_t.include rename to src/uint256_t/uint256_t.include diff --git a/src/uint256_t_config.include b/src/uint256_t/uint256_t_config.include similarity index 100% rename from src/uint256_t_config.include rename to src/uint256_t/uint256_t_config.include From 4c095bbc460facf2de6ff8317dde0c26404fb993 Mon Sep 17 00:00:00 2001 From: OndrejSladky Date: Thu, 21 Mar 2024 15:02:42 +0100 Subject: [PATCH 03/10] Minor cleanups. --- Makefile | 2 +- src/kmers.h | 4 +- src/prophasm.h | 4 +- src/uint256_t/uint256_t.cpp | 265 -------------------------------- src/uint256_t/uint256_t.include | 110 +------------ 5 files changed, 9 insertions(+), 376 deletions(-) diff --git a/Makefile b/Makefile index 8e9b52b..1331ab6 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ verify: $(PROG) $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa quick-verify: $(PROG) $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa python $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa --quick --interpath $(DATA)/spyogenes.fa -$(PROG): $(SRC)/main.cpp $(SRC)/$(wildcard *.cpp *.h *.hpp) src/version.h +$(PROG): $(SRC)/main.cpp $(SRC)/$(wildcard *.cpp *.h *.hpp) src/version.h $(wildcard $(UINT256)/*.cpp $(UINT256)/*.h $(UINT256)/*.include) ./create-version.sh $(CXX) $(CXXFLAGS) $(SRC)/main.cpp $(UINT256)/uint256_t.cpp $(SRC)/kthread.c -o $@ $(LDFLAGS) diff --git a/src/kmers.h b/src/kmers.h index 27040ea..7bf2422 100644 --- a/src/kmers.h +++ b/src/kmers.h @@ -114,7 +114,7 @@ const char letters[4] {'A', 'C', 'G', 'T'}; /// Return the index-th nucleotide from the encoded k-mer. template inline char NucleotideAtIndex(kmer_t encoded, int k, int index) { - return letters[(unsigned long)(encoded >> ((k - index - kmer_t(1)) << kmer_t(1))) & kmer_t(3)]; + return letters[(uint32_t)(encoded >> ((k - index - kmer_t(1)) << kmer_t(1))) & kmer_t(3)]; } /// Convert the encoded KMer representation to string. @@ -123,7 +123,7 @@ std::string NumberToKMer(kmer_t encoded, int length) { std::string ret(length, 'N'); for (int i = 0; i < length; ++i) { // The last two bits correspond to one nucleotide. - ret[length - i -1] = letters[(unsigned long)encoded & 3]; + ret[length - i -1] = letters[(uint32_t)(encoded & 3)]; // Move to the next letter. encoded >>= 2; } diff --git a/src/prophasm.h b/src/prophasm.h index d099cfb..6220942 100644 --- a/src/prophasm.h +++ b/src/prophasm.h @@ -60,7 +60,7 @@ void NextSimplitig(KHT *kMers, kmer_t begin, std::ostream& of, int k, bool comp } else { // Extend the simplitig to the right. eraseKMer(kMers, next, k, complements); - simplitig.emplace_back(letters[(unsigned int)ext]); + simplitig.emplace_back(letters[(uint32_t)ext]); last = next; } } else { @@ -71,7 +71,7 @@ void NextSimplitig(KHT *kMers, kmer_t begin, std::ostream& of, int k, bool comp } else { // Extend the simplitig to the left. eraseKMer(kMers, next, k, complements); - simplitig.emplace_front(letters[(unsigned int)ext]); + simplitig.emplace_front(letters[(uint32_t)ext]); first = next; } } diff --git a/src/uint256_t/uint256_t.cpp b/src/uint256_t/uint256_t.cpp index 0cc4858..82bcc00 100644 --- a/src/uint256_t/uint256_t.cpp +++ b/src/uint256_t/uint256_t.cpp @@ -10,39 +10,9 @@ const uint256_t uint256_0(0); const uint256_t uint256_1(1); const uint256_t uint256_max(uint128_t(-1), uint128_t(-1)); -/* -uint256_t::uint256_t(const std::string & s, uint8_t base) { - init_from_base(s.c_str(), base); -} - -uint256_t::uint256_t(const char * s, uint8_t base) { - init_from_base(s, base); -} -*/ - uint256_t::uint256_t(const bool & b) : uint256_t((uint8_t) b) {} -/* -void uint256_t::init_from_base(const char * s, uint8_t base) { - *this = 0; - - uint256_t power(1); - uint8_t digit; - int pos = strlen(s) - 1; - while(pos >= 0) { - digit = 0; - if('0' <= s[pos] && s[pos] <= '9') { - digit = s[pos] - '0'; - } else if('a' <= s[pos] && s[pos] <= 'z') { - digit = s[pos] - 'a' + 10; - } - *this += digit * power; - pos--; - power *= base; - } -} -*/ uint256_t & uint256_t::operator=(const bool & rhs) { UPPER = 0; @@ -321,133 +291,6 @@ uint256_t & uint256_t::operator-=(const uint256_t & rhs){ return *this; } -/* -uint256_t uint256_t::operator*(const uint128_t & rhs) const{ - return *this * uint256_t(rhs); -} - */ - -/* -uint256_t uint256_t::operator*(const uint256_t & rhs) const{ - // split values into 4 64-bit parts - uint128_t top[4] = {UPPER.upper(), UPPER.lower(), LOWER.upper(), LOWER.lower()}; - uint128_t bottom[4] = {rhs.upper().upper(), rhs.upper().lower(), rhs.lower().upper(), rhs.lower().lower()}; - uint128_t products[4][4]; - - // multiply each component of the values - for(int y = 3; y > -1; y--){ - for(int x = 3; x > -1; x--){ - products[3 - y][x] = top[x] * bottom[y]; - } - } - - // first row - uint128_t fourth64 = uint128_t(products[0][3].lower()); - uint128_t third64 = uint128_t(products[0][2].lower()) + uint128_t(products[0][3].upper()); - uint128_t second64 = uint128_t(products[0][1].lower()) + uint128_t(products[0][2].upper()); - uint128_t first64 = uint128_t(products[0][0].lower()) + uint128_t(products[0][1].upper()); - - // second row - third64 += uint128_t(products[1][3].lower()); - second64 += uint128_t(products[1][2].lower()) + uint128_t(products[1][3].upper()); - first64 += uint128_t(products[1][1].lower()) + uint128_t(products[1][2].upper()); - - // third row - second64 += uint128_t(products[2][3].lower()); - first64 += uint128_t(products[2][2].lower()) + uint128_t(products[2][3].upper()); - - // fourth row - first64 += uint128_t(products[3][3].lower()); - - // combines the values, taking care of carry over - return uint256_t(first64 << uint128_64, uint128_0) + - uint256_t(third64.upper(), third64 << uint128_64) + - uint256_t(second64, uint128_0) + - uint256_t(fourth64); -} - */ - -/* -uint256_t & uint256_t::operator*=(const uint128_t & rhs){ - return *this *= uint256_t(rhs); -} - -uint256_t & uint256_t::operator*=(const uint256_t & rhs){ - *this = *this * rhs; - return *this; -} - */ - -/* -std::pair uint256_t::divmod(const uint256_t & lhs, const uint256_t & rhs) const{ - // Save some calculations ///////////////////// - if (rhs == uint256_0){ - throw std::domain_error("Error: division or modulus by 0"); - } - else if (rhs == uint256_1){ - return std::pair (lhs, uint256_0); - } - else if (lhs == rhs){ - return std::pair (uint256_1, uint256_0); - } - else if ((lhs == uint256_0) || (lhs < rhs)){ - return std::pair (uint256_0, lhs); - } - - std::pair qr(uint256_0, lhs); - uint256_t copyd = rhs << (lhs.bits() - rhs.bits()); - uint256_t adder = uint256_1 << (lhs.bits() - rhs.bits()); - if (copyd > qr.second){ - copyd >>= uint256_1; - adder >>= uint256_1; - } - while (qr.second >= rhs){ - if (qr.second >= copyd){ - qr.second -= copyd; - qr.first |= adder; - } - copyd >>= uint256_1; - adder >>= uint256_1; - } - return qr; -} - - -uint256_t uint256_t::operator/(const uint128_t & rhs) const{ - return *this / uint256_t(rhs); -} - -uint256_t uint256_t::operator/(const uint256_t & rhs) const{ - return divmod(*this, rhs).first; -} - -uint256_t & uint256_t::operator/=(const uint128_t & rhs){ - return *this /= uint256_t(rhs); -} - -uint256_t & uint256_t::operator/=(const uint256_t & rhs){ - *this = *this / rhs; - return *this; -} - -uint256_t uint256_t::operator%(const uint128_t & rhs) const{ - return *this % uint256_t(rhs); -} - -uint256_t uint256_t::operator%(const uint256_t & rhs) const{ - return *this - (rhs * (*this / rhs)); -} - -uint256_t & uint256_t::operator%=(const uint128_t & rhs){ - return *this %= uint256_t(rhs); -} - -uint256_t & uint256_t::operator%=(const uint256_t & rhs){ - *this = *this % rhs; - return *this; -} - */ - uint256_t & uint256_t::operator++(){ *this += uint256_1; return *this; @@ -486,70 +329,6 @@ const uint128_t & uint256_t::lower() const { return LOWER; } -/* -std::vector uint256_t::export_bits() const { - std::vector ret; - ret.reserve(32); - UPPER.export_bits(ret); - LOWER.export_bits(ret); - return ret; -} - -std::vector uint256_t::export_bits_truncate() const { - std::vector ret = export_bits(); - - //prune the zeroes - int i = 0; - while (ret[i] == 0 && i < 64) i++; - ret.erase(ret.begin(), ret.begin() + i); - - return ret; -} - -uint16_t uint256_t::bits() const{ - uint16_t out = 0; - if (UPPER){ - out = 128; - uint128_t up = UPPER; - while (up){ - up >>= uint128_1; - out++; - } - } - else{ - uint128_t low = LOWER; - while (low){ - low >>= uint128_1; - out++; - } - } - return out; -} - */ - -/* -std::string uint256_t::str(uint8_t base, const unsigned int & len) const{ - if ((base < 2) || (base > 36)){ - throw std::invalid_argument("Base must be in the range 2-36"); - } - std::string out = ""; - if (!(*this)){ - out = "0"; - } - else{ - std::pair qr(*this, uint256_0); - do{ - qr = divmod(qr.first, base); - out = "0123456789abcdefghijklmnopqrstuvwxyz"[(uint8_t) qr.second] + out; - } while (qr.first); - } - if (out.size() < len){ - out = std::string(len - out.size(), '0') + out; - } - return out; -} -*/ - uint256_t operator&(const uint128_t & lhs, const uint256_t & rhs){ return rhs & lhs; } @@ -710,47 +489,3 @@ uint128_t & operator-=(uint128_t & lhs, const uint256_t & rhs){ lhs = (-(rhs - lhs)).lower(); return lhs; } - -/* -uint256_t operator*(const uint128_t & lhs, const uint256_t & rhs){ - return rhs * lhs; -} - -uint128_t & operator*=(uint128_t & lhs, const uint256_t & rhs){ - lhs = (rhs * lhs).lower(); - return lhs; -} - -uint256_t operator/(const uint128_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) / rhs; -} - -uint128_t & operator/=(uint128_t & lhs, const uint256_t & rhs){ - lhs = (uint256_t(lhs) / rhs).lower(); - return lhs; -} - -uint256_t operator%(const uint128_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) % rhs; -} - -uint128_t & operator%=(uint128_t & lhs, const uint256_t & rhs){ - lhs = (uint256_t(lhs) % rhs).lower(); - return lhs; -} - */ - -/* -std::ostream & operator<<(std::ostream & stream, const uint256_t & rhs){ - if (stream.flags() & stream.oct){ - stream << rhs.str(8); - } - else if (stream.flags() & stream.dec){ - stream << rhs.str(10); - } - else if (stream.flags() & stream.hex){ - stream << rhs.str(16); - } - return stream; -} - */ diff --git a/src/uint256_t/uint256_t.include b/src/uint256_t/uint256_t.include index 5dfa739..1e719a6 100644 --- a/src/uint256_t/uint256_t.include +++ b/src/uint256_t/uint256_t.include @@ -26,6 +26,10 @@ With much help from Auston Sterling Thanks to François Dessenne for convincing me to do a general rewrite of this class. + + +Ondřej Sladký: removed parts of this file as they were not needed and were +not compatible with __uint128_t. */ #ifndef __UINT256_T__ @@ -124,11 +128,6 @@ class uint256_t{ #endif {} - /* - // RHS input args only - std::vector export_bits() const; - std::vector export_bits_truncate() const; - */ // Assignment Operator uint256_t & operator=(const uint256_t & rhs) = default; uint256_t & operator=(uint256_t && rhs) = default; @@ -348,61 +347,6 @@ class uint256_t{ return *this = *this - uint256_t(rhs); } - /* - uint256_t operator*(const uint128_t & rhs) const; - uint256_t operator*(const uint256_t & rhs) const; - - template ::value, T>::type > - uint256_t operator*(const T & rhs) const{ - return *this * uint256_t(rhs); - } - uint256_t & operator*=(const uint128_t & rhs); - uint256_t & operator*=(const uint256_t & rhs); - - template ::value, T>::type > - uint256_t & operator*=(const T & rhs){ - return *this = *this * uint256_t(rhs); - } -*/ - - private: - std::pair divmod(const uint256_t & lhs, const uint256_t & rhs) const; - void init(const char * s); - void init_from_base(const char * s, uint8_t base); - - public: - uint256_t operator/(const uint128_t & rhs) const; - uint256_t operator/(const uint256_t & rhs) const; - - template ::value, T>::type > - uint256_t operator/(const T & rhs) const{ - return *this / uint256_t(rhs); - } - - uint256_t & operator/=(const uint128_t & rhs); - uint256_t & operator/=(const uint256_t & rhs); - - template ::value, T>::type > - uint256_t & operator/=(const T & rhs){ - return *this = *this / uint256_t(rhs); - } - - uint256_t operator%(const uint128_t & rhs) const; - uint256_t operator%(const uint256_t & rhs) const; - - template ::value, T>::type > - uint256_t operator%(const T & rhs) const{ - return *this % uint256_t(rhs); - } - - uint256_t & operator%=(const uint128_t & rhs); - uint256_t & operator%=(const uint256_t & rhs); - - template ::value, T>::type > - uint256_t & operator%=(const T & rhs){ - return *this = *this % uint256_t(rhs); - } - // Increment Operators uint256_t & operator++(); uint256_t operator++(int); @@ -590,50 +534,4 @@ T & operator-=(T & lhs, const uint256_t & rhs){ return lhs = static_cast (-(rhs - lhs)); } -/* -UINT256_T_EXTERN uint256_t operator*(const uint128_t & lhs, const uint256_t & rhs); - -template ::value, T>::type > -uint256_t operator*(const T & lhs, const uint256_t & rhs){ - return rhs * lhs; -} - -UINT256_T_EXTERN uint128_t & operator*=(uint128_t & lhs, const uint256_t & rhs); - -template ::value, T>::type > -T & operator*=(T & lhs, const uint256_t & rhs){ - return lhs = static_cast (rhs * lhs); -} - */ - -UINT256_T_EXTERN uint256_t operator/(const uint128_t & lhs, const uint256_t & rhs); - -template ::value, T>::type > -uint256_t operator/(const T & lhs, const uint256_t & rhs){ - return uint256_t(lhs) / rhs; -} - -UINT256_T_EXTERN uint128_t & operator/=(uint128_t & lhs, const uint256_t & rhs); - -template ::value, T>::type > -T & operator/=(T & lhs, const uint256_t & rhs){ - return lhs = static_cast (uint256_t(lhs) / rhs); -} - -UINT256_T_EXTERN uint256_t operator%(const uint128_t & lhs, const uint256_t & rhs); - -template ::value, T>::type > -uint256_t operator%(const T & lhs, const uint256_t & rhs){ - return uint256_t(lhs) % rhs; -} - -UINT256_T_EXTERN uint128_t & operator%=(uint128_t & lhs, const uint256_t & rhs); - -template ::value, T>::type > -T & operator%=(T & lhs, const uint256_t & rhs){ - return lhs = static_cast (uint256_t(lhs) % rhs); -} - -// IO Operator -UINT256_T_EXTERN std::ostream & operator<<(std::ostream & stream, const uint256_t & rhs); #endif From fd051558eb518eb64cfd413cb931042f03081da6 Mon Sep 17 00:00:00 2001 From: OndrejSladky Date: Thu, 21 Mar 2024 15:08:59 +0100 Subject: [PATCH 04/10] Removed duplicit definition of __LITTLE_ENDIAN__ --- src/uint256_t/uint256_t.cpp | 4 +++- src/uint256_t/uint256_t.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/uint256_t/uint256_t.cpp b/src/uint256_t/uint256_t.cpp index 82bcc00..de9546e 100644 --- a/src/uint256_t/uint256_t.cpp +++ b/src/uint256_t/uint256_t.cpp @@ -1,4 +1,6 @@ -#define __LITTLE_ENDIAN__ +#ifndef __LITTLE_ENDIAN__ + #define __LITTLE_ENDIAN__ 1 +#endif #include "uint256_t.build" #include #include diff --git a/src/uint256_t/uint256_t.h b/src/uint256_t/uint256_t.h index 1e9f552..e611db2 100644 --- a/src/uint256_t/uint256_t.h +++ b/src/uint256_t/uint256_t.h @@ -6,6 +6,8 @@ typedef __uint128_t uint128_t; const uint128_t uint128_0(0); const uint128_t uint128_1(1); -#define __LITTLE_ENDIAN__ +#ifndef __LITTLE_ENDIAN__ +#define __LITTLE_ENDIAN__ 1 +#endif #include "uint256_t.include" #endif From a6d8618917b1cfdf3b13149fb67c3bdfb224772d Mon Sep 17 00:00:00 2001 From: OndrejSladky Date: Thu, 21 Mar 2024 15:14:11 +0100 Subject: [PATCH 05/10] Verifying for k up to 128. --- scripts/verify.py | 4 ++-- src/uint256_t/uint256_t.cpp | 2 ++ src/uint256_t/uint256_t.h | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/verify.py b/scripts/verify.py index e96abbd..2ac1669 100755 --- a/scripts/verify.py +++ b/scripts/verify.py @@ -102,14 +102,14 @@ def main(): print("Testing ProphAsm2 outputs valid intersection on files " + args.path + " and " + args.interpath) for complements in [True]: for m in range(1, 3): - for k in range(2, 65, 23 if args.quick else 1): + for k in range(2, 129, 23 if args.quick else 1): success &= verify_intersection(args.path, args.interpath, k, complements, m) print("") print("Testing ProphAsm2 outputs valid simplitigs on file " + args.path) for complements in [True, False]: for m in range(1, 4): - for k in range(2, 65, 11 if args.quick else 1): + for k in range(2, 129, 11 if args.quick else 1): success &= verify_instance(args.path, k, complements, m) print("") diff --git a/src/uint256_t/uint256_t.cpp b/src/uint256_t/uint256_t.cpp index de9546e..1f3ddae 100644 --- a/src/uint256_t/uint256_t.cpp +++ b/src/uint256_t/uint256_t.cpp @@ -1,6 +1,8 @@ #ifndef __LITTLE_ENDIAN__ +#ifndef __BIG_ENDIAN__ #define __LITTLE_ENDIAN__ 1 #endif +#endif #include "uint256_t.build" #include #include diff --git a/src/uint256_t/uint256_t.h b/src/uint256_t/uint256_t.h index e611db2..7135ce6 100644 --- a/src/uint256_t/uint256_t.h +++ b/src/uint256_t/uint256_t.h @@ -7,7 +7,9 @@ typedef __uint128_t uint128_t; const uint128_t uint128_0(0); const uint128_t uint128_1(1); #ifndef __LITTLE_ENDIAN__ +#ifndef __BIG_ENDIAN__ #define __LITTLE_ENDIAN__ 1 #endif +#endif #include "uint256_t.include" #endif From 0df8dd873a98e88933e87e1d8e4e72d1c94e853b Mon Sep 17 00:00:00 2001 From: OndrejSladky Date: Thu, 21 Mar 2024 15:20:56 +0100 Subject: [PATCH 06/10] 1s compilation speedup --- src/khash_utils.h | 10 ---------- src/main.cpp | 14 ++------------ src/parser.h | 2 -- src/prophasm.h | 2 -- 4 files changed, 2 insertions(+), 26 deletions(-) diff --git a/src/khash_utils.h b/src/khash_utils.h index 3ac2d99..7d64e21 100644 --- a/src/khash_utils.h +++ b/src/khash_utils.h @@ -16,20 +16,12 @@ typedef unsigned char byte; #define KHASH_MAP_INIT_INT128(name, khval_t) \ KHASH_INIT(name, __uint128_t, khval_t, 1, kh_int128_hash_func, kh_int128_hash_equal) -#define KHASH_SET_INIT_INT128(name) \ - KHASH_INIT(name, __uint128_t, byte, 0, kh_int128_hash_func, kh_int256_hash_equal) - #define KHASH_MAP_INIT_INT256(name, khval_t) \ KHASH_INIT(name, uint256_t, khval_t, 1, kh_int256_hash_func, kh_int128_hash_equal) -#define KHASH_SET_INIT_INT256(name) \ - KHASH_INIT(name, uint256_t, byte, 0, kh_int256_hash_func, kh_int256_hash_equal) - KHASH_MAP_INIT_INT256(S256M, byte) KHASH_MAP_INIT_INT128(S128M, byte) KHASH_MAP_INIT_INT64(S64M, byte) -KHASH_SET_INIT_INT256(S256S) -KHASH_SET_INIT_INT128(S128S) KHASH_SET_INIT_INT64(S64S) byte MINIMUM_ABUNDANCE = 1; @@ -93,9 +85,7 @@ void DifferenceInPlaceThread##variant(void *arg, long i, int _) { INIT_KHASH_UTILS(64, 64S) INIT_KHASH_UTILS(64, 64M) -INIT_KHASH_UTILS(128, 128S) INIT_KHASH_UTILS(128, 128M) -INIT_KHASH_UTILS(256, 256S) INIT_KHASH_UTILS(256, 256M) /// Return the next k-mer in the k-mer set and update the index. diff --git a/src/main.cpp b/src/main.cpp index 5afcc7c..d98842f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -198,9 +198,7 @@ int run##version(int32_t k, INIT_RUN(64, 64S) INIT_RUN(64, 64M) -INIT_RUN(128, 128S) INIT_RUN(128, 128M) -INIT_RUN(256, 256S) INIT_RUN(256, 256M) int main(int argc, char **argv) { @@ -332,16 +330,8 @@ int main(int argc, char **argv) { return run64M(k, intersectionPath, inPaths, outPaths, statsPath, fstats, computeIntersection, computeOutput, verbose, complements, threads, setCount); } } else if (k <= 64) { - if (MINIMUM_ABUNDANCE == (byte)1) { - return run128S(k, intersectionPath, inPaths, outPaths, statsPath, fstats, computeIntersection, computeOutput, verbose, complements, threads, setCount); - } else { - return run128M(k, intersectionPath, inPaths, outPaths, statsPath, fstats, computeIntersection, computeOutput, verbose, complements, threads, setCount); - } + return run128M(k, intersectionPath, inPaths, outPaths, statsPath, fstats, computeIntersection, computeOutput, verbose, complements, threads, setCount); } else { - if (MINIMUM_ABUNDANCE == (byte)1) { - return run256S(k, intersectionPath, inPaths, outPaths, statsPath, fstats, computeIntersection, computeOutput, verbose, complements, threads, setCount); - } else { - return run256M(k, intersectionPath, inPaths, outPaths, statsPath, fstats, computeIntersection, computeOutput, verbose, complements, threads, setCount); - } + return run256M(k, intersectionPath, inPaths, outPaths, statsPath, fstats, computeIntersection, computeOutput, verbose, complements, threads, setCount); } } diff --git a/src/parser.h b/src/parser.h index cb1c4ea..1f4c23e 100644 --- a/src/parser.h +++ b/src/parser.h @@ -80,7 +80,5 @@ void ReadKMersThread##variant(void *arg, long i, int _) { INIT_PARSER(64, 64S) INIT_PARSER(64, 64M) -INIT_PARSER(128, 128S) INIT_PARSER(128, 128M) -INIT_PARSER(256, 256S) INIT_PARSER(256, 256M) diff --git a/src/prophasm.h b/src/prophasm.h index 6220942..173ea71 100644 --- a/src/prophasm.h +++ b/src/prophasm.h @@ -119,7 +119,5 @@ void ComputeSimplitigsThread##variant(void *arg, long i, int _) { INIT_PROPHASM(64, 64S) INIT_PROPHASM(64, 64M) -INIT_PROPHASM(128, 128S) INIT_PROPHASM(128, 128M) -INIT_PROPHASM(256, 256S) INIT_PROPHASM(256, 256M) From c30149301abb81672c23a1aeae8658d40bd023c8 Mon Sep 17 00:00:00 2001 From: OndrejSladky Date: Thu, 21 Mar 2024 15:34:28 +0100 Subject: [PATCH 07/10] Another 1s compilation speedup. --- Makefile | 4 +- src/uint256_t/uint256_t.cpp | 495 ----------------------------------- src/uint256_t/uint256_t.h | 502 +++++++++++++++++++++++++++++++++++- 3 files changed, 493 insertions(+), 508 deletions(-) delete mode 100644 src/uint256_t/uint256_t.cpp diff --git a/Makefile b/Makefile index 1331ab6..da2cdcb 100644 --- a/Makefile +++ b/Makefile @@ -27,11 +27,11 @@ quick-verify: $(PROG) $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa $(PROG): $(SRC)/main.cpp $(SRC)/$(wildcard *.cpp *.h *.hpp) src/version.h $(wildcard $(UINT256)/*.cpp $(UINT256)/*.h $(UINT256)/*.include) ./create-version.sh - $(CXX) $(CXXFLAGS) $(SRC)/main.cpp $(UINT256)/uint256_t.cpp $(SRC)/kthread.c -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) $(SRC)/main.cpp $(SRC)/kthread.c -o $@ $(LDFLAGS) prophasmtest: $(TESTS)/unittest.cpp gtest-all.o $(SRC)/$(wildcard *.cpp *.h *.hpp) $(TESTS)/$(wildcard *.cpp *.h *.hpp) - $(CXX) $(CXXFLAGS) -isystem $(GTEST)/include -I $(GTEST)/include $(TESTS)/unittest.cpp $(UINT256)/uint256_t.cpp gtest-all.o -pthread -o $@ $(LDFLAGS) + $(CXX) $(CXXFLAGS) -isystem $(GTEST)/include -I $(GTEST)/include $(TESTS)/unittest.cpp gtest-all.o -pthread -o $@ $(LDFLAGS) gtest-all.o: $(GTEST)/src/gtest-all.cc $(wildcard *.cpp *.h *.hpp) $(CXX) $(CXXFLAGS) -isystem $(GTEST)/include -I $(GTEST)/include -I $(GTEST) -DGTEST_CREATE_SHARED_LIBRARY=1 -c -pthread $(GTEST)/src/gtest-all.cc -o $@ diff --git a/src/uint256_t/uint256_t.cpp b/src/uint256_t/uint256_t.cpp deleted file mode 100644 index 1f3ddae..0000000 --- a/src/uint256_t/uint256_t.cpp +++ /dev/null @@ -1,495 +0,0 @@ -#ifndef __LITTLE_ENDIAN__ -#ifndef __BIG_ENDIAN__ - #define __LITTLE_ENDIAN__ 1 -#endif -#endif -#include "uint256_t.build" -#include -#include - -const uint128_t uint128_64(64); -const uint128_t uint128_128(128); -const uint128_t uint128_256(256); -const uint256_t uint256_0(0); -const uint256_t uint256_1(1); -const uint256_t uint256_max(uint128_t(-1), uint128_t(-1)); - -uint256_t::uint256_t(const bool & b) - : uint256_t((uint8_t) b) -{} - -uint256_t & uint256_t::operator=(const bool & rhs) { - UPPER = 0; - LOWER = rhs; - return *this; -} - -uint256_t::operator bool() const{ - return (bool) (UPPER | LOWER); -} - -uint256_t::operator uint8_t() const{ - return (uint8_t) LOWER; -} - -uint256_t::operator uint16_t() const{ - return (uint16_t) LOWER; -} - -uint256_t::operator uint32_t() const{ - return (uint32_t) LOWER; -} - -uint256_t::operator uint64_t() const{ - return (uint64_t) LOWER; -} - -uint256_t::operator uint128_t() const{ - return LOWER; -} - -uint256_t uint256_t::operator&(const uint128_t & rhs) const{ - return uint256_t(uint128_0, LOWER & rhs); -} - -uint256_t uint256_t::operator&(const uint256_t & rhs) const{ - return uint256_t(UPPER & rhs.UPPER, LOWER & rhs.LOWER); -} - -uint256_t & uint256_t::operator&=(const uint128_t & rhs){ - UPPER = uint128_0; - LOWER &= rhs; - return *this; -} - -uint256_t & uint256_t::operator&=(const uint256_t & rhs){ - UPPER &= rhs.UPPER; - LOWER &= rhs.LOWER; - return *this; -} - -uint256_t uint256_t::operator|(const uint128_t & rhs) const{ - return uint256_t(UPPER , LOWER | rhs); -} - -uint256_t uint256_t::operator|(const uint256_t & rhs) const{ - return uint256_t(UPPER | rhs.UPPER, LOWER | rhs.LOWER); -} - -uint256_t & uint256_t::operator|=(const uint128_t & rhs){ - LOWER |= rhs; - return *this; -} - -uint256_t & uint256_t::operator|=(const uint256_t & rhs){ - UPPER |= rhs.UPPER; - LOWER |= rhs.LOWER; - return *this; -} - -uint256_t uint256_t::operator^(const uint128_t & rhs) const{ - return uint256_t(UPPER, LOWER ^ rhs); -} - -uint256_t uint256_t::operator^(const uint256_t & rhs) const{ - return uint256_t(UPPER ^ rhs.UPPER, LOWER ^ rhs.LOWER); -} - -uint256_t & uint256_t::operator^=(const uint128_t & rhs){ - LOWER ^= rhs; - return *this; -} - -uint256_t & uint256_t::operator^=(const uint256_t & rhs){ - UPPER ^= rhs.UPPER; - LOWER ^= rhs.LOWER; - return *this; -} - -uint256_t uint256_t::operator~() const{ - return uint256_t(~UPPER, ~LOWER); -} - -uint256_t uint256_t::operator<<(const uint128_t & rhs) const{ - return *this << uint256_t(rhs); -} - -uint256_t uint256_t::operator<<(const uint256_t & rhs) const{ - const uint128_t shift = rhs.LOWER; - if (((bool) rhs.UPPER) || (shift >= uint128_256)){ - return uint256_0; - } - else if (shift == uint128_128){ - return uint256_t(LOWER, uint128_0); - } - else if (shift == uint128_0){ - return *this; - } - else if (shift < uint128_128){ - return uint256_t((UPPER << shift) + (LOWER >> (uint128_128 - shift)), LOWER << shift); - } - else if ((uint128_256 > shift) && (shift > uint128_128)){ - return uint256_t(LOWER << (shift - uint128_128), uint128_0); - } - else{ - return uint256_0; - } -} - -uint256_t & uint256_t::operator<<=(const uint128_t & shift){ - return *this <<= uint256_t(shift); -} - -uint256_t & uint256_t::operator<<=(const uint256_t & shift){ - *this = *this << shift; - return *this; -} - -uint256_t uint256_t::operator>>(const uint128_t & rhs) const{ - return *this >> uint256_t(rhs); -} - -uint256_t uint256_t::operator>>(const uint256_t & rhs) const{ - const uint128_t shift = rhs.LOWER; - if (((bool) rhs.UPPER) | (shift >= uint128_256)){ - return uint256_0; - } - else if (shift == uint128_128){ - return uint256_t(UPPER); - } - else if (shift == uint128_0){ - return *this; - } - else if (shift < uint128_128){ - return uint256_t(UPPER >> shift, (UPPER << (uint128_128 - shift)) + (LOWER >> shift)); - } - else if ((uint128_256 > shift) && (shift > uint128_128)){ - return uint256_t(UPPER >> (shift - uint128_128)); - } - else{ - return uint256_0; - } -} - -uint256_t & uint256_t::operator>>=(const uint128_t & shift){ - return *this >>= uint256_t(shift); -} - -uint256_t & uint256_t::operator>>=(const uint256_t & shift){ - *this = *this >> shift; - return *this; -} - -bool uint256_t::operator!() const{ - return ! (bool) *this; -} - -bool uint256_t::operator&&(const uint128_t & rhs) const{ - return (*this && uint256_t(rhs)); -} - -bool uint256_t::operator&&(const uint256_t & rhs) const{ - return ((bool) *this && (bool) rhs); -} - -bool uint256_t::operator||(const uint128_t & rhs) const{ - return (*this || uint256_t(rhs)); -} - -bool uint256_t::operator||(const uint256_t & rhs) const{ - return ((bool) *this || (bool) rhs); -} - -bool uint256_t::operator==(const uint128_t & rhs) const{ - return (*this == uint256_t(rhs)); -} - -bool uint256_t::operator==(const uint256_t & rhs) const{ - return ((UPPER == rhs.UPPER) && (LOWER == rhs.LOWER)); -} - -bool uint256_t::operator!=(const uint128_t & rhs) const{ - return (*this != uint256_t(rhs)); -} - -bool uint256_t::operator!=(const uint256_t & rhs) const{ - return ((UPPER != rhs.UPPER) | (LOWER != rhs.LOWER)); -} - -bool uint256_t::operator>(const uint128_t & rhs) const{ - return (*this > uint256_t(rhs)); -} - -bool uint256_t::operator>(const uint256_t & rhs) const{ - if (UPPER == rhs.UPPER){ - return (LOWER > rhs.LOWER); - } - if (UPPER > rhs.UPPER){ - return true; - } - return false; -} - -bool uint256_t::operator<(const uint128_t & rhs) const{ - return (*this < uint256_t(rhs)); -} - -bool uint256_t::operator<(const uint256_t & rhs) const{ - if (UPPER == rhs.UPPER){ - return (LOWER < rhs.LOWER); - } - if (UPPER < rhs.UPPER){ - return true; - } - return false; -} - -bool uint256_t::operator>=(const uint128_t & rhs) const{ - return (*this >= uint256_t(rhs)); -} - -bool uint256_t::operator>=(const uint256_t & rhs) const{ - return ((*this > rhs) | (*this == rhs)); -} - -bool uint256_t::operator<=(const uint128_t & rhs) const{ - return (*this <= uint256_t(rhs)); -} - -bool uint256_t::operator<=(const uint256_t & rhs) const{ - return ((*this < rhs) | (*this == rhs)); -} - -uint256_t uint256_t::operator+(const uint128_t & rhs) const{ - return *this + uint256_t(rhs); -} - -uint256_t uint256_t::operator+(const uint256_t & rhs) const{ - return uint256_t(UPPER + rhs.UPPER + (((LOWER + rhs.LOWER) < LOWER)?uint128_1:uint128_0), LOWER + rhs.LOWER); -} - -uint256_t & uint256_t::operator+=(const uint128_t & rhs){ - return *this += uint256_t(rhs); -} - -uint256_t & uint256_t::operator+=(const uint256_t & rhs){ - UPPER = rhs.UPPER + UPPER + ((LOWER + rhs.LOWER) < LOWER); - LOWER = LOWER + rhs.LOWER; - return *this; -} - -uint256_t uint256_t::operator-(const uint128_t & rhs) const{ - return *this - uint256_t(rhs); -} - -uint256_t uint256_t::operator-(const uint256_t & rhs) const{ - return uint256_t(UPPER - rhs.UPPER - ((LOWER - rhs.LOWER) > LOWER), LOWER - rhs.LOWER); -} - -uint256_t & uint256_t::operator-=(const uint128_t & rhs){ - return *this -= uint256_t(rhs); -} - -uint256_t & uint256_t::operator-=(const uint256_t & rhs){ - *this = *this - rhs; - return *this; -} - -uint256_t & uint256_t::operator++(){ - *this += uint256_1; - return *this; -} - -uint256_t uint256_t::operator++(int){ - uint256_t temp(*this); - ++*this; - return temp; -} - -uint256_t & uint256_t::operator--(){ - *this -= uint256_1; - return *this; -} - -uint256_t uint256_t::operator--(int){ - uint256_t temp(*this); - --*this; - return temp; -} - -uint256_t uint256_t::operator+() const{ - return *this; -} - -uint256_t uint256_t::operator-() const{ - return ~*this + uint256_1; -} - -const uint128_t & uint256_t::upper() const { - return UPPER; -} - -const uint128_t & uint256_t::lower() const { - return LOWER; -} - -uint256_t operator&(const uint128_t & lhs, const uint256_t & rhs){ - return rhs & lhs; -} - -uint128_t & operator&=(uint128_t & lhs, const uint256_t & rhs){ - lhs = (rhs & lhs).lower(); - return lhs; -} - -uint256_t operator|(const uint128_t & lhs, const uint256_t & rhs){ - return rhs | lhs; -} - -uint128_t & operator|=(uint128_t & lhs, const uint256_t & rhs){ - lhs = (rhs | lhs).lower(); - return lhs; -} - -uint256_t operator^(const uint128_t & lhs, const uint256_t & rhs){ - return rhs ^ lhs; -} - -uint128_t & operator^=(uint128_t & lhs, const uint256_t & rhs){ - lhs = (rhs ^ lhs).lower(); - return lhs; -} - -uint256_t operator<<(const bool & lhs, const uint256_t & rhs){ - return uint256_t(lhs) << rhs; -} - -uint256_t operator<<(const uint8_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) << rhs; -} - -uint256_t operator<<(const uint16_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) << rhs; -} - -uint256_t operator<<(const uint32_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) << rhs; -} - -uint256_t operator<<(const uint64_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) << rhs; -} - -uint256_t operator<<(const uint128_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) << rhs; -} - -uint256_t operator<<(const int8_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) << rhs; -} - -uint256_t operator<<(const int16_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) << rhs; -} - -uint256_t operator<<(const int32_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) << rhs; -} - -uint256_t operator<<(const int64_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) << rhs; -} - -uint128_t & operator<<=(uint128_t & lhs, const uint256_t & rhs){ - lhs = (uint256_t(lhs) << rhs).lower(); - return lhs; -} - -uint256_t operator>>(const bool & lhs, const uint256_t & rhs){ - return uint256_t(lhs) >> rhs; -} - -uint256_t operator>>(const uint8_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) >> rhs; -} - -uint256_t operator>>(const uint16_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) >> rhs; -} - -uint256_t operator>>(const uint32_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) >> rhs; -} - -uint256_t operator>>(const uint64_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) >> rhs; -} - -uint256_t operator>>(const uint128_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) >> rhs; -} - -uint256_t operator>>(const int8_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) >> rhs; -} - -uint256_t operator>>(const int16_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) >> rhs; -} - -uint256_t operator>>(const int32_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) >> rhs; -} - -uint256_t operator>>(const int64_t & lhs, const uint256_t & rhs){ - return uint256_t(lhs) >> rhs; -} - -uint128_t & operator>>=(uint128_t & lhs, const uint256_t & rhs){ - lhs = (uint256_t(lhs) >> rhs).lower(); - return lhs; -} - -// Comparison Operators -bool operator==(const uint128_t & lhs, const uint256_t & rhs){ - return rhs == lhs; -} - -bool operator!=(const uint128_t & lhs, const uint256_t & rhs){ - return rhs != lhs; -} - -bool operator>(const uint128_t & lhs, const uint256_t & rhs){ - return rhs < lhs; -} - -bool operator<(const uint128_t & lhs, const uint256_t & rhs){ - return rhs > lhs; -} - -bool operator>=(const uint128_t & lhs, const uint256_t & rhs){ - return rhs <= lhs; -} - -bool operator<=(const uint128_t & lhs, const uint256_t & rhs){ - return rhs >= lhs; -} - -// Arithmetic Operators -uint256_t operator+(const uint128_t & lhs, const uint256_t & rhs){ - return rhs + lhs; -} - -uint128_t & operator+=(uint128_t & lhs, const uint256_t & rhs){ - lhs = (rhs + lhs).lower(); - return lhs; -} - -uint256_t operator-(const uint128_t & lhs, const uint256_t & rhs){ - return -(rhs - lhs); -} - -uint128_t & operator-=(uint128_t & lhs, const uint256_t & rhs){ - lhs = (-(rhs - lhs)).lower(); - return lhs; -} diff --git a/src/uint256_t/uint256_t.h b/src/uint256_t/uint256_t.h index 7135ce6..1f3ddae 100644 --- a/src/uint256_t/uint256_t.h +++ b/src/uint256_t/uint256_t.h @@ -1,15 +1,495 @@ -// PUBLIC IMPORT HEADER -#ifndef _UINT256_H_ -#define _UINT256_H_ -#include "uint256_t_config.include" -#define UINT256_T_EXTERN _UINT256_T_IMPORT -typedef __uint128_t uint128_t; -const uint128_t uint128_0(0); -const uint128_t uint128_1(1); #ifndef __LITTLE_ENDIAN__ #ifndef __BIG_ENDIAN__ -#define __LITTLE_ENDIAN__ 1 + #define __LITTLE_ENDIAN__ 1 #endif #endif -#include "uint256_t.include" -#endif +#include "uint256_t.build" +#include +#include + +const uint128_t uint128_64(64); +const uint128_t uint128_128(128); +const uint128_t uint128_256(256); +const uint256_t uint256_0(0); +const uint256_t uint256_1(1); +const uint256_t uint256_max(uint128_t(-1), uint128_t(-1)); + +uint256_t::uint256_t(const bool & b) + : uint256_t((uint8_t) b) +{} + +uint256_t & uint256_t::operator=(const bool & rhs) { + UPPER = 0; + LOWER = rhs; + return *this; +} + +uint256_t::operator bool() const{ + return (bool) (UPPER | LOWER); +} + +uint256_t::operator uint8_t() const{ + return (uint8_t) LOWER; +} + +uint256_t::operator uint16_t() const{ + return (uint16_t) LOWER; +} + +uint256_t::operator uint32_t() const{ + return (uint32_t) LOWER; +} + +uint256_t::operator uint64_t() const{ + return (uint64_t) LOWER; +} + +uint256_t::operator uint128_t() const{ + return LOWER; +} + +uint256_t uint256_t::operator&(const uint128_t & rhs) const{ + return uint256_t(uint128_0, LOWER & rhs); +} + +uint256_t uint256_t::operator&(const uint256_t & rhs) const{ + return uint256_t(UPPER & rhs.UPPER, LOWER & rhs.LOWER); +} + +uint256_t & uint256_t::operator&=(const uint128_t & rhs){ + UPPER = uint128_0; + LOWER &= rhs; + return *this; +} + +uint256_t & uint256_t::operator&=(const uint256_t & rhs){ + UPPER &= rhs.UPPER; + LOWER &= rhs.LOWER; + return *this; +} + +uint256_t uint256_t::operator|(const uint128_t & rhs) const{ + return uint256_t(UPPER , LOWER | rhs); +} + +uint256_t uint256_t::operator|(const uint256_t & rhs) const{ + return uint256_t(UPPER | rhs.UPPER, LOWER | rhs.LOWER); +} + +uint256_t & uint256_t::operator|=(const uint128_t & rhs){ + LOWER |= rhs; + return *this; +} + +uint256_t & uint256_t::operator|=(const uint256_t & rhs){ + UPPER |= rhs.UPPER; + LOWER |= rhs.LOWER; + return *this; +} + +uint256_t uint256_t::operator^(const uint128_t & rhs) const{ + return uint256_t(UPPER, LOWER ^ rhs); +} + +uint256_t uint256_t::operator^(const uint256_t & rhs) const{ + return uint256_t(UPPER ^ rhs.UPPER, LOWER ^ rhs.LOWER); +} + +uint256_t & uint256_t::operator^=(const uint128_t & rhs){ + LOWER ^= rhs; + return *this; +} + +uint256_t & uint256_t::operator^=(const uint256_t & rhs){ + UPPER ^= rhs.UPPER; + LOWER ^= rhs.LOWER; + return *this; +} + +uint256_t uint256_t::operator~() const{ + return uint256_t(~UPPER, ~LOWER); +} + +uint256_t uint256_t::operator<<(const uint128_t & rhs) const{ + return *this << uint256_t(rhs); +} + +uint256_t uint256_t::operator<<(const uint256_t & rhs) const{ + const uint128_t shift = rhs.LOWER; + if (((bool) rhs.UPPER) || (shift >= uint128_256)){ + return uint256_0; + } + else if (shift == uint128_128){ + return uint256_t(LOWER, uint128_0); + } + else if (shift == uint128_0){ + return *this; + } + else if (shift < uint128_128){ + return uint256_t((UPPER << shift) + (LOWER >> (uint128_128 - shift)), LOWER << shift); + } + else if ((uint128_256 > shift) && (shift > uint128_128)){ + return uint256_t(LOWER << (shift - uint128_128), uint128_0); + } + else{ + return uint256_0; + } +} + +uint256_t & uint256_t::operator<<=(const uint128_t & shift){ + return *this <<= uint256_t(shift); +} + +uint256_t & uint256_t::operator<<=(const uint256_t & shift){ + *this = *this << shift; + return *this; +} + +uint256_t uint256_t::operator>>(const uint128_t & rhs) const{ + return *this >> uint256_t(rhs); +} + +uint256_t uint256_t::operator>>(const uint256_t & rhs) const{ + const uint128_t shift = rhs.LOWER; + if (((bool) rhs.UPPER) | (shift >= uint128_256)){ + return uint256_0; + } + else if (shift == uint128_128){ + return uint256_t(UPPER); + } + else if (shift == uint128_0){ + return *this; + } + else if (shift < uint128_128){ + return uint256_t(UPPER >> shift, (UPPER << (uint128_128 - shift)) + (LOWER >> shift)); + } + else if ((uint128_256 > shift) && (shift > uint128_128)){ + return uint256_t(UPPER >> (shift - uint128_128)); + } + else{ + return uint256_0; + } +} + +uint256_t & uint256_t::operator>>=(const uint128_t & shift){ + return *this >>= uint256_t(shift); +} + +uint256_t & uint256_t::operator>>=(const uint256_t & shift){ + *this = *this >> shift; + return *this; +} + +bool uint256_t::operator!() const{ + return ! (bool) *this; +} + +bool uint256_t::operator&&(const uint128_t & rhs) const{ + return (*this && uint256_t(rhs)); +} + +bool uint256_t::operator&&(const uint256_t & rhs) const{ + return ((bool) *this && (bool) rhs); +} + +bool uint256_t::operator||(const uint128_t & rhs) const{ + return (*this || uint256_t(rhs)); +} + +bool uint256_t::operator||(const uint256_t & rhs) const{ + return ((bool) *this || (bool) rhs); +} + +bool uint256_t::operator==(const uint128_t & rhs) const{ + return (*this == uint256_t(rhs)); +} + +bool uint256_t::operator==(const uint256_t & rhs) const{ + return ((UPPER == rhs.UPPER) && (LOWER == rhs.LOWER)); +} + +bool uint256_t::operator!=(const uint128_t & rhs) const{ + return (*this != uint256_t(rhs)); +} + +bool uint256_t::operator!=(const uint256_t & rhs) const{ + return ((UPPER != rhs.UPPER) | (LOWER != rhs.LOWER)); +} + +bool uint256_t::operator>(const uint128_t & rhs) const{ + return (*this > uint256_t(rhs)); +} + +bool uint256_t::operator>(const uint256_t & rhs) const{ + if (UPPER == rhs.UPPER){ + return (LOWER > rhs.LOWER); + } + if (UPPER > rhs.UPPER){ + return true; + } + return false; +} + +bool uint256_t::operator<(const uint128_t & rhs) const{ + return (*this < uint256_t(rhs)); +} + +bool uint256_t::operator<(const uint256_t & rhs) const{ + if (UPPER == rhs.UPPER){ + return (LOWER < rhs.LOWER); + } + if (UPPER < rhs.UPPER){ + return true; + } + return false; +} + +bool uint256_t::operator>=(const uint128_t & rhs) const{ + return (*this >= uint256_t(rhs)); +} + +bool uint256_t::operator>=(const uint256_t & rhs) const{ + return ((*this > rhs) | (*this == rhs)); +} + +bool uint256_t::operator<=(const uint128_t & rhs) const{ + return (*this <= uint256_t(rhs)); +} + +bool uint256_t::operator<=(const uint256_t & rhs) const{ + return ((*this < rhs) | (*this == rhs)); +} + +uint256_t uint256_t::operator+(const uint128_t & rhs) const{ + return *this + uint256_t(rhs); +} + +uint256_t uint256_t::operator+(const uint256_t & rhs) const{ + return uint256_t(UPPER + rhs.UPPER + (((LOWER + rhs.LOWER) < LOWER)?uint128_1:uint128_0), LOWER + rhs.LOWER); +} + +uint256_t & uint256_t::operator+=(const uint128_t & rhs){ + return *this += uint256_t(rhs); +} + +uint256_t & uint256_t::operator+=(const uint256_t & rhs){ + UPPER = rhs.UPPER + UPPER + ((LOWER + rhs.LOWER) < LOWER); + LOWER = LOWER + rhs.LOWER; + return *this; +} + +uint256_t uint256_t::operator-(const uint128_t & rhs) const{ + return *this - uint256_t(rhs); +} + +uint256_t uint256_t::operator-(const uint256_t & rhs) const{ + return uint256_t(UPPER - rhs.UPPER - ((LOWER - rhs.LOWER) > LOWER), LOWER - rhs.LOWER); +} + +uint256_t & uint256_t::operator-=(const uint128_t & rhs){ + return *this -= uint256_t(rhs); +} + +uint256_t & uint256_t::operator-=(const uint256_t & rhs){ + *this = *this - rhs; + return *this; +} + +uint256_t & uint256_t::operator++(){ + *this += uint256_1; + return *this; +} + +uint256_t uint256_t::operator++(int){ + uint256_t temp(*this); + ++*this; + return temp; +} + +uint256_t & uint256_t::operator--(){ + *this -= uint256_1; + return *this; +} + +uint256_t uint256_t::operator--(int){ + uint256_t temp(*this); + --*this; + return temp; +} + +uint256_t uint256_t::operator+() const{ + return *this; +} + +uint256_t uint256_t::operator-() const{ + return ~*this + uint256_1; +} + +const uint128_t & uint256_t::upper() const { + return UPPER; +} + +const uint128_t & uint256_t::lower() const { + return LOWER; +} + +uint256_t operator&(const uint128_t & lhs, const uint256_t & rhs){ + return rhs & lhs; +} + +uint128_t & operator&=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs & lhs).lower(); + return lhs; +} + +uint256_t operator|(const uint128_t & lhs, const uint256_t & rhs){ + return rhs | lhs; +} + +uint128_t & operator|=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs | lhs).lower(); + return lhs; +} + +uint256_t operator^(const uint128_t & lhs, const uint256_t & rhs){ + return rhs ^ lhs; +} + +uint128_t & operator^=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs ^ lhs).lower(); + return lhs; +} + +uint256_t operator<<(const bool & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint8_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint16_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint32_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint64_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint128_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const int8_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const int16_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const int32_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const int64_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint128_t & operator<<=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (uint256_t(lhs) << rhs).lower(); + return lhs; +} + +uint256_t operator>>(const bool & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint8_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint16_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint32_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint64_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint128_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const int8_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const int16_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const int32_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const int64_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint128_t & operator>>=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (uint256_t(lhs) >> rhs).lower(); + return lhs; +} + +// Comparison Operators +bool operator==(const uint128_t & lhs, const uint256_t & rhs){ + return rhs == lhs; +} + +bool operator!=(const uint128_t & lhs, const uint256_t & rhs){ + return rhs != lhs; +} + +bool operator>(const uint128_t & lhs, const uint256_t & rhs){ + return rhs < lhs; +} + +bool operator<(const uint128_t & lhs, const uint256_t & rhs){ + return rhs > lhs; +} + +bool operator>=(const uint128_t & lhs, const uint256_t & rhs){ + return rhs <= lhs; +} + +bool operator<=(const uint128_t & lhs, const uint256_t & rhs){ + return rhs >= lhs; +} + +// Arithmetic Operators +uint256_t operator+(const uint128_t & lhs, const uint256_t & rhs){ + return rhs + lhs; +} + +uint128_t & operator+=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs + lhs).lower(); + return lhs; +} + +uint256_t operator-(const uint128_t & lhs, const uint256_t & rhs){ + return -(rhs - lhs); +} + +uint128_t & operator-=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (-(rhs - lhs)).lower(); + return lhs; +} From 81c710bbd35fdfbbfc5a75ea06b305177ea2c8ca Mon Sep 17 00:00:00 2001 From: OndrejSladky Date: Thu, 21 Mar 2024 16:07:48 +0100 Subject: [PATCH 08/10] Unittests for large reverse complements. --- src/kthread.c | 93 +++--------------------------------------- src/kthread.h | 1 - tests/kmers_unittest.h | 44 ++++++++++++++++++++ 3 files changed, 49 insertions(+), 89 deletions(-) diff --git a/src/kthread.c b/src/kthread.c index ffdf940..6994242 100644 --- a/src/kthread.c +++ b/src/kthread.c @@ -1,3 +1,7 @@ +/* The MIT License + + Copyright (c) 2008, 2009, 2011 by Attractive Chaos + */ #include #include #include @@ -69,91 +73,4 @@ void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n) long j; for (j = 0; j < n; ++j) func(data, j, 0); } -} - -/***************** - * kt_pipeline() * - *****************/ - -struct ktp_t; - -typedef struct { - struct ktp_t *pl; - int64_t index; - int step; - void *data; -} ktp_worker_t; - -typedef struct ktp_t { - void *shared; - void *(*func)(void*, int, void*); - int64_t index; - int n_workers, n_steps; - ktp_worker_t *workers; - pthread_mutex_t mutex; - pthread_cond_t cv; -} ktp_t; - -static void *ktp_worker(void *data) -{ - ktp_worker_t *w = (ktp_worker_t*)data; - ktp_t *p = w->pl; - while (w->step < p->n_steps) { - // test whether we can kick off the job with this worker - pthread_mutex_lock(&p->mutex); - for (;;) { - int i; - // test whether another worker is doing the same step - for (i = 0; i < p->n_workers; ++i) { - if (w == &p->workers[i]) continue; // ignore itself - if (p->workers[i].step <= w->step && p->workers[i].index < w->index) - break; - } - if (i == p->n_workers) break; // no workers with smaller indices are doing w->step or the previous steps - pthread_cond_wait(&p->cv, &p->mutex); - } - pthread_mutex_unlock(&p->mutex); - - // working on w->step - w->data = p->func(p->shared, w->step, w->step? w->data : 0); // for the first step, input is NULL - - // update step and let other workers know - pthread_mutex_lock(&p->mutex); - w->step = w->step == p->n_steps - 1 || w->data? (w->step + 1) % p->n_steps : p->n_steps; - if (w->step == 0) w->index = p->index++; - pthread_cond_broadcast(&p->cv); - pthread_mutex_unlock(&p->mutex); - } - pthread_exit(0); -} - -void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps) -{ - ktp_t aux; - pthread_t *tid; - int i; - - if (n_threads < 1) n_threads = 1; - aux.n_workers = n_threads; - aux.n_steps = n_steps; - aux.func = func; - aux.shared = shared_data; - aux.index = 0; - pthread_mutex_init(&aux.mutex, 0); - pthread_cond_init(&aux.cv, 0); - - aux.workers = (ktp_worker_t*)calloc(n_threads, sizeof(ktp_worker_t)); - for (i = 0; i < n_threads; ++i) { - ktp_worker_t *w = &aux.workers[i]; - w->step = 0; w->pl = &aux; w->data = 0; - w->index = aux.index++; - } - - tid = (pthread_t*)calloc(n_threads, sizeof(pthread_t)); - for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktp_worker, &aux.workers[i]); - for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); - free(tid); free(aux.workers); - - pthread_mutex_destroy(&aux.mutex); - pthread_cond_destroy(&aux.cv); -} +} \ No newline at end of file diff --git a/src/kthread.h b/src/kthread.h index c3cd165..1f5b480 100644 --- a/src/kthread.h +++ b/src/kthread.h @@ -6,7 +6,6 @@ extern "C" { #endif void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n); -void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps); #ifdef __cplusplus } diff --git a/tests/kmers_unittest.h b/tests/kmers_unittest.h index b71d776..30a0efe 100644 --- a/tests/kmers_unittest.h +++ b/tests/kmers_unittest.h @@ -106,6 +106,50 @@ namespace { } } + TEST(KMers, ReverseComplement128) { + struct TestCase { + kmer128_t input; + int k; + kmer128_t wantResult; + }; + std::vector tests = { + {0b1001LL, 2, 0b1001LL}, + {0b101111LL, 3, 0b000001LL}, + {0b11LL, 1, 0b00LL}, + {0b11111111'01111111'11111111'11111111'11111111'11111111'11111111'11111110LL, 32, 0b010000'00000000'00000000'00000000'00000000'00000000'00000000'1000000000LL }, + {(kmer128_t (0b11111111'01111111'11111111'11111111'11111111'11111111'11111111'11111110LL) << 64) + kmer128_t (0b10111111'01111111'11111111'11111111'11111111'11111111'11111111'11111110LL) , + 64, (kmer128_t (0b010000'00000000'00000000'00000000'00000000'00000000'00000000'1000000001LL)<<64 ) + kmer128_t (0b010000'00000000'00000000'00000000'00000000'00000000'00000000'1000000000LL) }, + }; + + for (auto t: tests) { + kmer128_t gotResult = ReverseComplement(t.input, t.k); + + EXPECT_EQ(t.wantResult, gotResult); + } + } + + TEST(KMers, ReverseComplement256) { + struct TestCase { + kmer256_t input; + int k; + kmer256_t wantResult; + }; + std::vector tests = { + {0b1001LL, 2, 0b1001LL}, + {0b101111LL, 3, 0b000001LL}, + {0b11LL, 1, 0b00LL}, + {0b11111111'01111111'11111111'11111111'11111111'11111111'11111111'11111110LL, 32, 0b010000'00000000'00000000'00000000'00000000'00000000'00000000'1000000000LL }, + {(kmer128_t (0b11111111'01111111'11111111'11111111'11111111'11111111'11111111'11111110LL) << 64) + kmer128_t (0b10111111'01111111'11111111'11111111'11111111'11111111'11111111'11111110LL) , + 64, (kmer128_t (0b010000'00000000'00000000'00000000'00000000'00000000'00000000'1000000001LL)<<64 ) + kmer128_t (0b010000'00000000'00000000'00000000'00000000'00000000'00000000'1000000000LL) }, + }; + + for (auto t: tests) { + kmer256_t gotResult = ReverseComplement(t.input, t.k); + + EXPECT_EQ(t.wantResult, gotResult); + } + } + TEST(KMers, CanonicalKMer) { struct TestCase { kmer_t input; From afa99154018b339282ced8a7537a9c78d9a7c15b Mon Sep 17 00:00:00 2001 From: OndrejSladky Date: Thu, 25 Apr 2024 22:15:50 +0200 Subject: [PATCH 09/10] Fixed error in reverse complements for extra large k-mers. --- src/kmers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kmers.h b/src/kmers.h index 7bf2422..209c8d7 100644 --- a/src/kmers.h +++ b/src/kmers.h @@ -79,7 +79,7 @@ inline kmer128_t word_reverse_complement(kmer128_t w) { inline kmer256_t word_reverse_complement(kmer256_t w) { kmer128_t low = word_reverse_complement(w.lower()); kmer128_t high = word_reverse_complement(w.upper()); - return kmer256_t(high, low); + return kmer256_t(low, high); } constexpr int KMER_SIZE_64 = 64; From 26c030ed8e6a81003dba1dc57310029c8e8e8a88 Mon Sep 17 00:00:00 2001 From: OndrejSladky Date: Fri, 26 Apr 2024 15:10:43 +0200 Subject: [PATCH 10/10] README: sizes up to 128. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 73fdd41..08b0b25 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ ProphAsm2 is a versatile tool for computing simplitigs/SPSS from *k-mer sets* and for *k-mer set operations*. The new features compared to the original [ProphAsm](https://github.com/prophyle/prophasm) include a largely speed and memory optimization, parallelization, -support for k-mer sizes up to 64 and support for minimum abundances. +support for k-mer sizes up to 128 and support for minimum abundances. Various types of sequencing datasets can be used as the input for ProphAsm, including genomes, pan-genomes, metagenomes or sequencing reads.