From 54fea1f61f4ece7ddabb026603d385e194024563 Mon Sep 17 00:00:00 2001 From: hiraksarkar Date: Sat, 9 May 2020 14:12:03 -0400 Subject: [PATCH] rolling back to old pufferfish b/c of the optimizations that made the previous unusable for longer k-mers --- CMakeLists.txt | 21 +- .../twopaco/common/junctionapi/junctionapi.h | 8 - external/twopaco/common/streamfastaparser.h | 4 - .../twopaco/graphconstructor/CMakeLists.txt | 3 + .../graphconstructor/bifurcationstorage.h | 5 +- .../twopaco/graphconstructor/constructor.cpp | 4 +- external/twopaco/graphdump/CMakeLists.txt | 4 +- external/twopaco/graphdump/binaryWriter.h | 133 -- .../compact_vector/compact_iterator.hpp | 892 --------- .../compact_vector/compact_vector.hpp | 484 ----- .../compact_vector/const_iterator_traits.hpp | 21 - .../twopaco/graphdump/compact_vector/mio.hpp | 1754 ----------------- .../parallel_iterator_traits.hpp | 32 - .../prefetch_iterator_traits.hpp | 28 - external/twopaco/graphdump/graphdump.cpp | 1486 ++++++-------- external/twopaco/graphdump/pufferize.h | 154 -- src/PuffIndexer.cpp | 14 +- 17 files changed, 670 insertions(+), 4377 deletions(-) delete mode 100644 external/twopaco/graphdump/binaryWriter.h delete mode 100644 external/twopaco/graphdump/compact_vector/compact_iterator.hpp delete mode 100644 external/twopaco/graphdump/compact_vector/compact_vector.hpp delete mode 100644 external/twopaco/graphdump/compact_vector/const_iterator_traits.hpp delete mode 100644 external/twopaco/graphdump/compact_vector/mio.hpp delete mode 100644 external/twopaco/graphdump/compact_vector/parallel_iterator_traits.hpp delete mode 100644 external/twopaco/graphdump/compact_vector/prefetch_iterator_traits.hpp delete mode 100644 external/twopaco/graphdump/pufferize.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 7e06ba5..fb38dd3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,10 +81,16 @@ set(GAT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) ### ## Try and find TBB first -find_package(TBB 2018.0 COMPONENTS tbb tbbmalloc tbbmalloc_proxy) +find_package(TBB 2019.0 COMPONENTS tbb tbbmalloc tbbmalloc_proxy) + +## NOTE: we actually require at least 2019 U4 or greater +## since we are using tbb::global_control. However, they +## seem not to have tagged minor version numbers in their +## source. Check before release if we can bump to the 2020 +## version (requires having tbb 2020 for OSX). if (${TBB_FOUND}) - if (${TBB_VERSION} VERSION_GREATER_EQUAL 2018.0) + if (${TBB_VERSION} VERSION_GREATER_EQUAL 2019.0) message("FOUND SUITABLE TBB VERSION : ${TBB_VERSION}") set(TBB_TARGET_EXISTED TRUE) else() @@ -112,7 +118,8 @@ endif() message("Build system will fetch and build Intel Threading Building Blocks") message("==================================================================") # These are useful for the custom install step we'll do later -set(TBB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/oneTBB-2019_U8) +set(TBB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/oneTBB-2020.2) +#set(TBB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/oneTBB-2019_U8) #set(TBB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/tbb-2019_U8) set(TBB_INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install) @@ -126,10 +133,10 @@ set(TBB_CXXFLAGS "${TBB_CXXFLAGS} ${CXXSTDFLAG}") externalproject_add(libtbb DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external - DOWNLOAD_COMMAND curl -k -L https://github.com/intel/tbb/archive/2019_U8.tar.gz -o tbb-2019_U8.tgz && - ${SHASUM} 6b540118cbc79f9cbc06a35033c18156c21b84ab7b6cf56d773b168ad2b68566 tbb-2019_U8.tgz && - tar -xzvf tbb-2019_U8.tgz - SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/oneTBB-2019_U8 + DOWNLOAD_COMMAND curl -k -L https://github.com/oneapi-src/oneTBB/archive/v2020.2.tar.gz -o tbb-2020_U2.tgz && + ${SHASUM} 4804320e1e6cbe3a5421997b52199e3c1a3829b2ecb6489641da4b8e32faf500 tbb-2020_U2.tgz && + tar -xzvf tbb-2020_U2.tgz + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/oneTBB-2020.2 INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install PATCH_COMMAND "${TBB_PATCH_STEP}" CONFIGURE_COMMAND "" diff --git a/external/twopaco/common/junctionapi/junctionapi.h b/external/twopaco/common/junctionapi/junctionapi.h index 271a521..185d426 100644 --- a/external/twopaco/common/junctionapi/junctionapi.h +++ b/external/twopaco/common/junctionapi/junctionapi.h @@ -49,14 +49,6 @@ namespace TwoPaCo } } - void RestoreReader() { - /*if (!in_) { - throw std::runtime_error("No input file exists"); - }*/ - in_.clear(); - in_.seekg(0, in_.beg); - nowChr_=0; - } void RestoreVector(std::vector & mark, size_t chr) { JunctionPosition pos; diff --git a/external/twopaco/common/streamfastaparser.h b/external/twopaco/common/streamfastaparser.h index 07cd470..9f647df 100644 --- a/external/twopaco/common/streamfastaparser.h +++ b/external/twopaco/common/streamfastaparser.h @@ -148,10 +148,6 @@ namespace TwoPaCo } } - void reset() { - parser_.reset(new TwoPaCo::StreamFastaParser(fileName_[0])); - } - bool NextChr(std::string & buf) { buf.clear(); diff --git a/external/twopaco/graphconstructor/CMakeLists.txt b/external/twopaco/graphconstructor/CMakeLists.txt index e7be19f..b3fd463 100644 --- a/external/twopaco/graphconstructor/CMakeLists.txt +++ b/external/twopaco/graphconstructor/CMakeLists.txt @@ -2,6 +2,9 @@ project(twopaco CXX) cmake_minimum_required(VERSION 3.9) set(CMAKE_PROJECT_NAME twopaco) +#if(CMAKE_COMPILER_IS_GNUCXX) +# list(APPEND "CMAKE_CXX_FLAGS" "-std=c++0x") +#endif() include_directories(${twopaco_SOURCE_DIR} ${TBB_INCLUDE_DIRS} "../common") link_directories(${TBB_LIB_DIR}) diff --git a/external/twopaco/graphconstructor/bifurcationstorage.h b/external/twopaco/graphconstructor/bifurcationstorage.h index 1a7166b..b9e8fd7 100644 --- a/external/twopaco/graphconstructor/bifurcationstorage.h +++ b/external/twopaco/graphconstructor/bifurcationstorage.h @@ -1,7 +1,6 @@ #ifndef _BIFURCATION_STORAGE_H_ #define _BIFURCATION_STORAGE_H_ -#include #include "common.h" #include "compressedstring.h" @@ -35,7 +34,7 @@ namespace TwoPaCo } size_t hashFunctionNumber = 3; - bitsPower = std::max(static_cast(bitsPower), size_t(24)); + bitsPower = max(bitsPower, size_t(24)); bifurcationFilter_.assign(uint64_t(1) << bitsPower, false); hashFunction_.resize(hashFunctionNumber); for (HashFunctionPtr & ptr : hashFunction_) @@ -162,4 +161,4 @@ namespace TwoPaCo }; } -#endif +#endif \ No newline at end of file diff --git a/external/twopaco/graphconstructor/constructor.cpp b/external/twopaco/graphconstructor/constructor.cpp index c35dc75..17a1355 100644 --- a/external/twopaco/graphconstructor/constructor.cpp +++ b/external/twopaco/graphconstructor/constructor.cpp @@ -50,7 +50,7 @@ class OddConstraint : public TCLAP::Constraint } }; -int buildGraphMain(std::vector& args) //}int argc, char * argv[]) +int buildGraphMain(std::vector& args)//main(int argc, char * argv[]) { OddConstraint constraint; try @@ -124,7 +124,7 @@ int buildGraphMain(std::vector& args) //}int argc, char * argv[]) "file name", cmd); - cmd.parse(args);//argc, argv); + cmd.parse(args);//(argc, argv); using TwoPaCo::Range; if (runTests.getValue()) { diff --git a/external/twopaco/graphdump/CMakeLists.txt b/external/twopaco/graphdump/CMakeLists.txt index 3c6fd24..a029132 100644 --- a/external/twopaco/graphdump/CMakeLists.txt +++ b/external/twopaco/graphdump/CMakeLists.txt @@ -2,6 +2,9 @@ project(graphdump CXX) cmake_minimum_required(VERSION 3.9) set(CMAKE_PROJECT_NAME graphdump) +#if(CMAKE_COMPILER_IS_GNUCXX) +# list(APPEND "CMAKE_CXX_FLAGS" "-std=c++0x") +#endif() include_directories(${twopaco_SOURCE_DIR} ${TBB_INCLUDE_DIRS} "../common" "./" ) link_directories(${TBB_LIB_DIR}) @@ -14,7 +17,6 @@ install(TARGETS graphdump DESTINATION lib EXPORT graphdump-targets) install(EXPORT graphdump-targets DESTINATION lib/graphdump) - set(CPACK_PACKAGE_VERSION_MAJOR "0") set(CPACK_PACKAGE_VERSION_MINOR "9") set(CPACK_PACKAGE_VERSION_PATCH "3") diff --git a/external/twopaco/graphdump/binaryWriter.h b/external/twopaco/graphdump/binaryWriter.h deleted file mode 100644 index e916d05..0000000 --- a/external/twopaco/graphdump/binaryWriter.h +++ /dev/null @@ -1,133 +0,0 @@ -// -// Created by Fatemeh Almodaresi on 2019-06-26. -// - -#ifndef ROOT_BINARYWRITER_H -#define ROOT_BINARYWRITER_H - -#include "compact_vector/compact_vector.hpp" - -enum EntryType { - S,P,C,L -}; - -class BinaryWriter { -private: - std::ostream* out; - compact::vector seqVec_; - compact::vector rankVec_; - -public: - BinaryWriter(): out(&std::cout) {} - //BinaryWriter(std::ostream &outin): out(outin) {} - - void setOStream(std::ostream* o) { out = o; } - void setCapacity(uint64_t c) { - seqVec_.reserve(c); - rankVec_.reserve(c); - } - - void flushSegments(std::string &prefix) { - std::ofstream seqOut(prefix+"/seq.bin"); - seqVec_.serialize(seqOut); - std::ofstream rankOut(prefix+"/rank.bin"); - uint64_t ones=0; - for (uint64_t idx=0; idx < rankVec_.size(); idx++) { - if (rankVec_[idx]) { - ones++; - } - } - std::cerr << "# of ones in rank vector: " << ones << "\n"; - rankVec_.serialize(rankOut); - } - - BinaryWriter& operator<<(const EntryType &inval) { - char* inCharPtr = const_cast(reinterpret_cast(&inval)); - out->write(inCharPtr, sizeof(EntryType)); - return *this; - } - - BinaryWriter& operator<<(const bool &inval) { - char* inCharPtr = const_cast(reinterpret_cast(&inval)); - out->write(inCharPtr, sizeof(inval)); - return *this; - } - BinaryWriter& operator<<(const uint8_t &inval) { - char* inCharPtr = const_cast(reinterpret_cast(&inval)); - out->write(inCharPtr, sizeof(inval)); - return *this; - } - BinaryWriter& operator<<(const uint16_t &inval) { - char* inCharPtr = const_cast(reinterpret_cast(&inval)); - out->write(inCharPtr, sizeof(inval)); - return *this; - } - BinaryWriter& operator<<(const uint32_t &inval) { - char* inCharPtr = const_cast(reinterpret_cast(&inval)); - out->write(inCharPtr, sizeof(inval)); - return *this; - } - BinaryWriter& operator<<(const uint64_t &inval) { - char* inCharPtr = const_cast(reinterpret_cast(&inval)); - out->write(inCharPtr, sizeof(inval)); - return *this; - } - BinaryWriter& operator<<(const int64_t &inval) { - char* inCharPtr = const_cast(reinterpret_cast(&inval)); - out->write(inCharPtr, sizeof(inval)); - return *this; - } - - - BinaryWriter &operator<<(const double &inval) { - char *inCharPtr = const_cast(reinterpret_cast(&inval)); - out->write(inCharPtr, sizeof(inval)); - return *this; - } - BinaryWriter& operator<<(const std::string &inval) { - if (inval.size() >= 0x100) { - std::cerr << "ERROR!! DOESN'T SUPPORT STRING LENGTH LONGER THAN 255. String length: " - << inval.size() << "\n"; - std::exit(1); - } - auto tmp = static_cast(inval.size()); - out->write(reinterpret_cast(&tmp), sizeof(tmp)); - char* inCharPtr = const_cast(inval.c_str()); - out->write(inCharPtr, inval.size()); - return *this; - } - - BinaryWriter& operator<<(const compact::vector &inval) { - uint64_t w_size = inval.size(); - out->write(reinterpret_cast(&w_size), sizeof(w_size)); - uint64_t* m_mem = inval.get_words(); - out->write(reinterpret_cast(m_mem), inval.bytes()); - return *this; - } - - void addSeq(const std::string &inval) { -// std::cerr << seqVec_.capacity() << " " << seqVec_.size() << " "; - - for (size_t i = 0; i < inval.size(); ++i) { - uint16_t c = 0; - switch (inval[i]) { - case 'C': - c = 1; - break; - case 'G': - c = 2; - break; - case 'T': - c = 3; - break; - default: - c = 0; - } - seqVec_.push_back(c); - rankVec_.push_back(0); - } - rankVec_[rankVec_.size()-1] = 1; - } - -}; -#endif //ROOT_BINARYWRITER_H diff --git a/external/twopaco/graphdump/compact_vector/compact_iterator.hpp b/external/twopaco/graphdump/compact_vector/compact_iterator.hpp deleted file mode 100644 index 45fd66c..0000000 --- a/external/twopaco/graphdump/compact_vector/compact_iterator.hpp +++ /dev/null @@ -1,892 +0,0 @@ -#ifndef __COMPACT_ITERATOR_H__ -#define __COMPACT_ITERATOR_H__ - -#include -#include -#include -#include -#include - -#include "const_iterator_traits.hpp" -#include "parallel_iterator_traits.hpp" -#include "prefetch_iterator_traits.hpp" - - -namespace compact { -// Number of bits in type t -template -struct bitsof { - static constexpr size_t val = sizeof(T) * CHAR_BIT; -}; - -// Compact iterator definition. A 'iterator p' would -// behave identically to an 'int*', except that the underlying storage -// is bit-packed. The actual number of bits used by each element is -// specified at construction. -// -// * IDX is the type of the integral type, i.e., the type of the -// integer pointed to, as seen from the outside. It behaves like a -// pointer to IDX. -// -// * BITS is the number of bits used for each word. If BITS==0, the -// * class is specialized to use a number of bits defined at runtime -// * instead of compile time. -// -// * W is the word type used internally. We must have sizeof(IDX) <= -// sizeof(W). -// -// * TS is true for Thread Safe operations. It is only concerned with -// basic thread safety: if p1 != p2, than manipulating *p1 and *p2 -// in 2 different thread is safe. -// -// * UB is the number of Used Bits in each word. We must have UB <= -// bitsof::val. Normally UB == bitsof::val, but for some applications, -// saving a few bits in each word can be useful (for example to -// provide the compare and swap operation CAS). -template::val> -class const_iterator; -template::val> -class iterator; - -namespace iterator_imp { -constexpr bool divides(unsigned a, unsigned b) { return b % a == 0; } - -template -struct mask_store { }; // Store bits within a word masked - -// Getter / setter for BITS > 0 (number of bits known at compile time) -template -struct gs { - static IDX get(const W* p, unsigned o) { - static constexpr size_t Wbits = bitsof::val; - IDX res; - - if(UB == Wbits) { - constexpr W mask = ~(W)0 >> (Wbits - BITS); - res = (*p >> o) & mask; - } else { - const W mask = ~(W)0 >> (Wbits - BITS + (o > UB - BITS)); - res = (*p >> o) & mask; - } - - if(!divides(BITS, UB) && o + BITS > UB) { - const unsigned over = o + BITS - UB; - const uint64_t mask = ~(W)0 >> (Wbits - over); - res |= (*(p + 1) & mask) << (BITS - over); - } - if(std::is_signed::value && res & ((IDX)1 << (BITS - 1))) - res |= ~static_cast::type>(0) << BITS; - - return res; - } - static inline IDX get(const W* p, unsigned b, unsigned o) { return get(p, o); } - - template - static void set(IDX x, W* p, unsigned o) { - static constexpr size_t Wbits = bitsof::val; - static constexpr W ubmask = ~(W)0 >> (Wbits - UB); - const W y = x; - W mask = ((~(W)0 >> (Wbits - BITS)) << o) & ubmask; - mask_store::store(p, mask, y << o); - if(!divides(BITS, UB) && o + BITS > UB) { - unsigned over = o + BITS - UB; - mask = ~(W)0 >> (Wbits - over); - mask_store::store(p + 1, mask, y >> (BITS - over)); - } - } - template - static inline IDX set(IDX x, W* p, unsigned b, unsigned o) { return set(x, p, o); } - - // Do a CAS at position p, offset o and number of bits b. Expect value - // exp and set value x. It takes care of the tricky case when the - // value pointed by (p, o, b) straddles 2 words. Then it require 2 CAS - // and it is technically not lock free anymore: if the current thread - // dies after setting the MSB to 1 during the first CAS, then the - // location is "dead" and other threads maybe prevented from making - // progress. - static bool cas(const IDX x, const IDX exp, W* p, unsigned o) { - static_assert(UB < bitsof::val, "The CAS operation is valid for a cas_vector (used bits less than bits in word)"); - static constexpr size_t Wbits = bitsof::val; - static constexpr W ubmask = ~(W)0 >> (Wbits - UB); - static_assert(UB < Wbits, "Used bits must strictly less than bit size of W for CAS."); - if(divides(BITS, UB) || o + BITS <= UB) { - const W mask = (~(W)0 >> (Wbits - BITS)) << o & ubmask; - return mask_store::cas(p, mask, (W)x << o, (W)exp << o); - } - - // o + BITS > UB. Needs to do a CAS with MSB set to 1, expecting MSB at - // 0. If failure, then return failure. If success, cas rest of value - // in next word, then set MSB back to 0. - static constexpr W msb = (W)1 << (Wbits - 1); - W mask = (~(W)0 >> (Wbits - BITS)) << o; - if(!mask_store::cas(p, mask, msb | ((W)x << o), ~msb & ((W)exp << o))) - return false; - const unsigned over = o + BITS - UB; - mask = ~(W)0 >> (Wbits - over); - const bool res = mask_store::cas(p + 1, mask, (W)x >> (BITS - over), (W)exp >> (BITS - over)); - mask_store::store(p, msb, 0); - return res; - } - static inline bool cas(const IDX x, const IDX exp, W* p, unsigned b, unsigned o) { - return cas(x, exp, p, o); - } - - // Fetch a value at position p, offset o and number of bits b. This is - // used when multiple thread may update the same position (p,o,b) with - // cas operations. In the case where the value straddles two words, - // then a CAS operation set the MSB to 1 (to prevent any other thread - // from changing the value), then reads the second words, finally sets - // the MSB back to 0 with a CAS operation. - // - // Result returned in res. Returns true if fetch is successfull - // (either value contained within a word, or CAS operations - // succeeded). Otherwise, it returns false. - static bool fetch(IDX& res, W* p, unsigned o) { - static_assert(UB < bitsof::val, "The fetch operation is valid for cas_vector (used bits less than bits in word"); - if(divides(BITS, UB) || o + BITS <= UB) { - res = get(p, BITS, o); - return true; - } - - // o + BITS > UB. Needs to do a CAS with MSB set to 1, expecting MSB at - // 0. If failure, then return failure. If success, read entire value - // then set MSB back to 0. - static constexpr size_t Wbits = bitsof::val; - static constexpr W ubmask = ~(W)0 >> (Wbits - UB); - static constexpr W msb = (W)1 << (Wbits - 1); - const W mask = (~(W)0 >> (Wbits - BITS)) << o; - W x = (*p & mask); - if(x & msb) return false; // MSB already set to 1 - if(!mask_store::cas(p, mask, msb | x, x)) - return false; - const unsigned over = o + BITS - UB; - const W nmask = ~(W)0 >> (Wbits - over); - res = x | ((*(p + 1) & nmask) << (BITS - over)); - if(std::is_signed::value && res & ((IDX)1 << (BITS - 1))) - res |= ~(IDX)0 << BITS; - mask_storecas(p, mask, x, x | msb); - return true; - } -}; - -// gs implementation for number of bits known at runtime (BITS == 0). -// -// XXX: too much code duplication with non-specialized version of gs. Improve! -template -struct gs { - static IDX get(const W* p, unsigned b, unsigned o) { - static constexpr size_t Wbits = bitsof::val; - static constexpr W ubmask = ~(W)0 >> (Wbits - UB); - W mask = ((~(W)0 >> (Wbits - b)) << o) & ubmask; - IDX res = (*p & mask) >> o; - if(o + b > UB) { - const unsigned over = o + b - UB; - mask = ~(W)0 >> (Wbits - over); - res |= (*(p + 1) & mask) << (b - over); - } - if(std::is_signed::value && res & ((IDX)1 << (b - 1))) - res |= ~(IDX)0 << b; - - return res; - } - - template - static void set(IDX x, W* p, unsigned b, unsigned o) { - static constexpr size_t Wbits = bitsof::val; - static constexpr W ubmask = ~(W)0 >> (Wbits - UB); - const W y = x; - W mask = ((~(W)0 >> (Wbits - b)) << o) & ubmask; - mask_store::store(p, mask, y << o); - if(o + b > UB) { - unsigned over = o + b - UB; - mask = ~(W)0 >> (Wbits - over); - mask_store::store(p + 1, mask, y >> (b - over)); - } - } - - // Do a CAS at position p, offset o and number of bits b. Expect value - // exp and set value x. It takes care of the tricky case when the - // value pointed by (p, o, b) straddles 2 words. Then it require 2 CAS - // and it is technically not lock free anymore: if the current thread - // dies after setting the MSB to 1 during the first CAS, then the - // location is "dead" and other threads maybe prevented from making - // progress. - static bool cas(const IDX x, const IDX exp, W* p, unsigned b, unsigned o) { - static_assert(UB < bitsof::val, "The CAS operation is valid for a cas_vector (used bits less than bits in word)"); - static constexpr size_t Wbits = bitsof::val; - static constexpr W ubmask = ~(W)0 >> (Wbits - UB); - static_assert(UB < Wbits, "Used bits must strictly less than bit size of W for CAS."); - if(o + b <= UB) { - const W mask = (~(W)0 >> (Wbits - b)) << o & ubmask; - return mask_store::cas(p, mask, (W)x << o, (W)exp << o); - } - - // o + b > UB. Needs to do a CAS with MSB set to 1, expecting MSB at - // 0. If failure, then return failure. If success, cas rest of value - // in next word, then set MSB back to 0. - static constexpr W msb = (W)1 << (Wbits - 1); - W mask = (~(W)0 >> (Wbits - b)) << o; - if(!mask_store::cas(p, mask, msb | ((W)x << o), ~msb & ((W)exp << o))) - return false; - const unsigned over = o + b - UB; - mask = ~(W)0 >> (Wbits - over); - const bool res = mask_store::cas(p + 1, mask, (W)x >> (b - over), (W)exp >> (b - over)); - mask_store::store(p, msb, 0); - return res; - } - - // Fetch a value at position p, offset o and number of bits b. This is - // used when multiple thread may update the same position (p,o,b) with - // cas operations. In the case where the value straddles two words, - // then a CAS operation set the MSB to 1 (to prevent any other thread - // from changing the value), then reads the second words, finally sets - // the MSB back to 0 with a CAS operation. - // - // Result returned in res. Returns true if fetch is successfull - // (either value contained within a word, or CAS operations - // succeeded). Otherwise, it returns false. - static bool fetch(IDX& res, W* p, unsigned b, unsigned o) { - static_assert(UB < bitsof::val, "The fetch operation is valid for cas_vector (used bits less than bits in word"); - if(o + b <= UB) { - res = get(p, b, o); - return true; - } - - // o + b > UB. Needs to do a CAS with MSB set to 1, expecting MSB at - // 0. If failure, then return failure. If success, read entire value - // then set MSB back to 0. - static constexpr size_t Wbits = bitsof::val; - static constexpr W ubmask = ~(W)0 >> (Wbits - UB); - static constexpr W msb = (W)1 << (Wbits - 1); - const W mask = (~(W)0 >> (Wbits - b)) << o; - W x = (*p & mask); - if(x & msb) return false; // MSB already set to 1 - if(!mask_store::cas(p, mask, msb | x, x)) - return false; - const unsigned over = o + b - UB; - const W nmask = ~(W)0 >> (Wbits - over); - res = x | ((*(p + 1) & nmask) << (b - over)); - if(std::is_signed::value && res & ((IDX)1 << (b - 1))) - res |= ~(IDX)0 << b; - mask_storecas(p, mask, x, x | msb); - return true; - } -}; - -// Mask store, depending on the thread safety guarantee -template -struct mask_store { - static inline void store(W* p, W mask, W val) { - *p = (*p & ~mask) | (val & mask); - } -}; - -template -struct mask_store { - static void store(W* p, W mask, W val) { - W cval = *p, oval; - do { - W nval = (cval & ~mask) | (val & mask); - oval = cval; - cval = __sync_val_compare_and_swap(p, oval, nval); - } while(cval != oval); - } - - // Do a CAS at p and val, only in the bits covered by mask. It - // retries while bits outside of mask change but those inside the - // mask are equal to the expected value exp. - static bool cas(W* p, W mask, W val, W exp) { - W cval = *p, oval; - val &= mask; - exp &= mask; - if(val == exp) - return (cval & mask) == exp; - while((cval & mask) == exp) { - W nval = (cval & ~mask) | val; - oval = cval; - cval = __sync_val_compare_and_swap(p, oval, nval); - if(cval == oval) - return true; - } - return false; - } -}; - - -// Base class for the iterators -template -class common { -public: - std::ostream& print(std::ostream& os) const { - const Derived& self = *static_cast(this); - return os << '<' << (void*)self.ptr << '+' << self.offset << ',' << self.bits << '>'; - } - -protected: - static constexpr unsigned Wbits = bitsof::val; - // UB is the number of bits actually used in a word. - static_assert(UB <= Wbits, - "Number of used bits must be less than number of bits in a word"); - static_assert(sizeof(IDX) <= sizeof(W), - "The size of integral type IDX must be less than the word type W"); - -public: - typedef typename std::iterator::difference_type difference_type; - static constexpr unsigned used_bits = UB; - - Derived& operator=(const Derived& rhs) { - Derived& self = *static_cast(this); - self.ptr = rhs.ptr; - self.offset = rhs.offset; - self.bits(rhs.bits()); - return self; - } - - Derived& operator=(std::nullptr_t p) { - Derived& self = *static_cast(this); - self.ptr = nullptr; - self.offset = 0; - } - - IDX operator*() const { - const Derived& self = *static_cast(this); - return gs::get(self.m_ptr, self.bits(), self.m_offset); - } - - bool operator==(const Derived& rhs) const { - const Derived& self = *static_cast(this); - return self.m_ptr == rhs.m_ptr && self.m_offset == rhs.m_offset; - } - bool operator!=(const Derived& rhs) const { - return !(*this == rhs); - } - - bool operator==(std::nullptr_t p) { - const Derived& self = *static_cast(this); - return self.m_ptr == nullptr && self.m_offset == 0; - } - bool operator!=(std::nullptr_t p) { - return !(*this == nullptr); - } - - bool operator<(const Derived& rhs) const { - const Derived& self = *static_cast(this); - return self.m_ptr < rhs.m_ptr || (self.m_ptr == rhs.m_ptr && self.m_offset < rhs.m_offset); - } - bool operator>(const Derived& rhs) const { - const Derived& self = *static_cast(this); - return self.m_ptr > rhs.m_ptr || (self.m_ptr == rhs.m_ptr && self.m_offset > rhs.m_offset); - } - bool operator>=(const Derived& rhs) const { - return !(*this < rhs); - } - bool operator<=(const Derived& rhs) const { - return !(*this > rhs); - } - - Derived& operator++() { - Derived& self = *static_cast(this); - self.m_offset += self.bits(); - if(self.m_offset >= UB) { - ++self.m_ptr; - self.m_offset -= UB; - } - return self; - } - Derived operator++(int) { - Derived res(*static_cast(this)); - ++*this; - return res; - } - - Derived& operator--() { - Derived& self = *static_cast(this); - if(self.bits() > self.m_offset) { - --self.m_ptr; - self.m_offset += UB; - } - self.m_offset -= self.bits(); - return self; - } - Derived operator--(int) { - Derived res(*static_cast(this)); - --*this; - return res; - } - - Derived& operator+=(difference_type n) { - Derived& self = *static_cast(this); - if(n < 0) { - self -= -n; - return self; - } - - const size_t nbbits = self.bits() * n; - self.m_ptr += nbbits / UB; - self.m_offset += nbbits % UB; - if(self.m_offset >= UB) { - ++self.m_ptr; - self.m_offset -= UB; - } - return self; - } - - Derived operator+(difference_type n) const { - Derived res(*static_cast(this)); - return res += n; - } - - Derived& operator-=(difference_type n) { - Derived& self = *static_cast(this); - if(n < 0) { - self += -n; - return self; - } - - const size_t nbbits = self.bits() * n; - self.m_ptr -= nbbits / UB; - const unsigned ooffset = nbbits % UB; - if(ooffset > self.m_offset) { - --self.m_ptr; - self.m_offset += UB; - } - self.m_offset -= ooffset; - return self; - } - - Derived operator-(difference_type n) const { - Derived res(*static_cast(this)); - return res -= n; - } - - template - difference_type operator-(const common& rhs_) const { - const Derived& self = *static_cast(this); - const DD& rhs = *static_cast(&rhs_); - ptrdiff_t wdiff = (self.m_ptr - rhs.m_ptr) * UB; - if(self.m_offset < rhs.m_offset) - wdiff += (ptrdiff_t)((UB + self.m_offset) - rhs.m_offset) - (ptrdiff_t)UB; - else - wdiff += self.m_offset - rhs.m_offset; - return wdiff / self.bits(); - } - - IDX operator[](const difference_type n) const { - const Derived& self = *static_cast(this); - return *(self + n); - } - - // Extra methods which are not part of an iterator interface - - const W* get_ptr() const { - const Derived& self = *static_cast(this); - return self.ptr; - } - unsigned get_offset() const { - const Derived& self = *static_cast(this); - return self.offset; - } - unsigned get_bits() const { - const Derived& self = *static_cast(this); - return self.bits(); - } - - // Get some number of bits - W get_bits(unsigned bits) const { - const Derived& self = *static_cast(this); - return gs::get(self.ptr, bits, self.offset); - } - - W get_bits(unsigned bits, unsigned offset) const { - const Derived& self = *static_cast(this); - return gs::get(self.ptr, bits, offset); - } - - template - void set_bits(W x, unsigned bits) { - Derived& self = *static_cast(this); - gs::set(x, self.ptr, bits, self.offset); - } -}; - -template -struct swap_word_mask { - static constexpr W value = swap_word_mask::value << (4 * I) | swap_word_mask::value; -}; -template -struct swap_word_mask { - static constexpr W value = 0x55; -}; - -template -inline W swap_word(W w) { - return ((w & swap_word_mask::value) << 1) | ((w & (swap_word_mask::value << 1)) >> 1); -} -template -inline bool compare_swap_words(W w1, W w2) { - w1 = swap_word(w1); - w2 = swap_word(w2); - W bmask = w1 ^ w2; - bmask &= -bmask; - return (w1 & bmask) == 0; -} - -// Precompute (expensive) division by number of bits. The static -// arrays contain X/k (word_idx) and k*(X/k) -// (word_bits) for k in [0, X]. -// -// ** This code is kind of sick! - -//helper template, just converts its variadic arguments to array initializer list -template struct size_t_ary {static const size_t value[sizeof...(values)];}; -template const size_t size_t_ary::value[] = {values...}; - -template -struct word_idx : word_idx {}; -template -struct word_idx : size_t_ary<(size_t)0, values...> {}; - -template -struct word_bits : word_bits {}; -template -struct word_bits : size_t_ary<(size_t)0, values...> {}; - -template -bool lexicographical_compare_n(Iterator first1, const size_t len1, - Iterator first2, const size_t len2) { - static constexpr unsigned UB = Iterator::used_bits; - - const auto bits = first1.get_bits(); - auto left = std::min(len1, len2) * bits; - const decltype(len1) Widx = word_idx::value[bits]; - const decltype(len1) Wbits = word_bits::value[bits]; - - for( ; left > Wbits; left -= Wbits, first1 += Widx, first2 += Widx) { - auto w1 = first1.get_bits(Wbits); - auto w2 = first2.get_bits(Wbits); - if(w1 != w2) return compare_swap_words(w1, w2); - } - if(left > 0) { - auto w1 = first1.get_bits(left); - auto w2 = first2.get_bits(left); - if(w1 != w2) return compare_swap_words(w1, w2); - } - - return len1 < len2; -} - -template -bool operator==(std::nullptr_t lfs, const common& rhs) { - return rhs == nullptr; -} - -template -D operator+(typename common::difference_type lhs, const common& rhs) { - return rhs + lhs; -} - -template -std::ostream& operator<<(std::ostream& os, const common& rhs) { - return rhs.print(os); -} - -template::val> -class lhs_setter_common { -protected: - W* ptr; - unsigned offset; - -public: - typedef compact::iterator iterator; - lhs_setter_common(W* p, unsigned o) : ptr(p), offset(o) { } - operator IDX() const { - const Derived& self = *static_cast(this); - return gs::get(ptr, self.bits(), offset); - } - iterator operator&() { - Derived& self = *static_cast(this); - return iterator(ptr, self.bits(), offset); - } - inline bool cas(const IDX x, const IDX exp) { - Derived& self = *static_cast(this); - return gs::cas(x, exp, ptr, self.bits(), offset); - } -}; - -template -class lhs_setter; - -template -class lhs_setter - : public lhs_setter_common, IDX, 0, W, TS, UB> -{ - typedef lhs_setter_common, IDX, 0, W, TS, UB> super; - unsigned m_bits; // number of bits in an integral type - -public: - lhs_setter(W* p, int b, int o) : super(p, o), m_bits(b) { } - lhs_setter& operator=(const IDX x) { - gs::template set(x, super::ptr, m_bits, super::offset); - return *this; - } - lhs_setter& operator=(const lhs_setter& rhs) { - gs::template set((IDX)rhs, super::ptr, m_bits, super::offset); - return *this; - } - - unsigned bits() const { return m_bits; } -}; - -template -class lhs_setter - : public lhs_setter_common, IDX, BITS, W, TS, UB> -{ - typedef lhs_setter_common, IDX, BITS, W, TS, UB> super; - -public: - lhs_setter(W* p, int o) : super(p, o) { } - lhs_setter(W* p, unsigned bits, int o) : super(p, o) { } - lhs_setter& operator=(const IDX x) { - gs::template set(x, super::ptr, super::offset); - return *this; - } - lhs_setter& operator=(const lhs_setter& rhs) { - gs::template set((IDX)rhs, super::ptr, super::offset); - return *this; - } - - constexpr unsigned bits() const { return BITS; } -}; - -template -void swap(lhs_setter x, lhs_setter y) { - I t = x; - x = (I)y; - y = t; -} - -} // namespace iterator_imp - -// Specialization with BITS=0 (dynamic/runtime number of bits used) -template -class iterator : - public std::iterator, - public iterator_imp::common, IDX, 0, W, UB> -{ - W* m_ptr; - unsigned m_bits; // number of bits in an integral type - unsigned m_offset; - - friend class iterator; - friend class const_iterator; - friend class iterator_imp::common, IDX, 0, W, UB>; - friend class iterator_imp::common, IDX, 0, W, UB>; - - typedef std::iterator super; -public: - typedef typename super::value_type value_type; - typedef typename super::difference_type difference_type; - typedef IDX idx_type; - typedef W word_type; - typedef iterator_imp::lhs_setter lhs_setter_type; - - iterator() = default; - iterator(W* p, unsigned b, unsigned o) - : m_ptr(p), m_bits(b), m_offset(o) { } - template - iterator(const iterator& rhs) - : m_ptr(rhs.m_ptr), m_bits(rhs.m_bits), m_offset(rhs.m_offset) { } - iterator(std::nullptr_t) - : m_ptr(nullptr), m_bits(0), m_offset(0) { } - - lhs_setter_type operator*() { return lhs_setter_type(m_ptr, m_bits, m_offset); } - lhs_setter_type operator[](const difference_type n) const { - return *(*this + n); - } - - // CAS val. Does NOT return existing value at pointer. Return true - // if successful. - inline bool cas(const IDX x, const IDX exp) { - return iterator_imp::gs::cas(x, exp, m_ptr, m_bits, m_offset); - } - - unsigned bits() const { return m_bits; } -protected: - void bits(unsigned b) { m_bits = b; } -}; - -template -class const_iterator : - public std::iterator, - public iterator_imp::common, IDX, 0, W, UB> -{ - const W* m_ptr; - unsigned m_bits; // number of bits in an integral type - unsigned m_offset; - - friend class iterator; - friend class iterator_imp::common, IDX, 0, W, UB>; - friend class iterator_imp::common, IDX, 0, W, UB>; - friend class iterator_imp::common, IDX, 0, W, UB>; - - typedef std::iterator super; -public: - typedef typename super::value_type value_type; - typedef typename super::difference_type difference_type; - typedef IDX idx_type; - typedef W word_type; - - - const_iterator() = default; - const_iterator(const W* p, unsigned b, unsigned o) - : m_ptr(p), m_bits(b), m_offset(o) { } - const_iterator(const const_iterator& rhs) - : m_ptr(rhs.m_ptr), m_bits(rhs.m_bits), m_offset(rhs.m_offset) { } - template - const_iterator(const iterator& rhs) - : m_ptr(rhs.m_ptr), m_bits(rhs.m_bits), m_offset(rhs.m_offset) { } - const_iterator(std::nullptr_t) - : m_ptr(nullptr), m_bits(0), m_offset(0) { } - - unsigned bits() const { return m_bits; } - void bits(unsigned b) { m_bits = b; } -}; - - -// No specialization. Static number of bits used. -template -class iterator : - public std::iterator, - public iterator_imp::common, IDX, BITS, W, UB> -{ - W* m_ptr; - unsigned m_offset; - - friend class iterator; - friend class const_iterator; - friend class iterator_imp::common, IDX, BITS, W, UB>; - friend class iterator_imp::common, IDX, BITS, W, UB>; - - typedef std::iterator super; -public: - typedef typename super::value_type value_type; - typedef typename super::difference_type difference_type; - typedef IDX idx_type; - typedef W word_type; - typedef iterator_imp::lhs_setter lhs_setter_type; - - iterator() = default; - iterator(W* p, unsigned o) - : m_ptr(p), m_offset(o) { } - iterator(W* p, unsigned b, unsigned o) - : m_ptr(p), m_offset(o) { } // XXX Should we assert that BITS == b? - template - iterator(const iterator& rhs) - : m_ptr(rhs.m_ptr), m_offset(rhs.m_offset) { } - iterator(std::nullptr_t) - : m_ptr(nullptr), m_offset(0) { } - - lhs_setter_type operator*() { return lhs_setter_type(m_ptr, m_offset); } - lhs_setter_type operator[](const difference_type n) const { - return *(*this + n); - } - - // CAS val. Does NOT return existing value at pointer. Return true - // if successful. - inline bool cas(const IDX x, const IDX exp) { - return iterator_imp::gs::cas(x, exp, m_ptr, m_offset); - } - - constexpr unsigned bits() const { return BITS; } -protected: - void bits(unsigned b) { } // NOOP -}; - -template -class const_iterator : - public std::iterator, - public iterator_imp::common, IDX, BITS, W, UB> -{ - const W* m_ptr; - unsigned m_offset; - - friend class iterator; - friend class iterator_imp::common, IDX, BITS, W, UB>; - friend class iterator_imp::common, IDX, BITS, W, UB>; - friend class iterator_imp::common, IDX, BITS, W, UB>; - - typedef std::iterator super; -public: - typedef typename super::value_type value_type; - typedef typename super::difference_type difference_type; - typedef IDX idx_type; - typedef W word_type; - - - const_iterator() = default; - const_iterator(const W* p, unsigned o) - : m_ptr(p), m_offset(o) { } - const_iterator(const W* p, unsigned b, unsigned o) - : m_ptr(p), m_offset(o) { } - const_iterator(const const_iterator& rhs) - : m_ptr(rhs.m_ptr), m_offset(rhs.m_offset) { } - template - const_iterator(const iterator& rhs) - : m_ptr(rhs.m_ptr), m_offset(rhs.m_offset) { } - const_iterator(std::nullptr_t) - : m_ptr(nullptr), m_offset(0) { } - - constexpr unsigned bits() const { return BITS; } -protected: - void bits(unsigned b) { } // NOOP -}; - -template -struct const_iterator_traits> { - typedef const_iterator type; -}; -template -struct const_iterator_traits> { - typedef const_iterator type; -}; - -template -struct parallel_iterator_traits> { - typedef iterator type; - - // Does a cas on iterator x. Weak though: val is NOT updated to the - // current value. - static inline bool cas(type& x, I& expected, const I& val) { - return x.cas(val, expected); - } -}; - -template -struct parallel_iterator_traits> { - typedef const_iterator type; -}; - -template -struct prefetch_iterator_traits > { - template - static void read(const iterator& p) { prefetch_iterator_traits::template read(p.get_ptr()); } - template - static void write(const iterator& p) { prefetch_iterator_traits::template write(p.get_ptr()); } - -}; - -template -struct prefetch_iterator_traits > { - template - static void read(const const_iterator& p) { prefetch_iterator_traits::template read(p.get_ptr()); } - template - static void write(const const_iterator& p) { prefetch_iterator_traits::template write(p.get_ptr()); } - -}; - -} // namespace compact - -#endif /* __COMPACT_ITERATOR_H__ */ diff --git a/external/twopaco/graphdump/compact_vector/compact_vector.hpp b/external/twopaco/graphdump/compact_vector/compact_vector.hpp deleted file mode 100644 index 1604a49..0000000 --- a/external/twopaco/graphdump/compact_vector/compact_vector.hpp +++ /dev/null @@ -1,484 +0,0 @@ -#ifndef __COMPACT_VECTOR_H__ -#define __COMPACT_VECTOR_H__ - -#include -#include -#include -#include -#include // for std::error_code -#include "mio.hpp" -#include "compact_iterator.hpp" -#include -#include - -namespace compact { - -inline uint64_t get_bits_per_element(const std::string& fname) { - // load the vector by reading from file - std::ifstream ifile(fname, std::ios::binary); - uint64_t static_flag{0}; - ifile.read(reinterpret_cast(&static_flag), sizeof(static_flag)); - uint64_t bits_per_element; - ifile.read(reinterpret_cast(&bits_per_element), sizeof(bits_per_element)); - ifile.close(); - return bits_per_element; -} - -namespace vector_imp { -inline int clz(unsigned int x) { return __builtin_clz(x); } -inline int clz(unsigned long x) { return __builtin_clzl(x); } -inline int clz(unsigned long long x) { return __builtin_clzll(x); } - -// XXX TODO Missing copy and move constructors -template -class vector { - Allocator m_allocator; - size_t m_size; // Size in number of elements - size_t m_capacity; // Capacity in number of elements - W* m_mem; - mio::mmap_source ro_mmap; - -public: - // Number of bits required for indices/values in the range [0, s). - static unsigned required_bits(size_t s) { - unsigned res = bitsof::val - 1 - clz(s); - res += (s > ((size_t)1 << res)) + (std::is_signed::value ? 1 : 0); - return res; - } - - static size_t elements_to_words(size_t size, unsigned bits) { - size_t total_bits = size * bits; - return total_bits / UB + (total_bits % UB != 0); - } - - typedef compact::iterator iterator; - typedef compact::const_iterator const_iterator; - typedef compact::iterator mt_iterator; // Multi thread safe version - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - - vector(size_t s, size_t mem, Allocator allocator = Allocator()) - : m_allocator(allocator) - , m_size(s) - , m_capacity(s) - , m_mem(m_allocator.allocate(mem)) - { - static_assert(UB <= bitsof::val, "used_bits must be less or equal to the number of bits in the word_type"); - static_assert(BITS <= UB, "number of bits larger than usable bits"); - } - explicit vector(Allocator allocator = Allocator()) - : vector(0, 0, allocator) - { } - ~vector() { - if (!ro_mmap.is_mapped()) { - m_allocator.deallocate(m_mem, elements_to_words(m_capacity, bits())); - } - } - - const_iterator begin() const { return const_iterator(m_mem, bits(), 0); } - iterator begin() { return iterator(m_mem, bits(), 0); } - const_iterator end() const { return begin() + m_size; } - iterator end() { return begin() + m_size; } - const_iterator cbegin() const { return begin(); } - const_iterator cend() const { return end(); } - const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - reverse_iterator rbegin() { return reverse_iterator(end()); } - const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } - reverse_iterator rend() { return reverse_iterator(begin()); } - const_reverse_iterator crbegin() const { return const_reverse_iterator(end()); } - const_reverse_iterator crend() const { return const_reverse_iterator(begin()); } - - // Multi thread safe iterator - mt_iterator mt_begin() { return mt_iterator(m_mem, bits(), 0); } - mt_iterator mt_end() { return begin() + m_size; } - - IDX operator[](size_t i) const { - return BITS - ? *const_iterator(m_mem + (i * BITS) / UB, BITS, (i * BITS) % UB) - : *const_iterator(m_mem + (i * bits()) / UB, bits(), (i * bits()) % UB); - // return cbegin()[i]; - } - typename iterator::lhs_setter_type operator[](size_t i) { - return BITS - ? typename iterator::lhs_setter_type(m_mem + (i * BITS) / UB, BITS, (i * BITS) % UB) - : typename iterator::lhs_setter_type(m_mem + (i * bits()) / UB, bits(), (i * bits()) % UB); - // return begin()[i]; - } - IDX front() const { return *cbegin(); } - typename iterator::lhs_setter_type front() { return *begin(); } - IDX back() const { return *(cbegin() + (m_size - 1)); } - typename iterator::lhs_setter_type back() { return *(begin() + (m_size - 1)); } - - size_t size() const { return m_size; } - bool empty() const { return m_size == 0; } - size_t capacity() const { return m_capacity; } - uint64_t * get_words() const { return m_mem; } - - void push_back(IDX x) { - if(m_size == m_capacity) - enlarge(); - *end() = x; - ++m_size; - } - - void pop_back() { --m_size; } - void clear() { m_size = 0; } - void emplace_back(IDX x) { push_back(x); } - - W* get() { return m_mem; } - const W* get() const { return m_mem; } - size_t bytes() const { return sizeof(W) * elements_to_words(m_size, bits()); } - inline unsigned bits() const { return static_cast(this)->bits(); } - static constexpr unsigned static_bits() { return BITS; } - static constexpr unsigned used_bits() { return UB; } - static constexpr bool thread_safe() { return TS; } - - void serialize(std::ofstream& of) { - uint64_t static_flag = (static_bits() == bits()) ? 1 : 0; - of.write(reinterpret_cast(&static_flag), sizeof(static_flag)); - if (static_flag != 0) { - uint64_t bits_per_element = static_bits(); - of.write(reinterpret_cast(&bits_per_element), sizeof(bits_per_element)); - } else { - uint64_t bits_per_element = bits(); - of.write(reinterpret_cast(&bits_per_element), sizeof(bits_per_element)); - } - uint64_t w_size = m_size; - std::cerr << "size: " << w_size << "\n"; - of.write(reinterpret_cast(&w_size), sizeof(w_size)); - uint64_t w_capacity = m_capacity; - of.write(reinterpret_cast(&w_capacity), sizeof(w_capacity)); - of.write(reinterpret_cast(m_mem), bytes()); - //std::cerr << "wrote " << bytes() << " bytes of data at the end\n"; - } - - void deserialize(const std::string& fname, bool mmap) { - std::error_code error; - if (mmap) { - // load the vector *read only* by mmap - ro_mmap.map(fname, error); - if (error) { std::cerr << "error = " << error << "\n"; } - const char* data = ro_mmap.data(); - data += sizeof(uint64_t); - uint64_t bits_per_element; - std::memcpy(reinterpret_cast(&bits_per_element), reinterpret_cast(const_cast(data)), sizeof(bits_per_element)); - //std::cerr<< "bits / element = " << bits_per_element << "\n"; - data += sizeof(W); - uint64_t w_size{0}; - std::memcpy(reinterpret_cast(&w_size), reinterpret_cast(const_cast(data)), sizeof(w_size)); - m_size = w_size; - std::cerr<< "size = " << m_size << "\n"; - data += sizeof(w_size); - uint64_t w_capacity{0}; - std::memcpy(reinterpret_cast(&w_capacity), reinterpret_cast(const_cast(data)), sizeof(w_capacity)); - m_capacity = w_capacity; - //std::cerr<< "capacity = " << m_capacity << "\n"; - data += sizeof(w_capacity); - m_allocator.deallocate(m_mem, elements_to_words(m_capacity, bits())); - m_mem = reinterpret_cast(const_cast(data)); - } else { - // load the vector by reading from file - std::ifstream ifile(fname, std::ios::binary); - uint64_t static_flag{0}; - ifile.read(reinterpret_cast(&static_flag), sizeof(static_flag)); - - uint64_t bits_per_element; - ifile.read(reinterpret_cast(&bits_per_element), sizeof(bits_per_element)); - - //std::cerr<< "bits / element = " << bits_per_element << "\n"; - - uint64_t w_size{0}; - ifile.read(reinterpret_cast(&w_size), sizeof(w_size)); - m_size = w_size; - std::cerr<< "size = " << m_size << "\n"; - - uint64_t w_capacity{0}; - ifile.read(reinterpret_cast(&w_capacity), sizeof(w_capacity)); - m_capacity = w_capacity; - //std::cerr<< "capacity = " << m_capacity << "\n"; - - m_allocator.deallocate(m_mem, elements_to_words(m_capacity, bits())); - m_mem = m_allocator.allocate(elements_to_words(m_capacity, bits())); - if(m_mem == nullptr) throw std::bad_alloc(); - ifile.read(reinterpret_cast(m_mem), sizeof(W) * elements_to_words(m_size, bits())); - } - - } - void touch_all_pages(uint64_t bits_per_element) { - uint64_t sum = 0; - std::cerr<<"number of elements:"<size() << "\n"; - std::cerr<<"page size:"<size(); i+=elements_per_page ) { - sum += (*this)[i]; - } - std::cerr< -class vector_dyn - : public vector_imp::vector, IDX, 0, W, Allocator, UB, TS> -{ - typedef vector_imp::vector, IDX, 0, W, Allocator, UB, TS> super; - unsigned m_bits; // Number of bits in an element - -public: - typedef typename super::iterator iterator; - typedef typename super::const_iterator const_iterator; - typedef IDX value_type; - typedef Allocator allocator_type; - typedef typename iterator::lhs_setter_type reference; - typedef const reference const_reference; - typedef iterator pointer; - typedef const_iterator const_pointer; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef W word_type; - - vector_dyn(unsigned b, size_t s, Allocator allocator = Allocator()) - : super(s, super::elements_to_words(s, b), allocator) - , m_bits(b) - { } - vector_dyn(unsigned b, Allocator allocator = Allocator()) - : super(allocator) - , m_bits(b) - { } - - inline unsigned bits() const { return m_bits; } - void set_m_bits(size_t m) { m_bits = m; } -}; - -} // namespace vector_imp - -template> -class vector - : public vector_imp::vector, IDX, BITS, W, Allocator, bitsof::val, false> -{ - typedef vector_imp::vector, IDX, BITS, W, Allocator, bitsof::val, false> super; - -public: - typedef typename super::iterator iterator; - typedef typename super::const_iterator const_iterator; - typedef IDX value_type; - typedef Allocator allocator_type; - typedef typename iterator::lhs_setter_type reference; - typedef const reference const_reference; - typedef iterator pointer; - typedef const_iterator const_pointer; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef W word_type; - - vector(size_t s, Allocator allocator = Allocator()) - : super(s, super::elements_to_words(s, BITS), allocator) - { } - vector(Allocator allocator = Allocator()) - : super(allocator) - { } - - static constexpr unsigned bits() { return BITS; } -}; - -template -class vector - : public vector_imp::vector_dyn::val, false> -{ - typedef vector_imp::vector_dyn::val, false> super; - -public: - typedef typename super::iterator iterator; - typedef typename super::const_iterator const_iterator; - typedef IDX value_type; - typedef Allocator allocator_type; - typedef typename iterator::lhs_setter_type reference; - typedef const reference const_reference; - typedef iterator pointer; - typedef const_iterator const_pointer; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef W word_type; - - vector(unsigned b, size_t s, Allocator allocator = Allocator()) - : super(b, s, allocator) - { - if(b > bitsof::val) - throw std::out_of_range("Number of bits larger than usable bits"); - } - vector(unsigned b, Allocator allocator = Allocator()) - : super(b, allocator) - { } -}; - -template> -class ts_vector - : public vector_imp::vector, IDX, BITS, W, Allocator, bitsof::val, true> -{ - typedef vector_imp::vector, IDX, BITS, W, Allocator, bitsof::val, true> super; - -public: - typedef typename super::iterator iterator; - typedef typename super::const_iterator const_iterator; - typedef IDX value_type; - typedef Allocator allocator_type; - typedef typename iterator::lhs_setter_type reference; - typedef const reference const_reference; - typedef iterator pointer; - typedef const_iterator const_pointer; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef W word_type; - - ts_vector(size_t s, Allocator allocator = Allocator()) - : super(s, super::elements_to_words(s, BITS), allocator) - { } - ts_vector(Allocator allocator = Allocator()) - : super(allocator) - { } - - static constexpr unsigned bits() { return BITS; } -}; - - -template -class ts_vector - : public vector_imp::vector_dyn::val, true> -{ - typedef vector_imp::vector_dyn::val, true> super; -public: - typedef typename super::iterator iterator; - typedef typename super::const_iterator const_iterator; - typedef IDX value_type; - typedef Allocator allocator_type; - typedef typename iterator::lhs_setter_type reference; - typedef const reference const_reference; - typedef iterator pointer; - typedef const_iterator const_pointer; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef W word_type; - - ts_vector(unsigned b, size_t s, Allocator allocator = Allocator()) - : super(b, s, allocator) - { - if(b > bitsof::val) - throw std::out_of_range("Number of bits larger than usable bits"); - } - ts_vector(unsigned b, Allocator allocator = Allocator()) - : super(b, allocator) - { } -}; - -template> -class cas_vector - : public vector_imp::vector, IDX, BITS, W, Allocator, bitsof::val-1, true> -{ - typedef vector_imp::vector, IDX, BITS, W, Allocator, bitsof::val-1, true> super; - -public: - typedef typename super::iterator iterator; - typedef typename super::const_iterator const_iterator; - typedef IDX value_type; - typedef Allocator allocator_type; - typedef typename iterator::lhs_setter_type reference; - typedef const reference const_reference; - typedef iterator pointer; - typedef const_iterator const_pointer; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef W word_type; - - cas_vector(size_t s, Allocator allocator = Allocator()) - : super(s, super::elements_to_words(s, BITS), allocator) - { } - cas_vector(Allocator allocator = Allocator()) - : super(allocator) - { } - - static constexpr unsigned bits() { return BITS; } -}; - -template -class cas_vector - : public vector_imp::vector_dyn::val - 1, true> -{ - typedef vector_imp::vector_dyn::val - 1, true> super; -public: - typedef typename super::iterator iterator; - typedef typename super::const_iterator const_iterator; - typedef IDX value_type; - typedef Allocator allocator_type; - typedef typename iterator::lhs_setter_type reference; - typedef const reference const_reference; - typedef iterator pointer; - typedef const_iterator const_pointer; - typedef std::reverse_iterator reverse_iterator; - typedef std::reverse_iterator const_reverse_iterator; - typedef ptrdiff_t difference_type; - typedef size_t size_type; - typedef W word_type; - - cas_vector(unsigned b, size_t s, Allocator allocator = Allocator()) - : super(b, s, allocator) - { - if(b > bitsof::val - 1) - throw std::out_of_range("Number of bits larger than usable bits"); - } - cas_vector(unsigned b, Allocator allocator = Allocator()) - : super(b, allocator) - { } -}; - -} // namespace compact - -#endif /* __COMPACT_VECTOR_H__ */ diff --git a/external/twopaco/graphdump/compact_vector/const_iterator_traits.hpp b/external/twopaco/graphdump/compact_vector/const_iterator_traits.hpp deleted file mode 100644 index 69ed1b6..0000000 --- a/external/twopaco/graphdump/compact_vector/const_iterator_traits.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef __CONST_ITERATOR_TRAITS_H__ -#define __CONST_ITERATOR_TRAITS_H__ - -#include - - -namespace compact { -template struct const_iterator_traits { }; - -template -struct const_iterator_traits { - typedef typename std::add_const::type* type; -}; - -template -struct const_iterator_traits { - typedef const T* type; -}; -} // namespace compact - -#endif /* __CONST_ITERATOR_TRAITS_H__ */ diff --git a/external/twopaco/graphdump/compact_vector/mio.hpp b/external/twopaco/graphdump/compact_vector/mio.hpp deleted file mode 100644 index 15ae30f..0000000 --- a/external/twopaco/graphdump/compact_vector/mio.hpp +++ /dev/null @@ -1,1754 +0,0 @@ -/* Copyright 2017 https://github.com/mandreyel - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this - * software and associated documentation files (the "Software"), to deal in the Software - * without restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be included in all copies - * or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A - * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE - * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef MIO_MMAP_HEADER -#define MIO_MMAP_HEADER - -// #include "mio/page.hpp" -/* Copyright 2017 https://github.com/mandreyel - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this - * software and associated documentation files (the "Software"), to deal in the Software - * without restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be included in all copies - * or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A - * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE - * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef MIO_PAGE_HEADER -#define MIO_PAGE_HEADER - -#ifdef _WIN32 -# include -#else -# include -#endif - -namespace mio { - -/** - * This is used by `basic_mmap` to determine whether to create a read-only or - * a read-write memory mapping. - */ -enum class access_mode -{ - read, - write -}; - -/** - * Determines the operating system's page allocation granularity. - * - * On the first call to this function, it invokes the operating system specific syscall - * to determine the page size, caches the value, and returns it. Any subsequent call to - * this function serves the cached value, so no further syscalls are made. - */ -inline size_t page_size() -{ - static const size_t page_size = [] - { -#ifdef _WIN32 - SYSTEM_INFO SystemInfo; - GetSystemInfo(&SystemInfo); - return SystemInfo.dwAllocationGranularity; -#else - return sysconf(_SC_PAGE_SIZE); -#endif - }(); - return page_size; -} - -/** - * Alligns `offset` to the operating's system page size such that it subtracts the - * difference until the nearest page boundary before `offset`, or does nothing if - * `offset` is already page aligned. - */ -inline size_t make_offset_page_aligned(size_t offset) noexcept -{ - const size_t page_size_ = page_size(); - // Use integer division to round down to the nearest page alignment. - return offset / page_size_ * page_size_; -} - -} // namespace mio - -#endif // MIO_PAGE_HEADER - - -#include -#include -#include -#include - -#ifdef _WIN32 -# ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -# endif // WIN32_LEAN_AND_MEAN -# include -#else // ifdef _WIN32 -# define INVALID_HANDLE_VALUE -1 -#endif // ifdef _WIN32 - -namespace mio { - -// This value may be provided as the `length` parameter to the constructor or -// `map`, in which case a memory mapping of the entire file is created. -enum { map_entire_file = 0 }; - -#ifdef _WIN32 -using file_handle_type = HANDLE; -#else -using file_handle_type = int; -#endif - -// This value represents an invalid file handle type. This can be used to -// determine whether `basic_mmap::file_handle` is valid, for example. -const static file_handle_type invalid_handle = INVALID_HANDLE_VALUE; - -template -struct basic_mmap -{ - using value_type = ByteT; - using size_type = size_t; - using reference = value_type&; - using const_reference = const value_type&; - using pointer = value_type*; - using const_pointer = const value_type*; - using difference_type = std::ptrdiff_t; - using iterator = pointer; - using const_iterator = const_pointer; - using reverse_iterator = std::reverse_iterator; - using const_reverse_iterator = std::reverse_iterator; - using iterator_category = std::random_access_iterator_tag; - using handle_type = file_handle_type; - - static_assert(sizeof(ByteT) == sizeof(char), "ByteT must be the same size as char."); - -private: - // Points to the first requested byte, and not to the actual start of the mapping. - pointer data_ = nullptr; - - // Length--in bytes--requested by user (which may not be the length of the - // full mapping) and the length of the full mapping. - size_type length_ = 0; - size_type mapped_length_ = 0; - - // Letting user map a file using both an existing file handle and a path - // introcudes some complexity (see `is_handle_internal_`). - // On POSIX, we only need a file handle to create a mapping, while on - // Windows systems the file handle is necessary to retrieve a file mapping - // handle, but any subsequent operations on the mapped region must be done - // through the latter. - handle_type file_handle_ = INVALID_HANDLE_VALUE; -#ifdef _WIN32 - handle_type file_mapping_handle_ = INVALID_HANDLE_VALUE; -#endif - - // Letting user map a file using both an existing file handle and a path - // introcudes some complexity in that we must not close the file handle if - // user provided it, but we must close it if we obtained it using the - // provided path. For this reason, this flag is used to determine when to - // close `file_handle_`. - bool is_handle_internal_; - -public: - /** - * The default constructed mmap object is in a non-mapped state, that is, - * any operation that attempts to access nonexistent underlying data will - * result in undefined behaviour/segmentation faults. - */ - basic_mmap() = default; - -#ifdef __cpp_exceptions - /** - * The same as invoking the `map` function, except any error that may occur - * while establishing the mapping is wrapped in a `std::system_error` and is - * thrown. - */ - template - basic_mmap(const String& path, const size_type offset = 0, const size_type length = map_entire_file) - { - std::error_code error; - map(path, offset, length, error); - if(error) { throw std::system_error(error); } - } - - /** - * The same as invoking the `map` function, except any error that may occur - * while establishing the mapping is wrapped in a `std::system_error` and is - * thrown. - */ - basic_mmap(const handle_type handle, const size_type offset = 0, const size_type length = map_entire_file) - { - std::error_code error; - map(handle, offset, length, error); - if(error) { throw std::system_error(error); } - } -#endif // __cpp_exceptions - - /** - * `basic_mmap` has single-ownership semantics, so transferring ownership - * may only be accomplished by moving the object. - */ - basic_mmap(const basic_mmap&) = delete; - basic_mmap(basic_mmap&&); - basic_mmap& operator=(const basic_mmap&) = delete; - basic_mmap& operator=(basic_mmap&&); - - /** - * If this is a read-write mapping, the destructor invokes sync. Regardless - * of the access mode, unmap is invoked as a final step. - */ - ~basic_mmap(); - - /** - * On UNIX systems 'file_handle' and 'mapping_handle' are the same. On Windows, - * however, a mapped region of a file gets its own handle, which is returned by - * 'mapping_handle'. - */ - handle_type file_handle() const noexcept { return file_handle_; } - handle_type mapping_handle() const noexcept; - - /** Returns whether a valid memory mapping has been created. */ - bool is_open() const noexcept { return file_handle_ != invalid_handle; } - - /** - * Returns true if no mapping was established, that is, conceptually the - * same as though the length that was mapped was 0. This function is - * provided so that this class has Container semantics. - */ - bool empty() const noexcept { return length() == 0; } - - /** Returns true if a mapping was established. */ - bool is_mapped() const noexcept; - - /** - * `size` and `length` both return the logical length, i.e. the number of bytes - * user requested to be mapped, while `mapped_length` returns the actual number of - * bytes that were mapped which is a multiple of the underlying operating system's - * page allocation granularity. - */ - size_type size() const noexcept { return length(); } - size_type length() const noexcept { return length_; } - size_type mapped_length() const noexcept { return mapped_length_; } - - /** - * Returns the offset, relative to the file's start, at which the mapping was - * requested to be created. - */ - size_type offset() const noexcept { return mapped_length_ - length_; } - - /** - * Returns a pointer to the first requested byte, or `nullptr` if no memory mapping - * exists. - */ - template< - access_mode A = AccessMode, - typename = typename std::enable_if::type - > pointer data() noexcept { return data_; } - const_pointer data() const noexcept { return data_; } - - /** - * Returns an iterator to the first requested byte, if a valid memory mapping - * exists, otherwise this function call is undefined behaviour. - */ - template< - access_mode A = AccessMode, - typename = typename std::enable_if::type - > iterator begin() noexcept { return data(); } - const_iterator begin() const noexcept { return data(); } - const_iterator cbegin() const noexcept { return data(); } - - /** - * Returns an iterator one past the last requested byte, if a valid memory mapping - * exists, otherwise this function call is undefined behaviour. - */ - template< - access_mode A = AccessMode, - typename = typename std::enable_if::type - > iterator end() noexcept { return data() + length(); } - const_iterator end() const noexcept { return data() + length(); } - const_iterator cend() const noexcept { return data() + length(); } - - /** - * Returns a reverse iterator to the last memory mapped byte, if a valid - * memory mapping exists, otherwise this function call is undefined - * behaviour. - */ - template< - access_mode A = AccessMode, - typename = typename std::enable_if::type - > reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } - const_reverse_iterator rbegin() const noexcept - { return const_reverse_iterator(end()); } - const_reverse_iterator crbegin() const noexcept - { return const_reverse_iterator(end()); } - - /** - * Returns a reverse iterator past the first mapped byte, if a valid memory - * mapping exists, otherwise this function call is undefined behaviour. - */ - template< - access_mode A = AccessMode, - typename = typename std::enable_if::type - > reverse_iterator rend() noexcept { return reverse_iterator(begin()); } - const_reverse_iterator rend() const noexcept - { return const_reverse_iterator(begin()); } - const_reverse_iterator crend() const noexcept - { return const_reverse_iterator(begin()); } - - /** - * Returns a reference to the `i`th byte from the first requested byte (as returned - * by `data`). If this is invoked when no valid memory mapping has been created - * prior to this call, undefined behaviour ensues. - */ - reference operator[](const size_type i) noexcept { return data_[i]; } - const_reference operator[](const size_type i) const noexcept { return data_[i]; } - - /** - * Establishes a memory mapping with AccessMode. If the mapping is unsuccesful, the - * reason is reported via `error` and the object remains in a state as if this - * function hadn't been called. - * - * `path`, which must be a path to an existing file, is used to retrieve a file - * handle (which is closed when the object destructs or `unmap` is called), which is - * then used to memory map the requested region. Upon failure, `error` is set to - * indicate the reason and the object remains in an unmapped state. - * - * `offset` is the number of bytes, relative to the start of the file, where the - * mapping should begin. When specifying it, there is no need to worry about - * providing a value that is aligned with the operating system's page allocation - * granularity. This is adjusted by the implementation such that the first requested - * byte (as returned by `data` or `begin`), so long as `offset` is valid, will be at - * `offset` from the start of the file. - * - * `length` is the number of bytes to map. It may be `map_entire_file`, in which - * case a mapping of the entire file is created. - */ - template - void map(const String& path, const size_type offset, - const size_type length, std::error_code& error); - - /** - * Establishes a memory mapping with AccessMode. If the mapping is unsuccesful, the - * reason is reported via `error` and the object remains in a state as if this - * function hadn't been called. - * - * `path`, which must be a path to an existing file, is used to retrieve a file - * handle (which is closed when the object destructs or `unmap` is called), which is - * then used to memory map the requested region. Upon failure, `error` is set to - * indicate the reason and the object remains in an unmapped state. - * - * The entire file is mapped. - */ - template - void map(const String& path, std::error_code& error) - { - map(path, 0, map_entire_file, error); - } - - /** - * Establishes a memory mapping with AccessMode. If the mapping is - * unsuccesful, the reason is reported via `error` and the object remains in - * a state as if this function hadn't been called. - * - * `handle`, which must be a valid file handle, which is used to memory map the - * requested region. Upon failure, `error` is set to indicate the reason and the - * object remains in an unmapped state. - * - * `offset` is the number of bytes, relative to the start of the file, where the - * mapping should begin. When specifying it, there is no need to worry about - * providing a value that is aligned with the operating system's page allocation - * granularity. This is adjusted by the implementation such that the first requested - * byte (as returned by `data` or `begin`), so long as `offset` is valid, will be at - * `offset` from the start of the file. - * - * `length` is the number of bytes to map. It may be `map_entire_file`, in which - * case a mapping of the entire file is created. - */ - void map(const handle_type handle, const size_type offset, - const size_type length, std::error_code& error); - - /** - * Establishes a memory mapping with AccessMode. If the mapping is - * unsuccesful, the reason is reported via `error` and the object remains in - * a state as if this function hadn't been called. - * - * `handle`, which must be a valid file handle, which is used to memory map the - * requested region. Upon failure, `error` is set to indicate the reason and the - * object remains in an unmapped state. - * - * The entire file is mapped. - */ - void map(const handle_type handle, std::error_code& error) - { - map(handle, 0, map_entire_file, error); - } - - /** - * If a valid memory mapping has been created prior to this call, this call - * instructs the kernel to unmap the memory region and disassociate this object - * from the file. - * - * The file handle associated with the file that is mapped is only closed if the - * mapping was created using a file path. If, on the other hand, an existing - * file handle was used to create the mapping, the file handle is not closed. - */ - void unmap(); - - void swap(basic_mmap& other); - - /** Flushes the memory mapped page to disk. Errors are reported via `error`. */ - template - typename std::enable_if::type - sync(std::error_code& error); - - /** - * All operators compare the address of the first byte and size of the two mapped - * regions. - */ - -private: - template< - access_mode A = AccessMode, - typename = typename std::enable_if::type - > pointer get_mapping_start() noexcept - { - return !data() ? nullptr : data() - offset(); - } - - const_pointer get_mapping_start() const noexcept - { - return !data() ? nullptr : data() - offset(); - } - - /** - * The destructor syncs changes to disk if `AccessMode` is `write`, but not - * if it's `read`, but since the destructor cannot be templated, we need to - * do SFINAE in a dedicated function, where one syncs and the other is a noop. - */ - template - typename std::enable_if::type - conditional_sync(); - template - typename std::enable_if::type conditional_sync(); -}; - -template -bool operator==(const basic_mmap& a, - const basic_mmap& b); - -template -bool operator!=(const basic_mmap& a, - const basic_mmap& b); - -template -bool operator<(const basic_mmap& a, - const basic_mmap& b); - -template -bool operator<=(const basic_mmap& a, - const basic_mmap& b); - -template -bool operator>(const basic_mmap& a, - const basic_mmap& b); - -template -bool operator>=(const basic_mmap& a, - const basic_mmap& b); - -/** - * This is the basis for all read-only mmap objects and should be preferred over - * directly using `basic_mmap`. - */ -template -using basic_mmap_source = basic_mmap; - -/** - * This is the basis for all read-write mmap objects and should be preferred over - * directly using `basic_mmap`. - */ -template -using basic_mmap_sink = basic_mmap; - -/** - * These aliases cover the most common use cases, both representing a raw byte stream - * (either with a char or an unsigned char/uint8_t). - */ -using mmap_source = basic_mmap_source; -using ummap_source = basic_mmap_source; - -using mmap_sink = basic_mmap_sink; -using ummap_sink = basic_mmap_sink; - -/** - * Convenience factory method that constructs a mapping for any `basic_mmap` or - * `basic_mmap` type. - */ -template< - typename MMap, - typename MappingToken -> MMap make_mmap(const MappingToken& token, - int64_t offset, int64_t length, std::error_code& error) -{ - MMap mmap; - mmap.map(token, offset, length, error); - return mmap; -} - -/** - * Convenience factory method. - * - * MappingToken may be a String (`std::string`, `std::string_view`, `const char*`, - * `std::filesystem::path`, `std::vector`, or similar), or a - * `mmap_source::handle_type`. - */ -template -mmap_source make_mmap_source(const MappingToken& token, mmap_source::size_type offset, - mmap_source::size_type length, std::error_code& error) -{ - return make_mmap(token, offset, length, error); -} - -template -mmap_source make_mmap_source(const MappingToken& token, std::error_code& error) -{ - return make_mmap_source(token, 0, map_entire_file, error); -} - -/** - * Convenience factory method. - * - * MappingToken may be a String (`std::string`, `std::string_view`, `const char*`, - * `std::filesystem::path`, `std::vector`, or similar), or a - * `mmap_sink::handle_type`. - */ -template -mmap_sink make_mmap_sink(const MappingToken& token, mmap_sink::size_type offset, - mmap_sink::size_type length, std::error_code& error) -{ - return make_mmap(token, offset, length, error); -} - -template -mmap_sink make_mmap_sink(const MappingToken& token, std::error_code& error) -{ - return make_mmap_sink(token, 0, map_entire_file, error); -} - -} // namespace mio - -// #include "detail/mmap.ipp" -/* Copyright 2017 https://github.com/mandreyel - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this - * software and associated documentation files (the "Software"), to deal in the Software - * without restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be included in all copies - * or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A - * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE - * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef MIO_BASIC_MMAP_IMPL -#define MIO_BASIC_MMAP_IMPL - -// #include "mio/mmap.hpp" - -// #include "mio/page.hpp" - -// #include "mio/detail/string_util.hpp" -/* Copyright 2017 https://github.com/mandreyel - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this - * software and associated documentation files (the "Software"), to deal in the Software - * without restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be included in all copies - * or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A - * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE - * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef MIO_STRING_UTIL_HEADER -#define MIO_STRING_UTIL_HEADER - -#include - -namespace mio { -namespace detail { - -template< - typename S, - typename C = typename std::decay::type, - typename = decltype(std::declval().data()), - typename = typename std::enable_if< - std::is_same::value -#ifdef _WIN32 - || std::is_same::value -#endif - >::type -> struct char_type_helper { - using type = typename C::value_type; -}; - -template -struct char_type { - using type = typename char_type_helper::type; -}; - -// TODO: can we avoid this brute force approach? -template<> -struct char_type { - using type = char; -}; - -template<> -struct char_type { - using type = char; -}; - -template -struct char_type { - using type = char; -}; - -template -struct char_type { - using type = char; -}; - -#ifdef _WIN32 -template<> -struct char_type { - using type = wchar_t; -}; - -template<> -struct char_type { - using type = wchar_t; -}; - -template -struct char_type { - using type = wchar_t; -}; - -template -struct char_type { - using type = wchar_t; -}; -#endif // _WIN32 - -template -struct is_c_str_helper -{ - static constexpr bool value = std::is_same< - CharT*, - // TODO: I'm so sorry for this... Can this be made cleaner? - typename std::add_pointer< - typename std::remove_cv< - typename std::remove_pointer< - typename std::decay< - S - >::type - >::type - >::type - >::type - >::value; -}; - -template -struct is_c_str -{ - static constexpr bool value = is_c_str_helper::value; -}; - -#ifdef _WIN32 -template -struct is_c_wstr -{ - static constexpr bool value = is_c_str_helper::value; -}; -#endif // _WIN32 - -template -struct is_c_str_or_c_wstr -{ - static constexpr bool value = is_c_str::value -#ifdef _WIN32 - || is_c_wstr::value -#endif - ; -}; - -template< - typename String, - typename = decltype(std::declval().data()), - typename = typename std::enable_if::value>::type -> const typename char_type::type* c_str(const String& path) -{ - return path.data(); -} - -template< - typename String, - typename = decltype(std::declval().empty()), - typename = typename std::enable_if::value>::type -> bool empty(const String& path) -{ - return path.empty(); -} - -template< - typename String, - typename = typename std::enable_if::value>::type -> const typename char_type::type* c_str(String path) -{ - return path; -} - -template< - typename String, - typename = typename std::enable_if::value>::type -> bool empty(String path) -{ - return !path || (*path == 0); -} - -} // namespace detail -} // namespace mio - -#endif // MIO_STRING_UTIL_HEADER - - -#include - -#ifndef _WIN32 -# include -# include -# include -# include -#endif - -namespace mio { -namespace detail { - -#ifdef _WIN32 -namespace win { - -/** Returns the 4 upper bytes of an 8-byte integer. */ -inline DWORD int64_high(int64_t n) noexcept -{ - return n >> 32; -} - -/** Returns the 4 lower bytes of an 8-byte integer. */ -inline DWORD int64_low(int64_t n) noexcept -{ - return n & 0xffffffff; -} - -template< - typename String, - typename = typename std::enable_if< - std::is_same::type, char>::value - >::type -> file_handle_type open_file_helper(const String& path, const access_mode mode) -{ - return ::CreateFileA(c_str(path), - mode == access_mode::read ? GENERIC_READ : GENERIC_READ | GENERIC_WRITE, - FILE_SHARE_READ | FILE_SHARE_WRITE, - 0, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - 0); -} - -template -typename std::enable_if< - std::is_same::type, wchar_t>::value, - file_handle_type ->::type open_file_helper(const String& path, const access_mode mode) -{ - return ::CreateFileW(c_str(path), - mode == access_mode::read ? GENERIC_READ : GENERIC_READ | GENERIC_WRITE, - FILE_SHARE_READ | FILE_SHARE_WRITE, - 0, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - 0); -} - -} // win -#endif // _WIN32 - -/** - * Returns the last platform specific system error (errno on POSIX and - * GetLastError on Win) as a `std::error_code`. - */ -inline std::error_code last_error() noexcept -{ - std::error_code error; -#ifdef _WIN32 - error.assign(GetLastError(), std::system_category()); -#else - error.assign(errno, std::system_category()); -#endif - return error; -} - -template -file_handle_type open_file(const String& path, const access_mode mode, - std::error_code& error) -{ - error.clear(); - if(detail::empty(path)) - { - error = std::make_error_code(std::errc::invalid_argument); - return invalid_handle; - } -#ifdef _WIN32 - const auto handle = win::open_file_helper(path, mode); -#else // POSIX - const auto handle = ::open(c_str(path), - mode == access_mode::read ? O_RDONLY : O_RDWR); -#endif - if(handle == invalid_handle) - { - error = detail::last_error(); - } - return handle; -} - -inline size_t query_file_size(file_handle_type handle, std::error_code& error) -{ - error.clear(); -#ifdef _WIN32 - LARGE_INTEGER file_size; - if(::GetFileSizeEx(handle, &file_size) == 0) - { - error = detail::last_error(); - return 0; - } - return static_cast(file_size.QuadPart); -#else // POSIX - struct stat sbuf; - if(::fstat(handle, &sbuf) == -1) - { - error = detail::last_error(); - return 0; - } - return sbuf.st_size; -#endif -} - -struct mmap_context -{ - char* data; - int64_t length; - int64_t mapped_length; -#ifdef _WIN32 - file_handle_type file_mapping_handle; -#endif -}; - -inline mmap_context memory_map(const file_handle_type file_handle, const int64_t offset, - const int64_t length, const access_mode mode, std::error_code& error) -{ - const int64_t aligned_offset = make_offset_page_aligned(offset); - const int64_t length_to_map = offset - aligned_offset + length; -#ifdef _WIN32 - const int64_t max_file_size = offset + length; - const auto file_mapping_handle = ::CreateFileMapping( - file_handle, - 0, - mode == access_mode::read ? PAGE_READONLY : PAGE_READWRITE, - win::int64_high(max_file_size), - win::int64_low(max_file_size), - 0); - if(file_mapping_handle == invalid_handle) - { - error = detail::last_error(); - return {}; - } - char* mapping_start = static_cast(::MapViewOfFile( - file_mapping_handle, - mode == access_mode::read ? FILE_MAP_READ : FILE_MAP_WRITE, - win::int64_high(aligned_offset), - win::int64_low(aligned_offset), - length_to_map)); - if(mapping_start == nullptr) - { - error = detail::last_error(); - return {}; - } -#else // POSIX - char* mapping_start = static_cast(::mmap( - 0, // Don't give hint as to where to map. - length_to_map, - mode == access_mode::read ? PROT_READ : PROT_WRITE, - MAP_SHARED, - file_handle, - aligned_offset)); - if(mapping_start == MAP_FAILED) - { - error = detail::last_error(); - return {}; - } -#endif - mmap_context ctx; - ctx.data = mapping_start + offset; - ctx.length = length; - ctx.mapped_length = length_to_map; -#ifdef _WIN32 - ctx.file_mapping_handle = file_mapping_handle; -#endif - return ctx; -} - -} // namespace detail - -// -- basic_mmap -- - -template -basic_mmap::~basic_mmap() -{ - conditional_sync(); - unmap(); -} - -template -basic_mmap::basic_mmap(basic_mmap&& other) - : data_(std::move(other.data_)) - , length_(std::move(other.length_)) - , mapped_length_(std::move(other.mapped_length_)) - , file_handle_(std::move(other.file_handle_)) -#ifdef _WIN32 - , file_mapping_handle_(std::move(other.file_mapping_handle_)) -#endif - , is_handle_internal_(std::move(other.is_handle_internal_)) -{ - other.data_ = nullptr; - other.length_ = other.mapped_length_ = 0; - other.file_handle_ = invalid_handle; -#ifdef _WIN32 - other.file_mapping_handle_ = invalid_handle; -#endif -} - -template -basic_mmap& -basic_mmap::operator=(basic_mmap&& other) -{ - if(this != &other) - { - // First the existing mapping needs to be removed. - unmap(); - data_ = std::move(other.data_); - length_ = std::move(other.length_); - mapped_length_ = std::move(other.mapped_length_); - file_handle_ = std::move(other.file_handle_); -#ifdef _WIN32 - file_mapping_handle_ = std::move(other.file_mapping_handle_); -#endif - is_handle_internal_ = std::move(other.is_handle_internal_); - - // The moved from basic_mmap's fields need to be reset, because - // otherwise other's destructor will unmap the same mapping that was - // just moved into this. - other.data_ = nullptr; - other.length_ = other.mapped_length_ = 0; - other.file_handle_ = invalid_handle; -#ifdef _WIN32 - other.file_mapping_handle_ = invalid_handle; -#endif - other.is_handle_internal_ = false; - } - return *this; -} - -template -typename basic_mmap::handle_type -basic_mmap::mapping_handle() const noexcept -{ -#ifdef _WIN32 - return file_mapping_handle_; -#else - return file_handle_; -#endif -} - -template -template -void basic_mmap::map(const String& path, const size_type offset, - const size_type length, std::error_code& error) -{ - error.clear(); - if(detail::empty(path)) - { - error = std::make_error_code(std::errc::invalid_argument); - return; - } - const auto handle = detail::open_file(path, AccessMode, error); - if(error) - { - return; - } - - map(handle, offset, length, error); - // This MUST be after the call to map, as that sets this to true. - if(!error) - { - is_handle_internal_ = true; - } -} - -template -void basic_mmap::map(const handle_type handle, - const size_type offset, const size_type length, std::error_code& error) -{ - error.clear(); - if(handle == invalid_handle) - { - error = std::make_error_code(std::errc::bad_file_descriptor); - return; - } - - const auto file_size = detail::query_file_size(handle, error); - if(error) - { - return; - } - - if(offset + length > file_size) - { - error = std::make_error_code(std::errc::invalid_argument); - return; - } - - const auto ctx = detail::memory_map(handle, offset, - length == map_entire_file ? (file_size - offset) : length, - AccessMode, error); - if(!error) - { - // We must unmap the previous mapping that may have existed prior to this call. - // Note that this must only be invoked after a new mapping has been created in - // order to provide the strong guarantee that, should the new mapping fail, the - // `map` function leaves this instance in a state as though the function had - // never been invoked. - unmap(); - file_handle_ = handle; - is_handle_internal_ = false; - data_ = reinterpret_cast(ctx.data); - length_ = ctx.length; - mapped_length_ = ctx.mapped_length; -#ifdef _WIN32 - file_mapping_handle_ = ctx.file_mapping_handle; -#endif - } -} - -template -template -typename std::enable_if::type -basic_mmap::sync(std::error_code& error) -{ - error.clear(); - if(!is_open()) - { - error = std::make_error_code(std::errc::bad_file_descriptor); - return; - } - - if(data()) - { -#ifdef _WIN32 - if(::FlushViewOfFile(get_mapping_start(), mapped_length_) == 0 - || ::FlushFileBuffers(file_handle_) == 0) -#else // POSIX - if(::msync(get_mapping_start(), mapped_length_, MS_SYNC) != 0) -#endif - { - error = detail::last_error(); - return; - } - } -#ifdef _WIN32 - if(::FlushFileBuffers(file_handle_) == 0) - { - error = detail::last_error(); - } -#endif -} - -template -void basic_mmap::unmap() -{ - if(!is_open()) { return; } - // TODO do we care about errors here? -#ifdef _WIN32 - if(is_mapped()) - { - ::UnmapViewOfFile(get_mapping_start()); - ::CloseHandle(file_mapping_handle_); - } -#else // POSIX - if(data_) { ::munmap(const_cast(get_mapping_start()), mapped_length_); } -#endif - - // If `file_handle_` was obtained by our opening it (when map is called with - // a path, rather than an existing file handle), we need to close it, - // otherwise it must not be closed as it may still be used outside this - // instance. - if(is_handle_internal_) - { -#ifdef _WIN32 - ::CloseHandle(file_handle_); -#else // POSIX - ::close(file_handle_); -#endif - } - - // Reset fields to their default values. - data_ = nullptr; - length_ = mapped_length_ = 0; - file_handle_ = invalid_handle; -#ifdef _WIN32 - file_mapping_handle_ = invalid_handle; -#endif -} - -template -bool basic_mmap::is_mapped() const noexcept -{ -#ifdef _WIN32 - return file_mapping_handle_ != invalid_handle; -#else // POSIX - return is_open(); -#endif -} - -template -void basic_mmap::swap(basic_mmap& other) -{ - if(this != &other) - { - using std::swap; - swap(data_, other.data_); - swap(file_handle_, other.file_handle_); -#ifdef _WIN32 - swap(file_mapping_handle_, other.file_mapping_handle_); -#endif - swap(length_, other.length_); - swap(mapped_length_, other.mapped_length_); - swap(is_handle_internal_, other.is_handle_internal_); - } -} - -template -template -typename std::enable_if::type -basic_mmap::conditional_sync() -{ - // This is invoked from the destructor, so not much we can do about - // failures here. - std::error_code ec; - sync(ec); -} - -template -template -typename std::enable_if::type -basic_mmap::conditional_sync() -{ - // noop -} - -template -bool operator==(const basic_mmap& a, - const basic_mmap& b) -{ - return a.data() == b.data() - && a.size() == b.size(); -} - -template -bool operator!=(const basic_mmap& a, - const basic_mmap& b) -{ - return !(a == b); -} - -template -bool operator<(const basic_mmap& a, - const basic_mmap& b) -{ - if(a.data() == b.data()) { return a.size() < b.size(); } - return a.data() < b.data(); -} - -template -bool operator<=(const basic_mmap& a, - const basic_mmap& b) -{ - return !(a > b); -} - -template -bool operator>(const basic_mmap& a, - const basic_mmap& b) -{ - if(a.data() == b.data()) { return a.size() > b.size(); } - return a.data() > b.data(); -} - -template -bool operator>=(const basic_mmap& a, - const basic_mmap& b) -{ - return !(a < b); -} - -} // namespace mio - -#endif // MIO_BASIC_MMAP_IMPL - - -#endif // MIO_MMAP_HEADER -/* Copyright 2017 https://github.com/mandreyel - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this - * software and associated documentation files (the "Software"), to deal in the Software - * without restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be included in all copies - * or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A - * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE - * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef MIO_PAGE_HEADER -#define MIO_PAGE_HEADER - -#ifdef _WIN32 -# include -#else -# include -#endif - -namespace mio { - -/** - * This is used by `basic_mmap` to determine whether to create a read-only or - * a read-write memory mapping. - */ -enum class access_mode -{ - read, - write -}; - -/** - * Determines the operating system's page allocation granularity. - * - * On the first call to this function, it invokes the operating system specific syscall - * to determine the page size, caches the value, and returns it. Any subsequent call to - * this function serves the cached value, so no further syscalls are made. - */ -inline size_t page_size() -{ - static const size_t page_size = [] - { -#ifdef _WIN32 - SYSTEM_INFO SystemInfo; - GetSystemInfo(&SystemInfo); - return SystemInfo.dwAllocationGranularity; -#else - return sysconf(_SC_PAGE_SIZE); -#endif - }(); - return page_size; -} - -/** - * Alligns `offset` to the operating's system page size such that it subtracts the - * difference until the nearest page boundary before `offset`, or does nothing if - * `offset` is already page aligned. - */ -inline size_t make_offset_page_aligned(size_t offset) noexcept -{ - const size_t page_size_ = page_size(); - // Use integer division to round down to the nearest page alignment. - return offset / page_size_ * page_size_; -} - -} // namespace mio - -#endif // MIO_PAGE_HEADER -/* Copyright 2017 https://github.com/mandreyel - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of this - * software and associated documentation files (the "Software"), to deal in the Software - * without restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be included in all copies - * or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A - * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF - * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE - * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef MIO_SHARED_MMAP_HEADER -#define MIO_SHARED_MMAP_HEADER - -// #include "mio/mmap.hpp" - - -#include // std::error_code -#include // std::shared_ptr - -namespace mio { - -/** - * Exposes (nearly) the same interface as `basic_mmap`, but endowes it with - * `std::shared_ptr` semantics. - * - * This is not the default behaviour of `basic_mmap` to avoid allocating on the heap if - * shared semantics are not required. - */ -template< - access_mode AccessMode, - typename ByteT -> class basic_shared_mmap -{ - using impl_type = basic_mmap; - std::shared_ptr pimpl_; - -public: - using value_type = typename impl_type::value_type; - using size_type = typename impl_type::size_type; - using reference = typename impl_type::reference; - using const_reference = typename impl_type::const_reference; - using pointer = typename impl_type::pointer; - using const_pointer = typename impl_type::const_pointer; - using difference_type = typename impl_type::difference_type; - using iterator = typename impl_type::iterator; - using const_iterator = typename impl_type::const_iterator; - using reverse_iterator = typename impl_type::reverse_iterator; - using const_reverse_iterator = typename impl_type::const_reverse_iterator; - using iterator_category = typename impl_type::iterator_category; - using handle_type = typename impl_type::handle_type; - using mmap_type = impl_type; - - basic_shared_mmap() = default; - basic_shared_mmap(const basic_shared_mmap&) = default; - basic_shared_mmap& operator=(const basic_shared_mmap&) = default; - basic_shared_mmap(basic_shared_mmap&&) = default; - basic_shared_mmap& operator=(basic_shared_mmap&&) = default; - - /** Takes ownership of an existing mmap object. */ - basic_shared_mmap(mmap_type&& mmap) - : pimpl_(std::make_shared(std::move(mmap))) - {} - - /** Takes ownership of an existing mmap object. */ - basic_shared_mmap& operator=(mmap_type&& mmap) - { - pimpl_ = std::make_shared(std::move(mmap)); - return *this; - } - - /** Initializes this object with an already established shared mmap. */ - basic_shared_mmap(std::shared_ptr mmap) : pimpl_(std::move(mmap)) {} - - /** Initializes this object with an already established shared mmap. */ - basic_shared_mmap& operator=(std::shared_ptr mmap) - { - pimpl_ = std::move(mmap); - return *this; - } - -#ifdef __cpp_exceptions - /** - * The same as invoking the `map` function, except any error that may occur - * while establishing the mapping is wrapped in a `std::system_error` and is - * thrown. - */ - template - basic_shared_mmap(const String& path, const size_type offset = 0, const size_type length = map_entire_file) - { - std::error_code error; - map(path, offset, length, error); - if(error) { throw std::system_error(error); } - } - - /** - * The same as invoking the `map` function, except any error that may occur - * while establishing the mapping is wrapped in a `std::system_error` and is - * thrown. - */ - basic_shared_mmap(const handle_type handle, const size_type offset = 0, const size_type length = map_entire_file) - { - std::error_code error; - map(handle, offset, length, error); - if(error) { throw std::system_error(error); } - } -#endif // __cpp_exceptions - - /** - * If this is a read-write mapping and the last reference to the mapping, - * the destructor invokes sync. Regardless of the access mode, unmap is - * invoked as a final step. - */ - ~basic_shared_mmap() = default; - - /** Returns the underlying `std::shared_ptr` instance that holds the mmap. */ - std::shared_ptr get_shared_ptr() { return pimpl_; } - - /** - * On UNIX systems 'file_handle' and 'mapping_handle' are the same. On Windows, - * however, a mapped region of a file gets its own handle, which is returned by - * 'mapping_handle'. - */ - handle_type file_handle() const noexcept - { - return pimpl_ ? pimpl_->file_handle() : invalid_handle; - } - - handle_type mapping_handle() const noexcept - { - return pimpl_ ? pimpl_->mapping_handle() : invalid_handle; - } - - /** Returns whether a valid memory mapping has been created. */ - bool is_open() const noexcept { return pimpl_ && pimpl_->is_open(); } - - /** - * Returns true if no mapping was established, that is, conceptually the - * same as though the length that was mapped was 0. This function is - * provided so that this class has Container semantics. - */ - bool empty() const noexcept { return !pimpl_ || pimpl_->empty(); } - - /** - * `size` and `length` both return the logical length, i.e. the number of bytes - * user requested to be mapped, while `mapped_length` returns the actual number of - * bytes that were mapped which is a multiple of the underlying operating system's - * page allocation granularity. - */ - size_type size() const noexcept { return pimpl_ ? pimpl_->length() : 0; } - size_type length() const noexcept { return pimpl_ ? pimpl_->length() : 0; } - size_type mapped_length() const noexcept - { - return pimpl_ ? pimpl_->mapped_length() : 0; - } - - /** - * Returns the offset, relative to the file's start, at which the mapping was - * requested to be created. - */ - size_type offset() const noexcept { return pimpl_ ? pimpl_->offset() : 0; } - - /** - * Returns a pointer to the first requested byte, or `nullptr` if no memory mapping - * exists. - */ - template< - access_mode A = AccessMode, - typename = typename std::enable_if::type - > pointer data() noexcept { return pimpl_->data(); } - const_pointer data() const noexcept { return pimpl_ ? pimpl_->data() : nullptr; } - - /** - * Returns an iterator to the first requested byte, if a valid memory mapping - * exists, otherwise this function call is undefined behaviour. - */ - iterator begin() noexcept { return pimpl_->begin(); } - const_iterator begin() const noexcept { return pimpl_->begin(); } - const_iterator cbegin() const noexcept { return pimpl_->cbegin(); } - - /** - * Returns an iterator one past the last requested byte, if a valid memory mapping - * exists, otherwise this function call is undefined behaviour. - */ - template< - access_mode A = AccessMode, - typename = typename std::enable_if::type - > iterator end() noexcept { return pimpl_->end(); } - const_iterator end() const noexcept { return pimpl_->end(); } - const_iterator cend() const noexcept { return pimpl_->cend(); } - - /** - * Returns a reverse iterator to the last memory mapped byte, if a valid - * memory mapping exists, otherwise this function call is undefined - * behaviour. - */ - template< - access_mode A = AccessMode, - typename = typename std::enable_if::type - > reverse_iterator rbegin() noexcept { return pimpl_->rbegin(); } - const_reverse_iterator rbegin() const noexcept { return pimpl_->rbegin(); } - const_reverse_iterator crbegin() const noexcept { return pimpl_->crbegin(); } - - /** - * Returns a reverse iterator past the first mapped byte, if a valid memory - * mapping exists, otherwise this function call is undefined behaviour. - */ - template< - access_mode A = AccessMode, - typename = typename std::enable_if::type - > reverse_iterator rend() noexcept { return pimpl_->rend(); } - const_reverse_iterator rend() const noexcept { return pimpl_->rend(); } - const_reverse_iterator crend() const noexcept { return pimpl_->crend(); } - - /** - * Returns a reference to the `i`th byte from the first requested byte (as returned - * by `data`). If this is invoked when no valid memory mapping has been created - * prior to this call, undefined behaviour ensues. - */ - reference operator[](const size_type i) noexcept { return (*pimpl_)[i]; } - const_reference operator[](const size_type i) const noexcept { return (*pimpl_)[i]; } - - /** - * Establishes a memory mapping with AccessMode. If the mapping is unsuccesful, the - * reason is reported via `error` and the object remains in a state as if this - * function hadn't been called. - * - * `path`, which must be a path to an existing file, is used to retrieve a file - * handle (which is closed when the object destructs or `unmap` is called), which is - * then used to memory map the requested region. Upon failure, `error` is set to - * indicate the reason and the object remains in an unmapped state. - * - * `offset` is the number of bytes, relative to the start of the file, where the - * mapping should begin. When specifying it, there is no need to worry about - * providing a value that is aligned with the operating system's page allocation - * granularity. This is adjusted by the implementation such that the first requested - * byte (as returned by `data` or `begin`), so long as `offset` is valid, will be at - * `offset` from the start of the file. - * - * `length` is the number of bytes to map. It may be `map_entire_file`, in which - * case a mapping of the entire file is created. - */ - template - void map(const String& path, const size_type offset, - const size_type length, std::error_code& error) - { - map_impl(path, offset, length, error); - } - - /** - * Establishes a memory mapping with AccessMode. If the mapping is unsuccesful, the - * reason is reported via `error` and the object remains in a state as if this - * function hadn't been called. - * - * `path`, which must be a path to an existing file, is used to retrieve a file - * handle (which is closed when the object destructs or `unmap` is called), which is - * then used to memory map the requested region. Upon failure, `error` is set to - * indicate the reason and the object remains in an unmapped state. - * - * The entire file is mapped. - */ - template - void map(const String& path, std::error_code& error) - { - map_impl(path, 0, map_entire_file, error); - } - - /** - * Establishes a memory mapping with AccessMode. If the mapping is unsuccesful, the - * reason is reported via `error` and the object remains in a state as if this - * function hadn't been called. - * - * `handle`, which must be a valid file handle, which is used to memory map the - * requested region. Upon failure, `error` is set to indicate the reason and the - * object remains in an unmapped state. - * - * `offset` is the number of bytes, relative to the start of the file, where the - * mapping should begin. When specifying it, there is no need to worry about - * providing a value that is aligned with the operating system's page allocation - * granularity. This is adjusted by the implementation such that the first requested - * byte (as returned by `data` or `begin`), so long as `offset` is valid, will be at - * `offset` from the start of the file. - * - * `length` is the number of bytes to map. It may be `map_entire_file`, in which - * case a mapping of the entire file is created. - */ - void map(const handle_type handle, const size_type offset, - const size_type length, std::error_code& error) - { - map_impl(handle, offset, length, error); - } - - /** - * Establishes a memory mapping with AccessMode. If the mapping is unsuccesful, the - * reason is reported via `error` and the object remains in a state as if this - * function hadn't been called. - * - * `handle`, which must be a valid file handle, which is used to memory map the - * requested region. Upon failure, `error` is set to indicate the reason and the - * object remains in an unmapped state. - * - * The entire file is mapped. - */ - void map(const handle_type handle, std::error_code& error) - { - map_impl(handle, 0, map_entire_file, error); - } - - /** - * If a valid memory mapping has been created prior to this call, this call - * instructs the kernel to unmap the memory region and disassociate this object - * from the file. - * - * The file handle associated with the file that is mapped is only closed if the - * mapping was created using a file path. If, on the other hand, an existing - * file handle was used to create the mapping, the file handle is not closed. - */ - void unmap() { if(pimpl_) pimpl_->unmap(); } - - void swap(basic_shared_mmap& other) { pimpl_.swap(other.pimpl_); } - - /** Flushes the memory mapped page to disk. Errors are reported via `error`. */ - template< - access_mode A = AccessMode, - typename = typename std::enable_if::type - > void sync(std::error_code& error) { if(pimpl_) pimpl_->sync(error); } - - /** All operators compare the underlying `basic_mmap`'s addresses. */ - - friend bool operator==(const basic_shared_mmap& a, const basic_shared_mmap& b) - { - return a.pimpl_ == b.pimpl_; - } - - friend bool operator!=(const basic_shared_mmap& a, const basic_shared_mmap& b) - { - return !(a == b); - } - - friend bool operator<(const basic_shared_mmap& a, const basic_shared_mmap& b) - { - return a.pimpl_ < b.pimpl_; - } - - friend bool operator<=(const basic_shared_mmap& a, const basic_shared_mmap& b) - { - return a.pimpl_ <= b.pimpl_; - } - - friend bool operator>(const basic_shared_mmap& a, const basic_shared_mmap& b) - { - return a.pimpl_ > b.pimpl_; - } - - friend bool operator>=(const basic_shared_mmap& a, const basic_shared_mmap& b) - { - return a.pimpl_ >= b.pimpl_; - } - -private: - template - void map_impl(const MappingToken& token, const size_type offset, - const size_type length, std::error_code& error) - { - if(!pimpl_) - { - mmap_type mmap = make_mmap(token, offset, length, error); - if(error) { return; } - pimpl_ = std::make_shared(std::move(mmap)); - } - else - { - pimpl_->map(token, offset, length, error); - } - } -}; - -/** - * This is the basis for all read-only mmap objects and should be preferred over - * directly using basic_shared_mmap. - */ -template -using basic_shared_mmap_source = basic_shared_mmap; - -/** - * This is the basis for all read-write mmap objects and should be preferred over - * directly using basic_shared_mmap. - */ -template -using basic_shared_mmap_sink = basic_shared_mmap; - -/** - * These aliases cover the most common use cases, both representing a raw byte stream - * (either with a char or an unsigned char/uint8_t). - */ -using shared_mmap_source = basic_shared_mmap_source; -using shared_ummap_source = basic_shared_mmap_source; - -using shared_mmap_sink = basic_shared_mmap_sink; -using shared_ummap_sink = basic_shared_mmap_sink; - -} // namespace mio - -#endif // MIO_SHARED_MMAP_HEADER diff --git a/external/twopaco/graphdump/compact_vector/parallel_iterator_traits.hpp b/external/twopaco/graphdump/compact_vector/parallel_iterator_traits.hpp deleted file mode 100644 index 610ea2c..0000000 --- a/external/twopaco/graphdump/compact_vector/parallel_iterator_traits.hpp +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef __PARALLEL_POINTER_TRAITS_H__ -#define __PARALLEL_POINTER_TRAITS_H__ - -#include - -namespace compact { -// Traits for a parallel iterator. Very weak requirements that if two -// threads hold iterators to two different location, then the pointers -// can be read and stored. -// -// This holds for pointers. But it requires attention when dealing -// with compact iterators. - -template struct parallel_iterator_traits { }; - -template -struct parallel_iterator_traits { - typedef T* type; - static bool cas(type x, T& expected, const T& val) { - const T old = expected; - expected = __sync_val_compare_and_swap(x, expected, val); - return old == expected; - } -}; - -template -struct parallel_iterator_traits { - typedef const T* type; -}; -} // namespace compact - -#endif /* __PARALLEL_POINTER_TRAITS_H__ */ diff --git a/external/twopaco/graphdump/compact_vector/prefetch_iterator_traits.hpp b/external/twopaco/graphdump/compact_vector/prefetch_iterator_traits.hpp deleted file mode 100644 index 40b8e22..0000000 --- a/external/twopaco/graphdump/compact_vector/prefetch_iterator_traits.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef __PREFETCH_TRAITS_H__ -#define __PREFETCH_TRAITS_H__ - -#include - -namespace compact { -// Traits to prefetch an iterator - -template struct prefetch_iterator_traits { }; - -template -struct prefetch_iterator_traits { - template - static void read(T* ptr) { __builtin_prefetch((void*)ptr, 0, level); } - template - static void write(T* ptr) { __builtin_prefetch((void*)ptr, 1, level); } -}; - -template -struct prefetch_iterator_traits { - template - static void read(const T* ptr) { __builtin_prefetch((void*)ptr, 0, level); } - template - static void write(const T* ptr) { __builtin_prefetch((void*)ptr, 1, level); } -}; -} // namespace compact - -#endif /* __PREFETCH_TRAITS_H__ */ diff --git a/external/twopaco/graphdump/graphdump.cpp b/external/twopaco/graphdump/graphdump.cpp index 07da27d..7cd6a22 100644 --- a/external/twopaco/graphdump/graphdump.cpp +++ b/external/twopaco/graphdump/graphdump.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -18,24 +17,26 @@ #include #include -#include "pufferize.h" -#include "binaryWriter.h" -bool CompareJunctionsById(const TwoPaCo::JunctionPosition &a, const TwoPaCo::JunctionPosition &b) { - return a.GetId() < b.GetId(); +bool CompareJunctionsById(const TwoPaCo::JunctionPosition & a, const TwoPaCo::JunctionPosition & b) +{ + return a.GetId() < b.GetId(); } -bool CompareJunctionsByPos(const TwoPaCo::JunctionPosition &a, const TwoPaCo::JunctionPosition &b) { - return std::make_pair(a.GetChr(), a.GetPos()) < std::make_pair(b.GetChr(), b.GetPos()); +bool CompareJunctionsByPos(const TwoPaCo::JunctionPosition & a, const TwoPaCo::JunctionPosition & b) +{ + return std::make_pair(a.GetChr(), a.GetPos()) < std::make_pair(b.GetChr(), b.GetPos()); } -struct EqClass { - int64_t label; - std::vector position; +struct EqClass +{ + int64_t label; + std::vector position; }; -int64_t Abs(int64_t x) { - return x > 0 ? x : -x; +int64_t Abs(int64_t x) +{ + return x > 0 ? x : -x; } const int64_t ID_POWER = 35; @@ -43,894 +44,677 @@ int64_t reservedPath = int64_t(1) << (ID_POWER - 1); const int64_t MAX_JUNCTION_ID = int64_t(1) << (ID_POWER - 4); const int64_t MAX_SEGMENT_NUMBER = int64_t(1) << ID_POWER; -class Segment { +class Segment +{ public: - Segment() {} - - Segment(TwoPaCo::JunctionPosition begin, TwoPaCo::JunctionPosition end, char posEdgeCh, char negEdgeCh) { - bool uniquePath = false; - int64_t absBeginId = Abs(begin.GetId()); - int64_t absEndId = Abs(end.GetId()); - if (absBeginId >= MAX_JUNCTION_ID || absEndId >= MAX_JUNCTION_ID) { - throw std::runtime_error("A vertex id is too large, cannot generate GFA"); - } - - bool choseEndJunction = false; - if (absBeginId < absEndId || - (absBeginId == absEndId && (posEdgeCh < negEdgeCh || (posEdgeCh == negEdgeCh && begin.GetId() > 0)))) { - uniquePath = posEdgeCh == 'N'; - segmentId_ = TwoPaCo::DnaChar::MakeUpChar(posEdgeCh); - begin_ = begin; - end_ = end; - } else { - uniquePath = negEdgeCh == 'N'; - segmentId_ = TwoPaCo::DnaChar::MakeUpChar(negEdgeCh); - begin_ = TwoPaCo::JunctionPosition(begin.GetChr(), begin.GetPos(), -end.GetId()); - end_ = TwoPaCo::JunctionPosition(end.GetChr(), end.GetPos(), -begin.GetId()); - choseEndJunction = true; - } - - if (!uniquePath) { - if (begin_.GetId() < 0) { - segmentId_ |= 1 << 2; - segmentId_ |= Abs(begin_.GetId()) << 3; - } else { - segmentId_ |= begin_.GetId() << 3; - } - - //if (begin.GetId() != begin_.GetId()) { - if (choseEndJunction) { - segmentId_ = -segmentId_; - } - } else { - segmentId_ = reservedPath++; - } - } - - int64_t GetSegmentId() const { - return segmentId_; - } - - int64_t GetAbsSegmentId() const { - return Abs(segmentId_); - } + Segment() {} + Segment(TwoPaCo::JunctionPosition begin, TwoPaCo::JunctionPosition end, char posEdgeCh, char negEdgeCh) + { + bool uniquePath = false; + int64_t absBeginId = Abs(begin.GetId()); + int64_t absEndId = Abs(end.GetId()); + if (absBeginId >= MAX_JUNCTION_ID || absEndId >= MAX_JUNCTION_ID) + { + throw std::runtime_error("A vertex id is too large, cannot generate GFA"); + } + + if (absBeginId < absEndId || (absBeginId == absEndId && absBeginId > 0)) + { + uniquePath = posEdgeCh == 'N'; + segmentId_ = TwoPaCo::DnaChar::MakeUpChar(posEdgeCh); + begin_ = begin; + end_ = end; + } + else + { + uniquePath = negEdgeCh == 'N'; + segmentId_ = TwoPaCo::DnaChar::MakeUpChar(negEdgeCh); + begin_ = TwoPaCo::JunctionPosition(begin.GetChr(), begin.GetPos(), -end.GetId()); + end_ = TwoPaCo::JunctionPosition(end.GetChr(), end.GetPos(), -begin.GetId()); + } + + if (!uniquePath) + { + if (begin_.GetId() < 0) + { + segmentId_ |= 1 << 2; + segmentId_ |= Abs(begin_.GetId()) << 3; + } + else + { + segmentId_ |= begin_.GetId() << 3; + } + + if (begin.GetId() != begin_.GetId()) + { + segmentId_ = -segmentId_; + } + } + else + { + segmentId_ = reservedPath++; + } + } + + int64_t GetSegmentId() const + { + return segmentId_; + } + + int64_t GetAbsSegmentId() const + { + return Abs(segmentId_); + } private: - int64_t segmentId_; - TwoPaCo::JunctionPosition begin_; - TwoPaCo::JunctionPosition end_; + int64_t segmentId_; + TwoPaCo::JunctionPosition begin_; + TwoPaCo::JunctionPosition end_; }; -bool CompareJunctionClasses(const EqClass &a, const EqClass &b) { - return CompareJunctionsByPos(a.position[0], b.position[0]); +bool CompareJunctionClasses(const EqClass & a, const EqClass & b) +{ + return CompareJunctionsByPos(a.position[0], b.position[0]); } -void GenerateGroupOutupt(const std::string &inputFileName) { - TwoPaCo::JunctionPosition pos; - TwoPaCo::JunctionPositionReader reader(inputFileName.c_str()); - std::vector eqClass; - std::vector junction; - while (reader.NextJunctionPosition(pos)) { - junction.push_back(pos); - } - - std::sort(junction.begin(), junction.end(), CompareJunctionsById); - for (size_t i = 0; i < junction.size();) { - size_t j = i; - for (; j < junction.size() && junction[i].GetId() == junction[j].GetId(); j++); - std::sort(junction.begin() + i, junction.begin() + j, CompareJunctionsByPos); - eqClass.push_back(EqClass()); - eqClass.back().label = junction[i].GetId(); - for (size_t k = i; k < j; k++) { - eqClass.back().position.push_back(junction[k]); - } - - i = j; - } - - tbb::parallel_sort(eqClass.begin(), eqClass.end(), CompareJunctionClasses); - for (auto junctionClass : eqClass) { - for (auto j : junctionClass.position) { - std::cout << j.GetChr() << ' ' << j.GetPos() << "; "; - } - - std::cout << std::endl; - } +void GenerateGroupOutupt(const std::string & inputFileName) +{ + TwoPaCo::JunctionPosition pos; + TwoPaCo::JunctionPositionReader reader(inputFileName.c_str()); + std::vector eqClass; + std::vector junction; + while (reader.NextJunctionPosition(pos)) + { + junction.push_back(pos); + } + + std::sort(junction.begin(), junction.end(), CompareJunctionsById); + for (size_t i = 0; i < junction.size();) + { + size_t j = i; + for (; j < junction.size() && junction[i].GetId() == junction[j].GetId(); j++); + std::sort(junction.begin() + i, junction.begin() + j, CompareJunctionsByPos); + eqClass.push_back(EqClass()); + eqClass.back().label = junction[i].GetId(); + for (size_t k = i; k < j; k++) + { + eqClass.back().position.push_back(junction[k]); + } + + i = j; + } + + tbb::parallel_sort(eqClass.begin(), eqClass.end(), CompareJunctionClasses); + for (auto junctionClass : eqClass) + { + for (auto j : junctionClass.position) + { + std::cout << j.GetChr() << ' ' << j.GetPos() << "; "; + } + + std::cout << std::endl; + } } -void GenerateOrdinaryOutput(const std::string &inputFileName) { - TwoPaCo::JunctionPosition pos; - TwoPaCo::JunctionPositionReader reader(inputFileName.c_str()); - while (reader.NextJunctionPosition(pos)) { - std::cout << pos.GetChr() << ' ' << pos.GetPos() << ' ' << pos.GetId() << std::endl; - } +void GenerateOrdinaryOutput(const std::string & inputFileName) +{ + TwoPaCo::JunctionPosition pos; + TwoPaCo::JunctionPositionReader reader(inputFileName.c_str()); + while (reader.NextJunctionPosition(pos)) + { + std::cout << pos.GetChr() << ' ' << pos.GetPos() << ' ' << pos.GetId() << std::endl; + } } -char Sign(int64_t arg) { - return arg >= 0 ? '+' : '-'; +char Sign(int64_t arg) +{ + return arg >= 0 ? '+' : '-'; } -void ReadInputSequences(const std::vector &genomes, std::vector &chrSegmentId, - std::vector &chrSegmentLength, std::map &fileName, - bool noPrefix) { - size_t chrCount = 0; - chrSegmentId.clear(); - chrSegmentLength.clear(); - for (const std::string &chrFileName : genomes) { - TwoPaCo::StreamFastaParser parser(chrFileName); - while (parser.ReadRecord()) { - std::stringstream ssId; - if (noPrefix) { - ssId << parser.GetCurrentHeader(); - } else { - ssId << "s" << chrCount << "_" << parser.GetCurrentHeader(); - } - - chrSegmentId.push_back(ssId.str()); - fileName[ssId.str()] = chrFileName; - - - uint64_t size = 0; - for (char ch; parser.GetChar(ch); ++size); - chrSegmentLength.push_back(size); - } - } +void ReadInputSequences(const std::vector & genomes, std::vector & chrSegmentId, std::vector & chrSegmentLength, std::map & fileName, bool noPrefix) +{ + size_t chrCount = 0; + chrSegmentId.clear(); + chrSegmentLength.clear(); + for (const std::string & chrFileName : genomes) + { + TwoPaCo::StreamFastaParser parser(chrFileName); + while (parser.ReadRecord()) + { + std::stringstream ssId; + if (noPrefix) + { + ssId << parser.GetCurrentHeader(); + } + else + { + ssId << "s" << chrCount << "_" << parser.GetCurrentHeader(); + } + + chrSegmentId.push_back(ssId.str()); + fileName[ssId.str()] = chrFileName; + + + uint64_t size = 0; + for (char ch; parser.GetChar(ch); ++size); + chrSegmentLength.push_back(size); + } + } } -class Gfa1Generator { +class Gfa1Generator +{ public: - - void setCapacity(uint64_t c) { - //empty body -- forced by template - } - - void flushSegments(std::string &prefix) { - //empty body-- forced by template - } - - void Header(std::ostream &out) const { - out << "H\tVN:Z:1.0" << std::endl; - } - - void ListInputSequences(const std::vector &seq, std::map &fileName, - std::ostream &out) const { - for (const auto &it : seq) { - out << "S\t" - << it - << "\t*\tUR:Z:" - << fileName[it] - << std::endl; - } - } - - void Segment(int64_t segmentId, uint64_t segmentSize, const std::string &body, std::ostream &out) const { - out << "S\t" - << Abs(segmentId) << "\t" - << body << std::endl; - } - - void Occurrence(int64_t segmentId, uint64_t segmentSize, const std::string &chrSegmentId, uint64_t chrSegmentSize, - uint64_t begin, uint64_t end, uint64_t k, std::ostream &out) const { - out << "C\t" - << Abs(segmentId) << '\t' - << Sign(segmentId) << '\t' - << chrSegmentId << "\t+\t" - << end << std::endl; - } - - void Edge(int64_t prevSegmentId, uint64_t prevSegmentSize, int64_t segmentId, uint64_t segmentSize, uint64_t k, - std::ostream &out) const { - out << "L\t" - << Abs(prevSegmentId) << '\t' - << Sign(prevSegmentId) << '\t' - << Abs(segmentId) << '\t' - << Sign(segmentId) << '\t' - << k << 'M' << std::endl; - } - - void FlushPath(std::vector ¤tPath, const std::string &seqId, size_t k, std::ostream &out) const { - if (currentPath.size() > 0) { - out << "P\t" << seqId << '\t'; - for (auto it = currentPath.begin(); it != currentPath.end() - 1; ++it) { - out << Abs(*it) << Sign(*it) << ","; - } - - out << Abs(currentPath.back()) << Sign(currentPath.back()) << "\t*" << std::endl; - currentPath.clear(); - } - } -}; - -class Gfa1BinaryGenerator { - BinaryWriter bw; - //std::ofstream out; -public: - //Gfa1BinaryGenerator(): bw(std::cout) {} -// Gfa1BinaryGenerator(std::ostream &out): bw(out) {} - - void setOStream(std::ostream* o) { bw.setOStream(o); } - void setCapacity(uint64_t c) { - bw.setCapacity(c); - } - - void flushSegments(std::string & prefix) { - bw.flushSegments(prefix); - } - - void Header(std::ostream &out) { - /*std::string header="H\tVN:Z:1.0"; - bw << header;*/ - } - - void Segment(int64_t segmentId, uint64_t segmentSize, const std::string &body, std::ostream &out) { - bw.addSeq(body); - } - - void FlushPath(std::vector ¤tPath, const std::string &seqId, size_t k, std::ostream &out) { - if (currentPath.size() > 0) { -// bw << EntryType::P; - bw << seqId; - uint64_t pathLength = currentPath.size(); - bw << pathLength; - for (auto it = currentPath.begin(); it != currentPath.end(); ++it) { - int64_t segId = (*it); - bw << segId; // contains segmentId and its sign (could be negative) - } - currentPath.clear(); - } - } + void Header(std::ostream & out) const + { + out << "H\tVN:Z:1.0" << std::endl; + } + + void ListInputSequences(const std::vector & seq, std::map & fileName, std::ostream & out) const + { + for (const auto & it : seq) + { + out << "S\t" + << it + << "\t*\tUR:Z:" + << fileName[it] + << std::endl; + } + } + + void Segment(int64_t segmentId, uint64_t segmentSize, const std::string & body, std::ostream & out) const + { + out << "S\t" + << Abs(segmentId) << "\t" + << body << std::endl; + } + + void Occurrence(int64_t segmentId, uint64_t segmentSize, const std::string & chrSegmentId, uint64_t chrSegmentSize, uint64_t begin, uint64_t end, uint64_t k, std::ostream & out) const + { + out << "C\t" + << Abs(segmentId) << '\t' + << Sign(segmentId) << '\t' + << chrSegmentId << "\t+\t" + << end << std::endl; + } + + void Edge(int64_t prevSegmentId, uint64_t prevSegmentSize, int64_t segmentId, uint64_t segmentSize, uint64_t k, std::ostream & out) const + { + out << "L\t" + << Abs(prevSegmentId) << '\t' + << Sign(prevSegmentId) << '\t' + << Abs(segmentId) << '\t' + << Sign(segmentId) << '\t' + << k << 'M' << std::endl; + } + + void FlushPath(std::vector & currentPath, const std::string & seqId, size_t k, std::ostream & out) const + { + if (currentPath.size() > 0) + { + out << "P\t" << seqId << '\t'; + for (auto it = currentPath.begin(); it != currentPath.end() - 1; ++it) + { + out << Abs(*it) << Sign(*it) << ","; + } + + out << Abs(currentPath.back()) << Sign(currentPath.back()) << "\t*" << std::endl; + currentPath.clear(); + } + } }; -std::string Gfa2Position(size_t pos, size_t length) { - std::stringstream ss; - if (pos == length) { - ss << pos << "$"; - } else { - ss << pos; - } - - return ss.str(); +std::string Gfa2Position(size_t pos, size_t length) +{ + std::stringstream ss; + if (pos == length) + { + ss << pos << "$"; + } + else + { + ss << pos; + } + + return ss.str(); } -std::string Gfa2Segment(int64_t segment) { - std::stringstream ss; - ss << Abs(segment) << Sign(segment); - return ss.str(); +std::string Gfa2Segment(int64_t segment) +{ + std::stringstream ss; + ss << Abs(segment) << Sign(segment); + return ss.str(); } -class Gfa2Generator { +class Gfa2Generator +{ public: - - void setCapacity(uint64_t c) { - //empty body -- forced by template - } - - void flushSegments(std::string & dummy) { - //empty body-- forced by template - } - - void Header(std::ostream &out) const { - out << "H\tVN:Z:2.0" << std::endl; - } - - void ListInputSequences(const std::vector &seq, std::map &fileName, - std::ostream &out) const { - - } - - void Segment(int64_t segmentId, uint64_t segmentSize, const std::string &body, std::ostream &out) const { - out << "S\t" - << Abs(segmentId) << "\t" - << segmentSize << "\t" - << body << std::endl; - } - - void Occurrence(int64_t segmentId, uint64_t segmentSize, const std::string &chrSegmentId, uint64_t chrSegmentSize, - uint64_t begin, uint64_t end, uint64_t k, std::ostream &out) const { - std::cout << "F\t" - << Abs(segmentId) << '\t' - << chrSegmentId << Sign(segmentId) << '\t' - << "0\t" - << segmentSize << "$" << "\t" - << Gfa2Position(begin, chrSegmentSize) << "\t" - << Gfa2Position(end + k, chrSegmentSize) << "\t" - << k << "M" << std::endl; - } - - void Edge(int64_t prevSegmentId, uint64_t prevSegmentSize, int64_t segmentId, uint64_t segmentSize, uint64_t k, - std::ostream &out) const { - uint64_t prevSegmentStart; - uint64_t prevSegmentEnd; - uint64_t segmentStart; - uint64_t segmentEnd; - if (prevSegmentId > 0) { - prevSegmentStart = prevSegmentSize - k; - prevSegmentEnd = prevSegmentSize; - } else { - prevSegmentStart = 0; - prevSegmentEnd = k; - } - - if (segmentId > 0) { - segmentStart = 0; - segmentEnd = k; - } else { - segmentStart = segmentSize - k; - segmentEnd = segmentSize; - } - - out << "E\t" - << Gfa2Segment(prevSegmentId) - << '\t' << Gfa2Segment(segmentId) << '\t' - << Gfa2Position(prevSegmentStart, prevSegmentSize) << '\t' - << Gfa2Position(prevSegmentEnd, prevSegmentSize) << '\t' - << Gfa2Position(segmentStart, segmentSize) << '\t' - << Gfa2Position(segmentEnd, segmentSize) << '\t' - << k << 'M' << std::endl; - } - - void FlushPath(std::vector ¤tPath, const std::string &seqId, size_t k, std::ostream &out) const { - if (currentPath.size() > 0) { - out << "O\t" << seqId << "p" << '\t'; - for (auto it = currentPath.begin(); it != currentPath.end() - 1; ++it) { - out << Abs(*it) << Sign(*it) << " "; - } - - out << Abs(currentPath.back()) << Sign(currentPath.back()) << std::endl; - currentPath.clear(); - } - } + void Header(std::ostream & out) const + { + out << "H\tVN:Z:2.0" << std::endl; + } + + void ListInputSequences(const std::vector & seq, std::map & fileName, std::ostream & out) const + { + + } + + void Segment(int64_t segmentId, uint64_t segmentSize, const std::string & body, std::ostream & out) const + { + out << "S\t" + << Abs(segmentId) << "\t" + << segmentSize << "\t" + << body << std::endl; + } + + void Occurrence(int64_t segmentId, uint64_t segmentSize, const std::string & chrSegmentId, uint64_t chrSegmentSize, uint64_t begin, uint64_t end, uint64_t k, std::ostream & out) const + { + std::cout << "F\t" + << Abs(segmentId) << '\t' + << chrSegmentId << Sign(segmentId) << '\t' + << "0\t" + << segmentSize << "$" << "\t" + << Gfa2Position(begin, chrSegmentSize) << "\t" + << Gfa2Position(end + k, chrSegmentSize) << "\t" + << k << "M" << std::endl; + } + + void Edge(int64_t prevSegmentId, uint64_t prevSegmentSize, int64_t segmentId, uint64_t segmentSize, uint64_t k, std::ostream & out) const + { + uint64_t prevSegmentStart; + uint64_t prevSegmentEnd; + uint64_t segmentStart; + uint64_t segmentEnd; + if (prevSegmentId > 0) + { + prevSegmentStart = prevSegmentSize - k; + prevSegmentEnd = prevSegmentSize; + } + else + { + prevSegmentStart = 0; + prevSegmentEnd = k; + } + + if (segmentId > 0) + { + segmentStart = 0; + segmentEnd = k; + } + else + { + segmentStart = segmentSize - k; + segmentEnd = segmentSize; + } + + out << "E\t" + << Gfa2Segment(prevSegmentId) + << '\t' << Gfa2Segment(segmentId) << '\t' + << Gfa2Position(prevSegmentStart, prevSegmentSize) << '\t' + << Gfa2Position(prevSegmentEnd, prevSegmentSize) << '\t' + << Gfa2Position(segmentStart, segmentSize) << '\t' + << Gfa2Position(segmentEnd, segmentSize) << '\t' + << k << 'M' << std::endl; + } + + void FlushPath(std::vector & currentPath, const std::string & seqId, size_t k, std::ostream & out) const + { + if (currentPath.size() > 0) + { + out << "O\t" << seqId << "p" << '\t'; + for (auto it = currentPath.begin(); it != currentPath.end() - 1; ++it) + { + out << Abs(*it) << Sign(*it) << " "; + } + + out << Abs(currentPath.back()) << Sign(currentPath.back()) << std::endl; + currentPath.clear(); + } + } }; template -void GenerateGfaOutput(const std::string &inputFileName, const std::vector &genomes, size_t k, bool prefix, - G g) { - std::vector chrSegmentLength; - std::vector chrSegmentId; - std::map chrFileName; - - //std::cout << "H\tVN:Z:1.0" << std::endl; - g.Header(std::cout); - - ReadInputSequences(genomes, chrSegmentId, chrSegmentLength, chrFileName, !prefix); - g.ListInputSequences(chrSegmentId, chrFileName, std::cout); - - std::vector currentPath; - const int64_t NO_SEGMENT = 0; - std::string chr; - int64_t seqId = NO_SEGMENT; - int64_t prevSegmentId = NO_SEGMENT; - int64_t prevSegmentSize = -1; - TwoPaCo::JunctionPosition end; - TwoPaCo::JunctionPosition begin; - TwoPaCo::ChrReader chrReader(genomes); - TwoPaCo::JunctionPositionReader reader(inputFileName.c_str()); - std::vector seen(MAX_SEGMENT_NUMBER, 0); - int64_t previousId = 0; +void GenerateGfaOutput(const std::string & inputFileName, const std::vector & genomes, size_t k, bool prefix, const std::string & outFileName, G g) +{ + std::vector chrSegmentLength; + std::vector chrSegmentId; + std::map chrFileName; + + std::ofstream gfaStream(outFileName) ; + //std::cout << "H\tVN:Z:1.0" << std::endl; + g.Header(gfaStream); + + ReadInputSequences(genomes, chrSegmentId, chrSegmentLength, chrFileName, !prefix); + g.ListInputSequences(chrSegmentId, chrFileName, gfaStream); + + std::vector currentPath; + const int64_t NO_SEGMENT = 0; + std::string chr; + int64_t seqId = NO_SEGMENT; + int64_t prevSegmentId = NO_SEGMENT; + int64_t prevSegmentSize = -1; + TwoPaCo::JunctionPosition end; + TwoPaCo::JunctionPosition begin; + TwoPaCo::ChrReader chrReader(genomes); + TwoPaCo::JunctionPositionReader reader(inputFileName.c_str()); + std::vector seen(MAX_SEGMENT_NUMBER, 0); + int64_t previousId = 0; + #ifdef _DEBUG - std::map segmentBody; + std::map segmentBody; #endif - if (reader.NextJunctionPosition(begin)) { - chrReader.NextChr(chr); - while (reader.NextJunctionPosition(end)) { - if (begin.GetChr() == end.GetChr()) { - Segment nowSegment(begin, end, chr[begin.GetPos() + k], - TwoPaCo::DnaChar::ReverseChar(chr[end.GetPos() - 1])); - int64_t segmentId = nowSegment.GetSegmentId(); - currentPath.push_back(segmentId); - uint64_t segmentSize = end.GetPos() + k - begin.GetPos(); - if (!seen[Abs(segmentId)]) { - //std::cout << "S\t" << Abs(segmentId) << "\t"; - std::stringstream ss; - if (segmentId > 0) { - std::copy(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k, - std::ostream_iterator(ss)); - - } else { - std::string buf = TwoPaCo::DnaChar::ReverseCompliment( - std::string(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k)); - std::copy(buf.begin(), buf.end(), std::ostream_iterator(ss)); - } - - g.Segment(segmentId, segmentSize, ss.str(), std::cout); - seen[Abs(segmentId)] = true; - } + if (reader.NextJunctionPosition(begin)) + { + chrReader.NextChr(chr); + while (reader.NextJunctionPosition(end)) + { + if (begin.GetChr() == end.GetChr()) + { + Segment nowSegment(begin, end, chr[begin.GetPos() + k], TwoPaCo::DnaChar::ReverseChar(chr[end.GetPos() - 1])); + int64_t segmentId = nowSegment.GetSegmentId(); + currentPath.push_back(segmentId); + uint64_t segmentSize = end.GetPos() + k - begin.GetPos(); + if (!seen[Abs(segmentId)]) + { + //std::cout << "S\t" << Abs(segmentId) << "\t"; + std::stringstream ss; + if (segmentId > 0) + { + std::copy(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k, std::ostream_iterator(ss)); + + } + else + { + std::string buf = TwoPaCo::DnaChar::ReverseCompliment(std::string(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k)); + std::copy(buf.begin(), buf.end(), std::ostream_iterator(ss)); + } + + g.Segment(segmentId, segmentSize, ss.str(), gfaStream); + seen[Abs(segmentId)] = true; + } #ifdef _DEBUG - int64_t absSegmentId = Abs(segmentId); - std::string buf = segmentId > 0 ? std::string(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k) : - TwoPaCo::DnaChar::ReverseCompliment(std::string(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k)); - if (segmentBody.count(absSegmentId) == 0) - { - segmentBody[absSegmentId] = buf; - } - else - { - assert(segmentBody[absSegmentId] == buf); - } + int64_t absSegmentId = Abs(segmentId); + std::string buf = segmentId > 0 ? std::string(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k) : + TwoPaCo::DnaChar::ReverseCompliment(std::string(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k)); + if (segmentBody.count(absSegmentId) == 0) + { + segmentBody[absSegmentId] = buf; + } + else + { + assert(segmentBody[absSegmentId] == buf); + } #endif - g.Occurrence(segmentId, segmentSize, chrSegmentId[seqId], chrSegmentLength[seqId], begin.GetPos(), - end.GetPos(), k, std::cout); -// std::cout << "C\t" << Abs(segmentId) << '\t' << Sign(segmentId) << '\t' << chrSegmentId[seqId] << "\t+\t" << begin.GetPos() << std::endl; - - if (prevSegmentId != NO_SEGMENT) { -// std::cout << "L\t" << Abs(prevSegmentId) << '\t' << Sign(prevSegmentId) << '\t' << Abs(segmentId) << '\t' << Sign(segmentId) << '\t' << k << 'M' << std::endl; - g.Edge(prevSegmentId, prevSegmentSize, segmentId, segmentSize, k, std::cout); - } - - prevSegmentId = segmentId; - prevSegmentSize = segmentSize; - begin = end; - } else { - g.FlushPath(currentPath, chrSegmentId[seqId], k, std::cout); - chrReader.NextChr(chr); - prevSegmentId = 0; - begin = end; - - if (begin.GetChr() != ++seqId) { - throw std::runtime_error("The input is corrupted"); - } - } - } - } - - g.FlushPath(currentPath, chrSegmentId[seqId], k, std::cout); + g.Occurrence(segmentId, segmentSize, chrSegmentId[seqId], chrSegmentLength[seqId], begin.GetPos(), end.GetPos(), k, gfaStream); + //std::cout << "C\t" << Abs(segmentId) << '\t' << Sign(segmentId) << '\t' << chrSegmentId[seqId] << "\t+\t" << begin.GetPos() << std::endl; + + if (prevSegmentId != NO_SEGMENT) + { + //std::cout << "L\t" << Abs(prevSegmentId) << '\t' << Sign(prevSegmentId) << '\t' << Abs(segmentId) << '\t' << Sign(segmentId) << '\t' << k << 'M' << std::endl; + g.Edge(prevSegmentId, prevSegmentSize, segmentId, segmentSize, k, gfaStream); + } + + prevSegmentId = segmentId; + prevSegmentSize = segmentSize; + begin = end; + } + else + { + g.FlushPath(currentPath, chrSegmentId[seqId], k, gfaStream); + chrReader.NextChr(chr); + prevSegmentId = 0; + begin = end; + + if (begin.GetChr() != ++seqId) + { + throw std::runtime_error("The input is corrupted"); + } + } + } + } + + g.FlushPath(currentPath, chrSegmentId[seqId], k, gfaStream); } template -void OutFastaBody(It begin, It end) { - int64_t count = 0; - for (; begin != end; ++begin) { - std::cout << *begin; - if (++count % 80 == 0) { - std::cout << std::endl; - } - } - - if (count % 80 != 0) { - std::cout << std::endl; - } +void OutFastaBody(It begin, It end) +{ + int64_t count = 0; + for (; begin != end; ++begin) + { + std::cout << *begin; + if (++count % 80 == 0) + { + std::cout << std::endl; + } + } + + if (count % 80 != 0) + { + std::cout << std::endl; + } } -void GenerateFastaOutput(const std::string &inputFileName, const std::vector &genomes, size_t k) { - std::vector chrSegmentLength; - std::vector chrSegmentId; - std::map chrFileName; +void GenerateFastaOutput(const std::string & inputFileName, const std::vector & genomes, size_t k) +{ + std::vector chrSegmentLength; + std::vector chrSegmentId; + std::map chrFileName; - ReadInputSequences(genomes, chrSegmentId, chrSegmentLength, chrFileName, false); + ReadInputSequences(genomes, chrSegmentId, chrSegmentLength, chrFileName, false); - std::vector currentPath; - const int64_t NO_SEGMENT = 0; - std::string chr; - int64_t seqId = NO_SEGMENT; - int64_t prevSegmentId = NO_SEGMENT; - int64_t prevSegmentSize = -1; - TwoPaCo::JunctionPosition end; - TwoPaCo::JunctionPosition begin; - TwoPaCo::ChrReader chrReader(genomes); - TwoPaCo::JunctionPositionReader reader(inputFileName.c_str()); - std::vector seen(MAX_SEGMENT_NUMBER, 0); - int64_t previousId = 0; + std::vector currentPath; + const int64_t NO_SEGMENT = 0; + std::string chr; + int64_t seqId = NO_SEGMENT; + int64_t prevSegmentId = NO_SEGMENT; + int64_t prevSegmentSize = -1; + TwoPaCo::JunctionPosition end; + TwoPaCo::JunctionPosition begin; + TwoPaCo::ChrReader chrReader(genomes); + TwoPaCo::JunctionPositionReader reader(inputFileName.c_str()); + std::vector seen(MAX_SEGMENT_NUMBER, 0); + int64_t previousId = 0; #ifdef _DEBUG - std::map segmentBody; + std::map segmentBody; #endif - if (reader.NextJunctionPosition(begin)) { - chrReader.NextChr(chr); - while (reader.NextJunctionPosition(end)) { - if (begin.GetChr() == end.GetChr()) { - Segment nowSegment(begin, end, chr[begin.GetPos() + k], - TwoPaCo::DnaChar::ReverseChar(chr[end.GetPos() - 1])); - int64_t segmentId = nowSegment.GetSegmentId(); - currentPath.push_back(segmentId); - uint64_t segmentSize = end.GetPos() + k - begin.GetPos(); - if (!seen[Abs(segmentId)]) { - std::cout << ">" << Abs(segmentId) << std::endl; - if (segmentId > 0) { - OutFastaBody(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k); - } else { - std::string buf = TwoPaCo::DnaChar::ReverseCompliment( - std::string(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k)); - OutFastaBody(buf.begin(), buf.end()); - } - - - seen[Abs(segmentId)] = true; - } + if (reader.NextJunctionPosition(begin)) + { + chrReader.NextChr(chr); + while (reader.NextJunctionPosition(end)) + { + if (begin.GetChr() == end.GetChr()) + { + Segment nowSegment(begin, end, chr[begin.GetPos() + k], TwoPaCo::DnaChar::ReverseChar(chr[end.GetPos() - 1])); + int64_t segmentId = nowSegment.GetSegmentId(); + currentPath.push_back(segmentId); + uint64_t segmentSize = end.GetPos() + k - begin.GetPos(); + if (!seen[Abs(segmentId)]) + { + std::cout << ">" << Abs(segmentId) << std::endl; + if (segmentId > 0) + { + OutFastaBody(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k); + } + else + { + std::string buf = TwoPaCo::DnaChar::ReverseCompliment(std::string(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k)); + OutFastaBody(buf.begin(), buf.end()); + } + + + seen[Abs(segmentId)] = true; + } #ifdef _DEBUG - int64_t absSegmentId = Abs(segmentId); - std::string buf = segmentId > 0 ? std::string(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k) : - TwoPaCo::DnaChar::ReverseCompliment(std::string(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k)); - if (segmentBody.count(absSegmentId) == 0) - { - segmentBody[absSegmentId] = buf; - } - else - { - assert(segmentBody[absSegmentId] == buf); - } + int64_t absSegmentId = Abs(segmentId); + std::string buf = segmentId > 0 ? std::string(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k) : + TwoPaCo::DnaChar::ReverseCompliment(std::string(chr.begin() + begin.GetPos(), chr.begin() + end.GetPos() + k)); + if (segmentBody.count(absSegmentId) == 0) + { + segmentBody[absSegmentId] = buf; + } + else + { + assert(segmentBody[absSegmentId] == buf); + } #endif - prevSegmentId = segmentId; - prevSegmentSize = segmentSize; - begin = end; - } else { - chrReader.NextChr(chr); - prevSegmentId = 0; - begin = end; - - if (begin.GetChr() != ++seqId) { - throw std::runtime_error("The input is corrupted"); - } - } - } - } + prevSegmentId = segmentId; + prevSegmentSize = segmentSize; + begin = end; + } + else + { + chrReader.NextChr(chr); + prevSegmentId = 0; + begin = end; + + if (begin.GetChr() != ++seqId) + { + throw std::runtime_error("The input is corrupted"); + } + } + } + } } -void GenerateDotOutput(const std::string &inputFileName) { - TwoPaCo::JunctionPosition pos; - TwoPaCo::JunctionPosition prevPos; - TwoPaCo::JunctionPositionReader reader(inputFileName.c_str()); - std::cout << "digraph G\n{\n\trankdir = LR" << std::endl; - - while (reader.NextJunctionPosition(pos)) { - if (pos.GetChr() == prevPos.GetChr()) { - std::cout << '\t' << prevPos.GetId() << " -> " << pos.GetId() << - "[color=\"blue\", label=\"chr=" << prevPos.GetChr() << " pos=" << prevPos.GetPos() << "\"]" - << std::endl; - std::cout << '\t' << -pos.GetId() << " -> " << -prevPos.GetId() << - "[color=\"red\", label=\"chr=" << prevPos.GetChr() << " pos=" << prevPos.GetPos() << "\"]" - << std::endl; - } - - prevPos = pos; - } - - std::cout << "}" << std::endl; -} - -template -void GeneratePufferizedOutput(const std::string &inputFileName, const std::string& gfaFileName, const std::vector &genomes, size_t k, - bool prefix, std::string &prefixDir, G* g) { - std::vector chrSegmentLength; - std::vector chrSegmentId; - std::map chrFileName; - - std::cout << " ========= " << gfaFileName << "\n" ; - std::ofstream gfaStream(gfaFileName) ; - g->Header(gfaStream); - - ReadInputSequences(genomes, chrSegmentId, chrSegmentLength, chrFileName, !prefix); - - std::vector currentPath; - const int64_t NO_SEGMENT = 0; - std::string chr; - int64_t seqId = NO_SEGMENT; - TwoPaCo::JunctionPosition end; - TwoPaCo::JunctionPosition begin; - TwoPaCo::JunctionPosition curr; - TwoPaCo::JunctionPosition prev; - TwoPaCo::ChrReader chrReader(genomes); - TwoPaCo::JunctionPositionReader reader(inputFileName.c_str()); - -// std::cerr << "\n\nRound zero:\n"; - uint64_t maxJunction{0}; - while (reader.NextJunctionPosition(curr)) { -// std::cerr << "id: " << Abs(curr.GetId()) << "\n"; - if (Abs(curr.GetId()) > maxJunction) - maxJunction = (uint64_t)Abs(curr.GetId()); - } -// std::cerr << "Max Junction ID: " << maxJunction << "\n"; - - std::vector seen((maxJunction << 3 + 9)/*MAX_SEGMENT_NUMBER*/, 0); - std::vector kmerInfo(maxJunction+1/*MAX_JUNCTION_ID*/); - - // First round going over the junctions file -// std::cerr << "\n\nRound one:\n"; - reader.RestoreReader(); - uint64_t approximateContigLen=0; - if (reader.NextJunctionPosition(prev)) { - chrReader.NextChr(chr); - - // set the first junction kmer of the first sequence as seq. start (if fw) or seq. end (if rc) - if (prev.GetId() >= 0) { - kmerInfo[Abs(prev.GetId())].setStart(); - } else { - kmerInfo[Abs(prev.GetId())].setEnd(); - } - // for the first contig - kmerInfo[Abs(prev.GetId())].len = curr.GetPos(); - - while (reader.NextJunctionPosition(curr)) { - auto currAbsId = Abs(curr.GetId()); - auto prevAbsId = Abs(prev.GetId()); - if (prev.GetChr() != curr.GetChr()) { // If we are starting a new reference/path - kmerInfo[currAbsId].len = curr.GetPos(); - // set prev kmer as the end of a path if it is forward - if (prev.GetId() >= 0) { - kmerInfo[prevAbsId].setEnd(); - } else { - kmerInfo[prevAbsId].setStart(); - } - // set current kmer as the start of a path if it is forward - if (curr.GetId() >= 0) { - kmerInfo[currAbsId].setStart(); - } else { // set it as an end kmer if it is rc - kmerInfo[currAbsId].setEnd(); - } - chrReader.NextChr(chr); - } else { // If we are in the middle of a path - kmerInfo[currAbsId].len = curr.GetPos() - prev.GetPos(); - kmerInfo[prevAbsId].setSucceedingChar(prev.GetId() >= 0, chr[prev.GetPos() + k]); - kmerInfo[currAbsId].setPrecedingChar(curr.GetId() >= 0, chr[curr.GetPos() - 1]); - } - prev = curr; - } - // for the last contig - kmerInfo[Abs(curr.GetId())].len = curr.GetPos() - prev.GetPos(); - - // set the last junction kmer of the last sequence as seq. end (if fw) or seq. start (if rc) - if (prev.GetId() >= 0) { - kmerInfo[Abs(prev.GetId())].setEnd(); - } else { - kmerInfo[Abs(prev.GetId())].setStart(); - } - uint64_t cntr1{0}, cntr2{0}, cntr3{0}, cntr4{0}; - for (auto &kmerIn : kmerInfo) { - kmerIn.decideType(k, approximateContigLen, cntr1, cntr2, cntr3, cntr4); - } - std::cerr << "approximateContigTotalLength: " << approximateContigLen << "\ncounters:\n" << cntr1 << " " << cntr2 << " " << cntr3 << " " << cntr4 << "\n"; - } - // Having all the required information for each junction, - // Start the second round of going over the junctions file - - g->setCapacity(approximateContigLen); - uint64_t cntr{0}, contigCntr{1}, elementCntr{0}; - std::unordered_map contigMap; - auto addKmerIfComplex = [&] (int64_t absBegin) { - if (kmerInfo[absBegin].cropBoth()) { // If the start junction is complex, treat it as a segment - // the complex kmer ID (new segment ID) shouldn't interfere with the segment ID range - // so start from max_segment_number --> TODO potential segfault!! - int64_t kmerId = MAX_SEGMENT_NUMBER + absBegin; - if (begin.GetId() < 0) { - kmerId = -kmerId; - } - if (!kmerInfo[absBegin].seen()) { - cntr++; - std::stringstream ss; - if (begin.GetId() > 0) { - std::copy(chr.begin() + begin.GetPos(), chr.begin() + begin.GetPos() + k, - std::ostream_iterator(ss)); - } else { - std::string buf = - TwoPaCo::DnaChar::ReverseCompliment(std::string(chr.begin() + begin.GetPos(), - chr.begin() + begin.GetPos() + k)); - std::copy(buf.begin(), buf.end(), std::ostream_iterator(ss)); - } - contigMap[Abs(kmerId)] = contigCntr; -// std::cerr << "AddkmerIfComplex: " << contigCntr << " " << k << " " << ss.str() << "\n"; - g->Segment(contigCntr, k, ss.str(), gfaStream); - contigCntr++; - elementCntr+=ss.str().size(); - kmerInfo[absBegin].setSeen(); - } - - int64_t newId = contigMap[Abs(kmerId)]; - if (kmerId<0) - newId = -newId; - currentPath.push_back(newId); // Add complex node as a new segment to the path - } - }; - - -// std::cerr << "\n\nRound two:\n"; - reader.RestoreReader(); - chrReader.reset(); - if (reader.NextJunctionPosition(begin)) { - chrReader.NextChr(chr); - while (reader.NextJunctionPosition(end)) { - int64_t absBegin = Abs(begin.GetId()); - addKmerIfComplex(absBegin); // If the start junction is complex, treat it as a segment - - if (begin.GetChr() == end.GetChr()) { // store the segment - Segment nowSegment(begin, end, chr[begin.GetPos() + k], - TwoPaCo::DnaChar::ReverseChar(chr[end.GetPos() - 1])); - int64_t segmentId = nowSegment.GetSegmentId(); - int64_t absEnd = Abs(end.GetId()); - uint64_t beginPos{begin.GetPos()}, endPos{end.GetPos()}, extension{k}; - if (kmerInfo[absBegin].cropBoth() or - (kmerInfo[absBegin].cropStart() and begin.GetId() >= 0) or - (kmerInfo[absBegin].cropEnd() and begin.GetId() < 0)) { - beginPos++; // If need to crop the first nucleotide - } - if (kmerInfo[absEnd].cropBoth() or - (kmerInfo[absEnd].cropEnd() and end.GetId() >= 0) or - (kmerInfo[absEnd].cropStart() and end.GetId() < 0)) { - extension--; // If need to crop the last nucleotide - } - uint64_t segmentSize = endPos + extension - beginPos; - if (segmentSize >= k) { // write the middle segment only if its length is above the valid min segment length - // If the contig is palindrome - bool isPalindrome = false; - if (begin.GetId() == -end.GetId() and chr[begin.GetPos() + k] == TwoPaCo::DnaChar::ReverseChar(chr[end.GetPos() - 1])) { - isPalindrome = true; - if (segmentSize % 2 != 0) { - std::cerr << "This shouldn't happen. Problem handling palindromes!!\n"; - std::exit(1); - } - endPos = (beginPos + endPos)/2; - } - if (!seen[Abs(segmentId)]) { - std::stringstream ss; - if (segmentId > 0) { - std::copy(chr.begin() + beginPos, chr.begin() + endPos + extension, - std::ostream_iterator(ss)); - } else { - std::string buf = - TwoPaCo::DnaChar::ReverseCompliment(std::string(chr.begin() + beginPos, - chr.begin() + endPos + extension)); - std::copy(buf.begin(), buf.end(), std::ostream_iterator(ss)); - } - contigMap[Abs(segmentId)] = contigCntr; -// std::cerr << contigCntr << " " << segmentSize << " " << ss.str() << "\n"; - g->Segment(contigCntr, segmentSize, ss.str(), gfaStream); - contigCntr++; - elementCntr+=ss.str().size(); - seen[Abs(segmentId)] = true; - } - int64_t newId = contigMap[Abs(segmentId)]; - if (segmentId<0) - newId = -newId; - currentPath.push_back(newId); // Add segment to the path - if (isPalindrome) { - currentPath.push_back(-newId); - } - } - begin = end; - } else { - g->FlushPath(currentPath, chrSegmentId[seqId], k, gfaStream); - chrReader.NextChr(chr); - begin = end; - - if (begin.GetChr() != ++seqId) { - std::cerr << begin.GetChr() << " " << seqId << "\n"; - throw std::runtime_error("The input is corrupted"); - } - } - } - } - - // Need to take care of the very last junction - int64_t absBegin = Abs(begin.GetId()); - addKmerIfComplex(absBegin); - g->FlushPath(currentPath, chrSegmentId[seqId], k, gfaStream); - std::cerr << "contig count: " << contigCntr << " element count: " << elementCntr << " complex nodes: " << cntr << "\n"; - g->flushSegments(prefixDir); +void GenerateDotOutput(const std::string & inputFileName) +{ + TwoPaCo::JunctionPosition pos; + TwoPaCo::JunctionPosition prevPos; + TwoPaCo::JunctionPositionReader reader(inputFileName.c_str()); + std::cout << "digraph G\n{\n\trankdir = LR" << std::endl; + + while (reader.NextJunctionPosition(pos)) + { + if (pos.GetChr() == prevPos.GetChr()) + { + std::cout << '\t' << prevPos.GetId() << " -> " << pos.GetId() << + "[color=\"blue\", label=\"chr=" << prevPos.GetChr() << " pos=" << prevPos.GetPos() << "\"]" << std::endl; + std::cout << '\t' << -pos.GetId() << " -> " << -prevPos.GetId() << + "[color=\"red\", label=\"chr=" << prevPos.GetChr() << " pos=" << prevPos.GetPos() << "\"]" << std::endl; + } + + prevPos = pos; + } + + std::cout << "}" << std::endl; } -int dumpGraphMain(std::vector& args){//}int argc, char *argv[]) { - std::vector format; - format.push_back("seq"); - format.push_back("group"); - format.push_back("dot"); - format.push_back("gfa1"); - format.push_back("gfa2"); - format.push_back("fasta"); - format.push_back("pufferized"); - format.push_back("binPufferized"); - std::stringstream formatString; - std::copy(format.begin(), format.begin(), std::ostream_iterator(formatString, "|")); - try { - TCLAP::CmdLine cmd("This utility converts the binary output of TwoPaCo to another format", ' ', "0.9.3"); - TCLAP::SwitchArg prefix("", "prefix", - "Add a prefix to segments in GFA (in case if you have genomes with identical FASTA headers)", - cmd, false); - - TCLAP::UnlabeledValueArg inputFileName("infile", - "input file name", - true, - "", - "file name", - cmd); - - TCLAP::ValuesConstraint formatConstraint(format); - TCLAP::ValueArg outputFileFormat("f", - "format", - "Output format", - true, - format[0], - &formatConstraint, - cmd); - - TCLAP::ValueArg seqAndRankOutputDir("p", - "SeqRankDirPrefix", - "Sequence and rank output files directory prefix", - false, - "./", - "string", - cmd); - TCLAP::MultiArg seqFileName("s", - "seqfile", - "sequences file name", - false, - "", - cmd); - - TCLAP::ValueArg kvalue("k", - "kvalue", - "Value of k", - true, - 25, - "integer", - cmd); - - cmd.parse(args);//argc, argv); - if (outputFileFormat.getValue() == format[0]) { - GenerateOrdinaryOutput(inputFileName.getValue()); - } else if (outputFileFormat.getValue() == format[1]) { - GenerateGroupOutupt(inputFileName.getValue()); - } else if (outputFileFormat.getValue() == format[2]) { - GenerateDotOutput(inputFileName.getValue()); - } else if (outputFileFormat.getValue() == format[3]) { - if (!seqFileName.isSet()) { - throw TCLAP::ArgParseException("Required argument missing\n", "seqfilename"); - } - - GenerateGfaOutput(inputFileName.getValue(), seqFileName.getValue(), kvalue.getValue(), prefix.getValue(), - Gfa1Generator()); - } else if (outputFileFormat.getValue() == format[4]) { - if (!seqFileName.isSet()) { - throw TCLAP::ArgParseException("Required argument missing\n", "seqfilename"); - } - - GenerateGfaOutput(inputFileName.getValue(), seqFileName.getValue(), kvalue.getValue(), prefix.getValue(), - Gfa2Generator()); - } else if (outputFileFormat.getValue() == format[5]) { - if (!seqFileName.isSet()) { - throw TCLAP::ArgParseException("Required argument missing\n", "seqfilename"); - } - - GenerateFastaOutput(inputFileName.getValue(), seqFileName.getValue(), kvalue.getValue()); - } else if (outputFileFormat.getValue() == format[6]) { // pufferized - if (!seqFileName.isSet()) { - throw TCLAP::ArgParseException("Required argument missing\n", "seqfilename"); - } - std::cout << "In pufferized option\n"; - auto * g = new Gfa1Generator(); - std::string gfaFileName = seqAndRankOutputDir.getValue() + "/dbg.gfa" ; - GeneratePufferizedOutput(inputFileName.getValue(), gfaFileName, seqFileName.getValue(), kvalue.getValue(), - prefix.getValue(), seqAndRankOutputDir.getValue(), g); - delete g; - } else if (outputFileFormat.getValue() == format[7]) { // binPufferized - - std::cerr << "We don't produce binary file for minnow " << std::endl ; - return 1 ; - // if (!seqFileName.isSet()) { - // throw TCLAP::ArgParseException("Required argument missing\n", "seqfilename"); - // } - // if (!seqAndRankOutputDir.isSet()) { - // throw TCLAP::ArgParseException("Required argument missing\n", "SeqRankDirPrefix"); - // } - // auto * g = new Gfa1BinaryGenerator(); - // std::ofstream pfile(seqAndRankOutputDir.getValue()+"/path.bin", std::ofstream::binary); - // g->setOStream(&pfile); - // GeneratePufferizedOutput(inputFileName.getValue(),seqFileName.getValue(), kvalue.getValue(), - // prefix.getValue(), seqAndRankOutputDir.getValue(), g); - // pfile.close(); - //delete g; - } - } - catch (TCLAP::ArgException &e) { - std::cerr << "error: " << e.error() << " for arg " << e.argId() << std::endl; - return 1; - } - catch (std::runtime_error &e) { - std::cerr << "error: " << e.what() << std::endl; - return 1; - } - - return 0; +int dumpGraphMain(std::vector& args)//main(int argc, char * argv[]) +{ + std::vector format; + format.push_back("seq"); + format.push_back("group"); + format.push_back("dot"); + format.push_back("gfa1"); + format.push_back("gfa2"); + format.push_back("fasta"); + std::stringstream formatString; + std::copy(format.begin(), format.begin(), std::ostream_iterator(formatString, "|")); + try + { + TCLAP::CmdLine cmd("This utility converts the binary output of TwoPaCo to another format", ' ', "0.9.3"); + TCLAP::SwitchArg prefix("", "prefix", "Add a prefix to segments in GFA (in case if you have genomes with identical FASTA headers)", cmd, false); + //TCLAP::SwitchArg outfile("", "outfile", "output stream to hold the gfa file", cmd, false); + + TCLAP::UnlabeledValueArg inputFileName("infile", + "input file name", + true, + "", + "file name", + cmd); + + TCLAP::ValuesConstraint formatConstraint(format); + TCLAP::ValueArg outputFileFormat("f", + "format", + "Output format", + true, + format[0], + &formatConstraint, + cmd); + + TCLAP::MultiArg seqFileName("s", + "seqfile", + "sequences file name", + false, + "", + cmd); + + TCLAP::ValueArg kvalue("k", + "kvalue", + "Value of k", + true, + 25, + "integer", + cmd); + + TCLAP::ValueArg outFileName("o", + "outfile", + "output file", + true, + "dbg.gfa", + "file name", + cmd); + + cmd.parse(args);//(argc, argv); + if (outputFileFormat.getValue() == format[0]) + { + GenerateOrdinaryOutput(inputFileName.getValue()); + } + else if (outputFileFormat.getValue() == format[1]) + { + GenerateGroupOutupt(inputFileName.getValue()); + } + else if (outputFileFormat.getValue() == format[2]) + { + GenerateDotOutput(inputFileName.getValue()); + } + else if (outputFileFormat.getValue() == format[3]) + { + // gfa1 generator + if (!seqFileName.isSet()) + { + throw TCLAP::ArgParseException("Required argument missing\n", "seqfilename"); + } + + GenerateGfaOutput(inputFileName.getValue(), seqFileName.getValue(), kvalue.getValue(), prefix.getValue(), outFileName.getValue(), Gfa1Generator()); + } + else if (outputFileFormat.getValue() == format[4]) + { + if (!seqFileName.isSet()) + { + throw TCLAP::ArgParseException("Required argument missing\n", "seqfilename"); + } + + GenerateGfaOutput(inputFileName.getValue(), seqFileName.getValue(), kvalue.getValue(), prefix.getValue(), outFileName.getValue(), Gfa2Generator()); + } + else if (outputFileFormat.getValue() == format[5]) + { + if (!seqFileName.isSet()) + { + throw TCLAP::ArgParseException("Required argument missing\n", "seqfilename"); + } + + GenerateFastaOutput(inputFileName.getValue(), seqFileName.getValue(), kvalue.getValue()); + } + } + catch (TCLAP::ArgException &e) + { + std::cerr << "error: " << e.error() << " for arg " << e.argId() << std::endl; + return 1; + } + catch (std::runtime_error & e) + { + std::cerr << "error: " << e.what() << std::endl; + return 1; + } + + + return 0; } diff --git a/external/twopaco/graphdump/pufferize.h b/external/twopaco/graphdump/pufferize.h deleted file mode 100644 index d94c4b9..0000000 --- a/external/twopaco/graphdump/pufferize.h +++ /dev/null @@ -1,154 +0,0 @@ -// -// Created by Fatemeh Almodaresi on 8/3/18. -// - -#ifndef TWOPACO_PUFFERIZE_H -#define TWOPACO_PUFFERIZE_H - -#include -#include - -constexpr uint16_t ONE = 1; -constexpr uint16_t START = ONE << 8; -constexpr uint16_t END = ONE << 9; -constexpr uint16_t SEEN = ONE << 10; -constexpr uint16_t MERGE_LEFT = ONE << 11; -constexpr uint16_t MERGE_RIGHT = ONE << 12; -constexpr uint16_t COMPLEX = ONE << 13; - -struct KmerInfo { - //<1 bit: complex node (crop both start and end)> - //<1 bit: merge right (crop end)> - //<1 bit: merge left (crop start)> - //<1 bit: set if kmer has already been seen> - //<1 bit: set if a reference ends with the kmer in forward> - //<1 bit: set if a reference starts with the kmer in forward> - //<4 bits: characters that precede kmer in forward> - //<4 bits: characters that succeed kmer in forward> - uint16_t kinf{0}; - uint32_t len{0}; - - void setPrecedingChar(bool isFw, char c) { - if (isFw) { - auto idx = TwoPaCo::DnaChar::MakeUpChar(c); - kinf |= ONE << (4 + idx); - } else { - auto idx = TwoPaCo::DnaChar::MakeUpChar(TwoPaCo::DnaChar::ReverseChar(c)); - kinf |= ONE << (idx); - } - } - void setSucceedingChar(bool isFw, char c) { - if (isFw) { - auto idx = TwoPaCo::DnaChar::MakeUpChar(c); - kinf |= ONE << (idx); - } else { - auto idx = TwoPaCo::DnaChar::MakeUpChar(TwoPaCo::DnaChar::ReverseChar(c)); - kinf |= ONE << (4 + idx); - } - } - - inline void setStart() { - kinf |= START; - } - inline void setEnd() { - kinf |= END; - } - inline void setSeen() { - kinf |= SEEN; - } - - inline bool isStart() { - return (kinf & START) != 0; - } - - inline bool isEnd() { - return (kinf & END) != 0; - } - - inline bool seen() { - return (kinf & SEEN) != 0; - } - - inline void setCropBoth() { - kinf |= COMPLEX; - } - - inline bool cropBoth() { - return (kinf & COMPLEX) != 0; - } - - inline void setCropStart() { - kinf |= MERGE_LEFT; - } - - inline bool cropStart() { - return (kinf & MERGE_LEFT) != 0; - } - - inline void setCropEnd() { - kinf |= MERGE_RIGHT; - } - - inline bool cropEnd() { - return (kinf & MERGE_RIGHT) != 0; - } - - uint16_t countSucceeding() { - uint16_t cnt{0}; - for (uint16_t i = 0; i < 4; i++) { - cnt += (kinf >> i) & ONE; - } - //std::cerr << kinf << " " << cnt << "\n"; - return cnt; - } - - uint16_t countPreceding() { - uint16_t cnt{0}; - for (uint16_t i = 4; i < 8; i++) { - cnt += (kinf >> i) & ONE; - } - //std::cerr << kinf << " " << cnt << "\n"; - return cnt; - } - void decideType(uint64_t k, uint64_t &approximateContigLen, uint64_t &cntr1, uint64_t &cntr2, uint64_t &cntr3, uint64_t &cntr4) { - - if (!kinf) {// if kmer doesn't exist - return; - } - auto precedeCnt = countPreceding(); - auto succeedCnt = countSucceeding(); - if (precedeCnt > 1 and succeedCnt > 1) { - setCropBoth(); - cntr1++; - } else if (succeedCnt > 1) { - if (isStart()) { - setCropBoth(); - cntr2++; - } else { - setCropStart(); - } - - } else if (precedeCnt > 1) { - if (isEnd()) { - setCropBoth(); - cntr3++; - } else { - setCropEnd(); - } - - } else if (precedeCnt == 1 and succeedCnt == 1) { - if (isEnd() and isStart()) { - setCropBoth(); - cntr4++; - } else if (isStart()) { - setCropEnd(); - } else { - setCropStart(); - } - } // otherwise, we don't require to crop any nucleotides from any sides of a contig/segment - if (cropBoth()) approximateContigLen+=(len+k); - else approximateContigLen+=len; - } -}; - -#endif //TWOPACO_PUFFERIZE_H diff --git a/src/PuffIndexer.cpp b/src/PuffIndexer.cpp index 59daeb5..d7c171e 100644 --- a/src/PuffIndexer.cpp +++ b/src/PuffIndexer.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "spdlog/spdlog.h" #include "spdlog/sinks/ostream_sink.h" @@ -140,10 +141,17 @@ int puffIndex(IndexOptions& indexOpts){ args.push_back("-s"); args.push_back(rfile); args.push_back("-f"); - args.push_back("pufferized"); + args.push_back("gfa1"); args.push_back(outdir+"/tmp_dbg.bin"); - args.push_back("-p"); - args.push_back(outdir); + // args.push_back("-p"); + args.push_back("-o"); + args.push_back(outdir+"/dbg.gfa"); + //args.push_back(outdir); + + for(auto a : args){ + std::cout << a << " "; + } + std::cout << "\n" ; dumpGraphMain(args); // cleanup what we no longer need