diff --git a/.gitignore b/.gitignore index b6ae1c93..5570830b 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ docs/sphinx_build docs/sphinx_build_man docs/_build Testing/ +.idea/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 80286b42..131bf282 100644 --- a/.gitmodules +++ b/.gitmodules @@ -45,7 +45,7 @@ url = https://github.com/jeizenga/structures.git [submodule "deps/libbf"] path = deps/libbf - url = https://github.com/mavam/libbf.git + url = https://github.com/subwaystation/libbf.git [submodule "deps/cpp-httplib"] path = deps/cpp-httplib url = https://github.com/yhirose/cpp-httplib.git diff --git a/deps/libbf b/deps/libbf index 5478275d..6ac3c11c 160000 --- a/deps/libbf +++ b/deps/libbf @@ -1 +1 @@ -Subproject commit 5478275d8a4e9a5cc163b44c34517c515bd898ec +Subproject commit 6ac3c11c73b629130cc63618669c1be53d428c80 diff --git a/deps/libhandlegraph b/deps/libhandlegraph index 900f3a8f..772d1637 160000 --- a/deps/libhandlegraph +++ b/deps/libhandlegraph @@ -1 +1 @@ -Subproject commit 900f3a8f6bf8615e8446bf63f70077d5cff5f7fa +Subproject commit 772d16374e6f46791378c3d9ba87445216b76a08 diff --git a/deps/pybind11 b/deps/pybind11 index 98f1bbb8..0e43fcc7 160000 --- a/deps/pybind11 +++ b/deps/pybind11 @@ -1 +1 @@ -Subproject commit 98f1bbb8004f654ba9e26717bdf5912fb899b05a +Subproject commit 0e43fcc75e6b7429e3511dfb44343ec05a0ab843 diff --git a/scripts/heaps_fit.R b/scripts/heaps_fit.R index 88c69dfe..6e2338ac 100644 --- a/scripts/heaps_fit.R +++ b/scripts/heaps_fit.R @@ -31,6 +31,6 @@ n <- max(x$nth.genome) print(z * (f(n) - f(n-1))) print(z * (f(2) - f(1))) #print(f(n) - f(n-1)) - -ggplot(x, aes(x=nth.genome, y=base.pairs/1e9)) + geom_point(alpha=I(1/10)) + stat_function(fun=function(x) (fit$par[1] * x^fit$par[2] + fit$par[3]) * m) + scale_y_continuous("observed pangenome size (Gbp)") + scale_x_continuous("Nth included genome (200 permutations)") +pdf(NULL) +ggplot(x, aes(x=nth.genome, y=base.pairs/1e9)) + geom_point(alpha=I(1/10)) + stat_function(fun=function(x) (fit$par[1] * x^fit$par[2] + fit$par[3]) * m) + scale_y_continuous("observed pangenome size (Gbp)") + scale_x_continuous(paste("Nth included genome (", max(x$permutation)+1 ," permutations) with gamma=", round(fit$par[2], digits=3), sep = "")) + expand_limits(x = 0, y = 0) ggsave(args[2], height=5, width=9) diff --git a/src/algorithms/progress.hpp b/src/algorithms/progress.hpp index 4137cf96..e0eb271f 100644 --- a/src/algorithms/progress.hpp +++ b/src/algorithms/progress.hpp @@ -15,86 +15,81 @@ namespace algorithms { namespace progress_meter { -class ProgressMeter { -public: - std::string banner; - std::atomic<uint64_t> total; - std::atomic<uint64_t> completed; - std::chrono::time_point<std::chrono::steady_clock> start_time; - std::thread logger; - ProgressMeter(uint64_t _total, const std::string& _banner) - : total(_total), banner(_banner) { - start_time = std::chrono::steady_clock::now(); - completed = 0; - - logger = std::thread( - [&]() { - bool has_ever_printed = false; - - while (completed < total) { - if (completed > 0) { + class ProgressMeter { + public: + std::string banner; + std::atomic<uint64_t> total; + std::atomic<uint64_t> completed; + std::chrono::time_point<std::chrono::steady_clock> start_time; + std::thread logger; + ProgressMeter(uint64_t _total, const std::string& _banner) + : total(_total), banner(_banner) { + start_time = std::chrono::steady_clock::now(); + completed = 0; + logger = std::thread( + [&](void) { do_print(); - has_ever_printed = true; - } - if (has_ever_printed && completed < total) { - std::this_thread::sleep_for(std::chrono::milliseconds(250)); - } else { - std::this_thread::sleep_for(std::chrono::nanoseconds(100)); - } - } - }); + auto last = 0; + while (completed < total) { + auto curr = completed - last; + if (curr > 0) { + do_print(); + last = completed; + } + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + } + }); + }; + void do_print(void) { + auto curr = std::chrono::steady_clock::now(); + std::chrono::duration<double> elapsed_seconds = curr-start_time; + double rate = completed / elapsed_seconds.count(); + double seconds_to_completion = (completed > 0 ? (total - completed) / rate : 0); + std::cerr << "\r" << banner << " " + << std::defaultfloat + << std::setfill(' ') + << std::setw(5) + << std::fixed + << std::setprecision(2) + << 100.0 * ((double)completed / (double)total) << "%" + << " @ " + << std::setw(4) << std::scientific << rate << " bp/s " + << "elapsed: " << print_time(elapsed_seconds.count()) << " " + << "remain: " << print_time(seconds_to_completion); + } + void finish(void) { + completed.store(total); + logger.join(); + do_print(); + std::cerr << std::endl; + } + std::string print_time(const double& _seconds) { + int days = 0, hours = 0, minutes = 0, seconds = 0; + distribute_seconds(days, hours, minutes, seconds, _seconds); + std::stringstream buffer; + buffer << std::setfill('0') << std::setw(2) << days << ":" + << std::setfill('0') << std::setw(2) << hours << ":" + << std::setfill('0') << std::setw(2) << minutes << ":" + << std::setfill('0') << std::setw(2) << seconds; + return buffer.str(); + } + void distribute_seconds(int& days, int& hours, int& minutes, int& seconds, const double& input_seconds) { + const int cseconds_in_day = 86400; + const int cseconds_in_hour = 3600; + const int cseconds_in_minute = 60; + const int cseconds = 1; + days = std::floor(input_seconds / cseconds_in_day); + hours = std::floor(((int)input_seconds % cseconds_in_day) / cseconds_in_hour); + minutes = std::floor((((int)input_seconds % cseconds_in_day) % cseconds_in_hour) / cseconds_in_minute); + seconds = ((((int)input_seconds % cseconds_in_day) % cseconds_in_hour) % cseconds_in_minute) / cseconds; // + (input_seconds - std::floor(input_seconds)); + //std::cerr << input_seconds << " seconds is " << days << " days, " << hours << " hours, " << minutes << " minutes, and " << seconds << " seconds." << std::endl; + } + void increment(const uint64_t& incr) { + completed += incr; + } }; - void do_print() { - auto curr = std::chrono::steady_clock::now(); - std::chrono::duration<double> elapsed_seconds = curr-start_time; - double rate = completed / elapsed_seconds.count(); - double seconds_to_completion = (total - completed) / rate; - std::cerr << "\r" << banner << " " - << std::defaultfloat - << std::setfill(' ') - << std::setw(5) - << std::fixed - << std::setprecision(2) - << 100.0 * ((double)completed / (double)total) << "%" - << " @ " - << std::setw(4) << std::scientific << rate << "/s " - << "elapsed: " << print_time(elapsed_seconds.count()) << " " - << "remain: " << print_time(seconds_to_completion); - } - void finish() { - completed.store(total); - logger.join(); - do_print(); - std::cerr << std::endl; - } - std::string print_time(const double& _seconds) { - int days = 0, hours = 0, minutes = 0, seconds = 0; - distribute_seconds(days, hours, minutes, seconds, _seconds); - std::stringstream buffer; - buffer << std::setfill('0') << std::setw(2) << days << ":" - << std::setfill('0') << std::setw(2) << hours << ":" - << std::setfill('0') << std::setw(2) << minutes << ":" - << std::setfill('0') << std::setw(2) << seconds; - return buffer.str(); - } - void distribute_seconds(int& days, int& hours, int& minutes, int& seconds, const double& input_seconds) { - const int cseconds_in_day = 86400; - const int cseconds_in_hour = 3600; - const int cseconds_in_minute = 60; - const int cseconds = 1; - days = std::floor(input_seconds / cseconds_in_day); - hours = std::floor(((int)input_seconds % cseconds_in_day) / cseconds_in_hour); - minutes = std::floor((((int)input_seconds % cseconds_in_day) % cseconds_in_hour) / cseconds_in_minute); - seconds = ((((int)input_seconds % cseconds_in_day) % cseconds_in_hour) % cseconds_in_minute) / cseconds; // + (input_seconds - std::floor(input_seconds)); - //std::cerr << input_seconds << " seconds is " << days << " days, " << hours << " hours, " << minutes << " minutes, and " << seconds << " seconds." << std::endl; - } - void increment(const uint64_t& incr) { - completed += incr; - } -}; } } - } diff --git a/src/odgi.cpp b/src/odgi.cpp index b97f31da..0adc8c86 100644 --- a/src/odgi.cpp +++ b/src/odgi.cpp @@ -781,7 +781,7 @@ void graph_t::reassign_node_ids(const std::function<nid_t(const nid_t&)>& get_ne /// Reorder the graph's internal structure to match that given. /// Optionally compact the id space of the graph to match the ordering, from 1->|ordering|. -void graph_t::apply_ordering(const std::vector<handle_t>& order_in, bool compact_ids) { +bool graph_t::apply_ordering(const std::vector<handle_t>& order_in, bool compact_ids) { // get mapping from old to new id // if we're given an empty order, just compact the ids based on our ordering const std::vector<handle_t>* order; @@ -894,6 +894,8 @@ void graph_t::apply_ordering(const std::vector<handle_t>& order_in, bool compact } node_v = new_node_v; deleted_nodes.clear(); + + return true; } void graph_t::apply_path_ordering(const std::vector<path_handle_t>& order) { diff --git a/src/odgi.hpp b/src/odgi.hpp index 7f2d5eb1..adfa7f14 100644 --- a/src/odgi.hpp +++ b/src/odgi.hpp @@ -305,7 +305,7 @@ class graph_t : public MutablePathDeletableHandleGraph, public SerializableHandl /// Reorder the graph's internal structure to match that given. /// Optionally compact the id space of the graph to match the ordering, from 1->|ordering|. - void apply_ordering(const std::vector<handle_t>& order, bool compact_ids = false); + bool apply_ordering(const std::vector<handle_t>& order, bool compact_ids = false); /// Organize the graph for better performance and memory use void optimize(bool allow_id_reassignment = true); diff --git a/src/subcommand/heaps_main.cpp b/src/subcommand/heaps_main.cpp index 890dccef..0a340df1 100644 --- a/src/subcommand/heaps_main.cpp +++ b/src/subcommand/heaps_main.cpp @@ -128,6 +128,11 @@ int main_heaps(int argc, char **argv) { auto vals = split(path_name, '#'); path_groups_map[vals.front()].push_back(p); }); + } else { + // no groups + graph.for_each_path_handle([&](const path_handle_t& p) { + path_groups_map[graph.get_path_name(p)].push_back(p); + }); } path_groups.reserve(path_groups_map.size()); for (auto& g : path_groups_map) { diff --git a/src/subcommand/overlap_main.cpp b/src/subcommand/overlap_main.cpp index dc6c0a79..176b51da 100644 --- a/src/subcommand/overlap_main.cpp +++ b/src/subcommand/overlap_main.cpp @@ -125,39 +125,6 @@ namespace odgi { } } - auto get_graph_pos = [](const odgi::graph_t &graph, - const path_pos_t &pos) { - auto path_end = graph.path_end(pos.path); - uint64_t walked = 0; - for (step_handle_t s = graph.path_begin(pos.path); - s != path_end; s = graph.get_next_step(s)) { - handle_t h = graph.get_handle_of_step(s); - uint64_t node_length = graph.get_length(h); - if (walked + node_length > pos.offset) { - return make_pos_t(graph.get_id(h), graph.get_is_reverse(h), pos.offset - walked); - } - walked += node_length; - } - -#pragma omp critical (cout) - std::cerr << "[odgi::overlap] warning: position " << graph.get_path_name(pos.path) << ":" << pos.offset - << " outside of path" << std::endl; - return make_pos_t(0, false, 0); - }; - - auto get_offset_in_path = [](const odgi::graph_t &graph, - const path_handle_t &path, const step_handle_t &target) { - auto path_end = graph.path_end(path); - uint64_t walked = 0; - step_handle_t s = graph.path_begin(path); - for (; s != target; s = graph.get_next_step(s)) { - handle_t h = graph.get_handle_of_step(s); - walked += graph.get_length(h); - } - assert(s != path_end); - return walked; - }; - if (!path_ranges.empty()) { std::cout << "#path\tstart\tend\tpath.touched" << std::endl; diff --git a/src/subcommand/version_main.cpp b/src/subcommand/version_main.cpp index a3e95c85..be3b1d63 100644 --- a/src/subcommand/version_main.cpp +++ b/src/subcommand/version_main.cpp @@ -1,6 +1,7 @@ #include "subcommand.hpp" #include "args.hxx" #include "../version.hpp" +#include <cstdint> namespace odgi {