Skip to content

Commit

Permalink
[MISC] Minor changes that will ease the transition once we move to th…
Browse files Browse the repository at this point in the history
…e HIBF library (#291)

* [INFRA] Update chopper

* [MISC] Resolve circular include of hierarchical_build.hpp and initialise_max_bin_kmers.hpp

* [MISC] Remove doubling of prefixes

* [MISC] automatic linting

---------

Co-authored-by: seqan-actions[bot] <[email protected]>
  • Loading branch information
smehringer and seqan-actions authored Apr 5, 2023
1 parent 77ffeff commit 22453b4
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 140 deletions.
27 changes: 0 additions & 27 deletions include/raptor/build/hibf/bin_prefixes.hpp

This file was deleted.

31 changes: 0 additions & 31 deletions include/raptor/build/hibf/initialise_max_bin_kmers.hpp

This file was deleted.

4 changes: 2 additions & 2 deletions src/argument_parsing/build_parsing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
#include <cereal/archives/json.hpp>

#include <chopper/configuration.hpp>
#include <chopper/prefixes.hpp>

#include <raptor/argument_parsing/build_parsing.hpp>
#include <raptor/argument_parsing/compute_bin_size.hpp>
#include <raptor/argument_parsing/init_shared_meta.hpp>
#include <raptor/argument_parsing/parse_bin_path.hpp>
#include <raptor/argument_parsing/shared.hpp>
#include <raptor/argument_parsing/validators.hpp>
#include <raptor/build/hibf/bin_prefixes.hpp>
#include <raptor/build/raptor_build.hpp>

namespace raptor
Expand Down Expand Up @@ -234,7 +234,7 @@ bool input_is_pack_file(std::filesystem::path const & path)
std::string line{};
while (std::getline(file, line) && line.starts_with("##")) // Skip parameter information
{}
return line.starts_with(raptor::hibf::pack_file_first_line_prefix);
return line.starts_with(chopper::prefix::first_header_line);
}

void build_parsing(sharg::parser & parser)
Expand Down
1 change: 0 additions & 1 deletion src/build/hibf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ if (NOT TARGET raptor_build_hibf)
construct_ibf.cpp
create_ibfs_from_chopper_pack.cpp
hierarchical_build.cpp
initialise_max_bin_kmers.cpp
insert_into_ibf.cpp
loop_over_children.cpp
parse_chopper_pack_header.cpp
Expand Down
28 changes: 27 additions & 1 deletion src/build/hibf/hierarchical_build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#include <raptor/build/hibf/compute_kmers.hpp>
#include <raptor/build/hibf/construct_ibf.hpp>
#include <raptor/build/hibf/hierarchical_build.hpp>
#include <raptor/build/hibf/initialise_max_bin_kmers.hpp>
#include <raptor/build/hibf/insert_into_ibf.hpp>
#include <raptor/build/hibf/loop_over_children.hpp>
#include <raptor/build/hibf/update_user_bins.hpp>
Expand All @@ -38,6 +37,33 @@ size_t hierarchical_build(robin_hood::unordered_flat_set<size_t> & parent_kmers,
std::vector<int64_t> filename_indices(current_node_data.number_of_technical_bins, -1);
robin_hood::unordered_flat_set<size_t> kmers{};

auto initialise_max_bin_kmers = [](robin_hood::unordered_flat_set<size_t> & kmers,
std::vector<int64_t> & ibf_positions,
std::vector<int64_t> & filename_indices,
lemon::ListDigraph::Node const & node,
build_data<data_layout_mode> & data,
build_arguments const & arguments) -> size_t
{
auto & node_data = data.node_map[node];

if (node_data.favourite_child != lemon::INVALID) // max bin is a merged bin
{
// recursively initialize favourite child first
ibf_positions[node_data.max_bin_index] =
hierarchical_build(kmers, node_data.favourite_child, data, arguments, false);
return 1;
}
else // max bin is not a merged bin
{
// we assume that the max record is at the beginning of the list of remaining records.
auto const & record = node_data.remaining_records[0];
compute_kmers(kmers, arguments, record);
update_user_bins(data, filename_indices, record);

return record.number_of_bins.back();
}
};

// initialize lower level IBF
size_t const max_bin_tbs =
initialise_max_bin_kmers(kmers, ibf_positions, filename_indices, current_node, data, arguments);
Expand Down
66 changes: 0 additions & 66 deletions src/build/hibf/initialise_max_bin_kmers.cpp

This file was deleted.

18 changes: 9 additions & 9 deletions src/build/hibf/parse_chopper_pack_header.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

#include <chopper/prefixes.hpp>

#include <raptor/build/hibf/bin_prefixes.hpp>
#include <raptor/build/hibf/parse_chopper_pack_header.hpp>

namespace raptor::hibf
Expand Down Expand Up @@ -61,11 +60,11 @@ size_t parse_chopper_pack_header(lemon::ListDigraph & ibf_graph,
&& std::string_view{line}.substr(1, 1) == chopper::prefix::header_config)
; // skip config in header

assert(line[0] == '#'); // we are reading header lines
assert(line.substr(1, hibf_prefix.size()) == hibf_prefix); // first line should always be High level IBF
assert(line[0] == '#'); // we are reading header lines
assert(line.substr(1, chopper::prefix::high_level.size()) == chopper::prefix::high_level);

// parse High Level max bin index
assert(line.substr(hibf_prefix.size() + 2, 11) == "max_bin_id:");
assert(line.substr(chopper::prefix::high_level.size() + 2, 11) == "max_bin_id:");
std::string_view const hibf_max_bin_str{line.begin() + 27, line.end()};

auto high_level_node = ibf_graph.addNode(); // high-level node = root node
Expand All @@ -76,14 +75,15 @@ size_t parse_chopper_pack_header(lemon::ListDigraph & ibf_graph,
// first read and parse header records, in order to sort them before adding them to the graph
while (std::getline(chopper_pack_file, line) && line.substr(0, 6) != "#FILES")
{
assert(line.substr(1, merged_bin_prefix.size()) == merged_bin_prefix);
assert(line.substr(1, chopper::prefix::merged_bin.size()) == chopper::prefix::merged_bin);

// parse header line
std::string_view const indices_str{line.begin() + 1 /*#*/ + merged_bin_prefix.size() + 1 /*_*/,
std::find(line.begin() + merged_bin_prefix.size() + 2, line.end(), ' ')};
std::string_view const indices_str{
line.begin() + 1 /*#*/ + chopper::prefix::merged_bin.size() + 1 /*_*/,
std::find(line.begin() + chopper::prefix::merged_bin.size() + 2, line.end(), ' ')};

assert(line.substr(merged_bin_prefix.size() + indices_str.size() + 3, 11) == "max_bin_id:");
std::string_view const max_id_str{line.begin() + merged_bin_prefix.size() + indices_str.size() + 14,
assert(line.substr(chopper::prefix::merged_bin.size() + indices_str.size() + 3, 11) == "max_bin_id:");
std::string_view const max_id_str{line.begin() + chopper::prefix::merged_bin.size() + indices_str.size() + 14,
line.end()};

header_records.emplace_back(parse_bin_indices(indices_str), parse_first_bin(max_id_str));
Expand Down
5 changes: 3 additions & 2 deletions test/unit/cli/argument_parsing/options_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
// --------------------------------------------------------------------------------------------------

#include <raptor/build/hibf/bin_prefixes.hpp>
#include <chopper/prefixes.hpp>

#include <raptor/test/cli_test.hpp>
#include <raptor/test/tmp_test_file.hpp>

Expand All @@ -27,7 +28,7 @@ std::filesystem::path const
dummy_sequence_file = test_files.create("dummy.fasta", ">ID\nACGTCACGATCGTACGATCGATCGATCG"),
tmp_bin_list_file = test_files.create("all_bins.txt", dummy_sequence_file.c_str()),
tmp_bin_list_empty = test_files.create("empty.txt"),
tmp_bin_list_corrupted = test_files.create("corrupted.txt", raptor::hibf::pack_file_first_line_prefix),
tmp_bin_list_corrupted = test_files.create("corrupted.txt", chopper::prefix::first_header_line),
empty_sequence_file = test_files.create("empty.fasta"),
tmp_empty_bin_file = test_files.create("empty_bin.txt", empty_sequence_file.c_str()),
header_file = test_files.create("bin1.header", "1111111111111111111\t19\t0\t1\n"),
Expand Down

1 comment on commit 22453b4

@vercel
Copy link

@vercel vercel bot commented on 22453b4 Apr 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

raptor – ./

seqan-raptor.vercel.app
raptor-git-main-seqan.vercel.app
raptor-seqan.vercel.app

Please sign in to comment.