Skip to content

Commit

Permalink
Merge pull request #293 from smehringer/minor_things
Browse files Browse the repository at this point in the history
[MISC] Reduce complexity of insert_into_ibf.
  • Loading branch information
eseiler authored Apr 25, 2023
2 parents f793d92 + 5b58660 commit 22d4edf
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 38 deletions.
7 changes: 2 additions & 5 deletions include/raptor/build/hibf/insert_into_ibf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,11 @@ namespace raptor::hibf
{

// automatically does naive splitting if number_of_bins > 1
void insert_into_ibf(robin_hood::unordered_flat_set<size_t> & parent_kmers,
robin_hood::unordered_flat_set<size_t> const & kmers,
void insert_into_ibf(robin_hood::unordered_flat_set<size_t> const & kmers,
size_t const number_of_bins,
size_t const bin_index,
seqan3::interleaved_bloom_filter<> & ibf,
bool is_root,
timer<concurrent::yes> & fill_ibf_timer,
timer<concurrent::yes> & merge_kmers_timer);
timer<concurrent::yes> & fill_ibf_timer);

void insert_into_ibf(build_arguments const & arguments,
chopper_pack_record const & record,
Expand Down
26 changes: 26 additions & 0 deletions include/raptor/build/hibf/update_parent_kmers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// --------------------------------------------------------------------------------------------------
// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
// --------------------------------------------------------------------------------------------------

/*!\file
* \brief Provides raptor::hibf::update_parent_kmers.
* \author Svenja Mehringer <svenja.mehringer AT fu-berlin.de>
*/

#pragma once

#include <robin_hood.h>

#include <raptor/argument_parsing/timer.hpp>

namespace raptor::hibf
{

void update_parent_kmers(robin_hood::unordered_flat_set<size_t> & parent_kmers,
robin_hood::unordered_flat_set<size_t> const & kmers,
timer<concurrent::yes> & merge_kmers_timer);

} // namespace raptor::hibf
1 change: 1 addition & 0 deletions src/build/hibf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ if (NOT TARGET raptor_build_hibf)
parse_chopper_pack_header.cpp
parse_chopper_pack_line.cpp
read_chopper_pack_file.cpp
update_parent_kmers.cpp
update_user_bins.cpp
)

Expand Down
12 changes: 4 additions & 8 deletions src/build/hibf/construct_ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <raptor/build/hibf/bin_size_in_bits.hpp>
#include <raptor/build/hibf/construct_ibf.hpp>
#include <raptor/build/hibf/insert_into_ibf.hpp>
#include <raptor/build/hibf/update_parent_kmers.hpp>

namespace raptor::hibf
{
Expand All @@ -41,14 +42,9 @@ seqan3::interleaved_bloom_filter<> construct_ibf(robin_hood::unordered_flat_set<
local_index_allocation_timer.stop();
arguments.index_allocation_timer += local_index_allocation_timer;

insert_into_ibf(parent_kmers,
kmers,
number_of_bins,
node_data.max_bin_index,
ibf,
is_root,
arguments.fill_ibf_timer,
arguments.merge_kmers_timer);
insert_into_ibf(kmers, number_of_bins, node_data.max_bin_index, ibf, arguments.fill_ibf_timer);
if (!is_root)
update_parent_kmers(parent_kmers, kmers, arguments.merge_kmers_timer);

return ibf;
}
Expand Down
10 changes: 5 additions & 5 deletions src/build/hibf/hierarchical_build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <raptor/build/hibf/hierarchical_build.hpp>
#include <raptor/build/hibf/insert_into_ibf.hpp>
#include <raptor/build/hibf/loop_over_children.hpp>
#include <raptor/build/hibf/update_parent_kmers.hpp>
#include <raptor/build/hibf/update_user_bins.hpp>

namespace raptor::hibf
Expand Down Expand Up @@ -86,14 +87,13 @@ size_t hierarchical_build(robin_hood::unordered_flat_set<size_t> & parent_kmers,
else
{
compute_kmers(kmers, arguments, record);
insert_into_ibf(parent_kmers,
kmers,
insert_into_ibf(kmers,
record.number_of_bins.back(),
record.bin_indices.back(),
ibf,
is_root,
arguments.fill_ibf_timer,
arguments.merge_kmers_timer);
arguments.fill_ibf_timer);
if (!is_root)
update_parent_kmers(parent_kmers, kmers, arguments.merge_kmers_timer);
}

update_user_bins(data, filename_indices, record);
Expand Down
14 changes: 2 additions & 12 deletions src/build/hibf/insert_into_ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,11 @@ namespace raptor::hibf
{

// automatically does naive splitting if number_of_bins > 1
void insert_into_ibf(robin_hood::unordered_flat_set<size_t> & parent_kmers,
robin_hood::unordered_flat_set<size_t> const & kmers,
void insert_into_ibf(robin_hood::unordered_flat_set<size_t> const & kmers,
size_t const number_of_bins,
size_t const bin_index,
seqan3::interleaved_bloom_filter<> & ibf,
bool is_root,
timer<concurrent::yes> & fill_ibf_timer,
timer<concurrent::yes> & merge_kmers_timer)
timer<concurrent::yes> & fill_ibf_timer)
{
size_t const chunk_size = kmers.size() / number_of_bins + 1;
size_t chunk_number{};
Expand All @@ -44,13 +41,6 @@ void insert_into_ibf(robin_hood::unordered_flat_set<size_t> & parent_kmers,
}
local_fill_ibf_timer.stop();
fill_ibf_timer += local_fill_ibf_timer;

timer<concurrent::no> local_merge_kmers_timer{};
local_merge_kmers_timer.start();
if (!is_root)
parent_kmers.insert(kmers.begin(), kmers.end());
local_merge_kmers_timer.stop();
merge_kmers_timer += local_merge_kmers_timer;
}

void insert_into_ibf(build_arguments const & arguments,
Expand Down
12 changes: 4 additions & 8 deletions src/build/hibf/loop_over_children.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <raptor/build/hibf/hierarchical_build.hpp>
#include <raptor/build/hibf/insert_into_ibf.hpp>
#include <raptor/build/hibf/loop_over_children.hpp>
#include <raptor/build/hibf/update_parent_kmers.hpp>

namespace raptor::hibf
{
Expand Down Expand Up @@ -55,14 +56,9 @@ void loop_over_children(robin_hood::unordered_flat_set<size_t> & parent_kmers,
size_t const mutex_id{parent_bin_index / 64};
std::lock_guard<std::mutex> guard{local_ibf_mutex[mutex_id]};
ibf_positions[parent_bin_index] = ibf_pos;
insert_into_ibf(parent_kmers,
kmers,
1,
parent_bin_index,
ibf,
is_root,
arguments.fill_ibf_timer,
arguments.merge_kmers_timer);
insert_into_ibf(kmers, 1, parent_bin_index, ibf, arguments.fill_ibf_timer);
if (!is_root)
update_parent_kmers(parent_kmers, kmers, arguments.merge_kmers_timer);
}
}
};
Expand Down
29 changes: 29 additions & 0 deletions src/build/hibf/update_parent_kmers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// --------------------------------------------------------------------------------------------------
// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
// --------------------------------------------------------------------------------------------------

/*!\file
* \brief Implements raptor::hibf::update_parent_kmers.
* \author Svenja Mehringer <svenja.mehringer AT fu-berlin.de>
*/

#include <raptor/build/hibf/update_parent_kmers.hpp>

namespace raptor::hibf
{

void update_parent_kmers(robin_hood::unordered_flat_set<size_t> & parent_kmers,
robin_hood::unordered_flat_set<size_t> const & kmers,
timer<concurrent::yes> & merge_kmers_timer)
{
timer<concurrent::no> local_merge_kmers_timer{};
local_merge_kmers_timer.start();
parent_kmers.insert(kmers.begin(), kmers.end());
local_merge_kmers_timer.stop();
merge_kmers_timer += local_merge_kmers_timer;
}

} // namespace raptor::hibf

1 comment on commit 22d4edf

@vercel
Copy link

@vercel vercel bot commented on 22d4edf Apr 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

raptor – ./

seqan-raptor.vercel.app
raptor-git-main-seqan.vercel.app
raptor-seqan.vercel.app

Please sign in to comment.