diff --git a/.github/workflows/ci_coverage.yml b/.github/workflows/ci_coverage.yml
index a68fa8d5..eeef7123 100644
--- a/.github/workflows/ci_coverage.yml
+++ b/.github/workflows/ci_coverage.yml
@@ -51,7 +51,7 @@ jobs:
         uses: seqan/actions/setup-toolchain@main
         with:
           compiler: ${{ matrix.compiler }}
-          ccache_size: 125M
+          ccache_size: 200M
 
       - name: Install CMake
         uses: seqan/actions/setup-cmake@main
@@ -68,7 +68,7 @@ jobs:
           mkdir build
           cd build
           cmake .. -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-                   -DCMAKE_CXX_FLAGS="-Wno-interference-size" \
+                   -DCMAKE_CXX_FLAGS="-Werror -Wno-interference-size" \
                    -DCHOPPER_NATIVE_BUILD=OFF
           make -j2 gtest_build
 
@@ -93,8 +93,7 @@ jobs:
                 ${{ github.workspace }}/build \
                 --filter ${{ github.workspace }}/include \
                 --filter ${{ github.workspace }}/src \
-                --exclude ${{ github.workspace }}/src/measure_hyperloglog.cpp \
-                --exclude ${{ github.workspace }}/src/display_layout \
+                --exclude ${{ github.workspace }}/src/util \
                 --exclude-lines-by-pattern '^\s*$' \
                 --exclude-lines-by-pattern '^\s*};$' \
                 --exclude-lines-by-pattern '^.*GCOVR_EXCL_LINE.*$' \
diff --git a/.github/workflows/ci_linux.yml b/.github/workflows/ci_linux.yml
index fb2e3264..8093bccf 100644
--- a/.github/workflows/ci_linux.yml
+++ b/.github/workflows/ci_linux.yml
@@ -31,17 +31,25 @@ jobs:
       fail-fast: true
       matrix:
         include:
+          - name: "clang17"
+            compiler: "clang-17"
+            build: unit
+            build_type: Release
+
           - name: "gcc13"
             compiler: "gcc-13"
             build_type: Release
+            cxx_flags: -Wno-interference-size
 
           - name: "gcc12"
             compiler: "gcc-12"
             build_type: Release
+            cxx_flags: -Wno-interference-size
 
           - name: "gcc11"
             compiler: "gcc-11"
             build_type: Release
+            cxx_flags: -Wno-interference-size
 
     steps:
       - name: Checkout
@@ -54,7 +62,11 @@ jobs:
         uses: seqan/actions/setup-toolchain@main
         with:
           compiler: ${{ matrix.compiler }}
-          ccache_size: 75M
+          ccache_size: 125M
+
+      - name: Install OpenMP
+        if: contains(matrix.name, 'clang')
+        run: install libomp-17-dev
 
       - name: Install CMake
         uses: seqan/actions/setup-cmake@main
@@ -66,7 +78,7 @@ jobs:
           mkdir build
           cd build
           cmake .. -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-                   -DCMAKE_CXX_FLAGS="-Wno-interference-size" \
+                   -DCMAKE_CXX_FLAGS="-Werror ${{ matrix.cxx_flags }}" \
                    -DCHOPPER_NATIVE_BUILD=OFF
           make -j2 gtest_build
 
diff --git a/.github/workflows/ci_macos.yml b/.github/workflows/ci_macos.yml
index a490bed7..576872f1 100644
--- a/.github/workflows/ci_macos.yml
+++ b/.github/workflows/ci_macos.yml
@@ -54,7 +54,7 @@ jobs:
         uses: seqan/actions/setup-toolchain@main
         with:
           compiler: ${{ matrix.compiler }}
-          ccache_size: 75M
+          ccache_size: 125M
 
       - name: Install CMake
         uses: seqan/actions/setup-cmake@main
@@ -66,7 +66,7 @@ jobs:
           mkdir build
           cd build
           cmake .. -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-                   -DCMAKE_CXX_FLAGS="-Wno-interference-size" \
+                   -DCMAKE_CXX_FLAGS="-Werror -Wno-interference-size" \
                    -DCHOPPER_NATIVE_BUILD=OFF
           make -j3 gtest_build
 
diff --git a/.github/workflows/ci_misc.yml b/.github/workflows/ci_misc.yml
index ee6514d9..d8552789 100644
--- a/.github/workflows/ci_misc.yml
+++ b/.github/workflows/ci_misc.yml
@@ -64,7 +64,7 @@ jobs:
           mkdir build
           cd build
           cmake .. -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-                   -DCMAKE_CXX_FLAGS="-Wno-interference-size" \
+                   -DCMAKE_CXX_FLAGS="-Werror -Wno-interference-size" \
                    -DCHOPPER_HEADER_TEST_ONLY=ON \
                    -DCHOPPER_NATIVE_BUILD=OFF
 
diff --git a/.gitmodules b/.gitmodules
index d446a687..cdee4a2c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,9 +1,6 @@
 [submodule "lib/seqan3"]
 	path = lib/seqan3
 	url = https://github.com/seqan/seqan3.git
-[submodule "lib/robin-hood-hashing"]
-	path = lib/robin-hood-hashing
-	url = https://github.com/martinus/robin-hood-hashing
 [submodule "lib/simde"]
 	path = lib/simde
 	url = https://github.com/simd-everywhere/simde
diff --git a/include/chopper/configuration.hpp b/include/chopper/configuration.hpp
index 309cd91c..346f443f 100644
--- a/include/chopper/configuration.hpp
+++ b/include/chopper/configuration.hpp
@@ -7,15 +7,11 @@
 
 #pragma once
 
-#include <cassert>
+#include <cinttypes>
 #include <filesystem>
+#include <iosfwd>
 
-#include <cereal/archives/json.hpp>
 #include <cereal/cereal.hpp>
-#include <cereal/types/string.hpp>
-#include <cereal/types/vector.hpp>
-
-#include <chopper/prefixes.hpp>
 
 #include <hibf/cereal/path.hpp> // IWYU pragma: keep
 #include <hibf/config.hpp>
@@ -70,48 +66,9 @@ struct configuration
     //!\brief The HIBF config which will be used to compute the layout within the HIBF lib.
     seqan::hibf::config hibf_config;
 
-    void read_from(std::istream & stream)
-    {
-        std::string line;
-        std::stringstream config_str;
-
-        while (std::getline(stream, line) && line != chopper::prefix::meta_chopper_config_start)
-            ;
-
-        assert(line == chopper::prefix::meta_chopper_config_start);
-
-        while (std::getline(stream, line) && line != chopper::prefix::meta_chopper_config_end)
-        {
-            assert(line.size() >= 2);
-            assert(std::string_view{line}.substr(0, 1) == seqan::hibf::prefix::meta_header);
-            config_str << line.substr(1); // remove seqan::hibf::prefix::meta_header
-        }
-
-        assert(line == chopper::prefix::meta_chopper_config_end);
+    void read_from(std::istream & stream);
 
-        cereal::JSONInputArchive iarchive(config_str);
-        iarchive(*this);
-
-        hibf_config.read_from(stream);
-    }
-
-    void write_to(std::ostream & stream) const
-    {
-        // write json file to temprorary string stream with cereal
-        std::stringstream config_stream{};
-        cereal::JSONOutputArchive output(config_stream); // stream to cout
-        output(cereal::make_nvp("chopper_config", *this));
-
-        // write config
-        stream << chopper::prefix::meta_chopper_config_start << '\n';
-        std::string line;
-        while (std::getline(config_stream, line, '\n'))
-            stream << seqan::hibf::prefix::meta_header << line << '\n';
-        stream << seqan::hibf::prefix::meta_header << "}\n" // last closing bracket isn't written by loop above
-               << chopper::prefix::meta_chopper_config_end << '\n';
-
-        hibf_config.write_to(stream);
-    }
+    void write_to(std::ostream & stream) const;
 
 private:
     friend class cereal::access;
diff --git a/include/chopper/input_functor.hpp b/include/chopper/input_functor.hpp
index 8f1bc413..83c6a35f 100644
--- a/include/chopper/input_functor.hpp
+++ b/include/chopper/input_functor.hpp
@@ -7,15 +7,14 @@
 
 #pragma once
 
+#include <cinttypes>
+#include <cstddef>
 #include <string>
 #include <vector>
 
 #include <seqan3/io/sequence_file/all.hpp>
-#include <seqan3/search/views/minimiser_hash.hpp>
 
-#include <chopper/adjust_seed.hpp>
-
-#include <hibf/hierarchical_interleaved_bloom_filter.hpp>
+#include <hibf/config.hpp>
 
 namespace chopper
 {
@@ -38,36 +37,7 @@ struct input_functor
 
     uint8_t kmer_size{21};
 
-    void operator()(size_t const num, seqan::hibf::insert_iterator it)
-    {
-        assert(filenames.size() > num);
-        if (input_are_precomputed_files)
-        {
-            uint64_t hash{};
-            char * const hash_data{reinterpret_cast<char *>(&hash)};
-            std::streamsize const hash_bytes{sizeof(hash)};
-
-            std::ifstream infile{filenames[num], std::ios::binary};
-
-            while (infile.read(hash_data, hash_bytes))
-                it = hash;
-        }
-        else
-        {
-            sequence_file_type fin{filenames[num]};
-
-            seqan3::shape shape = seqan3::ungapped{kmer_size};
-            auto minimizer_view = seqan3::views::minimiser_hash(shape,
-                                                                seqan3::window_size{kmer_size},
-                                                                seqan3::seed{adjust_seed(shape.count())});
-
-            for (auto && [seq] : fin)
-            {
-                for (auto hash_value : seq | minimizer_view)
-                    it = hash_value;
-            }
-        }
-    }
+    void operator()(size_t const num, seqan::hibf::insert_iterator it);
 };
 
 } // namespace chopper
diff --git a/include/chopper/layout/determine_best_number_of_technical_bins.hpp b/include/chopper/layout/determine_best_number_of_technical_bins.hpp
new file mode 100644
index 00000000..d085c331
--- /dev/null
+++ b/include/chopper/layout/determine_best_number_of_technical_bins.hpp
@@ -0,0 +1,22 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <utility>
+#include <vector>
+
+#include <chopper/configuration.hpp>
+
+#include <hibf/layout/layout.hpp>
+#include <hibf/sketch/hyperloglog.hpp>
+
+namespace chopper::layout
+{
+
+std::pair<seqan::hibf::layout::layout, std::vector<seqan::hibf::sketch::hyperloglog>>
+determine_best_number_of_technical_bins(chopper::configuration & config);
+
+}
diff --git a/include/chopper/layout/execute.hpp b/include/chopper/layout/execute.hpp
index 26ef8491..84642c8e 100644
--- a/include/chopper/layout/execute.hpp
+++ b/include/chopper/layout/execute.hpp
@@ -7,6 +7,9 @@
 
 #pragma once
 
+#include <string>
+#include <vector>
+
 #include <chopper/configuration.hpp>
 
 namespace chopper::layout
diff --git a/include/chopper/layout/hibf_statistics.hpp b/include/chopper/layout/hibf_statistics.hpp
index 5d32eea2..81aeca40 100644
--- a/include/chopper/layout/hibf_statistics.hpp
+++ b/include/chopper/layout/hibf_statistics.hpp
@@ -9,18 +9,16 @@
 
 #include <algorithm>
 #include <cassert>
-#include <cmath>
-#include <iostream>
+#include <cstddef>
+#include <iosfwd>
 #include <map>
 #include <numeric>
+#include <string>
+#include <typeindex>
 #include <vector>
 
 #include <chopper/configuration.hpp>
-#include <chopper/layout/ibf_query_cost.hpp>
-#include <chopper/next_multiple_of_64.hpp>
-#include <chopper/workarounds.hpp>
 
-#include <hibf/layout/compute_fpr_correction.hpp>
 #include <hibf/layout/layout.hpp>
 #include <hibf/sketch/hyperloglog.hpp>
 
@@ -50,12 +48,12 @@ namespace chopper::layout
 class hibf_statistics
 {
 public:
-    hibf_statistics() = default;                                    //!< Defaulted.
-    hibf_statistics(hibf_statistics const & b) = default;           //!< Defaulted.
-    hibf_statistics & operator=(hibf_statistics const &) = default; //!< Defaulted.
-    hibf_statistics(hibf_statistics && b) = default;                //!< Defaulted.
-    hibf_statistics & operator=(hibf_statistics &&) = default;      //!< Defaulted.
-    ~hibf_statistics() = default;                                   //!< Defaulted.
+    hibf_statistics() = delete;                                    //!< Deleted. Holds reference members.
+    hibf_statistics(hibf_statistics const & b) = delete;           //!< Deleted. Holds const member.
+    hibf_statistics & operator=(hibf_statistics const &) = delete; //!< Deleted. Holds const member.
+    hibf_statistics(hibf_statistics && b) = delete;                //!< Deleted. Holds const member.
+    hibf_statistics & operator=(hibf_statistics &&) = delete;      //!< Deleted. Holds const member.
+    ~hibf_statistics() = default;                                  //!< Defaulted.
 
     /*!\brief Construct an empty HIBF with an empty top level IBF
      * \param[in] config_ User configuration for the HIBF.
@@ -64,18 +62,10 @@ class hibf_statistics
      */
     hibf_statistics(configuration const & config_,
                     std::vector<seqan::hibf::sketch::hyperloglog> const & sketches_,
-                    std::vector<size_t> const & kmer_counts) :
-        config{config_},
-        fp_correction{
-            seqan::hibf::layout::compute_fpr_correction({.fpr = config_.hibf_config.maximum_false_positive_rate,
-                                                         .hash_count = config_.hibf_config.number_of_hash_functions,
-                                                         .t_max = config_.hibf_config.tmax})},
-        sketches{sketches_},
-        counts{kmer_counts},
-        total_kmer_count{std::accumulate(kmer_counts.begin(), kmer_counts.end(), size_t{})}
-    {}
-
-    struct bin; // forward declaration
+                    std::vector<size_t> const & kmer_counts);
+
+    //!\brief Represents a (set) of user bins (see ibf_statistics::bin_kind).
+    class bin;
 
     //!\brief A representation of an IBF level that gathers information about bins in an IBF.
     struct level
@@ -94,326 +84,20 @@ class hibf_statistics
         merged //!< Multiple user bins are merged into a single technical bin.
     };
 
-    //!\brief Represents a (set) of user bins (see ibf_statistics::bin_kind).
-    class bin
-    {
-    public:
-        bin_kind kind;            //!< Either a split or merged bin.
-        size_t cardinality;       //!< The size/weight of the bin (either a kmer count or hll sketch estimation).
-        size_t num_contained_ubs; //!< [MERGED] How many UBs are merged within this TB.
-        size_t num_spanning_tbs;  //!< [SPLIT] How many TBs are used for this sindle UB.
-        std::vector<size_t> user_bin_indices; //!< The user bin indices of this bin.
-        size_t tb_index;                      // The (first) technical bin idx this bin is stored in.
-        level child_level;                    //!< [MERGED] The lower level ibf statistics.
-        size_t child_level_idx;               //!< [MERGED] The lower level ibf statistics.
-
-        bin() = default;                        //!< Defaulted.
-        bin(bin const & b) = default;           //!< Defaulted.
-        bin & operator=(bin const &) = default; //!< Defaulted.
-        bin(bin && b) = default;                //!< Defaulted.
-        bin & operator=(bin &&) = default;      //!< Defaulted.
-        ~bin() = default;                       //!< Defaulted.
-
-        bin(bin_kind const kind_, size_t const spanning_tbs, std::vector<size_t> const & user_bin_indices_) :
-            kind{kind_},
-            num_contained_ubs{user_bin_indices_.size()},
-            num_spanning_tbs{spanning_tbs},
-            user_bin_indices{user_bin_indices_}
-        {
-            assert((kind == bin_kind::split && num_contained_ubs == 1u)
-                   || (kind == bin_kind::merged && num_spanning_tbs == 1u));
-        }
-    };
-
     //!\brief Gather all statistics to have all members ready.
-    void finalize()
-    {
-        collect_bins();
-
-        compute_cardinalities(top_level_ibf);
-
-        compute_total_query_cost(top_level_ibf);
-
-        gather_statistics(top_level_ibf, 0);
-
-        expected_HIBF_query_cost = total_query_cost / total_kmer_count;
-    }
+    void finalize();
 
     //!\brief Prints a column names of the summary to the command line.
-    static void print_header_to(std::ostream & stream, bool const verbose = true)
-    {
-        // print column names explanation in header
-        stream << "## ### Notation ###\n"
-               << "## X-IBF = An IBF with X number of bins.\n"
-               << "## X-HIBF = An HIBF with tmax = X, e.g a maximum of X technical bins on each level.\n";
-
-        stream << "## ### Column Description ###\n"
-                  "## tmax : The maximum number of technical bin on each level\n"
-                  "## c_tmax : The technical extra cost of querying an tmax-IBF, compared to 64-IBF\n"
-                  "## l_tmax : The estimated query cost for an tmax-HIBF, compared to an 64-HIBF\n"
-                  "## m_tmax : The estimated memory consumption for an tmax-HIBF, compared to an 64-HIBF\n"
-                  "## (l*m)_tmax : Computed by l_tmax * m_tmax\n"
-                  "## size : The expected total size of an tmax-HIBF\n"
-               << ((verbose) ? "## uncorr_size : The expected size of an tmax-HIBF without FPR correction\n" : "");
-
-        // print column names
-        stream << "# tmax" << '\t' << "c_tmax" << '\t' << "l_tmax" << '\t' << "m_tmax" << '\t' << "(l*m)_tmax" << '\t'
-               << "size";
-
-        if (verbose) // uncorrected size and add level statistics
-        {
-            stream << '\t' << "uncorr_size" << '\t' << "level" << '\t' << "num_ibfs" << '\t' << "level_size" << '\t'
-                   << "level_size_no_corr" << '\t' << "total_num_tbs" << '\t' << "avg_num_tbs" << '\t'
-                   << "split_tb_percentage" << '\t' << "max_split_tb" << '\t' << "avg_split_tb" << '\t' << "max_factor"
-                   << '\t' << "avg_factor";
-        }
-
-        stream << '\n';
-    }
+    static void print_header_to(std::ostream & stream, bool const verbose = true);
 
     //!\brief Prints a tab-separated summary of the statistics of this HIBF to the command line.
-    void print_summary_to(size_t & t_max_64_memory, std::ostream & stream, bool const verbose = true)
-    {
-        if (summaries.empty())
-            finalize();
-
-        if (t_max_64_memory == 0)
-            t_max_64_memory = total_hibf_size_in_byte();
-
-        double const relative_memory_size = total_hibf_size_in_byte() / static_cast<double>(t_max_64_memory);
-        double const query_time_memory_usage_prod = expected_HIBF_query_cost * relative_memory_size;
-
-        stream << std::fixed << std::setprecision(2);
-
-        std::string level_str, num_ibfs_str, level_size_str, level_size_no_corr_str, total_num_tbs_str, avg_num_tbs_str,
-            split_tb_percentage_str, max_split_tb_str, avg_split_tb_str, max_factor_str, avg_factor_str;
-
-        size_t total_size{};
-        size_t total_size_no_corr{};
-
-        // go through each level and collect and output the statistics
-        auto to_string_with_precision = [](auto num)
-        {
-            std::stringstream ss;
-            ss << std::fixed << std::setprecision(2) << num;
-            return ss.str();
-        };
-
-        for (auto const & [level, s] : summaries)
-        {
-            size_t const level_size = std::reduce(s.ibf_mem_size.begin(), s.ibf_mem_size.end());
-            size_t const level_size_no_corr = std::reduce(s.ibf_mem_size_no_corr.begin(), s.ibf_mem_size_no_corr.end());
-
-            total_size += level_size;
-            total_size_no_corr += level_size_no_corr;
-
-            size_t const total_num_tbs = std::reduce(s.num_tbs.begin(), s.num_tbs.end());
-
-            size_t const total_num_split_tbs = std::reduce(s.num_split_tbs.begin(), s.num_split_tbs.end());
-            double const split_tb_percentage = 100.0 * static_cast<double>(total_num_split_tbs) / total_num_tbs;
-
-            size_t const max_split_bin_span = *std::max_element(s.max_split_tb_span.begin(), s.max_split_tb_span.end());
-
-#if CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
-#    pragma GCC diagnostic push
-#    pragma GCC diagnostic ignored "-Wrestrict"
-#endif // CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
-
-            level_str += ":" + to_string_with_precision(level);
-            num_ibfs_str += ":" + to_string_with_precision(s.num_ibfs);
-            level_size_str += ":" + to_formatted_BF_size(level_size);
-            level_size_no_corr_str += ":" + to_formatted_BF_size(level_size_no_corr);
-            total_num_tbs_str += ":" + to_string_with_precision(total_num_tbs);
-            avg_num_tbs_str += ":" + to_string_with_precision(total_num_tbs / s.num_ibfs);
-            split_tb_percentage_str += ":" + to_string_with_precision(split_tb_percentage);
-
-#if CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
-#    pragma GCC diagnostic pop
-#endif // CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
-
-            // if there are no split bins on this level, the following statistics don't make sense
-            if (max_split_bin_span != 0)
-            {
-                size_t const total_num_split_ubs = std::reduce(s.num_split_ubs.begin(), s.num_split_ubs.end());
-                double const avg_split_bin =
-                    static_cast<double>(total_num_split_tbs) / static_cast<double>(total_num_split_ubs);
-                size_t const total_split_tb_kmers = std::reduce(s.split_tb_kmers.begin(), s.split_tb_kmers.end());
-                double const avg_factor =
-                    static_cast<double>(std::reduce(s.split_tb_corr_kmers.begin(), s.split_tb_corr_kmers.end()))
-                    / static_cast<double>(total_split_tb_kmers);
-
-                max_split_tb_str += ":" + to_string_with_precision(max_split_bin_span);
-                avg_split_tb_str += ":" + to_string_with_precision(avg_split_bin);
-                max_factor_str += ":" + to_string_with_precision((fp_correction)[max_split_bin_span]);
-                avg_factor_str += ":" + to_string_with_precision(avg_factor);
-            }
-            else
-            {
-                max_split_tb_str += ":-";
-                avg_split_tb_str += ":-";
-                max_factor_str += ":-";
-                avg_factor_str += ":-";
-            }
-        }
-
-        stream << std::fixed << std::setprecision(2);
-
-        stream /*        tmax */ << config.hibf_config.tmax
-                                 << '\t'
-                                 /*      c_tmax */
-                                 << chopper::layout::ibf_query_cost::interpolated(
-                                        config.hibf_config.tmax,
-                                        config.hibf_config.maximum_false_positive_rate)
-                                 << '\t'
-                                 /*      l_tmax */
-                                 << expected_HIBF_query_cost
-                                 << '\t' /*relative to a 64 bin IBF*/
-                                         /*      m_tmax */
-                                 << relative_memory_size
-                                 << '\t' /*relative to the 64 T_Max HIBF*/
-                                         /*   (l*m)tmax */
-                                 << query_time_memory_usage_prod
-                                 << '\t'
-                                 /*  corr. size */
-                                 << to_formatted_BF_size(total_size) << ((verbose) ? '\t' : '\n');
-
-        if (verbose)
-        {
-            // uncorrected FPR
-            stream /*uncorr. size */ << to_formatted_BF_size(total_size_no_corr) << '\t';
-
-            // per level statistics:
-            stream /* level               */ << level_str
-                                             << '\t'
-                                             /* num_ibfs            */
-                                             << num_ibfs_str
-                                             << '\t'
-                                             /* level_size          */
-                                             << level_size_str
-                                             << '\t'
-                                             /* level_size_no_corr  */
-                                             << level_size_no_corr_str
-                                             << '\t'
-                                             /* total_num_tbs       */
-                                             << total_num_tbs_str
-                                             << '\t'
-                                             /* avg_num_tbs         */
-                                             << avg_num_tbs_str
-                                             << '\t'
-                                             /* split_tb_percentage */
-                                             << split_tb_percentage_str
-                                             << '\t'
-                                             /* max_split_tb        */
-                                             << max_split_tb_str
-                                             << '\t'
-                                             /* avg_split_tb        */
-                                             << avg_split_tb_str
-                                             << '\t'
-                                             /* max_factor          */
-                                             << max_factor_str
-                                             << '\t'
-                                             /* avg_factor          */
-                                             << avg_factor_str << '\n';
-        }
-    }
+    void print_summary_to(size_t & t_max_64_memory, std::ostream & stream, bool const verbose = true);
 
     //!\brief Return the total corrected size of the HIBF in bytes
-    size_t total_hibf_size_in_byte()
-    {
-        if (summaries.empty())
-            finalize();
-
-        size_t total_size{};
-
-        // go through each level and collect the memory sizes
-        for (auto const & [level, summary] : summaries)
-        {
-            (void)level;
-
-            total_size += std::reduce(summary.ibf_mem_size.begin(), summary.ibf_mem_size.end());
-        }
-
-        return compute_bin_size(total_size) / 8;
-    }
+    size_t total_hibf_size_in_byte();
 
     //!\brief Round bytes to the appropriate unit and convert to string with unit.
-    [[nodiscard]] static std::string byte_size_to_formatted_str(size_t const bytes)
-    {
-        size_t iterations{};
-        size_t integer{bytes};
-
-        while (integer >> 10u && iterations < 6u)
-        {
-            integer >>= 10u;
-            ++iterations;
-        }
-
-        // While this is a bit more involved, we can avoid using floating point numbers.
-        auto first_decimal_position = [&]()
-        {
-            assert(iterations > 0u);
-            size_t decimal{bytes};
-            decimal -= integer << (iterations * 10u); // Substract bytes represented by integer, e.g. -5GiB
-            decimal >>= (iterations - 1u) * 10u;      // Shift to next smallest unit, e.g. 800MiB
-            decimal = decimal * 1000u / 1024u;        // Account for using decimal system, i.e. 800MiB != 0.8GiB
-            size_t const diff{decimal - (decimal / 100u) * 100u}; // We want to round up to 1 decimal position
-            uint32_t const round_up{diff >= 50u};
-            decimal += round_up * 100u - diff;
-            decimal /= 100u;
-            return decimal;
-        };
-
-        auto formatted_string = [&]()
-        {
-            static constexpr int8_t int_to_char_offset{'0'}; // int 0 as char: char{0 + 48} = '0'
-            size_t const decimal = iterations ? first_decimal_position() : 0u;
-            assert(decimal <= 10u);
-
-            if (!iterations) // No decimals for Bytes
-                return std::to_string(integer);
-            else if (decimal < 10u) // No need to round integer part
-                return std::to_string(integer) + '.' + static_cast<char>(decimal + int_to_char_offset);
-            else // Round integer part, e.g., 5.99 MiB should report 6.0 MiB
-            {
-                ++integer;
-                // Check whether rounding results in a change of unit, e.g. 1023.99MiB to 1.0GiB
-                if (integer >> 10u)
-                {
-                    ++iterations;
-                    integer >>= 10u;
-                }
-                return std::to_string(integer) + ".0";
-            }
-        };
-
-        std::string result{formatted_string()};
-        switch (iterations)
-        {
-        case 0:
-            result += "Bytes";
-            break;
-        case 1:
-            result += "KiB";
-            break;
-        case 2:
-            result += "MiB";
-            break;
-        case 3:
-            result += "GiB";
-            break;
-        case 4:
-            result += "TiB";
-            break;
-        case 5:
-            result += "PiB";
-            break;
-        default:
-            result += "EiB";
-            break;
-        }
-
-        return result;
-    }
+    [[nodiscard]] static std::string byte_size_to_formatted_str(size_t const bytes);
 
     //!\brief The top level IBF of this HIBF, often starting point for recursions.
     level top_level_ibf;
@@ -444,28 +128,7 @@ class hibf_statistics
     size_t const total_kmer_count{};
 
     //!\brief Statistics for all IBFs on a certain level of the HIBF.
-    struct level_summary
-    {
-        size_t num_ibfs{};
-
-        std::vector<size_t> num_tbs{};
-        std::vector<size_t> num_ubs{};
-
-        std::vector<size_t> num_split_tbs{};
-        std::vector<size_t> num_merged_tbs{};
-
-        std::vector<size_t> num_split_ubs{};
-        std::vector<size_t> num_merged_ubs{};
-
-        std::vector<size_t> max_split_tb_span{};
-        std::vector<size_t> split_tb_corr_kmers{};
-        std::vector<size_t> split_tb_kmers{};
-
-        std::vector<size_t> max_ubs_in_merged{};
-
-        std::vector<size_t> ibf_mem_size{};
-        std::vector<size_t> ibf_mem_size_no_corr{};
-    };
+    struct level_summary;
 
     //!\brief The gathered summary of statistics for each level of this HIBF.
     std::map<size_t, level_summary> summaries;
@@ -480,259 +143,79 @@ class hibf_statistics
     * -----------------------
     * LN(1 - e^(LN(FPR) / HASHES) )
     */
-    size_t compute_bin_size(size_t const number_of_kmers_to_be_stored) const
-    {
-        return std::ceil(
-            -static_cast<double>(number_of_kmers_to_be_stored * config.hibf_config.number_of_hash_functions)
-            / std::log(1
-                       - std::exp(std::log(config.hibf_config.maximum_false_positive_rate)
-                                  / config.hibf_config.number_of_hash_functions)));
-    }
+    size_t compute_bin_size(size_t const number_of_kmers_to_be_stored) const;
 
     /*!\brief Compute the Bloom Filter size from `number_of_kmers_to_be_stored` and
      *        return it as a formatted string with the appropriate unit.
      * \param[in] number_of_kmers_to_be_stored
      */
-    std::string to_formatted_BF_size(size_t const number_of_kmers_to_be_stored) const
-    {
-        size_t const size_in_bytes = compute_bin_size(number_of_kmers_to_be_stored) / 8;
-        return byte_size_to_formatted_str(size_in_bytes);
-    }
+    std::string to_formatted_BF_size(size_t const number_of_kmers_to_be_stored) const;
 
-    void collect_bins()
-    {
-        std::vector<hibf_statistics::level> ibfs(hibf_layout.max_bins.size() + 1); // 0 = top_level
-        robin_hood::unordered_map<std::vector<size_t>, size_t> id_to_pos{};
-
-        // fill id_to_pos map
-        id_to_pos[std::vector<size_t>{}] = 0;
-        for (size_t i = 0; i < hibf_layout.max_bins.size(); ++i)
-            id_to_pos[hibf_layout.max_bins[i].previous_TB_indices] = i + 1;
-
-        for (auto const & user_bin_info : hibf_layout.user_bins)
-        {
-            std::vector<size_t> prev{};
-
-            // add user bin index to previous merged bins
-            for (size_t i = 0; i < user_bin_info.previous_TB_indices.size(); ++i)
-            {
-                auto & ibf = ibfs[id_to_pos.at(prev)];
-                auto const target_tb_index = user_bin_info.previous_TB_indices[i];
-
-                bool found_merged_bin{false};
-                for (auto & previous_bins_to_check : ibf.bins)
-                {
-                    if (previous_bins_to_check.tb_index == target_tb_index)
-                    {
-                        found_merged_bin = true;
-                        previous_bins_to_check.user_bin_indices.push_back(user_bin_info.idx);
-                        ++previous_bins_to_check.num_contained_ubs;
-                    }
-                }
-
-                if (!found_merged_bin)
-                {
-                    ibf.bins.emplace_back(hibf_statistics::bin_kind::merged, 1, std::vector<size_t>{user_bin_info.idx});
-                    ibf.bins.back().tb_index = target_tb_index;
-                    auto next = prev;
-                    next.push_back(target_tb_index);
-                    ibf.bins.back().child_level_idx = id_to_pos.at(next);
-                }
-                prev.push_back(target_tb_index);
-            }
-
-            // emplace a split bin at last since every user bin is on its lowest level single or split
-            auto & ibf = ibfs[id_to_pos.at(prev)];
-            ibf.bins.emplace_back(hibf_statistics::bin_kind::split,
-                                  user_bin_info.number_of_technical_bins,
-                                  std::vector<size_t>{user_bin_info.idx});
-            ibf.bins.back().tb_index = user_bin_info.storage_TB_id;
-        }
-
-        for (auto & ibf : ibfs)
-            for (auto & bin : ibf.bins)
-                if (bin.kind == hibf_statistics::bin_kind::merged)
-                    bin.child_level = ibfs[bin.child_level_idx];
-
-        top_level_ibf = std::move(ibfs[0]);
-    }
+    void collect_bins();
 
-    void compute_cardinalities(level & curr_level)
-    {
-        for (bin & current_bin : curr_level.bins)
-        {
-            if (current_bin.kind == bin_kind::merged)
-            {
-                if (config.hibf_config.disable_estimate_union)
-                {
-                    size_t sum{};
-                    for (size_t i = 0; i < current_bin.user_bin_indices.size(); ++i)
-                        sum += counts[current_bin.user_bin_indices[i]]; // TODO should be kmer_counts
-                    current_bin.cardinality = sum;
-                }
-                else
-                {
-                    assert(!current_bin.user_bin_indices.empty());
-                    seqan::hibf::sketch::hyperloglog hll = sketches[current_bin.user_bin_indices[0]];
-
-                    for (size_t i = 1; i < current_bin.user_bin_indices.size(); ++i)
-                        hll.merge(sketches[current_bin.user_bin_indices[i]]);
-
-                    current_bin.cardinality = hll.estimate();
-                }
-
-                compute_cardinalities(current_bin.child_level);
-            }
-            else if (current_bin.kind == bin_kind::split) // bin_kind::split
-            {
-                assert(current_bin.user_bin_indices.size() == 1);
-                current_bin.cardinality = counts[current_bin.user_bin_indices[0]];
-            }
-        }
-    }
+    void compute_cardinalities(level & curr_level);
 
     //!\brief Computes the estimated query cost
-    void compute_total_query_cost(level & curr_level)
-    {
-        // Compute number of technical bins in current level (<= tmax)
-        size_t number_of_tbs{0};
-        size_t level_kmer_count{0};
-        size_t index{0};
-        std::vector<size_t> merged_bin_indices{};
-        std::vector<seqan::hibf::sketch::hyperloglog> merged_bin_sketches{};
-
-        for (bin const & current_bin : curr_level.bins)
-        {
-            if (current_bin.kind == bin_kind::merged)
-            {
-                ++number_of_tbs;
-                merged_bin_indices.push_back(index);
-
-                if (!config.hibf_config.disable_estimate_union)
-                {
-                    // compute merged_bin_sketch
-                    assert(!current_bin.user_bin_indices.empty());
-                    seqan::hibf::sketch::hyperloglog hll = sketches[current_bin.user_bin_indices[0]];
-
-                    for (size_t i = 1; i < current_bin.user_bin_indices.size(); ++i)
-                        hll.merge(sketches[current_bin.user_bin_indices[i]]);
-
-                    merged_bin_sketches.push_back(std::move(hll));
-                }
-            }
-            else if (current_bin.kind == bin_kind::split) // bin_kind::split
-            {
-                number_of_tbs += current_bin.num_spanning_tbs;
-                level_kmer_count += current_bin.cardinality;
-            }
-            ++index;
-        }
-        assert(number_of_tbs <= config.hibf_config.tmax);
-
-        // Add cost of querying the current IBF
-        // (how costly is querying number_of_tbs (e.g. 128 tbs) compared to 64 tbs given the current FPR)
-        curr_level.current_query_cost +=
-            ibf_query_cost::interpolated(number_of_tbs, config.hibf_config.maximum_false_positive_rate);
-
-        // Add costs of querying the HIBF for each kmer in this level.
-        total_query_cost += curr_level.current_query_cost * level_kmer_count;
-
-        // update query cost of all merged bins
-        for (size_t i = 0; i < merged_bin_indices.size(); ++i)
-        {
-            auto & current_bin = curr_level.bins[merged_bin_indices[i]];
-
-            // Pass on cost of querying the current level
-            current_bin.child_level.current_query_cost = curr_level.current_query_cost;
-
-            // If merged bins share kmers, we need to penalize this
-            // because querying a kmer will result in multi level look-ups.
-            if (!config.hibf_config.disable_estimate_union)
-            {
-                double const current_estimate = merged_bin_sketches[i].estimate();
-
-                for (size_t j = i + 1; j < merged_bin_indices.size(); ++j)
-                {
-                    seqan::hibf::sketch::hyperloglog tmp =
-                        merged_bin_sketches[i]; // copy needed, s.t. current is not modified
-                    double union_estimate = tmp.merge_and_estimate(merged_bin_sketches[j]);
-                    // Jaccard distance estimate
-                    double distance = 2.0 - (current_estimate + merged_bin_sketches[j].estimate()) / union_estimate;
-                    // Since the sizes are estimates, the distance might be slighlty above 1.0 or below 0.0
-                    // but we need to avoid nagetive numbers
-                    distance = std::min(std::max(distance, 0.0), 1.0);
-
-                    current_bin.child_level.current_query_cost += (1.0 - distance);
-                }
-            }
-        }
-
-        // call function recursively for each merged bin
-        for (size_t i : merged_bin_indices)
-            compute_total_query_cost(curr_level.bins[i].child_level);
-    }
+    void compute_total_query_cost(level & curr_level);
 
     /*!\brief Recursively gather all the statistics from the bins.
      * \param[in] curr_level The current IBF from which the statistics will be extracted.
      * \param[in] level_summary_index The index of `curr_level` in `summeries`.
      */
-    void gather_statistics(level const & curr_level, size_t const level_summary_index)
-    {
-        level_summary & summary = summaries[level_summary_index];
-        summary.num_ibfs += 1;
-
-        size_t max_cardinality{}, max_cardinality_no_corr{}, num_tbs{}, num_ubs{}, num_split_tbs{}, num_merged_tbs{},
-            num_split_ubs{}, num_merged_ubs{}, max_split_tb_span{}, split_tb_kmers{}, max_ubs_in_merged{},
-            split_tb_corr_kmers{};
-
-        for (bin const & current_bin : curr_level.bins)
-        {
-            size_t const cardinality_per_split_bin =
-                (current_bin.cardinality + current_bin.num_spanning_tbs - 1) / current_bin.num_spanning_tbs; // round up
-            size_t const corrected_cardinality =
-                std::ceil(cardinality_per_split_bin * (fp_correction)[current_bin.num_spanning_tbs]);
-            max_cardinality = std::max(max_cardinality, corrected_cardinality);
-            max_cardinality_no_corr = std::max(max_cardinality_no_corr, cardinality_per_split_bin);
-
-            num_tbs += current_bin.num_spanning_tbs;
-            num_ubs += current_bin.num_contained_ubs;
-
-            if (current_bin.kind == bin_kind::split)
-            {
-                num_split_tbs += current_bin.num_spanning_tbs;
-                num_split_ubs += 1;
-                split_tb_corr_kmers += corrected_cardinality * current_bin.num_spanning_tbs;
-                split_tb_kmers += cardinality_per_split_bin * current_bin.num_spanning_tbs;
-                max_split_tb_span = std::max(max_split_tb_span, current_bin.num_spanning_tbs);
-            }
-            else
-            {
-                num_merged_tbs += 1;
-                num_merged_ubs += current_bin.num_contained_ubs;
-                max_ubs_in_merged = std::max(max_ubs_in_merged, current_bin.num_contained_ubs);
-
-                gather_statistics(current_bin.child_level, level_summary_index + 1);
-            }
-        }
-
-        summary.num_tbs.push_back(num_tbs);
-        summary.num_ubs.push_back(num_ubs);
-
-        summary.num_split_tbs.push_back(num_split_tbs);
-        summary.num_merged_tbs.push_back(num_merged_tbs);
-
-        summary.num_split_ubs.push_back(num_split_ubs);
-        summary.num_merged_ubs.push_back(num_merged_ubs);
-
-        summary.max_split_tb_span.push_back(max_split_tb_span);
-        summary.split_tb_corr_kmers.push_back(split_tb_corr_kmers);
-        summary.split_tb_kmers.push_back(split_tb_kmers);
-
-        summary.max_ubs_in_merged.push_back(max_ubs_in_merged);
-
-        summary.ibf_mem_size.push_back(max_cardinality * num_tbs);
-        summary.ibf_mem_size_no_corr.push_back(max_cardinality_no_corr * num_tbs);
+    void gather_statistics(level const & curr_level, size_t const level_summary_index);
+};
+
+class hibf_statistics::bin
+{
+public:
+    bin_kind kind;            //!< Either a split or merged bin.
+    size_t cardinality;       //!< The size/weight of the bin (either a kmer count or hll sketch estimation).
+    size_t num_contained_ubs; //!< [MERGED] How many UBs are merged within this TB.
+    size_t num_spanning_tbs;  //!< [SPLIT] How many TBs are used for this sindle UB.
+    std::vector<size_t> user_bin_indices; //!< The user bin indices of this bin.
+    size_t tb_index;                      // The (first) technical bin idx this bin is stored in.
+    level child_level;                    //!< [MERGED] The lower level ibf statistics.
+    size_t child_level_idx;               //!< [MERGED] The lower level ibf statistics.
+
+    bin() = default;                        //!< Defaulted.
+    bin(bin const & b) = default;           //!< Defaulted.
+    bin & operator=(bin const &) = default; //!< Defaulted.
+    bin(bin && b) = default;                //!< Defaulted.
+    bin & operator=(bin &&) = default;      //!< Defaulted.
+    ~bin() = default;                       //!< Defaulted.
+
+    bin(bin_kind const kind_, size_t const spanning_tbs, std::vector<size_t> const & user_bin_indices_) :
+        kind{kind_},
+        num_contained_ubs{user_bin_indices_.size()},
+        num_spanning_tbs{spanning_tbs},
+        user_bin_indices{user_bin_indices_}
+    {
+        assert((kind == bin_kind::split && num_contained_ubs == 1u)
+               || (kind == bin_kind::merged && num_spanning_tbs == 1u));
     }
 };
 
+struct hibf_statistics::level_summary
+{
+    size_t num_ibfs{};
+
+    std::vector<size_t> num_tbs{};
+    std::vector<size_t> num_ubs{};
+
+    std::vector<size_t> num_split_tbs{};
+    std::vector<size_t> num_merged_tbs{};
+
+    std::vector<size_t> num_split_ubs{};
+    std::vector<size_t> num_merged_ubs{};
+
+    std::vector<size_t> max_split_tb_span{};
+    std::vector<size_t> split_tb_corr_kmers{};
+    std::vector<size_t> split_tb_kmers{};
+
+    std::vector<size_t> max_ubs_in_merged{};
+
+    std::vector<size_t> ibf_mem_size{};
+    std::vector<size_t> ibf_mem_size_no_corr{};
+};
+
 } // namespace chopper::layout
diff --git a/include/chopper/layout/ibf_query_cost.hpp b/include/chopper/layout/ibf_query_cost.hpp
index 81d533ef..544a0b9b 100644
--- a/include/chopper/layout/ibf_query_cost.hpp
+++ b/include/chopper/layout/ibf_query_cost.hpp
@@ -10,8 +10,8 @@
 #include <array>
 #include <bit>
 #include <cassert>
+#include <cstddef>
 #include <map>
-#include <stdexcept>
 
 namespace chopper::layout
 {
@@ -28,45 +28,9 @@ class ibf_query_cost
     ibf_query_cost & operator=(ibf_query_cost &&) = default;
     ~ibf_query_cost() = default;
 
-    static double exact(size_t const t_max, double const fpr)
-    {
-        auto it = find_closest_fpr(fpr);
-
-        if (contains(t_max))
-            return it->second[position(t_max)];
-        else
-            throw std::invalid_argument("No exact data available for this t_max.");
-    }
+    static double exact(size_t const t_max, double const fpr);
 
-    static double interpolated(size_t const t_max, double const fpr)
-    {
-        auto it = find_closest_fpr(fpr);
-
-        if (t_max <= 64u)
-        {
-            return it->second[0];
-        }
-        else if (t_max > maximum_t_max)
-        {
-            throw std::invalid_argument("No data available for a t_max this large.");
-        }
-        else if (contains(t_max))
-        {
-            return it->second[position(t_max)];
-        }
-        else
-        {
-            size_t const upper_bound{std::bit_ceil(t_max)};
-            size_t const lower_bound{upper_bound >> 1};
-            double const upper_value{it->second[position(upper_bound)]};
-            double const lower_value{it->second[position(lower_bound)]};
-
-            double const interpolated_value{lower_value
-                                            + (upper_value - lower_value) * (t_max - lower_bound) / lower_bound};
-            assert(interpolated_value <= upper_value);
-            return interpolated_value;
-        }
-    }
+    static double interpolated(size_t const t_max, double const fpr);
 
 private:
     /*!\brief The cost factor to penalize a search in an IBF with more then 64 bins.
@@ -90,28 +54,7 @@ class ibf_query_cost
         {0.0625, {1.0000, 1.1011, 1.2670, 1.5964, 2.4030, 3.6996, 7.1772, 12.4852, 23.3882, 44.7427, 87.8259}},
         {0.3125, {1.0000, 1.2818, 1.5493, 2.2546, 3.7804, 6.5428, 12.9410, 24.4539, 47.6262, 93.4733, 185.1019}}};
 
-    static std::map<double, std::array<double, 11>>::const_iterator find_closest_fpr(double const fpr)
-    {
-        if (auto it = cost_factors.find(fpr); it != cost_factors.end()) // fpr is found exaclty in map
-            return it;
-
-        // otherwise search for the closest one in the map
-        auto lower_it = cost_factors.lower_bound(fpr);
-        auto upper_it = cost_factors.upper_bound(fpr);
-
-        assert(lower_it != cost_factors.end() || upper_it != cost_factors.end());
-
-        if (lower_it == cost_factors.end())
-            return upper_it;
-
-        if (upper_it == cost_factors.end())
-            return lower_it;
-
-        if (std::abs(lower_it->first - fpr) < std::abs(upper_it->first - fpr))
-            return lower_it;
-        else
-            return upper_it;
-    }
+    static std::map<double, std::array<double, 11>>::const_iterator find_closest_fpr(double const fpr);
 
     static constexpr bool contains(size_t const value)
     {
diff --git a/include/chopper/layout/input.hpp b/include/chopper/layout/input.hpp
index ab64dd44..855856c1 100644
--- a/include/chopper/layout/input.hpp
+++ b/include/chopper/layout/input.hpp
@@ -7,70 +7,20 @@
 
 #pragma once
 
-#include <cassert>
-#include <charconv>
-#include <ranges>
-
-#include <cereal/archives/json.hpp>
+#include <iosfwd>
+#include <string>
+#include <tuple>
+#include <vector>
 
 #include <chopper/configuration.hpp>
-#include <chopper/prefixes.hpp>
 
 #include <hibf/layout/layout.hpp>
 
 namespace chopper::layout
 {
 
-inline std::vector<std::vector<std::string>> read_filenames_from(std::istream & stream)
-{
-    std::vector<std::vector<std::string>> filenames{};
-    std::string line;
-
-    while (std::getline(stream, line) && line != chopper::prefix::meta_chopper_user_bins_start)
-        ;
-
-    assert(line == chopper::prefix::meta_chopper_user_bins_start);
-
-#ifndef NDEBUG
-    size_t counter{};
-#endif
-    while (std::getline(stream, line) && line != chopper::prefix::meta_chopper_user_bins_end)
-    {
-        assert(line.size() >= 2);
-        assert(std::string_view{line}.substr(0, 1) == seqan::hibf::prefix::meta_header);
-
-        // @0 file1.fa file2.fa
-        auto const bin_idx_pos = line.find(' ');
-        assert(bin_idx_pos != std::string::npos);
-
-#ifndef NDEBUG
-        size_t bin_idx{};
-        std::from_chars(line.data() + 1, line.data() + bin_idx_pos, bin_idx);
-        assert(bin_idx == counter++);
-#endif
-
-        filenames.emplace_back();
-        std::string_view const filename_str{line.begin() + bin_idx_pos + 1, line.end()};
-        for (auto const && filename : std::views::split(filename_str, ' '))
-        {
-            auto common_view = std::views::common(filename);
-            filenames.back().emplace_back(common_view.begin(), common_view.end());
-        }
-    }
-
-    assert(line == chopper::prefix::meta_chopper_user_bins_end);
-
-    return filenames;
-}
-
-inline auto read_layout_file(std::istream & stream)
-{
-    std::vector<std::vector<std::string>> filenames = chopper::layout::read_filenames_from(stream);
-    chopper::configuration chopper_config;
-    chopper_config.read_from(stream);
-    seqan::hibf::layout::layout hibf_layout{};
-    hibf_layout.read_from(stream);
-    return std::make_tuple(std::move(filenames), std::move(chopper_config), std::move(hibf_layout));
-}
+std::vector<std::vector<std::string>> read_filenames_from(std::istream & stream);
+std::tuple<std::vector<std::vector<std::string>>, configuration, seqan::hibf::layout::layout>
+read_layout_file(std::istream & stream);
 
 } // namespace chopper::layout
diff --git a/include/chopper/layout/output.hpp b/include/chopper/layout/output.hpp
index ea0ba9f3..a66aafd7 100644
--- a/include/chopper/layout/output.hpp
+++ b/include/chopper/layout/output.hpp
@@ -7,23 +7,13 @@
 
 #pragma once
 
-#include <cereal/archives/json.hpp>
-
-#include <chopper/configuration.hpp>
-#include <chopper/prefixes.hpp>
-
-#include <hibf/layout/layout.hpp>
+#include <iosfwd>
+#include <string>
+#include <vector>
 
 namespace chopper::layout
 {
 
-inline void write_user_bins_to(std::vector<std::string> const & filenames, std::ostream & stream)
-{
-    stream << chopper::prefix::meta_chopper_user_bins_start << '\n';
-    size_t counter{};
-    for (auto const & filename : filenames)
-        stream << seqan::hibf::prefix::meta_header << counter++ << ' ' << filename << '\n';
-    stream << chopper::prefix::meta_chopper_user_bins_end << '\n';
-}
+void write_user_bins_to(std::vector<std::string> const & filenames, std::ostream & stream);
 
 } // namespace chopper::layout
diff --git a/include/chopper/set_up_parser.hpp b/include/chopper/set_up_parser.hpp
index 7fa97cf0..4f9adbe7 100644
--- a/include/chopper/set_up_parser.hpp
+++ b/include/chopper/set_up_parser.hpp
@@ -11,220 +11,9 @@
 
 #include <chopper/configuration.hpp>
 
-inline void set_up_parser(sharg::parser & parser, chopper::configuration & config)
+namespace chopper
 {
-    parser.info.version = "1.0.0";
-    parser.info.author = "Svenja Mehringer";
-    parser.info.email = "svenja.mehringer@fu-berlin.de";
-    parser.info.short_description = "Compute an HIBF layout";
 
-    parser.info.description.emplace_back("Computes an HIBF layout that tries to minimize the disk space consumption of "
-                                         "the resulting index. The space is estimated using a k-mer count per user "
-                                         "bin which represents the potential denisity in a technical bin in an "
-                                         "interleaved Bloom filter.  You can pass the resulting layout to raptor "
-                                         "(https://github.com/seqan/raptor) to build the index and "
-                                         "conduct queries.");
+void set_up_parser(sharg::parser & parser, configuration & config);
 
-    parser.info.synopsis.emplace_back(
-        " --input <file> [--output <file>] [--threads <number>] [--kmer <number>] [--fpr <number>] [--hash <number>] "
-        "[--disable-estimate-union] [--disable-rearrangement]");
-
-    parser.add_subsection("Main options:");
-    // -----------------------------------------------------------------------------------------------------------------
-    parser.add_option(
-        config.data_file,
-        sharg::config{
-            .short_id = '\0',
-            .long_id = "input",
-            .description =
-                "The input must be a file containing paths to sequence data you wish to estimate; one filepath "
-                "per line. If your file contains auxiliary information (e.g. species IDs), your file must be tab-"
-                "separated.",
-            .required = true});
-    parser.add_list_item("", "Example file:");
-    parser.add_list_item("", "```");
-    parser.add_list_item("", "/absolute/path/to/file1.fasta");
-    parser.add_list_item("", "/absolute/path/to/file2.fa.gz");
-    parser.add_list_item("", "```");
-
-    parser.add_option(
-        config.k,
-        sharg::config{
-            .short_id = '\0',
-            .long_id = "kmer",
-            .description =
-                "The k-mer size influences the size estimates of the input. "
-                "Choosing a k-mer size that is too small for "
-                "your data will result in files appearing more similar than they really are. Likewise, a large "
-                "k-mer size might miss out on certain similarities. For DNA sequences, a k-mer size between "
-                "[16,32] has proven to work well."});
-
-    parser.add_option(
-        config.hibf_config.tmax,
-        sharg::config{
-            .short_id = '\0',
-            .long_id = "tmax",
-            .description =
-                "Limits the number of technical bins on each level of the HIBF. Choosing a good tmax is not "
-                "trivial. The smaller tmax, the more levels the layout needs to represent the data. This results "
-                "in a higher space consumption of the index. While querying each individual level is cheap, "
-                "querying many levels might also lead to an increased runtime. "
-                "A good tmax is usually the square root of the number of user bins/samples rounded to the next "
-                "multiple of 64. Note that your tmax will always be rounded to the next multiple of 64. "
-                "At the expense of a longer runtime, you can enable the statistic mode that determines the best "
-                "tmax for your data set. See the advanced option --determine-best-tmax",
-            .default_message = "≈sqrt(#samples)",
-            .advanced = true});
-
-    parser.add_option(
-        config.hibf_config.number_of_hash_functions,
-        sharg::config{.short_id = '\0',
-                      .long_id = "hash",
-                      .description =
-                          "The number of hash functions to use when building the HIBF from the resulting layout. "
-                          "This parameter is needed to correctly estimate the index size when computing the layout."});
-
-    parser.add_option(
-        config.hibf_config.maximum_false_positive_rate,
-        sharg::config{.short_id = '\0',
-                      .long_id = "fpr",
-                      .description =
-                          "The false positive rate you aim for when building the HIBF from the resulting layout. "
-                          "This parameter is needed to correctly estimate the index size when computing the layout."});
-
-    parser.add_option(
-        config.output_filename,
-        sharg::config{.short_id = '\0', .long_id = "output", .description = "A file name for the resulting layout."});
-
-    parser.add_option(
-        config.hibf_config.threads,
-        sharg::config{
-            .short_id = '\0',
-            .long_id = "threads",
-            .description =
-                "The number of threads to use. Currently, only merging of sketches is parallelized, so if the flag "
-                "--disable-rearrangement is set, --threads will have no effect.",
-            .validator =
-                sharg::arithmetic_range_validator{static_cast<size_t>(1), std::numeric_limits<size_t>::max()}});
-
-    parser.add_subsection("HyperLogLog Sketches:");
-    parser.add_line("To improve the layout, you can estimate the sequence similarities using HyperLogLog sketches.");
-
-    parser.add_flag(
-        config.hibf_config.disable_estimate_union,
-        sharg::config{
-            .short_id = '\0',
-            .long_id = "disable-estimate-union",
-            .description =
-                "The sketches are used to estimate the sequence similarity among a set of user bins. This will improve "
-                "the layout computation as merging user bins that do not increase technical bin sizes will be "
-                "preferred. This may use more RAM and can be disabled in RAM-critical environments. "
-                "Attention: Also disables rearrangement which depends on union estimations."});
-
-    parser.add_flag(
-        config.hibf_config.disable_rearrangement,
-        sharg::config{
-            .short_id = '\0',
-            .long_id = "disable-rearrangement",
-            .description =
-                "As a preprocessing step, rearranging the order of the given user bins based on their sequence "
-                "similarity may lead to favourable small unions and thus a smaller index. "
-                "Depending on the number of input samples (user bins), this may be time-consuming and can thus be "
-                "disabled if a suboptimal layout is sufficient."});
-
-    parser.add_subsection("Parameter Tweaking:");
-    // -----------------------------------------------------------------------------------------------------------------
-    parser.add_option(
-        config.hibf_config.alpha,
-        sharg::config{
-            .short_id = '\0',
-            .long_id = "alpha",
-            .description =
-                "The layout algorithm optimizes the space consumption of the resulting HIBF but currently has no "
-                "means of optimizing the runtime for querying such an HIBF. In general, the ratio of merged bins "
-                "and split bins influences the query time because a merged bin always triggers another search on "
-                "a lower level. To influence this ratio, alpha can be used. The higher alpha, the less merged "
-                "bins are chosen in the layout. This improves query times but leads to a bigger index.",
-            .advanced = true});
-
-    parser.add_option(
-        config.hibf_config.max_rearrangement_ratio,
-        sharg::config{
-            .short_id = '\0',
-            .long_id = "max-rearrangement-ratio",
-            .description =
-                "When the flag --disable-rearrangement is \\fBnot\\fP set, this option can influence the rearrangement "
-                "algorithm. The algorithm only rearranges the order of user bins in fixed intervals. The higher "
-                "--max-rearrangement-ratio, the larger the intervals. This potentially improves the layout, but "
-                "increases the runtime of the layout algorithm.",
-            .advanced = true,
-            .validator = sharg::arithmetic_range_validator{0.0, 1.0}});
-
-    parser.add_option(
-        config.hibf_config.sketch_bits,
-        sharg::config{.short_id = '\0',
-                      .long_id = "sketch-bits",
-                      .description =
-                          "The number of bits the HyperLogLog sketch should use to distribute the values into bins.",
-                      .advanced = true,
-                      .validator = sharg::arithmetic_range_validator{5, 32}});
-
-    parser.add_subsection("Special options");
-    // -----------------------------------------------------------------------------------------------------------------
-    parser.add_flag(
-        config.determine_best_tmax,
-        sharg::config{
-            .short_id = '\0',
-            .long_id = "determine-best-tmax",
-            .description =
-                "When this flag is set, the program will compute multiple layouts for tmax in "
-                "[64 , 128, 256, ... , tmax] as well as tmax=sqrt(#samples). "
-                "The layout algorithm itself only optimizes the space consumption. When determining the best "
-                "layout, we additionally keep track of the average number of queries needed to traverse each "
-                "layout. This query cost is taken into account when determining the best tmax for your data. "
-                "Note that the option --tmax serves as upper bound. Once the layout quality starts dropping, the "
-                "computation is stopped. To run all layout computations, pass the flag --force-all-binnings.",
-            .advanced = true});
-
-    parser.add_flag(
-        config.force_all_binnings,
-        sharg::config{
-            .short_id = '\0',
-            .long_id = "force-all-binnings",
-            .description =
-                "Forces all layouts up to --tmax to be computed, "
-                "regardless of the layout quality. If the flag --determine-best-tmax is not set, this flag is "
-                "ignored and has no effect.",
-            .advanced = true});
-
-    parser.add_flag(
-        config.output_verbose_statistics,
-        sharg::config{.short_id = '\0',
-                      .long_id = "output-verbose-statistics",
-                      .description =
-                          "Enable verbose statistics to be "
-                          "printed to std::cout. If the flag --determine-best-tmax is not set, this flag is ignored "
-                          "and has no effect.",
-                      .hidden = true});
-
-    parser.add_option(
-        config.sketch_directory,
-        sharg::config{
-            .long_id = "output-sketches-to",
-            .description =
-                "If you supply a directory path with this option, the hyperloglog sketches of your input will be "
-                "stored in the respective path; one .hll file per input file.",
-            .default_message = "None",
-            .advanced = true});
-
-    parser.add_flag(config.debug,
-                    sharg::config{.short_id = '\0',
-                                  .long_id = "debug",
-                                  .description = "Enables debug output in layout file.",
-                                  .hidden = true});
-
-    parser.add_section("References");
-    parser.add_line("[1] Philippe Flajolet, Éric Fusy, Olivier Gandouet, Frédéric Meunier. HyperLogLog: the analysis "
-                    "of a near-optimal cardinality estimation algorithm. AofA: Analysis of Algorithms, Jun 2007, Juan "
-                    "les Pins, France. pp.137-156. hal-00406166v2, https://doi.org/10.46298/dmtcs.3545");
 }
diff --git a/include/chopper/sketch/check_filenames.hpp b/include/chopper/sketch/check_filenames.hpp
index 8377d8db..c07e57c4 100644
--- a/include/chopper/sketch/check_filenames.hpp
+++ b/include/chopper/sketch/check_filenames.hpp
@@ -7,64 +7,15 @@
 
 #pragma once
 
-#include <cassert>
-
-#include <robin_hood.h>
+#include <string>
+#include <vector>
 
 #include <chopper/configuration.hpp>
-#include <chopper/workarounds.hpp>
 
 namespace chopper::sketch
 {
 
 //!\brief Checks the `filenames` for consistent files, either precomputed or sequence files.
-inline void check_filenames(std::vector<std::string> const & filenames, configuration & config)
-{
-    assert(!filenames.empty());
-
-    auto case_insensitive_string_ends_with = [](std::string_view str, std::string_view suffix)
-    {
-        size_t const suffix_length{suffix.size()};
-        size_t const str_length{str.size()};
-
-        if (suffix_length > str_length)
-            return false; // GCOVR_EXCL_LINE
-
-        for (size_t j = 0, s_start = str_length - suffix_length; j < suffix_length; ++j)
-            if (std::tolower(str[s_start + j]) != std::tolower(suffix[j]))
-                return false;
-
-        return true;
-    };
-
-    // If the first filename ends in .minimiser we expect all files to end in .minimiser
-    config.precomputed_files = case_insensitive_string_ends_with(filenames[0], ".minimiser");
-
-    for (auto const & filename : filenames)
-    {
-#if CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
-#    pragma GCC diagnostic push
-#    pragma GCC diagnostic ignored "-Wrestrict"
-#endif // CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
-        if (!std::filesystem::exists(filename))
-            throw std::invalid_argument{"File " + filename + " does not exist!"};
-#if CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
-#    pragma GCC diagnostic pop
-#endif // CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
-
-        if (config.precomputed_files && !case_insensitive_string_ends_with(filename, ".minimiser"))
-        {
-            throw std::invalid_argument{"You are providing precomputed files but the file " + filename
-                                        + " does not have the correct file extension (.minimiser)."
-                                          " Mixing non-/precomputed files is not allowed."};
-        }
-        else if (!config.precomputed_files && case_insensitive_string_ends_with(filename, ".minimiser"))
-        {
-            throw std::invalid_argument{"You are providing sequence files but the file " + filename
-                                        + " was identified as a precomputed file (.minimiser)."
-                                          " Mixing non-/precomputed files is not allowed."};
-        }
-    }
-}
+void check_filenames(std::vector<std::string> const & filenames, configuration & config);
 
 } // namespace chopper::sketch
diff --git a/include/chopper/sketch/output.hpp b/include/chopper/sketch/output.hpp
index b403778d..556a0eb2 100644
--- a/include/chopper/sketch/output.hpp
+++ b/include/chopper/sketch/output.hpp
@@ -7,12 +7,12 @@
 
 #pragma once
 
-#include <fstream>
+#include <cinttypes>
+#include <iosfwd>
 #include <string>
+#include <utility>
 #include <vector>
 
-#include <seqan3/utility/views/join_with.hpp>
-
 #include <chopper/configuration.hpp>
 
 #include <hibf/sketch/hyperloglog.hpp>
@@ -20,27 +20,12 @@
 namespace chopper::sketch
 {
 
-inline void write_count_file_line(std::pair<std::string, std::vector<std::string>> const & cluster,
-                                  uint64_t const weight,
-                                  std::ofstream & fout)
-{
-    auto & [key, filepaths] = cluster;
-
-    for (auto && arr : filepaths | seqan3::views::join_with(';'))
-        fout << arr;
+void write_count_file_line(std::pair<std::string, std::vector<std::string>> const & cluster,
+                           uint64_t const weight,
+                           std::ofstream & fout);
 
-    fout << '\t' << weight << '\t' << key << '\n';
-}
-
-inline void write_sketch_file(std::string const & filename,
-                              seqan::hibf::sketch::hyperloglog const & sketch,
-                              configuration const & config)
-{
-    // For one file in the cluster, the file stem is used with the .hll ending
-    std::filesystem::path path = config.sketch_directory / std::filesystem::path(filename).stem();
-    path += ".hll";
-    std::ofstream hll_fout(path, std::ios::binary);
-    sketch.store(hll_fout);
-}
+void write_sketch_file(std::string const & filename,
+                       seqan::hibf::sketch::hyperloglog const & sketch,
+                       configuration const & config);
 
 } // namespace chopper::sketch
diff --git a/include/chopper/sketch/read_data_file.hpp b/include/chopper/sketch/read_data_file.hpp
index ef1e017c..a63f9f50 100644
--- a/include/chopper/sketch/read_data_file.hpp
+++ b/include/chopper/sketch/read_data_file.hpp
@@ -7,38 +7,14 @@
 
 #pragma once
 
-#include <fstream>
-
-#include <robin_hood.h>
+#include <string>
+#include <vector>
 
 #include <chopper/configuration.hpp>
 
 namespace chopper::sketch
 {
 
-inline void read_data_file(configuration const & config, std::vector<std::string> & filenames)
-{
-    std::ifstream fin{config.data_file.string()};
-
-    if (!fin.good() || !fin.is_open())
-        throw std::runtime_error{"Could not open data file " + config.data_file.string() + " for reading."};
-
-    std::string line;
-    while (std::getline(fin, line))
-    {
-        auto tab_pos = line.find('\t');
-
-        if (tab_pos == std::string::npos)
-        {
-            std::string const filename{line.begin(), line.end()};
-            filenames.push_back(filename);
-        }
-        else
-        {
-            std::string const filename{line.begin(), line.begin() + tab_pos};
-            filenames.push_back(filename);
-        }
-    }
-}
+void read_data_file(configuration const & config, std::vector<std::string> & filenames);
 
 } // namespace chopper::sketch
diff --git a/include/chopper/sketch/read_hll_files_into.hpp b/include/chopper/sketch/read_hll_files_into.hpp
index f05ffca2..ec9825c3 100644
--- a/include/chopper/sketch/read_hll_files_into.hpp
+++ b/include/chopper/sketch/read_hll_files_into.hpp
@@ -7,9 +7,7 @@
 
 #pragma once
 
-#include <cassert>
 #include <filesystem>
-#include <fstream>
 #include <string>
 #include <vector>
 
@@ -18,35 +16,8 @@
 namespace chopper::sketch
 {
 
-inline void read_hll_files_into(std::filesystem::path const & hll_dir,
-                                std::vector<std::string> const & target_filenames,
-                                std::vector<seqan::hibf::sketch::hyperloglog> & target)
-{
-    assert(std::filesystem::exists(hll_dir) && !std::filesystem::is_empty(hll_dir)); // checked in chopper_layout
-
-    target.reserve(target_filenames.size());
-
-    try
-    {
-        for (auto const & filename : target_filenames)
-        {
-            std::filesystem::path path = hll_dir / std::filesystem::path(filename).stem();
-            path += ".hll";
-            std::ifstream hll_fin(path, std::ios::binary);
-
-            if (!hll_fin.good())
-                throw std::runtime_error{"Could not open file " + path.string()};
-
-            // the sketch bits will be automatically read from the files
-            target.emplace_back().load(hll_fin);
-        }
-    }
-    catch (std::runtime_error const & err)
-    {
-        std::string const chopper_msg{"[CHOPPER LAYOUT ERROR] Something went wrong trying to read the HyperLogLog"
-                                      " sketches from files:\n"};
-        throw std::runtime_error{chopper_msg + err.what()};
-    }
-}
+void read_hll_files_into(std::filesystem::path const & hll_dir,
+                         std::vector<std::string> const & target_filenames,
+                         std::vector<seqan::hibf::sketch::hyperloglog> & target);
 
 } // namespace chopper::sketch
diff --git a/lib/robin-hood-hashing b/lib/robin-hood-hashing
deleted file mode 160000
index 9145f963..00000000
--- a/lib/robin-hood-hashing
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 9145f963d80d6a02f0f96a47758050a89184a3ed
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a57acc4b..1913ce6a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -7,29 +7,26 @@
 
 cmake_minimum_required (VERSION 3.18)
 
-add_library (chopper_interface INTERFACE)
-target_link_libraries (chopper_interface INTERFACE seqan3::seqan3)
-target_link_libraries (chopper_interface INTERFACE sharg::sharg)
-target_link_libraries (chopper_interface INTERFACE "seqan::hibf")
-target_include_directories (chopper_interface INTERFACE ../include)
-target_include_directories (chopper_interface INTERFACE ${CHOPPER_SUBMODULES_DIR}/robin-hood-hashing/src/include)
-target_include_directories (chopper_interface INTERFACE ${CHOPPER_SUBMODULES_DIR}/simde/simde)
-target_compile_options (chopper_interface INTERFACE "-pedantic" "-Wall" "-Wextra")
-
-add_library (chopper_layout_lib STATIC chopper_layout.cpp)
-target_link_libraries (chopper_layout_lib "chopper_interface")
+if (NOT TARGET chopper_shared)
+    add_library (chopper_shared STATIC configuration.cpp input_functor.cpp)
+
+    target_link_libraries (chopper_shared PUBLIC seqan3::seqan3)
+    target_link_libraries (chopper_shared PUBLIC sharg::sharg)
+    target_link_libraries (chopper_shared PUBLIC seqan::hibf)
+    target_include_directories (chopper_shared PUBLIC ../include)
+    target_include_directories (chopper_shared PUBLIC ${CHOPPER_SUBMODULES_DIR}/simde/simde)
+    target_compile_options (chopper_shared PUBLIC "-pedantic" "-Wall" "-Wextra")
+endif ()
 
 add_library (chopper_lib INTERFACE)
-target_link_libraries (chopper_lib INTERFACE "chopper_interface" "chopper_layout_lib")
-
-add_executable (chopper chopper.cpp)
-target_link_libraries (chopper "chopper_lib")
+target_link_libraries (chopper_lib INTERFACE chopper_layout chopper_sketch)
 
-add_executable (measure_hyperloglog EXCLUDE_FROM_ALL measure_hyperloglog.cpp)
-target_link_libraries (measure_hyperloglog "chopper_interface")
-target_compile_options (measure_hyperloglog PRIVATE "-Werror")
+add_executable (chopper chopper.cpp set_up_parser.cpp)
+target_link_libraries (chopper PUBLIC chopper_lib)
 
-add_subdirectory (display_layout)
+add_subdirectory (layout)
+add_subdirectory (sketch)
+add_subdirectory (util)
 
 if (CHOPPER_INSTALL)
     install (TARGETS chopper RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
diff --git a/src/chopper.cpp b/src/chopper.cpp
index 34eb5ace..c8da4697 100644
--- a/src/chopper.cpp
+++ b/src/chopper.cpp
@@ -5,9 +5,14 @@
 // shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
 // ---------------------------------------------------------------------------------------------------
 
-#include <sharg/parser.hpp>
+#include <exception>
+#include <filesystem>
+#include <functional>
+#include <iostream>
+#include <string>
+#include <vector>
 
-#include <seqan3/core/debug_stream.hpp>
+#include <sharg/parser.hpp>
 
 #include <chopper/configuration.hpp>
 #include <chopper/input_functor.hpp>
diff --git a/src/configuration.cpp b/src/configuration.cpp
new file mode 100644
index 00000000..dde603ca
--- /dev/null
+++ b/src/configuration.cpp
@@ -0,0 +1,64 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <cassert>
+#include <sstream>
+#include <string>
+#include <string_view>
+
+#include <cereal/archives/json.hpp>
+
+#include <chopper/configuration.hpp>
+#include <chopper/prefixes.hpp>
+
+namespace chopper
+{
+
+void configuration::read_from(std::istream & stream)
+{
+    std::string line;
+    std::stringstream config_str;
+
+    while (std::getline(stream, line) && line != chopper::prefix::meta_chopper_config_start)
+        ;
+
+    assert(line == chopper::prefix::meta_chopper_config_start);
+
+    while (std::getline(stream, line) && line != chopper::prefix::meta_chopper_config_end)
+    {
+        assert(line.size() >= 2);
+        assert(std::string_view{line}.substr(0, 1) == seqan::hibf::prefix::meta_header);
+        config_str << line.substr(1); // remove seqan::hibf::prefix::meta_header
+    }
+
+    assert(line == chopper::prefix::meta_chopper_config_end);
+
+    cereal::JSONInputArchive iarchive(config_str);
+    iarchive(*this);
+
+    hibf_config.read_from(stream);
+}
+
+void configuration::write_to(std::ostream & stream) const
+{
+    // write json file to temprorary string stream with cereal
+    std::stringstream config_stream{};
+    cereal::JSONOutputArchive output(config_stream); // stream to cout
+    output(cereal::make_nvp("chopper_config", *this));
+
+    // write config
+    stream << chopper::prefix::meta_chopper_config_start << '\n';
+    std::string line;
+    while (std::getline(config_stream, line, '\n'))
+        stream << seqan::hibf::prefix::meta_header << line << '\n';
+    stream << seqan::hibf::prefix::meta_header << "}\n" // last closing bracket isn't written by loop above
+           << chopper::prefix::meta_chopper_config_end << '\n';
+
+    hibf_config.write_to(stream);
+}
+
+} // namespace chopper
diff --git a/src/input_functor.cpp b/src/input_functor.cpp
new file mode 100644
index 00000000..c5fbce6e
--- /dev/null
+++ b/src/input_functor.cpp
@@ -0,0 +1,57 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <cassert>
+#include <cinttypes>
+#include <cstddef>
+#include <filesystem>
+#include <fstream>
+#include <ranges>
+#include <string>
+#include <vector>
+
+#include <seqan3/io/sequence_file/all.hpp>
+#include <seqan3/search/views/minimiser_hash.hpp>
+
+#include <chopper/adjust_seed.hpp>
+#include <chopper/input_functor.hpp>
+
+namespace chopper
+{
+
+void input_functor::operator()(size_t const num, seqan::hibf::insert_iterator it)
+{
+    assert(filenames.size() > num);
+    if (input_are_precomputed_files)
+    {
+        uint64_t hash{};
+        char * const hash_data{reinterpret_cast<char *>(&hash)};
+        std::streamsize const hash_bytes{sizeof(hash)};
+
+        std::ifstream infile{filenames[num], std::ios::binary};
+
+        while (infile.read(hash_data, hash_bytes))
+            it = hash;
+    }
+    else
+    {
+        sequence_file_type fin{filenames[num]};
+
+        seqan3::shape shape = seqan3::ungapped{kmer_size};
+        auto minimizer_view = seqan3::views::minimiser_hash(shape,
+                                                            seqan3::window_size{kmer_size},
+                                                            seqan3::seed{adjust_seed(shape.count())});
+
+        for (auto && [seq] : fin)
+        {
+            for (auto hash_value : seq | minimizer_view)
+                it = hash_value;
+        }
+    }
+}
+
+} // namespace chopper
diff --git a/src/layout/CMakeLists.txt b/src/layout/CMakeLists.txt
new file mode 100644
index 00000000..3faee796
--- /dev/null
+++ b/src/layout/CMakeLists.txt
@@ -0,0 +1,9 @@
+cmake_minimum_required (VERSION 3.18)
+
+if (NOT TARGET chopper_layout)
+    add_library (chopper_layout STATIC determine_best_number_of_technical_bins.cpp execute.cpp hibf_statistics.cpp
+                                       ibf_query_cost.cpp input.cpp output.cpp
+    )
+
+    target_link_libraries (chopper_layout PUBLIC chopper_shared)
+endif ()
diff --git a/src/chopper_layout.cpp b/src/layout/determine_best_number_of_technical_bins.cpp
similarity index 54%
rename from src/chopper_layout.cpp
rename to src/layout/determine_best_number_of_technical_bins.cpp
index 58de3d62..dbf80a8c 100644
--- a/src/chopper_layout.cpp
+++ b/src/layout/determine_best_number_of_technical_bins.cpp
@@ -5,21 +5,25 @@
 // shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
 // ---------------------------------------------------------------------------------------------------
 
-#include <iostream>
+#include <cmath>
+#include <cstddef>
+#include <filesystem>
+#include <fstream>
+#include <limits>
 #include <set>
-
-#include <sharg/detail/to_string.hpp>
-#include <sharg/exceptions.hpp>
+#include <string>
+#include <utility>
+#include <vector>
 
 #include <chopper/configuration.hpp>
+#include <chopper/layout/determine_best_number_of_technical_bins.hpp>
 #include <chopper/layout/hibf_statistics.hpp>
-#include <chopper/layout/ibf_query_cost.hpp>
-#include <chopper/layout/output.hpp>
-#include <chopper/sketch/output.hpp>
+#include <chopper/next_multiple_of_64.hpp>
 
-#include <hibf/hierarchical_interleaved_bloom_filter.hpp>
 #include <hibf/layout/compute_layout.hpp>
+#include <hibf/layout/layout.hpp>
 #include <hibf/sketch/compute_sketches.hpp>
+#include <hibf/sketch/hyperloglog.hpp>
 
 namespace chopper::layout
 {
@@ -98,72 +102,4 @@ determine_best_number_of_technical_bins(chopper::configuration & config)
     return {best_layout, sketches};
 }
 
-int execute(chopper::configuration & config, std::vector<std::string> const & filenames)
-{
-    assert(config.hibf_config.number_of_user_bins > 0);
-
-    if (config.hibf_config.disable_estimate_union)
-        config.hibf_config.disable_rearrangement = true;
-
-    if (config.hibf_config.tmax == 0) // no tmax was set by the user on the command line
-    {
-        // Set default as sqrt(#samples). Experiments showed that this is a reasonable default.
-        if (size_t number_samples = config.hibf_config.number_of_user_bins;
-            number_samples >= 1ULL << 32) // sqrt is bigger than uint16_t
-            throw std::invalid_argument{"Too many samples. Please set a tmax (see help via `-hh`)."}; // GCOVR_EXCL_LINE
-        else
-            config.hibf_config.tmax =
-                chopper::next_multiple_of_64(static_cast<uint16_t>(std::ceil(std::sqrt(number_samples))));
-    }
-    else if (config.hibf_config.tmax % 64 != 0)
-    {
-        config.hibf_config.tmax = chopper::next_multiple_of_64(config.hibf_config.tmax);
-        std::cerr << "[CHOPPER LAYOUT WARNING]: Your requested number of technical bins was not a multiple of 64. "
-                  << "Due to the architecture of the HIBF, it will use up space equal to the next multiple of 64 "
-                  << "anyway, so we increased your number of technical bins to " << config.hibf_config.tmax << ".\n";
-    }
-
-    seqan::hibf::layout::layout hibf_layout;
-    std::vector<seqan::hibf::sketch::hyperloglog> sketches;
-
-    if (config.determine_best_tmax)
-    {
-        std::tie(hibf_layout, sketches) = determine_best_number_of_technical_bins(config);
-    }
-    else
-    {
-        std::vector<size_t> kmer_counts;
-
-        seqan::hibf::sketch::compute_sketches(config.hibf_config, kmer_counts, sketches);
-        hibf_layout = seqan::hibf::layout::compute_layout(config.hibf_config, kmer_counts, sketches);
-
-        if (config.output_verbose_statistics)
-        {
-            size_t dummy{};
-            chopper::layout::hibf_statistics global_stats{config, sketches, kmer_counts};
-            global_stats.hibf_layout = hibf_layout;
-            global_stats.print_header_to(std::cout);
-            global_stats.print_summary_to(dummy, std::cout);
-        }
-    }
-
-    if (!config.disable_sketch_output)
-    {
-        if (!std::filesystem::exists(config.sketch_directory))
-            std::filesystem::create_directory(config.sketch_directory);
-
-        assert(filenames.size() == sketches.size());
-        for (size_t i = 0; i < filenames.size(); ++i)
-            sketch::write_sketch_file(filenames[i], sketches[i], config);
-    }
-
-    // brief Write the output to the layout file.
-    std::ofstream fout{config.output_filename};
-    chopper::layout::write_user_bins_to(filenames, fout);
-    config.write_to(fout);
-    hibf_layout.write_to(fout);
-
-    return 0;
-}
-
 } // namespace chopper::layout
diff --git a/src/layout/execute.cpp b/src/layout/execute.cpp
new file mode 100644
index 00000000..48110377
--- /dev/null
+++ b/src/layout/execute.cpp
@@ -0,0 +1,104 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <cassert>
+#include <cinttypes>
+#include <cmath>
+#include <cstddef>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <stdexcept>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include <chopper/configuration.hpp>
+#include <chopper/layout/determine_best_number_of_technical_bins.hpp>
+#include <chopper/layout/execute.hpp>
+#include <chopper/layout/hibf_statistics.hpp>
+#include <chopper/layout/output.hpp>
+#include <chopper/next_multiple_of_64.hpp>
+#include <chopper/sketch/output.hpp>
+
+#include <hibf/layout/compute_layout.hpp>
+#include <hibf/layout/layout.hpp>
+#include <hibf/sketch/compute_sketches.hpp>
+#include <hibf/sketch/hyperloglog.hpp>
+
+namespace chopper::layout
+{
+
+int execute(chopper::configuration & config, std::vector<std::string> const & filenames)
+{
+    assert(config.hibf_config.number_of_user_bins > 0);
+
+    if (config.hibf_config.disable_estimate_union)
+        config.hibf_config.disable_rearrangement = true;
+
+    if (config.hibf_config.tmax == 0) // no tmax was set by the user on the command line
+    {
+        // Set default as sqrt(#samples). Experiments showed that this is a reasonable default.
+        if (size_t number_samples = config.hibf_config.number_of_user_bins;
+            number_samples >= 1ULL << 32) // sqrt is bigger than uint16_t
+            throw std::invalid_argument{"Too many samples. Please set a tmax (see help via `-hh`)."}; // GCOVR_EXCL_LINE
+        else
+            config.hibf_config.tmax =
+                chopper::next_multiple_of_64(static_cast<uint16_t>(std::ceil(std::sqrt(number_samples))));
+    }
+    else if (config.hibf_config.tmax % 64 != 0)
+    {
+        config.hibf_config.tmax = chopper::next_multiple_of_64(config.hibf_config.tmax);
+        std::cerr << "[CHOPPER LAYOUT WARNING]: Your requested number of technical bins was not a multiple of 64. "
+                  << "Due to the architecture of the HIBF, it will use up space equal to the next multiple of 64 "
+                  << "anyway, so we increased your number of technical bins to " << config.hibf_config.tmax << ".\n";
+    }
+
+    seqan::hibf::layout::layout hibf_layout;
+    std::vector<seqan::hibf::sketch::hyperloglog> sketches;
+
+    if (config.determine_best_tmax)
+    {
+        std::tie(hibf_layout, sketches) = determine_best_number_of_technical_bins(config);
+    }
+    else
+    {
+        std::vector<size_t> kmer_counts;
+
+        seqan::hibf::sketch::compute_sketches(config.hibf_config, kmer_counts, sketches);
+        hibf_layout = seqan::hibf::layout::compute_layout(config.hibf_config, kmer_counts, sketches);
+
+        if (config.output_verbose_statistics)
+        {
+            size_t dummy{};
+            chopper::layout::hibf_statistics global_stats{config, sketches, kmer_counts};
+            global_stats.hibf_layout = hibf_layout;
+            global_stats.print_header_to(std::cout);
+            global_stats.print_summary_to(dummy, std::cout);
+        }
+    }
+
+    if (!config.disable_sketch_output)
+    {
+        if (!std::filesystem::exists(config.sketch_directory))
+            std::filesystem::create_directory(config.sketch_directory);
+
+        assert(filenames.size() == sketches.size());
+        for (size_t i = 0; i < filenames.size(); ++i)
+            sketch::write_sketch_file(filenames[i], sketches[i], config);
+    }
+
+    // brief Write the output to the layout file.
+    std::ofstream fout{config.output_filename};
+    chopper::layout::write_user_bins_to(filenames, fout);
+    config.write_to(fout);
+    hibf_layout.write_to(fout);
+
+    return 0;
+}
+
+} // namespace chopper::layout
diff --git a/src/layout/hibf_statistics.cpp b/src/layout/hibf_statistics.cpp
new file mode 100644
index 00000000..8b1655a3
--- /dev/null
+++ b/src/layout/hibf_statistics.cpp
@@ -0,0 +1,587 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+// clang-format off
+#include <chopper/workarounds.hpp>
+// clang-format on
+
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cmath>
+#include <cstddef>
+#if CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
+#    pragma GCC diagnostic push
+#    pragma GCC diagnostic ignored "-Wrestrict"
+#endif // CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
+#include <iostream>
+#if CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
+#    pragma GCC diagnostic pop
+#endif // CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
+#include <map>
+#include <numeric>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <chopper/configuration.hpp>
+#include <chopper/layout/hibf_statistics.hpp>
+#include <chopper/layout/ibf_query_cost.hpp>
+
+#include <hibf/contrib/robin_hood.hpp>
+#include <hibf/layout/compute_fpr_correction.hpp>
+#include <hibf/layout/layout.hpp>
+#include <hibf/sketch/hyperloglog.hpp>
+
+namespace chopper::layout
+{
+
+hibf_statistics::hibf_statistics(configuration const & config_,
+                                 std::vector<seqan::hibf::sketch::hyperloglog> const & sketches_,
+                                 std::vector<size_t> const & kmer_counts) :
+    config{config_},
+    fp_correction{
+        seqan::hibf::layout::compute_fpr_correction({.fpr = config_.hibf_config.maximum_false_positive_rate,
+                                                     .hash_count = config_.hibf_config.number_of_hash_functions,
+                                                     .t_max = config_.hibf_config.tmax})},
+    sketches{sketches_},
+    counts{kmer_counts},
+    total_kmer_count{std::accumulate(kmer_counts.begin(), kmer_counts.end(), size_t{})}
+{}
+
+void hibf_statistics::finalize()
+{
+    collect_bins();
+
+    compute_cardinalities(top_level_ibf);
+
+    compute_total_query_cost(top_level_ibf);
+
+    gather_statistics(top_level_ibf, 0);
+
+    expected_HIBF_query_cost = total_query_cost / total_kmer_count;
+}
+
+//!\brief Prints a column names of the summary to the command line.
+void hibf_statistics::print_header_to(std::ostream & stream, bool const verbose)
+{
+    // print column names explanation in header
+    stream << "## ### Notation ###\n"
+           << "## X-IBF = An IBF with X number of bins.\n"
+           << "## X-HIBF = An HIBF with tmax = X, e.g a maximum of X technical bins on each level.\n";
+
+    stream << "## ### Column Description ###\n"
+              "## tmax : The maximum number of technical bin on each level\n"
+              "## c_tmax : The technical extra cost of querying an tmax-IBF, compared to 64-IBF\n"
+              "## l_tmax : The estimated query cost for an tmax-HIBF, compared to an 64-HIBF\n"
+              "## m_tmax : The estimated memory consumption for an tmax-HIBF, compared to an 64-HIBF\n"
+              "## (l*m)_tmax : Computed by l_tmax * m_tmax\n"
+              "## size : The expected total size of an tmax-HIBF\n"
+           << ((verbose) ? "## uncorr_size : The expected size of an tmax-HIBF without FPR correction\n" : "");
+
+    // print column names
+    stream << "# tmax" << '\t' << "c_tmax" << '\t' << "l_tmax" << '\t' << "m_tmax" << '\t' << "(l*m)_tmax" << '\t'
+           << "size";
+
+    if (verbose) // uncorrected size and add level statistics
+    {
+        stream << '\t' << "uncorr_size" << '\t' << "level" << '\t' << "num_ibfs" << '\t' << "level_size" << '\t'
+               << "level_size_no_corr" << '\t' << "total_num_tbs" << '\t' << "avg_num_tbs" << '\t'
+               << "split_tb_percentage" << '\t' << "max_split_tb" << '\t' << "avg_split_tb" << '\t' << "max_factor"
+               << '\t' << "avg_factor";
+    }
+
+    stream << '\n';
+}
+
+void hibf_statistics::print_summary_to(size_t & t_max_64_memory, std::ostream & stream, bool const verbose)
+{
+    if (summaries.empty())
+        finalize();
+
+    if (t_max_64_memory == 0)
+        t_max_64_memory = total_hibf_size_in_byte();
+
+    double const relative_memory_size = total_hibf_size_in_byte() / static_cast<double>(t_max_64_memory);
+    double const query_time_memory_usage_prod = expected_HIBF_query_cost * relative_memory_size;
+
+    stream << std::fixed << std::setprecision(2);
+
+    std::string level_str, num_ibfs_str, level_size_str, level_size_no_corr_str, total_num_tbs_str, avg_num_tbs_str,
+        split_tb_percentage_str, max_split_tb_str, avg_split_tb_str, max_factor_str, avg_factor_str;
+
+    size_t total_size{};
+    size_t total_size_no_corr{};
+
+    // go through each level and collect and output the statistics
+    auto to_string_with_precision = [](auto num)
+    {
+        std::stringstream ss;
+        ss << std::fixed << std::setprecision(2) << num;
+        return ss.str();
+    };
+
+    for (auto const & [level, s] : summaries)
+    {
+        size_t const level_size = std::reduce(s.ibf_mem_size.begin(), s.ibf_mem_size.end());
+        size_t const level_size_no_corr = std::reduce(s.ibf_mem_size_no_corr.begin(), s.ibf_mem_size_no_corr.end());
+
+        total_size += level_size;
+        total_size_no_corr += level_size_no_corr;
+
+        size_t const total_num_tbs = std::reduce(s.num_tbs.begin(), s.num_tbs.end());
+
+        size_t const total_num_split_tbs = std::reduce(s.num_split_tbs.begin(), s.num_split_tbs.end());
+        double const split_tb_percentage = 100.0 * static_cast<double>(total_num_split_tbs) / total_num_tbs;
+
+        size_t const max_split_bin_span = *std::max_element(s.max_split_tb_span.begin(), s.max_split_tb_span.end());
+
+#if CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
+#    pragma GCC diagnostic push
+#    pragma GCC diagnostic ignored "-Wrestrict"
+#endif // CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
+
+        level_str += ":" + to_string_with_precision(level);
+        num_ibfs_str += ":" + to_string_with_precision(s.num_ibfs);
+        level_size_str += ":" + to_formatted_BF_size(level_size);
+        level_size_no_corr_str += ":" + to_formatted_BF_size(level_size_no_corr);
+        total_num_tbs_str += ":" + to_string_with_precision(total_num_tbs);
+        avg_num_tbs_str += ":" + to_string_with_precision(total_num_tbs / s.num_ibfs);
+        split_tb_percentage_str += ":" + to_string_with_precision(split_tb_percentage);
+
+        // if there are no split bins on this level, the following statistics don't make sense
+        if (max_split_bin_span != 0)
+        {
+            size_t const total_num_split_ubs = std::reduce(s.num_split_ubs.begin(), s.num_split_ubs.end());
+            double const avg_split_bin =
+                static_cast<double>(total_num_split_tbs) / static_cast<double>(total_num_split_ubs);
+            size_t const total_split_tb_kmers = std::reduce(s.split_tb_kmers.begin(), s.split_tb_kmers.end());
+            double const avg_factor =
+                static_cast<double>(std::reduce(s.split_tb_corr_kmers.begin(), s.split_tb_corr_kmers.end()))
+                / static_cast<double>(total_split_tb_kmers);
+
+            max_split_tb_str += ":" + to_string_with_precision(max_split_bin_span);
+            avg_split_tb_str += ":" + to_string_with_precision(avg_split_bin);
+            max_factor_str += ":" + to_string_with_precision((fp_correction)[max_split_bin_span]);
+            avg_factor_str += ":" + to_string_with_precision(avg_factor);
+        }
+        else
+        {
+            max_split_tb_str += ":-";
+            avg_split_tb_str += ":-";
+            max_factor_str += ":-";
+            avg_factor_str += ":-";
+        }
+#if CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
+#    pragma GCC diagnostic pop
+#endif // CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
+    }
+
+    stream << std::fixed << std::setprecision(2);
+
+    stream /*        tmax */ << config.hibf_config.tmax
+                             << '\t'
+                             /*      c_tmax */
+                             << chopper::layout::ibf_query_cost::interpolated(
+                                    config.hibf_config.tmax,
+                                    config.hibf_config.maximum_false_positive_rate)
+                             << '\t'
+                             /*      l_tmax */
+                             << expected_HIBF_query_cost
+                             << '\t' /*relative to a 64 bin IBF*/
+                                     /*      m_tmax */
+                             << relative_memory_size
+                             << '\t' /*relative to the 64 T_Max HIBF*/
+                                     /*   (l*m)tmax */
+                             << query_time_memory_usage_prod
+                             << '\t'
+                             /*  corr. size */
+                             << to_formatted_BF_size(total_size) << ((verbose) ? '\t' : '\n');
+
+    if (verbose)
+    {
+        // uncorrected FPR
+        stream /*uncorr. size */ << to_formatted_BF_size(total_size_no_corr) << '\t';
+
+        // per level statistics:
+        stream /* level               */ << level_str
+                                         << '\t'
+                                         /* num_ibfs            */
+                                         << num_ibfs_str
+                                         << '\t'
+                                         /* level_size          */
+                                         << level_size_str
+                                         << '\t'
+                                         /* level_size_no_corr  */
+                                         << level_size_no_corr_str
+                                         << '\t'
+                                         /* total_num_tbs       */
+                                         << total_num_tbs_str
+                                         << '\t'
+                                         /* avg_num_tbs         */
+                                         << avg_num_tbs_str
+                                         << '\t'
+                                         /* split_tb_percentage */
+                                         << split_tb_percentage_str
+                                         << '\t'
+                                         /* max_split_tb        */
+                                         << max_split_tb_str
+                                         << '\t'
+                                         /* avg_split_tb        */
+                                         << avg_split_tb_str
+                                         << '\t'
+                                         /* max_factor          */
+                                         << max_factor_str
+                                         << '\t'
+                                         /* avg_factor          */
+                                         << avg_factor_str << '\n';
+    }
+}
+
+//!\brief Return the total corrected size of the HIBF in bytes
+size_t hibf_statistics::total_hibf_size_in_byte()
+{
+    if (summaries.empty())
+        finalize();
+
+    size_t total_size{};
+
+    // go through each level and collect the memory sizes
+    for (auto const & [level, summary] : summaries)
+    {
+        (void)level;
+
+        total_size += std::reduce(summary.ibf_mem_size.begin(), summary.ibf_mem_size.end());
+    }
+
+    return compute_bin_size(total_size) / 8;
+}
+
+//!\brief Round bytes to the appropriate unit and convert to string with unit.
+[[nodiscard]] std::string hibf_statistics::byte_size_to_formatted_str(size_t const bytes)
+{
+    size_t iterations{};
+    size_t integer{bytes};
+
+    while (integer >> 10u && iterations < 6u)
+    {
+        integer >>= 10u;
+        ++iterations;
+    }
+
+    // While this is a bit more involved, we can avoid using floating point numbers.
+    auto first_decimal_position = [&]()
+    {
+        assert(iterations > 0u);
+        size_t decimal{bytes};
+        decimal -= integer << (iterations * 10u);             // Substract bytes represented by integer, e.g. -5GiB
+        decimal >>= (iterations - 1u) * 10u;                  // Shift to next smallest unit, e.g. 800MiB
+        decimal = decimal * 1000u / 1024u;                    // Account for using decimal system, i.e. 800MiB != 0.8GiB
+        size_t const diff{decimal - (decimal / 100u) * 100u}; // We want to round up to 1 decimal position
+        uint32_t const round_up{diff >= 50u};
+        decimal += round_up * 100u - diff;
+        decimal /= 100u;
+        return decimal;
+    };
+
+    auto formatted_string = [&]()
+    {
+        static constexpr int8_t int_to_char_offset{'0'}; // int 0 as char: char{0 + 48} = '0'
+        size_t const decimal = iterations ? first_decimal_position() : 0u;
+        assert(decimal <= 10u);
+
+        if (!iterations) // No decimals for Bytes
+            return std::to_string(integer);
+        else if (decimal < 10u) // No need to round integer part
+            return std::to_string(integer) + '.' + static_cast<char>(decimal + int_to_char_offset);
+        else // Round integer part, e.g., 5.99 MiB should report 6.0 MiB
+        {
+            ++integer;
+            // Check whether rounding results in a change of unit, e.g. 1023.99MiB to 1.0GiB
+            if (integer >> 10u)
+            {
+                ++iterations;
+                integer >>= 10u;
+            }
+            return std::to_string(integer) + ".0";
+        }
+    };
+
+    std::string result{formatted_string()};
+    switch (iterations)
+    {
+    case 0:
+        result += "Bytes";
+        break;
+    case 1:
+        result += "KiB";
+        break;
+    case 2:
+        result += "MiB";
+        break;
+    case 3:
+        result += "GiB";
+        break;
+    case 4:
+        result += "TiB";
+        break;
+    case 5:
+        result += "PiB";
+        break;
+    default:
+        result += "EiB";
+        break;
+    }
+
+    return result;
+}
+
+size_t hibf_statistics::compute_bin_size(size_t const number_of_kmers_to_be_stored) const
+{
+    return std::ceil(-static_cast<double>(number_of_kmers_to_be_stored * config.hibf_config.number_of_hash_functions)
+                     / std::log(1
+                                - std::exp(std::log(config.hibf_config.maximum_false_positive_rate)
+                                           / config.hibf_config.number_of_hash_functions)));
+}
+
+std::string hibf_statistics::to_formatted_BF_size(size_t const number_of_kmers_to_be_stored) const
+{
+    size_t const size_in_bytes = compute_bin_size(number_of_kmers_to_be_stored) / 8;
+    return byte_size_to_formatted_str(size_in_bytes);
+}
+
+void hibf_statistics::collect_bins()
+{
+    std::vector<hibf_statistics::level> ibfs(hibf_layout.max_bins.size() + 1); // 0 = top_level
+    robin_hood::unordered_map<std::vector<size_t>, size_t> id_to_pos{};
+
+    // fill id_to_pos map
+    id_to_pos[std::vector<size_t>{}] = 0;
+    for (size_t i = 0; i < hibf_layout.max_bins.size(); ++i)
+        id_to_pos[hibf_layout.max_bins[i].previous_TB_indices] = i + 1;
+
+    for (auto const & user_bin_info : hibf_layout.user_bins)
+    {
+        std::vector<size_t> prev{};
+
+        // add user bin index to previous merged bins
+        for (size_t i = 0; i < user_bin_info.previous_TB_indices.size(); ++i)
+        {
+            auto & ibf = ibfs[id_to_pos.at(prev)];
+            auto const target_tb_index = user_bin_info.previous_TB_indices[i];
+
+            bool found_merged_bin{false};
+            for (auto & previous_bins_to_check : ibf.bins)
+            {
+                if (previous_bins_to_check.tb_index == target_tb_index)
+                {
+                    found_merged_bin = true;
+                    previous_bins_to_check.user_bin_indices.push_back(user_bin_info.idx);
+                    ++previous_bins_to_check.num_contained_ubs;
+                }
+            }
+
+            if (!found_merged_bin)
+            {
+                ibf.bins.emplace_back(hibf_statistics::bin_kind::merged, 1, std::vector<size_t>{user_bin_info.idx});
+                ibf.bins.back().tb_index = target_tb_index;
+                auto next = prev;
+                next.push_back(target_tb_index);
+                ibf.bins.back().child_level_idx = id_to_pos.at(next);
+            }
+            prev.push_back(target_tb_index);
+        }
+
+        // emplace a split bin at last since every user bin is on its lowest level single or split
+        auto & ibf = ibfs[id_to_pos.at(prev)];
+        ibf.bins.emplace_back(hibf_statistics::bin_kind::split,
+                              user_bin_info.number_of_technical_bins,
+                              std::vector<size_t>{user_bin_info.idx});
+        ibf.bins.back().tb_index = user_bin_info.storage_TB_id;
+    }
+
+    for (auto & ibf : ibfs)
+        for (auto & bin : ibf.bins)
+            if (bin.kind == hibf_statistics::bin_kind::merged)
+                bin.child_level = ibfs[bin.child_level_idx];
+
+    top_level_ibf = std::move(ibfs[0]);
+}
+
+void hibf_statistics::compute_cardinalities(level & curr_level)
+{
+    for (bin & current_bin : curr_level.bins)
+    {
+        if (current_bin.kind == bin_kind::merged)
+        {
+            if (config.hibf_config.disable_estimate_union)
+            {
+                size_t sum{};
+                for (size_t i = 0; i < current_bin.user_bin_indices.size(); ++i)
+                    sum += counts[current_bin.user_bin_indices[i]]; // TODO should be kmer_counts
+                current_bin.cardinality = sum;
+            }
+            else
+            {
+                assert(!current_bin.user_bin_indices.empty());
+                seqan::hibf::sketch::hyperloglog hll = sketches[current_bin.user_bin_indices[0]];
+
+                for (size_t i = 1; i < current_bin.user_bin_indices.size(); ++i)
+                    hll.merge(sketches[current_bin.user_bin_indices[i]]);
+
+                current_bin.cardinality = hll.estimate();
+            }
+
+            compute_cardinalities(current_bin.child_level);
+        }
+        else if (current_bin.kind == bin_kind::split) // bin_kind::split
+        {
+            assert(current_bin.user_bin_indices.size() == 1);
+            current_bin.cardinality = counts[current_bin.user_bin_indices[0]];
+        }
+    }
+}
+
+void hibf_statistics::compute_total_query_cost(level & curr_level)
+{
+    // Compute number of technical bins in current level (<= tmax)
+    size_t number_of_tbs{0};
+    size_t level_kmer_count{0};
+    size_t index{0};
+    std::vector<size_t> merged_bin_indices{};
+    std::vector<seqan::hibf::sketch::hyperloglog> merged_bin_sketches{};
+
+    for (bin const & current_bin : curr_level.bins)
+    {
+        if (current_bin.kind == bin_kind::merged)
+        {
+            ++number_of_tbs;
+            merged_bin_indices.push_back(index);
+
+            if (!config.hibf_config.disable_estimate_union)
+            {
+                // compute merged_bin_sketch
+                assert(!current_bin.user_bin_indices.empty());
+                seqan::hibf::sketch::hyperloglog hll = sketches[current_bin.user_bin_indices[0]];
+
+                for (size_t i = 1; i < current_bin.user_bin_indices.size(); ++i)
+                    hll.merge(sketches[current_bin.user_bin_indices[i]]);
+
+                merged_bin_sketches.push_back(std::move(hll));
+            }
+        }
+        else if (current_bin.kind == bin_kind::split) // bin_kind::split
+        {
+            number_of_tbs += current_bin.num_spanning_tbs;
+            level_kmer_count += current_bin.cardinality;
+        }
+        ++index;
+    }
+    assert(number_of_tbs <= config.hibf_config.tmax);
+
+    // Add cost of querying the current IBF
+    // (how costly is querying number_of_tbs (e.g. 128 tbs) compared to 64 tbs given the current FPR)
+    curr_level.current_query_cost +=
+        ibf_query_cost::interpolated(number_of_tbs, config.hibf_config.maximum_false_positive_rate);
+
+    // Add costs of querying the HIBF for each kmer in this level.
+    total_query_cost += curr_level.current_query_cost * level_kmer_count;
+
+    // update query cost of all merged bins
+    for (size_t i = 0; i < merged_bin_indices.size(); ++i)
+    {
+        auto & current_bin = curr_level.bins[merged_bin_indices[i]];
+
+        // Pass on cost of querying the current level
+        current_bin.child_level.current_query_cost = curr_level.current_query_cost;
+
+        // If merged bins share kmers, we need to penalize this
+        // because querying a kmer will result in multi level look-ups.
+        if (!config.hibf_config.disable_estimate_union)
+        {
+            double const current_estimate = merged_bin_sketches[i].estimate();
+
+            for (size_t j = i + 1; j < merged_bin_indices.size(); ++j)
+            {
+                seqan::hibf::sketch::hyperloglog tmp =
+                    merged_bin_sketches[i]; // copy needed, s.t. current is not modified
+                double union_estimate = tmp.merge_and_estimate(merged_bin_sketches[j]);
+                // Jaccard distance estimate
+                double distance = 2.0 - (current_estimate + merged_bin_sketches[j].estimate()) / union_estimate;
+                // Since the sizes are estimates, the distance might be slighlty above 1.0 or below 0.0
+                // but we need to avoid nagetive numbers
+                distance = std::min(std::max(distance, 0.0), 1.0);
+
+                current_bin.child_level.current_query_cost += (1.0 - distance);
+            }
+        }
+    }
+
+    // call function recursively for each merged bin
+    for (size_t i : merged_bin_indices)
+        compute_total_query_cost(curr_level.bins[i].child_level);
+}
+
+void hibf_statistics::gather_statistics(level const & curr_level, size_t const level_summary_index)
+{
+    level_summary & summary = summaries[level_summary_index];
+    summary.num_ibfs += 1;
+
+    size_t max_cardinality{}, max_cardinality_no_corr{}, num_tbs{}, num_ubs{}, num_split_tbs{}, num_merged_tbs{},
+        num_split_ubs{}, num_merged_ubs{}, max_split_tb_span{}, split_tb_kmers{}, max_ubs_in_merged{},
+        split_tb_corr_kmers{};
+
+    for (bin const & current_bin : curr_level.bins)
+    {
+        size_t const cardinality_per_split_bin =
+            (current_bin.cardinality + current_bin.num_spanning_tbs - 1) / current_bin.num_spanning_tbs; // round up
+        size_t const corrected_cardinality =
+            std::ceil(cardinality_per_split_bin * (fp_correction)[current_bin.num_spanning_tbs]);
+        max_cardinality = std::max(max_cardinality, corrected_cardinality);
+        max_cardinality_no_corr = std::max(max_cardinality_no_corr, cardinality_per_split_bin);
+
+        num_tbs += current_bin.num_spanning_tbs;
+        num_ubs += current_bin.num_contained_ubs;
+
+        if (current_bin.kind == bin_kind::split)
+        {
+            num_split_tbs += current_bin.num_spanning_tbs;
+            num_split_ubs += 1;
+            split_tb_corr_kmers += corrected_cardinality * current_bin.num_spanning_tbs;
+            split_tb_kmers += cardinality_per_split_bin * current_bin.num_spanning_tbs;
+            max_split_tb_span = std::max(max_split_tb_span, current_bin.num_spanning_tbs);
+        }
+        else
+        {
+            num_merged_tbs += 1;
+            num_merged_ubs += current_bin.num_contained_ubs;
+            max_ubs_in_merged = std::max(max_ubs_in_merged, current_bin.num_contained_ubs);
+
+            gather_statistics(current_bin.child_level, level_summary_index + 1);
+        }
+    }
+
+    summary.num_tbs.push_back(num_tbs);
+    summary.num_ubs.push_back(num_ubs);
+
+    summary.num_split_tbs.push_back(num_split_tbs);
+    summary.num_merged_tbs.push_back(num_merged_tbs);
+
+    summary.num_split_ubs.push_back(num_split_ubs);
+    summary.num_merged_ubs.push_back(num_merged_ubs);
+
+    summary.max_split_tb_span.push_back(max_split_tb_span);
+    summary.split_tb_corr_kmers.push_back(split_tb_corr_kmers);
+    summary.split_tb_kmers.push_back(split_tb_kmers);
+
+    summary.max_ubs_in_merged.push_back(max_ubs_in_merged);
+
+    summary.ibf_mem_size.push_back(max_cardinality * num_tbs);
+    summary.ibf_mem_size_no_corr.push_back(max_cardinality_no_corr * num_tbs);
+}
+
+} // namespace chopper::layout
diff --git a/src/layout/ibf_query_cost.cpp b/src/layout/ibf_query_cost.cpp
new file mode 100644
index 00000000..40f9de93
--- /dev/null
+++ b/src/layout/ibf_query_cost.cpp
@@ -0,0 +1,84 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <array>
+#include <bit>
+#include <cassert>
+#include <cstddef>
+#include <map>
+#include <stdexcept>
+#include <utility>
+
+#include <chopper/layout/ibf_query_cost.hpp>
+
+namespace chopper::layout
+{
+
+double ibf_query_cost::exact(size_t const t_max, double const fpr)
+{
+    auto it = find_closest_fpr(fpr);
+
+    if (contains(t_max))
+        return it->second[position(t_max)];
+    else
+        throw std::invalid_argument("No exact data available for this t_max.");
+}
+
+double ibf_query_cost::interpolated(size_t const t_max, double const fpr)
+{
+    auto it = find_closest_fpr(fpr);
+
+    if (t_max <= 64u)
+    {
+        return it->second[0];
+    }
+    else if (t_max > maximum_t_max)
+    {
+        throw std::invalid_argument("No data available for a t_max this large.");
+    }
+    else if (contains(t_max))
+    {
+        return it->second[position(t_max)];
+    }
+    else
+    {
+        size_t const upper_bound{std::bit_ceil(t_max)};
+        size_t const lower_bound{upper_bound >> 1};
+        double const upper_value{it->second[position(upper_bound)]};
+        double const lower_value{it->second[position(lower_bound)]};
+
+        double const interpolated_value{lower_value
+                                        + (upper_value - lower_value) * (t_max - lower_bound) / lower_bound};
+        assert(interpolated_value <= upper_value);
+        return interpolated_value;
+    }
+}
+
+std::map<double, std::array<double, 11>>::const_iterator ibf_query_cost::find_closest_fpr(double const fpr)
+{
+    if (auto it = cost_factors.find(fpr); it != cost_factors.end()) // fpr is found exaclty in map
+        return it;
+
+    // otherwise search for the closest one in the map
+    auto lower_it = cost_factors.lower_bound(fpr);
+    auto upper_it = cost_factors.upper_bound(fpr);
+
+    assert(lower_it != cost_factors.end() || upper_it != cost_factors.end());
+
+    if (lower_it == cost_factors.end())
+        return upper_it;
+
+    if (upper_it == cost_factors.end())
+        return lower_it;
+
+    if (std::abs(lower_it->first - fpr) < std::abs(upper_it->first - fpr))
+        return lower_it;
+    else
+        return upper_it;
+}
+
+} // namespace chopper::layout
diff --git a/src/layout/input.cpp b/src/layout/input.cpp
new file mode 100644
index 00000000..2b1a7459
--- /dev/null
+++ b/src/layout/input.cpp
@@ -0,0 +1,80 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <cassert>
+#include <charconv>
+#include <istream>
+#include <ranges>
+#include <string>
+#include <string_view>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include <chopper/configuration.hpp>
+#include <chopper/layout/input.hpp>
+#include <chopper/prefixes.hpp>
+
+#include <hibf/layout/layout.hpp>
+
+namespace chopper::layout
+{
+
+std::vector<std::vector<std::string>> read_filenames_from(std::istream & stream)
+{
+    std::vector<std::vector<std::string>> filenames{};
+    std::string line;
+
+    while (std::getline(stream, line) && line != chopper::prefix::meta_chopper_user_bins_start)
+        ;
+
+    assert(line == chopper::prefix::meta_chopper_user_bins_start);
+
+#ifndef NDEBUG
+    size_t counter{};
+#endif
+    while (std::getline(stream, line) && line != chopper::prefix::meta_chopper_user_bins_end)
+    {
+        assert(line.size() >= 2);
+        assert(std::string_view{line}.substr(0, 1) == seqan::hibf::prefix::meta_header);
+
+        // @0 file1.fa file2.fa
+        auto const bin_idx_pos = line.find(' ');
+        assert(bin_idx_pos != std::string::npos);
+
+#ifndef NDEBUG
+        size_t bin_idx{};
+        std::from_chars(line.data() + 1, line.data() + bin_idx_pos, bin_idx);
+        assert(bin_idx == counter++);
+#endif
+
+        filenames.emplace_back();
+        std::string_view const filename_str{line.begin() + bin_idx_pos + 1, line.end()};
+        for (auto const && filename : std::views::split(filename_str, ' '))
+        {
+            auto common_view = std::views::common(filename);
+            filenames.back().emplace_back(common_view.begin(), common_view.end());
+        }
+    }
+
+    assert(line == chopper::prefix::meta_chopper_user_bins_end);
+
+    return filenames;
+}
+
+std::tuple<std::vector<std::vector<std::string>>, configuration, seqan::hibf::layout::layout>
+read_layout_file(std::istream & stream)
+{
+    std::vector<std::vector<std::string>> filenames = chopper::layout::read_filenames_from(stream);
+    chopper::configuration chopper_config;
+    chopper_config.read_from(stream);
+    seqan::hibf::layout::layout hibf_layout{};
+    hibf_layout.read_from(stream);
+    return std::make_tuple(std::move(filenames), std::move(chopper_config), std::move(hibf_layout));
+}
+
+} // namespace chopper::layout
diff --git a/src/layout/output.cpp b/src/layout/output.cpp
new file mode 100644
index 00000000..f0436bee
--- /dev/null
+++ b/src/layout/output.cpp
@@ -0,0 +1,31 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <cstddef>
+#include <iostream>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include <chopper/layout/output.hpp>
+#include <chopper/prefixes.hpp>
+
+#include <hibf/layout/prefixes.hpp>
+
+namespace chopper::layout
+{
+
+void write_user_bins_to(std::vector<std::string> const & filenames, std::ostream & stream)
+{
+    stream << chopper::prefix::meta_chopper_user_bins_start << '\n';
+    size_t counter{};
+    for (auto const & filename : filenames)
+        stream << seqan::hibf::prefix::meta_header << counter++ << ' ' << filename << '\n';
+    stream << chopper::prefix::meta_chopper_user_bins_end << '\n';
+}
+
+} // namespace chopper::layout
diff --git a/src/set_up_parser.cpp b/src/set_up_parser.cpp
new file mode 100644
index 00000000..da9147dd
--- /dev/null
+++ b/src/set_up_parser.cpp
@@ -0,0 +1,237 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <cstddef>
+#include <filesystem>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include <chopper/set_up_parser.hpp>
+
+namespace chopper
+{
+
+void set_up_parser(sharg::parser & parser, configuration & config)
+{
+    parser.info.version = "1.0.0";
+    parser.info.author = "Svenja Mehringer";
+    parser.info.email = "svenja.mehringer@fu-berlin.de";
+    parser.info.short_description = "Compute an HIBF layout";
+
+    parser.info.description.emplace_back("Computes an HIBF layout that tries to minimize the disk space consumption of "
+                                         "the resulting index. The space is estimated using a k-mer count per user "
+                                         "bin which represents the potential denisity in a technical bin in an "
+                                         "interleaved Bloom filter.  You can pass the resulting layout to raptor "
+                                         "(https://github.com/seqan/raptor) to build the index and "
+                                         "conduct queries.");
+
+    parser.info.synopsis.emplace_back(
+        " --input <file> [--output <file>] [--threads <number>] [--kmer <number>] [--fpr <number>] [--hash <number>] "
+        "[--disable-estimate-union] [--disable-rearrangement]");
+
+    parser.add_subsection("Main options:");
+    // -----------------------------------------------------------------------------------------------------------------
+    parser.add_option(
+        config.data_file,
+        sharg::config{
+            .short_id = '\0',
+            .long_id = "input",
+            .description =
+                "The input must be a file containing paths to sequence data you wish to estimate; one filepath "
+                "per line. If your file contains auxiliary information (e.g. species IDs), your file must be tab-"
+                "separated.",
+            .required = true});
+    parser.add_list_item("", "Example file:");
+    parser.add_list_item("", "```");
+    parser.add_list_item("", "/absolute/path/to/file1.fasta");
+    parser.add_list_item("", "/absolute/path/to/file2.fa.gz");
+    parser.add_list_item("", "```");
+
+    parser.add_option(
+        config.k,
+        sharg::config{
+            .short_id = '\0',
+            .long_id = "kmer",
+            .description =
+                "The k-mer size influences the size estimates of the input. "
+                "Choosing a k-mer size that is too small for "
+                "your data will result in files appearing more similar than they really are. Likewise, a large "
+                "k-mer size might miss out on certain similarities. For DNA sequences, a k-mer size between "
+                "[16,32] has proven to work well."});
+
+    parser.add_option(
+        config.hibf_config.tmax,
+        sharg::config{
+            .short_id = '\0',
+            .long_id = "tmax",
+            .description =
+                "Limits the number of technical bins on each level of the HIBF. Choosing a good tmax is not "
+                "trivial. The smaller tmax, the more levels the layout needs to represent the data. This results "
+                "in a higher space consumption of the index. While querying each individual level is cheap, "
+                "querying many levels might also lead to an increased runtime. "
+                "A good tmax is usually the square root of the number of user bins/samples rounded to the next "
+                "multiple of 64. Note that your tmax will always be rounded to the next multiple of 64. "
+                "At the expense of a longer runtime, you can enable the statistic mode that determines the best "
+                "tmax for your data set. See the advanced option --determine-best-tmax",
+            .default_message = "≈sqrt(#samples)",
+            .advanced = true});
+
+    parser.add_option(
+        config.hibf_config.number_of_hash_functions,
+        sharg::config{.short_id = '\0',
+                      .long_id = "hash",
+                      .description =
+                          "The number of hash functions to use when building the HIBF from the resulting layout. "
+                          "This parameter is needed to correctly estimate the index size when computing the layout."});
+
+    parser.add_option(
+        config.hibf_config.maximum_false_positive_rate,
+        sharg::config{.short_id = '\0',
+                      .long_id = "fpr",
+                      .description =
+                          "The false positive rate you aim for when building the HIBF from the resulting layout. "
+                          "This parameter is needed to correctly estimate the index size when computing the layout."});
+
+    parser.add_option(
+        config.output_filename,
+        sharg::config{.short_id = '\0', .long_id = "output", .description = "A file name for the resulting layout."});
+
+    parser.add_option(
+        config.hibf_config.threads,
+        sharg::config{
+            .short_id = '\0',
+            .long_id = "threads",
+            .description =
+                "The number of threads to use. Currently, only merging of sketches is parallelized, so if the flag "
+                "--disable-rearrangement is set, --threads will have no effect.",
+            .validator =
+                sharg::arithmetic_range_validator{static_cast<size_t>(1), std::numeric_limits<size_t>::max()}});
+
+    parser.add_subsection("HyperLogLog Sketches:");
+    parser.add_line("To improve the layout, you can estimate the sequence similarities using HyperLogLog sketches.");
+
+    parser.add_flag(
+        config.hibf_config.disable_estimate_union,
+        sharg::config{
+            .short_id = '\0',
+            .long_id = "disable-estimate-union",
+            .description =
+                "The sketches are used to estimate the sequence similarity among a set of user bins. This will improve "
+                "the layout computation as merging user bins that do not increase technical bin sizes will be "
+                "preferred. This may use more RAM and can be disabled in RAM-critical environments. "
+                "Attention: Also disables rearrangement which depends on union estimations."});
+
+    parser.add_flag(
+        config.hibf_config.disable_rearrangement,
+        sharg::config{
+            .short_id = '\0',
+            .long_id = "disable-rearrangement",
+            .description =
+                "As a preprocessing step, rearranging the order of the given user bins based on their sequence "
+                "similarity may lead to favourable small unions and thus a smaller index. "
+                "Depending on the number of input samples (user bins), this may be time-consuming and can thus be "
+                "disabled if a suboptimal layout is sufficient."});
+
+    parser.add_subsection("Parameter Tweaking:");
+    // -----------------------------------------------------------------------------------------------------------------
+    parser.add_option(
+        config.hibf_config.alpha,
+        sharg::config{
+            .short_id = '\0',
+            .long_id = "alpha",
+            .description =
+                "The layout algorithm optimizes the space consumption of the resulting HIBF but currently has no "
+                "means of optimizing the runtime for querying such an HIBF. In general, the ratio of merged bins "
+                "and split bins influences the query time because a merged bin always triggers another search on "
+                "a lower level. To influence this ratio, alpha can be used. The higher alpha, the less merged "
+                "bins are chosen in the layout. This improves query times but leads to a bigger index.",
+            .advanced = true});
+
+    parser.add_option(
+        config.hibf_config.max_rearrangement_ratio,
+        sharg::config{
+            .short_id = '\0',
+            .long_id = "max-rearrangement-ratio",
+            .description =
+                "When the flag --disable-rearrangement is \\fBnot\\fP set, this option can influence the rearrangement "
+                "algorithm. The algorithm only rearranges the order of user bins in fixed intervals. The higher "
+                "--max-rearrangement-ratio, the larger the intervals. This potentially improves the layout, but "
+                "increases the runtime of the layout algorithm.",
+            .advanced = true,
+            .validator = sharg::arithmetic_range_validator{0.0, 1.0}});
+
+    parser.add_option(
+        config.hibf_config.sketch_bits,
+        sharg::config{.short_id = '\0',
+                      .long_id = "sketch-bits",
+                      .description =
+                          "The number of bits the HyperLogLog sketch should use to distribute the values into bins.",
+                      .advanced = true,
+                      .validator = sharg::arithmetic_range_validator{5, 32}});
+
+    parser.add_subsection("Special options");
+    // -----------------------------------------------------------------------------------------------------------------
+    parser.add_flag(
+        config.determine_best_tmax,
+        sharg::config{
+            .short_id = '\0',
+            .long_id = "determine-best-tmax",
+            .description =
+                "When this flag is set, the program will compute multiple layouts for tmax in "
+                "[64 , 128, 256, ... , tmax] as well as tmax=sqrt(#samples). "
+                "The layout algorithm itself only optimizes the space consumption. When determining the best "
+                "layout, we additionally keep track of the average number of queries needed to traverse each "
+                "layout. This query cost is taken into account when determining the best tmax for your data. "
+                "Note that the option --tmax serves as upper bound. Once the layout quality starts dropping, the "
+                "computation is stopped. To run all layout computations, pass the flag --force-all-binnings.",
+            .advanced = true});
+
+    parser.add_flag(
+        config.force_all_binnings,
+        sharg::config{
+            .short_id = '\0',
+            .long_id = "force-all-binnings",
+            .description =
+                "Forces all layouts up to --tmax to be computed, "
+                "regardless of the layout quality. If the flag --determine-best-tmax is not set, this flag is "
+                "ignored and has no effect.",
+            .advanced = true});
+
+    parser.add_flag(
+        config.output_verbose_statistics,
+        sharg::config{.short_id = '\0',
+                      .long_id = "output-verbose-statistics",
+                      .description =
+                          "Enable verbose statistics to be "
+                          "printed to std::cout. If the flag --determine-best-tmax is not set, this flag is ignored "
+                          "and has no effect.",
+                      .hidden = true});
+
+    parser.add_option(
+        config.sketch_directory,
+        sharg::config{
+            .long_id = "output-sketches-to",
+            .description =
+                "If you supply a directory path with this option, the hyperloglog sketches of your input will be "
+                "stored in the respective path; one .hll file per input file.",
+            .default_message = "None",
+            .advanced = true});
+
+    parser.add_flag(config.debug,
+                    sharg::config{.short_id = '\0',
+                                  .long_id = "debug",
+                                  .description = "Enables debug output in layout file.",
+                                  .hidden = true});
+
+    parser.add_section("References");
+    parser.add_line("[1] Philippe Flajolet, Éric Fusy, Olivier Gandouet, Frédéric Meunier. HyperLogLog: the analysis "
+                    "of a near-optimal cardinality estimation algorithm. AofA: Analysis of Algorithms, Jun 2007, Juan "
+                    "les Pins, France. pp.137-156. hal-00406166v2, https://doi.org/10.46298/dmtcs.3545");
+}
+
+} // namespace chopper
diff --git a/src/sketch/CMakeLists.txt b/src/sketch/CMakeLists.txt
new file mode 100644
index 00000000..8b02e97b
--- /dev/null
+++ b/src/sketch/CMakeLists.txt
@@ -0,0 +1,7 @@
+cmake_minimum_required (VERSION 3.18)
+
+if (NOT TARGET chopper_sketch)
+    add_library (chopper_sketch STATIC check_filenames.cpp output.cpp read_data_file.cpp read_hll_files_into.cpp)
+
+    target_link_libraries (chopper_sketch PUBLIC chopper_shared)
+endif ()
diff --git a/src/sketch/check_filenames.cpp b/src/sketch/check_filenames.cpp
new file mode 100644
index 00000000..3b89eb48
--- /dev/null
+++ b/src/sketch/check_filenames.cpp
@@ -0,0 +1,73 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <cassert>
+#include <cctype>
+#include <cstddef>
+#include <filesystem>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include <chopper/configuration.hpp>
+#include <chopper/sketch/check_filenames.hpp>
+
+namespace chopper::sketch
+{
+
+//!\brief Checks the `filenames` for consistent files, either precomputed or sequence files.
+void check_filenames(std::vector<std::string> const & filenames, configuration & config)
+{
+    assert(!filenames.empty());
+
+    auto case_insensitive_string_ends_with = [](std::string_view str, std::string_view suffix)
+    {
+        size_t const suffix_length{suffix.size()};
+        size_t const str_length{str.size()};
+
+        if (suffix_length > str_length)
+            return false; // GCOVR_EXCL_LINE
+
+        for (size_t j = 0, s_start = str_length - suffix_length; j < suffix_length; ++j)
+            if (std::tolower(str[s_start + j]) != std::tolower(suffix[j]))
+                return false;
+
+        return true;
+    };
+
+    // If the first filename ends in .minimiser we expect all files to end in .minimiser
+    config.precomputed_files = case_insensitive_string_ends_with(filenames[0], ".minimiser");
+
+    for (auto const & filename : filenames)
+    {
+#if CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
+#    pragma GCC diagnostic push
+#    pragma GCC diagnostic ignored "-Wrestrict"
+#endif // CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
+        if (!std::filesystem::exists(filename))
+            throw std::invalid_argument{"File " + filename + " does not exist!"};
+#if CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
+#    pragma GCC diagnostic pop
+#endif // CHOPPER_WORKAROUND_GCC_BOGUS_MEMCPY
+
+        if (config.precomputed_files && !case_insensitive_string_ends_with(filename, ".minimiser"))
+        {
+            throw std::invalid_argument{"You are providing precomputed files but the file " + filename
+                                        + " does not have the correct file extension (.minimiser)."
+                                          " Mixing non-/precomputed files is not allowed."};
+        }
+        else if (!config.precomputed_files && case_insensitive_string_ends_with(filename, ".minimiser"))
+        {
+            throw std::invalid_argument{"You are providing sequence files but the file " + filename
+                                        + " was identified as a precomputed file (.minimiser)."
+                                          " Mixing non-/precomputed files is not allowed."};
+        }
+    }
+}
+
+} // namespace chopper::sketch
diff --git a/src/sketch/output.cpp b/src/sketch/output.cpp
new file mode 100644
index 00000000..6630ae79
--- /dev/null
+++ b/src/sketch/output.cpp
@@ -0,0 +1,48 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <cinttypes>
+#include <filesystem>
+#include <fstream>
+#include <ranges>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <chopper/configuration.hpp>
+#include <chopper/sketch/output.hpp>
+
+#include <hibf/contrib/std/join_with_view.hpp>
+#include <hibf/sketch/hyperloglog.hpp>
+
+namespace chopper::sketch
+{
+
+void write_count_file_line(std::pair<std::string, std::vector<std::string>> const & cluster,
+                           uint64_t const weight,
+                           std::ofstream & fout)
+{
+    auto & [key, filepaths] = cluster;
+
+    for (auto && arr : filepaths | seqan::stl::views::join_with(';'))
+        fout << arr;
+
+    fout << '\t' << weight << '\t' << key << '\n';
+}
+
+void write_sketch_file(std::string const & filename,
+                       seqan::hibf::sketch::hyperloglog const & sketch,
+                       configuration const & config)
+{
+    // For one file in the cluster, the file stem is used with the .hll ending
+    std::filesystem::path path = config.sketch_directory / std::filesystem::path(filename).stem();
+    path += ".hll";
+    std::ofstream hll_fout(path, std::ios::binary);
+    sketch.store(hll_fout);
+}
+
+} // namespace chopper::sketch
diff --git a/src/sketch/read_data_file.cpp b/src/sketch/read_data_file.cpp
new file mode 100644
index 00000000..ad52e311
--- /dev/null
+++ b/src/sketch/read_data_file.cpp
@@ -0,0 +1,45 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <filesystem>
+#include <fstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <chopper/configuration.hpp>
+#include <chopper/sketch/read_data_file.hpp>
+
+namespace chopper::sketch
+{
+
+void read_data_file(configuration const & config, std::vector<std::string> & filenames)
+{
+    std::ifstream fin{config.data_file.string()};
+
+    if (!fin.good() || !fin.is_open())
+        throw std::runtime_error{"Could not open data file " + config.data_file.string() + " for reading."};
+
+    std::string line;
+    while (std::getline(fin, line))
+    {
+        auto tab_pos = line.find('\t');
+
+        if (tab_pos == std::string::npos)
+        {
+            std::string const filename{line.begin(), line.end()};
+            filenames.push_back(filename);
+        }
+        else
+        {
+            std::string const filename{line.begin(), line.begin() + tab_pos};
+            filenames.push_back(filename);
+        }
+    }
+}
+
+} // namespace chopper::sketch
diff --git a/src/sketch/read_hll_files_into.cpp b/src/sketch/read_hll_files_into.cpp
new file mode 100644
index 00000000..e48bbab0
--- /dev/null
+++ b/src/sketch/read_hll_files_into.cpp
@@ -0,0 +1,52 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <algorithm>
+#include <cassert>
+#include <filesystem>
+#include <fstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include <chopper/sketch/read_hll_files_into.hpp>
+
+namespace chopper::sketch
+{
+
+void read_hll_files_into(std::filesystem::path const & hll_dir,
+                         std::vector<std::string> const & target_filenames,
+                         std::vector<seqan::hibf::sketch::hyperloglog> & target)
+{
+    assert(std::filesystem::exists(hll_dir) && !std::filesystem::is_empty(hll_dir)); // checked in chopper_layout
+
+    target.reserve(target_filenames.size());
+
+    try
+    {
+        for (auto const & filename : target_filenames)
+        {
+            std::filesystem::path path = hll_dir / std::filesystem::path(filename).stem();
+            path += ".hll";
+            std::ifstream hll_fin(path, std::ios::binary);
+
+            if (!hll_fin.good())
+                throw std::runtime_error{"Could not open file " + path.string()};
+
+            // the sketch bits will be automatically read from the files
+            target.emplace_back().load(hll_fin);
+        }
+    }
+    catch (std::runtime_error const & err)
+    {
+        std::string const chopper_msg{"[CHOPPER LAYOUT ERROR] Something went wrong trying to read the HyperLogLog"
+                                      " sketches from files:\n"};
+        throw std::runtime_error{chopper_msg + err.what()};
+    }
+}
+
+} // namespace chopper::sketch
diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt
new file mode 100644
index 00000000..e38d4a0d
--- /dev/null
+++ b/src/util/CMakeLists.txt
@@ -0,0 +1,13 @@
+# ---------------------------------------------------------------------------------------------------
+# Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+# Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+# This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+# shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+# ---------------------------------------------------------------------------------------------------
+
+cmake_minimum_required (VERSION 3.18)
+
+add_executable (measure_hyperloglog EXCLUDE_FROM_ALL measure_hyperloglog.cpp)
+target_link_libraries (measure_hyperloglog PUBLIC chopper_lib)
+
+add_subdirectory (display_layout)
diff --git a/src/display_layout/CMakeLists.txt b/src/util/display_layout/CMakeLists.txt
similarity index 92%
rename from src/display_layout/CMakeLists.txt
rename to src/util/display_layout/CMakeLists.txt
index 22dbab5f..da0404ff 100644
--- a/src/display_layout/CMakeLists.txt
+++ b/src/util/display_layout/CMakeLists.txt
@@ -8,4 +8,4 @@
 cmake_minimum_required (VERSION 3.18)
 
 add_executable (display_layout EXCLUDE_FROM_ALL display_layout.cpp general.cpp process_file.cpp sizes.cpp)
-target_link_libraries (display_layout "chopper_interface")
+target_link_libraries (display_layout PUBLIC chopper_lib)
diff --git a/src/display_layout/display_layout.cpp b/src/util/display_layout/display_layout.cpp
similarity index 84%
rename from src/display_layout/display_layout.cpp
rename to src/util/display_layout/display_layout.cpp
index dfc210df..a6be5a15 100644
--- a/src/display_layout/display_layout.cpp
+++ b/src/util/display_layout/display_layout.cpp
@@ -5,29 +5,16 @@
 // shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
 // ---------------------------------------------------------------------------------------------------
 
+#include <cstdlib>
+#include <filesystem>
 #include <iostream>
-#include <set>
+#include <string>
+#include <string_view>
+#include <vector>
 
-#include <robin_hood.h>
-
-#include <sharg/detail/to_string.hpp>
 #include <sharg/exceptions.hpp>
 #include <sharg/parser.hpp>
 
-#include <seqan3/io/sequence_file/all.hpp>
-#include <seqan3/search/views/minimiser_hash.hpp>
-
-#include <chopper/adjust_seed.hpp>
-#include <chopper/layout/hibf_statistics.hpp>
-#include <chopper/layout/input.hpp>
-
-#include <hibf/build/bin_size_in_bits.hpp>
-#include <hibf/build/build_data.hpp>
-#include <hibf/interleaved_bloom_filter.hpp>
-#include <hibf/layout/compute_fpr_correction.hpp>
-#include <hibf/layout/graph.hpp>
-#include <hibf/sketch/hyperloglog.hpp>
-
 #include "shared.hpp"
 
 void init_shared_meta(sharg::parser & parser)
diff --git a/src/display_layout/general.cpp b/src/util/display_layout/general.cpp
similarity index 94%
rename from src/display_layout/general.cpp
rename to src/util/display_layout/general.cpp
index f5b042aa..0a6095b4 100644
--- a/src/display_layout/general.cpp
+++ b/src/util/display_layout/general.cpp
@@ -5,17 +5,24 @@
 // shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
 // ---------------------------------------------------------------------------------------------------
 
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <filesystem>
+#include <fstream>
+#include <functional>
 #include <iostream>
-
-#include <robin_hood.h>
-
-#include <sharg/detail/to_string.hpp>
-#include <sharg/exceptions.hpp>
-#include <sharg/parser.hpp>
+#include <stdexcept>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
 
 #include <chopper/layout/hibf_statistics.hpp>
 #include <chopper/layout/input.hpp>
 
+#include <hibf/contrib/robin_hood.hpp>
 #include <hibf/sketch/hyperloglog.hpp>
 
 #include "shared.hpp"
@@ -57,7 +64,15 @@ int execute(config const & cfg)
     if (!layout_file.good() || !layout_file.is_open())
         throw std::logic_error{"Could not open file " + cfg.input.string() + " for reading"};
 
+// https://godbolt.org/z/PeKnxzjn1
+#if defined(__clang__)
+    auto tuple = chopper::layout::read_layout_file(layout_file);
+    auto filenames = std::get<0>(tuple);
+    auto chopper_config = std::get<1>(tuple);
+    auto hibf_layout = std::get<2>(tuple);
+#else
     auto [filenames, chopper_config, hibf_layout] = chopper::layout::read_layout_file(layout_file);
+#endif
     auto const & hibf_config = chopper_config.hibf_config;
 
     std::ofstream output_stream{cfg.output};
diff --git a/src/display_layout/process_file.cpp b/src/util/display_layout/process_file.cpp
similarity index 95%
rename from src/display_layout/process_file.cpp
rename to src/util/display_layout/process_file.cpp
index 3d08361a..4e20cc40 100644
--- a/src/display_layout/process_file.cpp
+++ b/src/util/display_layout/process_file.cpp
@@ -5,6 +5,14 @@
 // shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
 // ---------------------------------------------------------------------------------------------------
 
+#include <algorithm>
+#include <cinttypes>
+#include <filesystem>
+#include <fstream>
+#include <ranges>
+#include <string>
+#include <vector>
+
 #include <seqan3/io/sequence_file/all.hpp>
 #include <seqan3/search/views/minimiser_hash.hpp>
 
diff --git a/src/display_layout/shared.hpp b/src/util/display_layout/shared.hpp
similarity index 100%
rename from src/display_layout/shared.hpp
rename to src/util/display_layout/shared.hpp
diff --git a/src/display_layout/sizes.cpp b/src/util/display_layout/sizes.cpp
similarity index 95%
rename from src/display_layout/sizes.cpp
rename to src/util/display_layout/sizes.cpp
index cf877372..210fd2ad 100644
--- a/src/display_layout/sizes.cpp
+++ b/src/util/display_layout/sizes.cpp
@@ -5,22 +5,32 @@
 // shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
 // ---------------------------------------------------------------------------------------------------
 
+#include <algorithm>
 #include <atomic>
 #include <cassert>
-#include <iostream>
+#include <cinttypes>
+#include <cmath>
+#include <cstddef>
+#include <filesystem>
+#include <fstream>
+#include <functional>
+#include <iomanip>
+#include <mutex>
+#include <numeric>
+#include <optional>
+#include <random>
+#include <stdexcept>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
 
-#include <robin_hood.h>
-
-#include <sharg/detail/to_string.hpp>
-#include <sharg/exceptions.hpp>
-#include <sharg/parser.hpp>
-
-#include <chopper/layout/hibf_statistics.hpp>
 #include <chopper/layout/input.hpp>
 
 #include <hibf/build/bin_size_in_bits.hpp>
 #include <hibf/build/build_data.hpp>
-#include <hibf/interleaved_bloom_filter.hpp>
+#include <hibf/config.hpp>
+#include <hibf/contrib/robin_hood.hpp>
 #include <hibf/layout/compute_fpr_correction.hpp>
 #include <hibf/layout/graph.hpp>
 #include <hibf/sketch/hyperloglog.hpp>
@@ -298,7 +308,15 @@ void execute_general_stats(config const & cfg)
     if (!layout_file.good() || !layout_file.is_open())
         throw std::logic_error{"Could not open file " + cfg.input.string() + " for reading"};
 
+// https://godbolt.org/z/PeKnxzjn1
+#if defined(__clang__)
+    auto tuple = chopper::layout::read_layout_file(layout_file);
+    auto filenames = std::get<0>(tuple);
+    auto chopper_config = std::get<1>(tuple);
+    auto hibf_layout = std::get<2>(tuple);
+#else
     auto [filenames, chopper_config, hibf_layout] = chopper::layout::read_layout_file(layout_file);
+#endif
 
     // Prepare configs
     chopper_config.hibf_config.threads = cfg.threads;
diff --git a/src/measure_hyperloglog.cpp b/src/util/measure_hyperloglog.cpp
similarity index 98%
rename from src/measure_hyperloglog.cpp
rename to src/util/measure_hyperloglog.cpp
index 7da147ab..5de104a4 100644
--- a/src/measure_hyperloglog.cpp
+++ b/src/util/measure_hyperloglog.cpp
@@ -5,10 +5,14 @@
 // shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
 // ---------------------------------------------------------------------------------------------------
 
+#include <cinttypes>
 #include <cmath>
 #include <filesystem>
 #include <fstream>
+#include <iostream>
+#include <ranges>
 #include <string>
+#include <tuple>
 #include <unordered_set>
 #include <vector>
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 2bf1321f..0dffd461 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -43,7 +43,13 @@ macro (add_app_test test_filename test_alternative)
     # Create the test target.
     add_executable (${target} ${test_filename})
     target_link_libraries (${target} "${PROJECT_NAME}_lib" seqan3::seqan3 gtest gtest_main)
-    target_compile_options (${target} PRIVATE "-Werror")
+
+    # GCC12 and above: Disable warning about std::hardware_destructive_interference_size not being ABI-stable.
+    if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+        if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12)
+            target_compile_options (${target} PRIVATE "-Wno-interference-size")
+        endif ()
+    endif ()
 
     # Make seqan3::test available for both cli and api tests.
     target_include_directories (${target} PUBLIC "${SEQAN3_CLONE_DIR}/test/include")
@@ -96,6 +102,4 @@ else ()
     add_subdirectory (coverage)
 endif ()
 
-add_dependencies (cli_test measure_hyperloglog)
-
 message (STATUS "${FontBold}You can run `make test` to build and run tests.${FontReset}")
diff --git a/test/api/input_functor_test.cpp b/test/api/input_functor_test.cpp
index b8e8061c..6e056f6f 100644
--- a/test/api/input_functor_test.cpp
+++ b/test/api/input_functor_test.cpp
@@ -9,8 +9,6 @@
 
 #include <vector>
 
-#include <chopper/input_functor.hpp>
-
 TEST(execute_test, small_example)
 {
     // std::string input_filename = data("small.fa");
diff --git a/test/api/layout/execute_layout_test.cpp b/test/api/layout/execute_layout_test.cpp
old mode 100755
new mode 100644
index 18d2b2f4..d163dc3a
--- a/test/api/layout/execute_layout_test.cpp
+++ b/test/api/layout/execute_layout_test.cpp
@@ -7,8 +7,12 @@
 
 #include <gtest/gtest.h>
 
+#include <cstddef>
+#include <filesystem>
 #include <fstream>
-#include <sstream>
+#include <functional>
+#include <ranges>
+#include <string>
 #include <vector>
 
 #include <chopper/layout/execute.hpp>
diff --git a/test/api/layout/execute_with_estimation_test.cpp b/test/api/layout/execute_with_estimation_test.cpp
index 145b2cf7..aedbf281 100644
--- a/test/api/layout/execute_with_estimation_test.cpp
+++ b/test/api/layout/execute_with_estimation_test.cpp
@@ -7,13 +7,19 @@
 
 #include <gtest/gtest.h>
 
-#include <fstream>
-#include <sstream>
+#include <cmath>
+#include <cstddef>
+#include <filesystem>
+#include <functional>
+#include <ranges>
+#include <string>
+#include <tuple>
 #include <vector>
 
 #include <seqan3/io/sequence_file/all.hpp>
 #include <seqan3/search/views/kmer_hash.hpp>
 
+#include <chopper/configuration.hpp>
 #include <chopper/layout/execute.hpp>
 #include <chopper/sketch/read_hll_files_into.hpp>
 
diff --git a/test/api/layout/hibf_statistics_test.cpp b/test/api/layout/hibf_statistics_test.cpp
index d4e84ee6..a5e70312 100644
--- a/test/api/layout/hibf_statistics_test.cpp
+++ b/test/api/layout/hibf_statistics_test.cpp
@@ -7,8 +7,14 @@
 
 #include <gtest/gtest.h>
 
+#include <cstddef>
+#include <filesystem>
+#include <functional>
 #include <iostream>
+#include <limits>
 #include <ranges>
+#include <string>
+#include <vector>
 
 #include <chopper/configuration.hpp>
 #include <chopper/layout/execute.hpp>
diff --git a/test/api/layout/ibf_query_cost_test.cpp b/test/api/layout/ibf_query_cost_test.cpp
index cdab3db9..ea8fab6f 100644
--- a/test/api/layout/ibf_query_cost_test.cpp
+++ b/test/api/layout/ibf_query_cost_test.cpp
@@ -7,9 +7,12 @@
 
 #include <gtest/gtest.h>
 
-#include <chopper/layout/ibf_query_cost.hpp>
+#include <bit>
+#include <cstddef>
+#include <stdexcept>
+#include <string>
 
-#include "../api_test.hpp"
+#include <chopper/layout/ibf_query_cost.hpp>
 
 TEST(ibf_query_cost_test, exact)
 {
diff --git a/test/api/layout/user_bin_io_test.cpp b/test/api/layout/user_bin_io_test.cpp
index a2515931..daf04978 100644
--- a/test/api/layout/user_bin_io_test.cpp
+++ b/test/api/layout/user_bin_io_test.cpp
@@ -1,10 +1,8 @@
 #include <gtest/gtest.h> // for Test, TestInfo, EXPECT_EQ, Message, TEST, TestPartResult
 
-#include <cstddef>     // for size_t
-#include <sstream>     // for operator<<, char_traits, basic_ostream, basic_stringstream, strings...
-#include <string>      // for allocator, string
-#include <string_view> // for operator<<
-#include <vector>      // for vector
+#include <sstream> // for operator<<, char_traits, basic_ostream, basic_stringstream, strings...
+#include <string>  // for allocator, string
+#include <vector>  // for vector
 
 #include <chopper/layout/input.hpp>
 #include <chopper/layout/output.hpp>
diff --git a/test/api/sketch/check_filenames_test.cpp b/test/api/sketch/check_filenames_test.cpp
index 37970150..af2df860 100644
--- a/test/api/sketch/check_filenames_test.cpp
+++ b/test/api/sketch/check_filenames_test.cpp
@@ -7,6 +7,11 @@
 
 #include <gtest/gtest.h>
 
+#include <filesystem>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
 #include <chopper/sketch/check_filenames.hpp>
 
 #include "../api_test.hpp"
diff --git a/test/api/sketch/read_data_file_test.cpp b/test/api/sketch/read_data_file_test.cpp
index 3cf02b57..ffd92ad7 100644
--- a/test/api/sketch/read_data_file_test.cpp
+++ b/test/api/sketch/read_data_file_test.cpp
@@ -7,7 +7,9 @@
 
 #include <gtest/gtest.h>
 
-#include <sstream>
+#include <filesystem>
+#include <stdexcept>
+#include <string>
 #include <vector>
 
 #include <chopper/configuration.hpp>
diff --git a/test/api/sketch/read_hll_files_into_test.cpp b/test/api/sketch/read_hll_files_into_test.cpp
index 1147bb5f..74c73601 100644
--- a/test/api/sketch/read_hll_files_into_test.cpp
+++ b/test/api/sketch/read_hll_files_into_test.cpp
@@ -7,6 +7,14 @@
 
 #include <gtest/gtest.h>
 
+#include <cstddef>
+#include <filesystem>
+#include <fstream>
+#include <ranges>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
 #include <seqan3/io/sequence_file/input.hpp>
 #include <seqan3/search/views/minimiser_hash.hpp>
 
diff --git a/test/cli/cli_chopper_basic_test.cpp b/test/cli/cli_chopper_basic_test.cpp
index 2cb59d13..52eb08b8 100644
--- a/test/cli/cli_chopper_basic_test.cpp
+++ b/test/cli/cli_chopper_basic_test.cpp
@@ -5,10 +5,11 @@
 // shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
 // ---------------------------------------------------------------------------------------------------
 
+#include <gtest/gtest.h>
+
+#include <filesystem>
 #include <fstream>
-#include <ranges> // range comparisons
 #include <string> // strings
-#include <vector> // vectors
 
 #include <seqan3/test/tmp_directory.hpp>
 
diff --git a/test/cli/cli_chopper_pipeline_test.cpp b/test/cli/cli_chopper_pipeline_test.cpp
index 34ef1c92..e44f5700 100644
--- a/test/cli/cli_chopper_pipeline_test.cpp
+++ b/test/cli/cli_chopper_pipeline_test.cpp
@@ -5,17 +5,13 @@
 // shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
 // ---------------------------------------------------------------------------------------------------
 
+#include <gtest/gtest.h>
+
+#include <filesystem>
 #include <fstream>
-#include <ranges> // range comparisons
 #include <string> // strings
-#include <vector> // vectors
-
-#include <cereal/archives/binary.hpp>
 
-#include <seqan3/search/dream_index/interleaved_bloom_filter.hpp>
 #include <seqan3/test/tmp_directory.hpp>
-#include <seqan3/utility/range/to.hpp>
-#include <seqan3/utility/views/join_with.hpp>
 
 #include "../api/api_test.hpp"
 #include "cli_test.hpp"
diff --git a/test/header/CMakeLists.txt b/test/header/CMakeLists.txt
index 30d11efd..67b77ab2 100644
--- a/test/header/CMakeLists.txt
+++ b/test/header/CMakeLists.txt
@@ -11,7 +11,7 @@ include (seqan3_test_files)
 include (seqan3_test_component)
 
 add_library ("chopper_header_test_lib" INTERFACE)
-target_link_libraries ("chopper_header_test_lib" INTERFACE "${PROJECT_NAME}_interface" gtest gtest_main)
+target_link_libraries ("chopper_header_test_lib" INTERFACE "chopper_shared" gtest gtest_main)
 target_include_directories ("chopper_header_test_lib" INTERFACE "${SEQAN3_TEST_CLONE_DIR}/googletest/include/")
 # SeqAn3 script adds an include for <bechmark/benchmark.h>, which we do not use in Chopper
 target_include_directories ("chopper_header_test_lib" INTERFACE ./dummy_include)
@@ -66,7 +66,7 @@ foreach (header ${header_files})
             target_include_directories (${header_target}
                                         PRIVATE $<TARGET_PROPERTY:chopper_header_test_lib,INTERFACE_INCLUDE_DIRECTORIES>
             )
-            add_dependencies (${header_target} "${PROJECT_NAME}_interface" gtest gtest_main)
+            add_dependencies (${header_target} "chopper_shared" gtest gtest_main)
         else ()
             target_link_libraries (${header_target} chopper_header_test_lib)
         endif ()