From 62343ec3c2adfe49b0eac772954c5e44934b3990 Mon Sep 17 00:00:00 2001 From: Svenja Mehringer Date: Tue, 7 Nov 2023 09:35:31 +0100 Subject: [PATCH 1/2] [FEATURE] Add --timing-output and Output layout timings. --- include/chopper/configuration.hpp | 3 +++ lib/hibf | 2 +- src/layout/execute.cpp | 29 ++++++++++++++++++++++++++++- src/set_up_parser.cpp | 6 ++++++ 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/include/chopper/configuration.hpp b/include/chopper/configuration.hpp index 82473d3e..3568053b 100644 --- a/include/chopper/configuration.hpp +++ b/include/chopper/configuration.hpp @@ -29,6 +29,9 @@ struct configuration //!\brief Internal parameter that triggers some verbose debug output. bool debug{false}; + + //!\brief If specified, layout timings are written to the specified file. + std::filesystem::path output_timings{}; //!\} /*!\name Configuration of size estimates (chopper::count) diff --git a/lib/hibf b/lib/hibf index cc744d96..f7c341c9 160000 --- a/lib/hibf +++ b/lib/hibf @@ -1 +1 @@ -Subproject commit cc744d96a764170610851509fca98f2c3d71830a +Subproject commit f7c341c9eaaea8dd920bf4039f0d32c258c6e811 diff --git a/src/layout/execute.cpp b/src/layout/execute.cpp index 48110377..7d90f796 100644 --- a/src/layout/execute.cpp +++ b/src/layout/execute.cpp @@ -61,6 +61,11 @@ int execute(chopper::configuration & config, std::vector const & fi seqan::hibf::layout::layout hibf_layout; std::vector sketches; + seqan::hibf::concurrent_timer compute_sketches_timer{}; + seqan::hibf::concurrent_timer union_estimation_timer{}; + seqan::hibf::concurrent_timer rearrangement_timer{}; + seqan::hibf::concurrent_timer dp_algorithm_timer{}; + if (config.determine_best_tmax) { std::tie(hibf_layout, sketches) = determine_best_number_of_technical_bins(config); @@ -69,8 +74,16 @@ int execute(chopper::configuration & config, std::vector const & fi { std::vector kmer_counts; + compute_sketches_timer.start(); seqan::hibf::sketch::compute_sketches(config.hibf_config, kmer_counts, sketches); - hibf_layout = seqan::hibf::layout::compute_layout(config.hibf_config, kmer_counts, sketches); + compute_sketches_timer.stop(); + dp_algorithm_timer.start(); + hibf_layout = seqan::hibf::layout::compute_layout(config.hibf_config, + kmer_counts, + sketches, + union_estimation_timer, + rearrangement_timer); + dp_algorithm_timer.stop(); if (config.output_verbose_statistics) { @@ -98,6 +111,20 @@ int execute(chopper::configuration & config, std::vector const & fi config.write_to(fout); hibf_layout.write_to(fout); + if (!config.output_timings.empty()) + { + std::ofstream output_stream{config.output_timings}; + output_stream << std::fixed << std::setprecision(2); + output_stream << "sketching_in_seconds\t" + << "layouting_in_seconds\t" + << "union_estimation_in_seconds\t" + << "rearrangement_in_seconds\n"; + output_stream << compute_sketches_timer.in_seconds() << '\t'; + output_stream << dp_algorithm_timer.in_seconds() << '\t'; + output_stream << union_estimation_timer.in_seconds() << '\t'; + output_stream << rearrangement_timer.in_seconds() << '\t'; + } + return 0; } diff --git a/src/set_up_parser.cpp b/src/set_up_parser.cpp index 5b64d8d1..5f18a3a9 100644 --- a/src/set_up_parser.cpp +++ b/src/set_up_parser.cpp @@ -78,6 +78,12 @@ void set_up_parser(sharg::parser & parser, configuration & config) "accuracy.", .default_message = "k-mer size", }); + parser.add_option(config.output_timings, + sharg::config{.short_id = '\0', + .long_id = "timing-output", + .description = "Write time and memory usage to specified file (TSV format). ", + .default_message = "", + .validator = sharg::output_file_validator{}}); parser.add_option( config.hibf_config.tmax, From d2d2b391e3119be75b758cc874f795a80448ca85 Mon Sep 17 00:00:00 2001 From: Svenja Mehringer Date: Wed, 8 Nov 2023 13:09:22 +0100 Subject: [PATCH 2/2] [TEST] Add CLI --timing-output test. --- test/cli/CMakeLists.txt | 3 ++ test/cli/cli_timing_output_test.cpp | 45 +++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 test/cli/cli_timing_output_test.cpp diff --git a/test/cli/CMakeLists.txt b/test/cli/CMakeLists.txt index 2e95608b..170910c7 100644 --- a/test/cli/CMakeLists.txt +++ b/test/cli/CMakeLists.txt @@ -14,3 +14,6 @@ target_use_datasources (cli_chopper_pipeline_test FILES seq1.fa) target_use_datasources (cli_chopper_pipeline_test FILES small.fa) target_use_datasources (cli_chopper_pipeline_test FILES small2.fa) target_use_datasources (cli_chopper_pipeline_test FILES small.split) + +add_cli_test (cli_timing_output_test.cpp) +target_use_datasources (cli_chopper_pipeline_test FILES small.fa) diff --git a/test/cli/cli_timing_output_test.cpp b/test/cli/cli_timing_output_test.cpp new file mode 100644 index 00000000..9ec62292 --- /dev/null +++ b/test/cli/cli_timing_output_test.cpp @@ -0,0 +1,45 @@ +// --------------------------------------------------------------------------------------------------- +// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin +// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md +// --------------------------------------------------------------------------------------------------- + +#include + +#include +#include +#include // strings + +#include + +#include "cli_test.hpp" + +TEST_F(cli_test, timing_output) +{ + std::string const seq_filename = data("small.fa"); + seqan3::test::tmp_directory tmp_dir{}; + std::filesystem::path const input_filename{tmp_dir.path() / "data.tsv"}; + std::filesystem::path const layout_filename{tmp_dir.path() / "output.layout"}; + std::filesystem::path const timing_filename{tmp_dir.path() / "output.timings"}; + + { + std::ofstream fout{input_filename}; + fout << seq_filename << '\n' << seq_filename << '\n' << seq_filename << '\n'; + } + + cli_test_result result = execute_app("chopper", + "--input", + input_filename.c_str(), + "--output", + layout_filename.c_str(), + "--timing-output", + timing_filename.c_str()); + + EXPECT_EQ(result.exit_code, 0); + EXPECT_EQ(result.out, std::string{}); + EXPECT_EQ(result.err, std::string{}); + + EXPECT_TRUE(std::filesystem::exists(timing_filename)); // file should have been written + // not not check output since it is not relevant how exectly the timings look like +}