From 0f191125f7ff0f1d79a87c1998514a9f7275618b Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Thu, 26 Aug 2021 15:35:28 +0200 Subject: [PATCH] [INFRA] Reduce test cases --- CMakeLists.txt | 7 +- test/cli/raptor_parts_test.cpp | 168 +++++------------------------ test/util/collect_compile_stats.sh | 21 ++++ test/util/g++.sh | 11 ++ test/util/gcc.sh | 11 ++ test/util/parse.py | 43 ++++++++ 6 files changed, 115 insertions(+), 146 deletions(-) create mode 100755 test/util/collect_compile_stats.sh create mode 100755 test/util/g++.sh create mode 100755 test/util/gcc.sh create mode 100755 test/util/parse.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 424528c2..91dc0428 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,8 +68,11 @@ set (SEQAN3_SUBMODULES_DIR "lib") find_package (SeqAn3 QUIET REQUIRED HINTS lib/seqan3/build_system) # Use ccache. -include ("${SEQAN3_CLONE_DIR}/test/cmake/seqan3_require_ccache.cmake") -seqan3_require_ccache () +set (USE_CCACHE ON CACHE BOOL "Use ccache if available.") +if (USE_CCACHE) + include ("${SEQAN3_CLONE_DIR}/test/cmake/seqan3_require_ccache.cmake") + seqan3_require_ccache () +endif () # Add the application. add_subdirectory (src) diff --git a/test/cli/raptor_parts_test.cpp b/test/cli/raptor_parts_test.cpp index c5ac1631..976155a6 100644 --- a/test/cli/raptor_parts_test.cpp +++ b/test/cli/raptor_parts_test.cpp @@ -7,12 +7,11 @@ #include "cli_test.hpp" -struct raptor_parts : public raptor_base, public testing::WithParamInterface> {}; +struct raptor_parts : public raptor_base, public testing::WithParamInterface> {}; TEST_P(raptor_parts, pipeline) { - auto const [number_of_repeated_bins, window_size, run_parallel_tmp, number_of_errors, parts] = GetParam(); - bool const run_parallel = run_parallel_tmp && number_of_repeated_bins >= 32; + auto const [number_of_repeated_bins, window_size, number_of_errors, parts, compressed] = GetParam(); if (window_size == 23 && number_of_errors == 0) GTEST_SKIP() << "Needs dynamic threshold correction"; @@ -39,8 +38,8 @@ TEST_P(raptor_parts, pipeline) "--kmer 19", "--window ", std::to_string(window_size), "--size 64k", - "--threads ", run_parallel ? "2" : "1", "--output raptor.index", + compressed ? "--compressed" : "--threads 1", "--parts ", std::to_string(parts), "raptor_cli_test.txt"); EXPECT_EQ(result1.out, std::string{}); @@ -81,17 +80,11 @@ TEST_P(raptor_parts, pipeline) EXPECT_EQ(expected, actual); } -TEST_P(raptor_parts, pipeline_compressed) +TEST_F(raptor_parts, pipeline_misc) { - auto const [number_of_repeated_bins, window_size, run_parallel_tmp, number_of_errors, parts] = GetParam(); - bool const run_parallel = run_parallel_tmp && number_of_repeated_bins >= 32; - - if (window_size == 23 && number_of_errors == 0) - GTEST_SKIP() << "Needs dynamic threshold correction"; - std::stringstream header{}; { - std::string const expanded_bins = repeat_bins(number_of_repeated_bins); + std::string const expanded_bins = repeat_bins(16); std::ofstream file{"raptor_cli_test.txt"}; auto split_bins = expanded_bins | std::views::split(' ') @@ -109,84 +102,10 @@ TEST_P(raptor_parts, pipeline_compressed) cli_test_result const result1 = execute_app("raptor", "build", "--kmer 19", - "--window ", std::to_string(window_size), + "--window 23", "--size 64k", - "--threads ", run_parallel ? "2" : "1", "--output raptor.index", - "--compressed", - "--parts ", std::to_string(parts), - "raptor_cli_test.txt"); - EXPECT_EQ(result1.out, std::string{}); - EXPECT_EQ(result1.err, std::string{}); - ASSERT_EQ(result1.exit_code, 0); - - cli_test_result const result2 = execute_app("raptor", "search", - "--output search.out", - "--error ", std::to_string(number_of_errors), - "--index ", "raptor.index", - "--query ", data("query.fq")); - EXPECT_EQ(result2.out, std::string{}); - EXPECT_EQ(result2.err, std::string{}); - ASSERT_EQ(result2.exit_code, 0); - - std::string const expected = [&] () - { - std::string result{header.str()}; - std::string line{}; - std::ifstream search_result{search_result_path(number_of_repeated_bins, - window_size, - number_of_errors)}; - while (std::getline(search_result, line) && line.substr(0, 6) != "query1") - {} - result += line; - result += '\n'; - while (std::getline(search_result, line)) - { - result += line; - result += '\n'; - } - - return result; - }(); - - std::string const actual = string_from_file("search.out"); - - EXPECT_EQ(expected, actual); -} - -TEST_P(raptor_parts, pipeline_threshold) -{ - auto const [number_of_repeated_bins, window_size, run_parallel_tmp, number_of_errors, parts] = GetParam(); - bool const run_parallel = run_parallel_tmp && number_of_repeated_bins >= 32; - - if (window_size == 23 && number_of_errors == 0) - GTEST_SKIP() << "Needs dynamic threshold correction"; - - std::stringstream header{}; - { - std::string const expanded_bins = repeat_bins(number_of_repeated_bins); - std::ofstream file{"raptor_cli_test.txt"}; - auto split_bins = expanded_bins - | std::views::split(' ') - | std::views::transform([](auto &&rng) { - return std::string_view(&*rng.begin(), std::ranges::distance(rng));}); - size_t usr_bin_id{0}; - for (auto && file_path : split_bins) - { - header << '#' << usr_bin_id++ << '\t' << file_path << '\n'; - file << file_path << '\n'; - } - header << "#QUERY_NAME\tUSER_BINS\n"; - file << '\n'; - } - - cli_test_result const result1 = execute_app("raptor", "build", - "--kmer 19", - "--window ", std::to_string(window_size), - "--size 64k", - "--threads ", run_parallel ? "2" : "1", - "--output raptor.index", - "--parts ", std::to_string(parts), + "--parts 4", "raptor_cli_test.txt"); EXPECT_EQ(result1.out, std::string{}); EXPECT_EQ(result1.err, std::string{}); @@ -206,7 +125,7 @@ TEST_P(raptor_parts, pipeline_threshold) std::string const bin_list = [&] () { std::string result; - for (size_t i = 0; i < std::max(1, number_of_repeated_bins * 4u); ++i) + for (size_t i = 0; i < std::max(1, 16 * 4u); ++i) { result += std::to_string(i); result += ','; @@ -221,62 +140,23 @@ TEST_P(raptor_parts, pipeline_threshold) std::string const actual = string_from_file("search.out"); EXPECT_EQ(expected, actual); -} - -TEST_P(raptor_parts, pipeline_empty) -{ - auto const [number_of_repeated_bins, window_size, run_parallel_tmp, number_of_errors, parts] = GetParam(); - bool const run_parallel = run_parallel_tmp && number_of_repeated_bins >= 32; - - if (window_size == 23 && number_of_errors == 0) - GTEST_SKIP() << "Needs dynamic threshold correction"; - - std::stringstream header{}; - { - std::string const expanded_bins = repeat_bins(number_of_repeated_bins); - std::ofstream file{"raptor_cli_test.txt"}; - auto split_bins = expanded_bins - | std::views::split(' ') - | std::views::transform([](auto &&rng) { - return std::string_view(&*rng.begin(), std::ranges::distance(rng));}); - size_t usr_bin_id{0}; - for (auto && file_path : split_bins) - { - header << '#' << usr_bin_id++ << '\t' << file_path << '\n'; - file << file_path << '\n'; - } - header << "#QUERY_NAME\tUSER_BINS\n"; - file << '\n'; - } - - cli_test_result const result1 = execute_app("raptor", "build", - "--kmer 19", - "--window ", std::to_string(window_size), - "--size 64k", - "--threads ", run_parallel ? "2" : "1", - "--output raptor.index", - "--parts ", std::to_string(parts), - "raptor_cli_test.txt"); - EXPECT_EQ(result1.out, std::string{}); - EXPECT_EQ(result1.err, std::string{}); - ASSERT_EQ(result1.exit_code, 0); - cli_test_result const result2 = execute_app("raptor", "search", - "--output search.out", - "--error ", std::to_string(number_of_errors), + cli_test_result const result3 = execute_app("raptor", "search", + "--output search2.out", + "--error 1", "--index ", "raptor.index", "--query ", data("query_empty.fq")); - EXPECT_EQ(result2.out, std::string{}); - EXPECT_EQ(result2.err, std::string{}); - ASSERT_EQ(result2.exit_code, 0); + EXPECT_EQ(result3.out, std::string{}); + EXPECT_EQ(result3.err, std::string{}); + ASSERT_EQ(result3.exit_code, 0); - std::string const expected = [&] () + std::string const expected2 = [&] () { std::string result{header.str()}; std::string line{}; - std::ifstream search_result{search_result_path(number_of_repeated_bins, - window_size, - number_of_errors, + std::ifstream search_result{search_result_path(16, + 23, + 1, false, true)}; while (std::getline(search_result, line) && line.substr(0, 6) != "query1") @@ -292,20 +172,20 @@ TEST_P(raptor_parts, pipeline_empty) return result; }(); - std::string const actual = string_from_file("search.out"); + std::string const actual2 = string_from_file("search2.out"); - EXPECT_EQ(expected, actual); + EXPECT_EQ(expected2, actual2); } INSTANTIATE_TEST_SUITE_P(parts_suite, raptor_parts, - testing::Combine(testing::Values(0, 16, 32), testing::Values(19, 23), testing::Values(true, false), testing::Values(0, 1), testing::Values(2, 4, 8)), + testing::Combine(testing::Values(32), testing::Values(19, 23), testing::Values(0, 1), testing::Values(2, 4, 8), testing::Values(true, false)), [] (testing::TestParamInfo const & info) { std::string name = std::to_string(std::max(1, std::get<0>(info.param) * 4)) + "_bins_" + std::to_string(std::get<1>(info.param)) + "_window_" + - (std::get<2>(info.param) ? "parallel" : "serial") + - std::to_string(std::get<3>(info.param)) + "_error" + - std::to_string(std::get<4>(info.param)) + "_parts"; + std::to_string(std::get<2>(info.param)) + "_error" + + std::to_string(std::get<3>(info.param)) + "_parts" + + (std::get<4>(info.param) ? "compressed" : "uncompressed"); return name; }); diff --git a/test/util/collect_compile_stats.sh b/test/util/collect_compile_stats.sh new file mode 100755 index 00000000..c0c78ca3 --- /dev/null +++ b/test/util/collect_compile_stats.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" + +reset_scripts() { + sed -i "s/DO_TIME=1/DO_TIME=0/" $SCRIPT_DIR/gcc.sh + sed -i "s/DO_TIME=1/DO_TIME=0/" $SCRIPT_DIR/g++.sh +} +trap reset_scripts EXIT + +set -ex + +cmake $SCRIPT_DIR/../.. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=$SCRIPT_DIR/g++.sh -DCMAKE_C_COMPILER=$SCRIPT_DIR/gcc.sh -DUSE_CCACHE=OFF + +sed -i "s/DO_TIME=0/DO_TIME=1/" $SCRIPT_DIR/gcc.sh +sed -i "s/DO_TIME=0/DO_TIME=1/" $SCRIPT_DIR/g++.sh + +make -k -j4 cli_test api_test + +find . -name "ram_usage.*" -exec cat {} + > complete.txt +$SCRIPT_DIR/parse.py complete.txt stats.csv diff --git a/test/util/g++.sh b/test/util/g++.sh new file mode 100755 index 00000000..fe752bd1 --- /dev/null +++ b/test/util/g++.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +GCC="/usr/bin/g++-11" +DO_TIME=0 + +if [[ DO_TIME -eq 0 ]]; then + exec "$GCC" "$@" +else + FILE=$(mktemp ram_usage.XXXXXXXX) + exec /usr/bin/time -v "$GCC" "$@" 2> $FILE +fi diff --git a/test/util/gcc.sh b/test/util/gcc.sh new file mode 100755 index 00000000..98d31022 --- /dev/null +++ b/test/util/gcc.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +GCC="/usr/bin/gcc-11" +DO_TIME=0 + +if [[ DO_TIME -eq 0 ]]; then + exec "$GCC" "$@" +else + FILE=$(mktemp ram_usage.XXXXXXXX) + exec /usr/bin/time -v "$GCC" "$@" 2> $FILE +fi diff --git a/test/util/parse.py b/test/util/parse.py new file mode 100755 index 00000000..77cd8886 --- /dev/null +++ b/test/util/parse.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +# ----------------------------------------------------------------------------------------------------- +# Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin +# Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik +# This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License +# shipped with this file and also available at: https://github.com/seqan/raptor/blob/master/LICENSE.md +# ----------------------------------------------------------------------------------------------------- +# +# Usage ram_usage.py +# +# Computes a table with RAM-Usage from a file containing output of `time -v`. +import argparse +import os +import pandas + +parser = argparse.ArgumentParser(description='Parse time and memory consumption of compiling.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('input', type=str, help='File containing all outputs of `time -v`.') +parser.add_argument('output', type=str, help='File to write output to. (CSV)') +arguments = parser.parse_args() + +file_names = [] +ram_usages = [] +run_times = [] + +with open(arguments.input, 'r') as input_file: + parsing_ram_usage = False + for line_number, line in enumerate(input_file): + if line_number % 23 == 0: + index_of_unit = line.rfind('-c') + if index_of_unit != - 1: + parsing_ram_usage = True + file_names.append(line[index_of_unit:][:-2].split('/')[-1]) + else: + parsing_ram_usage = False + if parsing_ram_usage and ((line_number - 9) % 23) == 0: + ram_usages.append(int(line.split(' ')[-1]) // 1024) + if parsing_ram_usage and ((line_number - 4) % 23) == 0: + run_times.append(line.strip().split(' ')[-1].lstrip('0:')) + +with open(arguments.output, 'w') as output_file: + df = pandas.DataFrame({'File' : file_names, 'RAM in MiB' : ram_usages, 'Time in s' : run_times}) + df = df.sort_values(by=['File'], ascending=True) + df.to_csv(output_file, index=False)