Skip to content

Commit

Permalink
Merge pull request #69 from eseiler/infra/faster_testing
Browse files Browse the repository at this point in the history
[INFRA] Reduce test cases
  • Loading branch information
eseiler authored Aug 26, 2021
2 parents 23a675f + 0f19112 commit f00d56f
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 146 deletions.
7 changes: 5 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,11 @@ set (SEQAN3_SUBMODULES_DIR "lib")
find_package (SeqAn3 QUIET REQUIRED HINTS lib/seqan3/build_system)

# Use ccache.
include ("${SEQAN3_CLONE_DIR}/test/cmake/seqan3_require_ccache.cmake")
seqan3_require_ccache ()
set (USE_CCACHE ON CACHE BOOL "Use ccache if available.")
if (USE_CCACHE)
include ("${SEQAN3_CLONE_DIR}/test/cmake/seqan3_require_ccache.cmake")
seqan3_require_ccache ()
endif ()

# Add the application.
add_subdirectory (src)
Expand Down
168 changes: 24 additions & 144 deletions test/cli/raptor_parts_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@

#include "cli_test.hpp"

struct raptor_parts : public raptor_base, public testing::WithParamInterface<std::tuple<size_t, size_t, bool, size_t, size_t>> {};
struct raptor_parts : public raptor_base, public testing::WithParamInterface<std::tuple<size_t, size_t, size_t, size_t, bool>> {};

TEST_P(raptor_parts, pipeline)
{
auto const [number_of_repeated_bins, window_size, run_parallel_tmp, number_of_errors, parts] = GetParam();
bool const run_parallel = run_parallel_tmp && number_of_repeated_bins >= 32;
auto const [number_of_repeated_bins, window_size, number_of_errors, parts, compressed] = GetParam();

if (window_size == 23 && number_of_errors == 0)
GTEST_SKIP() << "Needs dynamic threshold correction";
Expand All @@ -39,8 +38,8 @@ TEST_P(raptor_parts, pipeline)
"--kmer 19",
"--window ", std::to_string(window_size),
"--size 64k",
"--threads ", run_parallel ? "2" : "1",
"--output raptor.index",
compressed ? "--compressed" : "--threads 1",
"--parts ", std::to_string(parts),
"raptor_cli_test.txt");
EXPECT_EQ(result1.out, std::string{});
Expand Down Expand Up @@ -81,17 +80,11 @@ TEST_P(raptor_parts, pipeline)
EXPECT_EQ(expected, actual);
}

TEST_P(raptor_parts, pipeline_compressed)
TEST_F(raptor_parts, pipeline_misc)
{
auto const [number_of_repeated_bins, window_size, run_parallel_tmp, number_of_errors, parts] = GetParam();
bool const run_parallel = run_parallel_tmp && number_of_repeated_bins >= 32;

if (window_size == 23 && number_of_errors == 0)
GTEST_SKIP() << "Needs dynamic threshold correction";

std::stringstream header{};
{
std::string const expanded_bins = repeat_bins(number_of_repeated_bins);
std::string const expanded_bins = repeat_bins(16);
std::ofstream file{"raptor_cli_test.txt"};
auto split_bins = expanded_bins
| std::views::split(' ')
Expand All @@ -109,84 +102,10 @@ TEST_P(raptor_parts, pipeline_compressed)

cli_test_result const result1 = execute_app("raptor", "build",
"--kmer 19",
"--window ", std::to_string(window_size),
"--window 23",
"--size 64k",
"--threads ", run_parallel ? "2" : "1",
"--output raptor.index",
"--compressed",
"--parts ", std::to_string(parts),
"raptor_cli_test.txt");
EXPECT_EQ(result1.out, std::string{});
EXPECT_EQ(result1.err, std::string{});
ASSERT_EQ(result1.exit_code, 0);

cli_test_result const result2 = execute_app("raptor", "search",
"--output search.out",
"--error ", std::to_string(number_of_errors),
"--index ", "raptor.index",
"--query ", data("query.fq"));
EXPECT_EQ(result2.out, std::string{});
EXPECT_EQ(result2.err, std::string{});
ASSERT_EQ(result2.exit_code, 0);

std::string const expected = [&] ()
{
std::string result{header.str()};
std::string line{};
std::ifstream search_result{search_result_path(number_of_repeated_bins,
window_size,
number_of_errors)};
while (std::getline(search_result, line) && line.substr(0, 6) != "query1")
{}
result += line;
result += '\n';
while (std::getline(search_result, line))
{
result += line;
result += '\n';
}

return result;
}();

std::string const actual = string_from_file("search.out");

EXPECT_EQ(expected, actual);
}

TEST_P(raptor_parts, pipeline_threshold)
{
auto const [number_of_repeated_bins, window_size, run_parallel_tmp, number_of_errors, parts] = GetParam();
bool const run_parallel = run_parallel_tmp && number_of_repeated_bins >= 32;

if (window_size == 23 && number_of_errors == 0)
GTEST_SKIP() << "Needs dynamic threshold correction";

std::stringstream header{};
{
std::string const expanded_bins = repeat_bins(number_of_repeated_bins);
std::ofstream file{"raptor_cli_test.txt"};
auto split_bins = expanded_bins
| std::views::split(' ')
| std::views::transform([](auto &&rng) {
return std::string_view(&*rng.begin(), std::ranges::distance(rng));});
size_t usr_bin_id{0};
for (auto && file_path : split_bins)
{
header << '#' << usr_bin_id++ << '\t' << file_path << '\n';
file << file_path << '\n';
}
header << "#QUERY_NAME\tUSER_BINS\n";
file << '\n';
}

cli_test_result const result1 = execute_app("raptor", "build",
"--kmer 19",
"--window ", std::to_string(window_size),
"--size 64k",
"--threads ", run_parallel ? "2" : "1",
"--output raptor.index",
"--parts ", std::to_string(parts),
"--parts 4",
"raptor_cli_test.txt");
EXPECT_EQ(result1.out, std::string{});
EXPECT_EQ(result1.err, std::string{});
Expand All @@ -206,7 +125,7 @@ TEST_P(raptor_parts, pipeline_threshold)
std::string const bin_list = [&] ()
{
std::string result;
for (size_t i = 0; i < std::max<size_t>(1, number_of_repeated_bins * 4u); ++i)
for (size_t i = 0; i < std::max<size_t>(1, 16 * 4u); ++i)
{
result += std::to_string(i);
result += ',';
Expand All @@ -221,62 +140,23 @@ TEST_P(raptor_parts, pipeline_threshold)
std::string const actual = string_from_file("search.out");

EXPECT_EQ(expected, actual);
}

TEST_P(raptor_parts, pipeline_empty)
{
auto const [number_of_repeated_bins, window_size, run_parallel_tmp, number_of_errors, parts] = GetParam();
bool const run_parallel = run_parallel_tmp && number_of_repeated_bins >= 32;

if (window_size == 23 && number_of_errors == 0)
GTEST_SKIP() << "Needs dynamic threshold correction";

std::stringstream header{};
{
std::string const expanded_bins = repeat_bins(number_of_repeated_bins);
std::ofstream file{"raptor_cli_test.txt"};
auto split_bins = expanded_bins
| std::views::split(' ')
| std::views::transform([](auto &&rng) {
return std::string_view(&*rng.begin(), std::ranges::distance(rng));});
size_t usr_bin_id{0};
for (auto && file_path : split_bins)
{
header << '#' << usr_bin_id++ << '\t' << file_path << '\n';
file << file_path << '\n';
}
header << "#QUERY_NAME\tUSER_BINS\n";
file << '\n';
}

cli_test_result const result1 = execute_app("raptor", "build",
"--kmer 19",
"--window ", std::to_string(window_size),
"--size 64k",
"--threads ", run_parallel ? "2" : "1",
"--output raptor.index",
"--parts ", std::to_string(parts),
"raptor_cli_test.txt");
EXPECT_EQ(result1.out, std::string{});
EXPECT_EQ(result1.err, std::string{});
ASSERT_EQ(result1.exit_code, 0);

cli_test_result const result2 = execute_app("raptor", "search",
"--output search.out",
"--error ", std::to_string(number_of_errors),
cli_test_result const result3 = execute_app("raptor", "search",
"--output search2.out",
"--error 1",
"--index ", "raptor.index",
"--query ", data("query_empty.fq"));
EXPECT_EQ(result2.out, std::string{});
EXPECT_EQ(result2.err, std::string{});
ASSERT_EQ(result2.exit_code, 0);
EXPECT_EQ(result3.out, std::string{});
EXPECT_EQ(result3.err, std::string{});
ASSERT_EQ(result3.exit_code, 0);

std::string const expected = [&] ()
std::string const expected2 = [&] ()
{
std::string result{header.str()};
std::string line{};
std::ifstream search_result{search_result_path(number_of_repeated_bins,
window_size,
number_of_errors,
std::ifstream search_result{search_result_path(16,
23,
1,
false,
true)};
while (std::getline(search_result, line) && line.substr(0, 6) != "query1")
Expand All @@ -292,20 +172,20 @@ TEST_P(raptor_parts, pipeline_empty)
return result;
}();

std::string const actual = string_from_file("search.out");
std::string const actual2 = string_from_file("search2.out");

EXPECT_EQ(expected, actual);
EXPECT_EQ(expected2, actual2);
}

INSTANTIATE_TEST_SUITE_P(parts_suite,
raptor_parts,
testing::Combine(testing::Values(0, 16, 32), testing::Values(19, 23), testing::Values(true, false), testing::Values(0, 1), testing::Values(2, 4, 8)),
testing::Combine(testing::Values(32), testing::Values(19, 23), testing::Values(0, 1), testing::Values(2, 4, 8), testing::Values(true, false)),
[] (testing::TestParamInfo<raptor_parts::ParamType> const & info)
{
std::string name = std::to_string(std::max<int>(1, std::get<0>(info.param) * 4)) + "_bins_" +
std::to_string(std::get<1>(info.param)) + "_window_" +
(std::get<2>(info.param) ? "parallel" : "serial") +
std::to_string(std::get<3>(info.param)) + "_error" +
std::to_string(std::get<4>(info.param)) + "_parts";
std::to_string(std::get<2>(info.param)) + "_error" +
std::to_string(std::get<3>(info.param)) + "_parts" +
(std::get<4>(info.param) ? "compressed" : "uncompressed");
return name;
});
21 changes: 21 additions & 0 deletions test/util/collect_compile_stats.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env bash

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"

reset_scripts() {
sed -i "s/DO_TIME=1/DO_TIME=0/" $SCRIPT_DIR/gcc.sh
sed -i "s/DO_TIME=1/DO_TIME=0/" $SCRIPT_DIR/g++.sh
}
trap reset_scripts EXIT

set -ex

cmake $SCRIPT_DIR/../.. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=$SCRIPT_DIR/g++.sh -DCMAKE_C_COMPILER=$SCRIPT_DIR/gcc.sh -DUSE_CCACHE=OFF

sed -i "s/DO_TIME=0/DO_TIME=1/" $SCRIPT_DIR/gcc.sh
sed -i "s/DO_TIME=0/DO_TIME=1/" $SCRIPT_DIR/g++.sh

make -k -j4 cli_test api_test

find . -name "ram_usage.*" -exec cat {} + > complete.txt
$SCRIPT_DIR/parse.py complete.txt stats.csv
11 changes: 11 additions & 0 deletions test/util/g++.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash

GCC="/usr/bin/g++-11"
DO_TIME=0

if [[ DO_TIME -eq 0 ]]; then
exec "$GCC" "$@"
else
FILE=$(mktemp ram_usage.XXXXXXXX)
exec /usr/bin/time -v "$GCC" "$@" 2> $FILE
fi
11 changes: 11 additions & 0 deletions test/util/gcc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash

GCC="/usr/bin/gcc-11"
DO_TIME=0

if [[ DO_TIME -eq 0 ]]; then
exec "$GCC" "$@"
else
FILE=$(mktemp ram_usage.XXXXXXXX)
exec /usr/bin/time -v "$GCC" "$@" 2> $FILE
fi
43 changes: 43 additions & 0 deletions test/util/parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env python3
# -----------------------------------------------------------------------------------------------------
# Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
# Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
# This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
# shipped with this file and also available at: https://github.com/seqan/raptor/blob/master/LICENSE.md
# -----------------------------------------------------------------------------------------------------
#
# Usage ram_usage.py <input_file> <output_file>
#
# Computes a table with RAM-Usage from a file containing output of `time -v`.
import argparse
import os
import pandas

parser = argparse.ArgumentParser(description='Parse time and memory consumption of compiling.', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('input', type=str, help='File containing all outputs of `time -v`.')
parser.add_argument('output', type=str, help='File to write output to. (CSV)')
arguments = parser.parse_args()

file_names = []
ram_usages = []
run_times = []

with open(arguments.input, 'r') as input_file:
parsing_ram_usage = False
for line_number, line in enumerate(input_file):
if line_number % 23 == 0:
index_of_unit = line.rfind('-c')
if index_of_unit != - 1:
parsing_ram_usage = True
file_names.append(line[index_of_unit:][:-2].split('/')[-1])
else:
parsing_ram_usage = False
if parsing_ram_usage and ((line_number - 9) % 23) == 0:
ram_usages.append(int(line.split(' ')[-1]) // 1024)
if parsing_ram_usage and ((line_number - 4) % 23) == 0:
run_times.append(line.strip().split(' ')[-1].lstrip('0:'))

with open(arguments.output, 'w') as output_file:
df = pandas.DataFrame({'File' : file_names, 'RAM in MiB' : ram_usages, 'Time in s' : run_times})
df = df.sort_values(by=['File'], ascending=True)
df.to_csv(output_file, index=False)

0 comments on commit f00d56f

Please sign in to comment.