Skip to content

Commit

Permalink
Merge pull request #70 from eseiler/infra/misc
Browse files Browse the repository at this point in the history
[INFRA] Default native build, update readme
  • Loading branch information
eseiler authored Aug 26, 2021
2 parents f00d56f + 24f4947 commit ef8957d
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 22 deletions.
27 changes: 20 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ cmake_minimum_required (VERSION 3.8)
# Define the application name and version.
project (raptor VERSION 2.0.0)

# Messages
string (ASCII 27 Esc)
set (FontBold "${Esc}[1m")
set (FontReset "${Esc}[m")

# Fallback to these values if there is no git or no git repository
set (RAPTOR_COMMIT_DATE "2021-08-20--no-git"
CACHE STRING
Expand Down Expand Up @@ -52,24 +57,32 @@ if (NOT CMAKE_BUILD_TYPE)
FORCE)
endif ()

set (RAPTOR_NATIVE_BUILD ON CACHE BOOL "Optimize build for current architecture.")
if (RAPTOR_NATIVE_BUILD)
message (STATUS "${FontBold}Native build enabled. Built binaries will be optimized for this system.${FontReset}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
else ()
message (STATUS "${FontBold}Native build disabled. Detecting popcnt support.${FontReset}")
include (CheckCXXCompilerFlag)
check_cxx_compiler_flag ("-mpopcnt" RAPTOR_HAS_POPCNT)
if (RAPTOR_HAS_POPCNT)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mpopcnt")
endif ()
endif ()

# Specify the directories where to store the built archives, libraries and executables
set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set (CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")

# Messages
string (ASCII 27 Esc)
set (FontBold "${Esc}[1m")
set (FontReset "${Esc}[m")

# Dependency: SeqAn3.
set (SEQAN3_CEREAL ON CACHE BOOL "Require cereal to be present.")
set (SEQAN3_SUBMODULES_DIR "lib")
find_package (SeqAn3 QUIET REQUIRED HINTS lib/seqan3/build_system)

# Use ccache.
set (USE_CCACHE ON CACHE BOOL "Use ccache if available.")
if (USE_CCACHE)
set (RAPTOR_USE_CCACHE ON CACHE BOOL "Use ccache if available.")
if (RAPTOR_USE_CCACHE)
include ("${SEQAN3_CLONE_DIR}/test/cmake/seqan3_require_ccache.cmake")
seqan3_require_ccache ()
endif ()
Expand Down
36 changes: 29 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
# Raptor [![build status](https://github.com/seqan/raptor/workflows/Raptor%20CI/badge.svg?branch=master)](https://github.com/seqan/raptor/actions) [![codecov](https://codecov.io/gh/seqan/raptor/branch/master/graph/badge.svg?token=SJVMYRUKW2)](https://codecov.io/gh/seqan/raptor)
# Raptor [![build status][1]][2] [![codecov][3]][4] [![install with bioconda][5]][6]

[1]: https://github.com/seqan/raptor/workflows/Raptor%20CI/badge.svg?branch=master
[2]: https://github.com/seqan/raptor/actions
[3]: https://codecov.io/gh/seqan/raptor/branch/master/graph/badge.svg?token=SJVMYRUKW2
[4]: https://codecov.io/gh/seqan/raptor
[5]: https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat
[6]: https://bioconda.github.io/recipes/raptor/README.html

### A fast and space-efficient pre-filter for querying very large collections of nucleotide sequences

## Download and Installation
There may be performance benefits when compiling from source, especially when using `-march=native` as compiler
directive.
There may be performance benefits when compiling from source as the build can be optimized for the host system.

### Install with bioconda (Linux)
[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/raptor/README.html)

```bash
conda install -c bioconda -c conda-forge raptor
Expand Down Expand Up @@ -62,16 +68,20 @@ make

The binary can be found in `bin`.

You may want to add the raptor executable yo your PATH:
You may want to add the Raptor executable to your PATH:
```
export PATH=$(pwd)/bin:$PATH
raptor --version
```

By default, Raptor will be built with host specific optimizations (`-march=native`). This behavior can be disabled by
passing `-DRAPTOR_NATIVE_BUILD=OFF` to CMake.

</details>

## Example Data and Usage
A toy data set can be found [here](https://ftp.imp.fu-berlin.de/pub/seiler/raptor/).
A toy data set (124 MiB compressed, 983 MiB decompressed) can be found
[here](https://ftp.imp.fu-berlin.de/pub/seiler/raptor/).

```bash
wget https://ftp.imp.fu-berlin.de/pub/seiler/raptor/example_data.tar.gz
Expand Down Expand Up @@ -113,7 +123,7 @@ Afterwards, we can search for some reads:
raptor search --error 2 --index raptor.index --query example_data/64/reads/mini.fastq --output search.output
```

The output starts with a header section (lines starting with `\#`). The header maps a number to each input file.
The output starts with a header section (lines starting with `#`). The header maps a number to each input file.
After the header section, each line of the output consists of the read ID (in the toy example these are numbers) and
the corresponding bins in which they were found:
```text
Expand Down Expand Up @@ -169,6 +179,18 @@ The preprocessing applies the same cutoffs as used in Mantis
This means that only minimisers that occur more often than the cutoff specifies are included in the output.
If you wish to process all minimisers, you can use `--disable-cutoffs`.

### Partitioned indices
To reduce the overall memory consumption, the index can be divided into multiple (a power of two) parts.
This can be done by passing `--parts n` to `raptor build`, where `n` is the number of parts you want to create.
This will create `n` files, each representing one part of the index. The `--size` parameter describes the overall size
of the index. For example, `--size 8g --parts 4` will create four 2 GiB indices. This will reduce the memory consumption
of `raptor build` and `raptor search` by approximately 6 GiB, since there will only be one part in memory at any given
time. `raptor search` will automatically detect the parts, and does not need any special parameters.

### Upgrading the index (v1.1.0 to v2.0.0)
An old index can be upgraded by running `raptor upgrade` and providing some information about how the index was
constructed.

### SOCKS interface
We implement the core interface of [SOCKS](https://gitlab.ub.uni-bielefeld.de/gi/socks).
For a list of options, see the help pages:
Expand Down
18 changes: 14 additions & 4 deletions src/argument_parsing/build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ void init_build_parser(seqan3::argument_parser & parser, build_arguments & argum
'\0',
"shape",
"The shape to use for k-mers. Mutually exclusive with --kmer.",
seqan3::option_spec::hidden, // Add help in kmer_size
seqan3::option_spec::advanced, // Add help in kmer_size
seqan3::regex_validator{"[01]+"});
parser.add_option(arguments.out_path,
'\0',
Expand Down Expand Up @@ -73,6 +73,11 @@ void init_build_parser(seqan3::argument_parser & parser, build_arguments & argum
"compute-minimiser",
"Computes minimisers using cutoffs from Mantis (Pandey et al.). Does not create the index.",
arguments.is_socks ? seqan3::option_spec::hidden : seqan3::option_spec::standard);
parser.add_flag(arguments.compute_minimiser,
'\0',
"compute-minimizer",
"Hidden flag, alias of --compute-minimiser.",
seqan3::option_spec::hidden);
parser.add_flag(arguments.disable_cutoffs,
'\0',
"disable-cutoffs",
Expand Down Expand Up @@ -119,8 +124,13 @@ void run_build(seqan3::argument_parser & parser, bool const is_socks)
arguments.window_size = arguments.shape.size();
}

std::filesystem::path output_directory = parser.is_option_set("compute-minimiser") ? arguments.out_path :
arguments.out_path.parent_path();
bool const is_compute_minimiser_set{parser.is_option_set("compute-minimiser") ||
parser.is_option_set("compute-minimizer")};

arguments.compute_minimiser = is_compute_minimiser_set;

std::filesystem::path output_directory = is_compute_minimiser_set ? arguments.out_path :
arguments.out_path.parent_path();
std::error_code ec{};
std::filesystem::create_directories(output_directory, ec);

Expand All @@ -132,7 +142,7 @@ void run_build(seqan3::argument_parser & parser, bool const is_socks)
ec.message())};
// LCOV_EXCL_END

if (!parser.is_option_set("compute-minimiser"))
if (!is_compute_minimiser_set)
{
seqan3::output_file_validator{}(arguments.out_path);

Expand Down
11 changes: 11 additions & 0 deletions test/cli/raptor_options_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,17 @@ TEST_F(raptor_build, directory_missing)
EXPECT_EQ(result.err, std::string{"[Error] Option --output is required but not set.\n"});
}

TEST_F(raptor_build, alias)
{
cli_test_result const result = execute_app("raptor", "build",
"--size 8m",
"--compute-minimizer",
tmp_bin_list_file.file_path);
EXPECT_NE(result.exit_code, 0);
EXPECT_EQ(result.out, std::string{});
EXPECT_EQ(result.err, std::string{"[Error] Option --output is required but not set.\n"});
}

TEST_F(raptor_build, size_missing)
{
cli_test_result const result = execute_app("raptor", "build",
Expand Down
9 changes: 5 additions & 4 deletions test/util/collect_compile_stats.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env bash
set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"

Expand All @@ -8,14 +9,14 @@ reset_scripts() {
}
trap reset_scripts EXIT

set -ex

cmake $SCRIPT_DIR/../.. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=$SCRIPT_DIR/g++.sh -DCMAKE_C_COMPILER=$SCRIPT_DIR/gcc.sh -DUSE_CCACHE=OFF
cmake $SCRIPT_DIR/../.. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=$SCRIPT_DIR/g++.sh -DCMAKE_C_COMPILER=$SCRIPT_DIR/gcc.sh -DRAPTOR_USE_CCACHE=OFF -DRAPTOR_NATIVE_BUILD=ON

sed -i "s/DO_TIME=0/DO_TIME=1/" $SCRIPT_DIR/gcc.sh
sed -i "s/DO_TIME=0/DO_TIME=1/" $SCRIPT_DIR/g++.sh

make -k -j4 cli_test api_test
make -k -j6 cli_test api_test

find . -name "ram_usage.*" -exec cat {} + > complete.txt
$SCRIPT_DIR/parse.py complete.txt stats.csv

echo "Results can be found in $(pwd)/stats.csv"

0 comments on commit ef8957d

Please sign in to comment.