Skip to content

Commit

Permalink
Merge pull request #106 from Illumina/update-to-version-4
Browse files Browse the repository at this point in the history
Update to version 4
  • Loading branch information
egor-dolzhenko authored Oct 6, 2020
2 parents c4f88fc + e199e4d commit a656b27
Show file tree
Hide file tree
Showing 360 changed files with 37,808 additions and 17,472 deletions.
73 changes: 35 additions & 38 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@ include(ExternalProject)
# Download and unpack googletest at configure time
configure_file(cmake/google_test.cmake googletest-download/CMakeLists.txt)
execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download)
if (result)
message(FATAL_ERROR "CMake step for googletest failed: ${result}")
endif ()
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download )
if(result)
message(FATAL_ERROR "CMake step for googletest failed: ${result}")
endif()
execute_process(COMMAND ${CMAKE_COMMAND} --build .
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download)
if (result)
message(FATAL_ERROR "Build step for googletest failed: ${result}")
endif ()
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download )
if(result)
message(FATAL_ERROR "Build step for googletest failed: ${result}")
endif()

# Add googletest directly to our build. This defines
# the gtest and gtest_main targets.
Expand All @@ -32,30 +32,31 @@ add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src


ExternalProject_Add(zlib
PREFIX ${CMAKE_BINARY_DIR}/thirdparty/zlib
GIT_REPOSITORY "https://github.com/madler/zlib.git"
GIT_TAG "v1.2.8"
UPDATE_COMMAND ""
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ${CMAKE_BINARY_DIR}/thirdparty/zlib/src/zlib/configure --prefix=${CMAKE_BINARY_DIR}/thirdparty/zlib --static
INSTALL_DIR ${CMAKE_BINARY_DIR}/thirdparty/zlib
LOG_DOWNLOAD 1
LOG_INSTALL 1
)
PREFIX ${CMAKE_BINARY_DIR}/thirdparty/zlib
GIT_REPOSITORY "https://github.com/madler/zlib.git"
GIT_TAG "v1.2.8"
UPDATE_COMMAND ""
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ${CMAKE_BINARY_DIR}/thirdparty/zlib/src/zlib/configure --prefix=${CMAKE_BINARY_DIR}/thirdparty/zlib --static
INSTALL_DIR ${CMAKE_BINARY_DIR}/thirdparty/zlib
LOG_DOWNLOAD 1
LOG_INSTALL 1
)

ExternalProject_Add(htslib
PREFIX ${CMAKE_BINARY_DIR}/thirdparty/htslib
PREFIX ${CMAKE_BINARY_DIR}/thirdparty/htslib
GIT_REPOSITORY "https://github.com/samtools/htslib.git"
GIT_TAG "1.3.1"
GIT_TAG "1.3.1"
UPDATE_COMMAND ""
BUILD_IN_SOURCE 1
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
BUILD_COMMAND make
INSTALL_COMMAND make install prefix=${CMAKE_BINARY_DIR}/thirdparty/htslib
LOG_DOWNLOAD 1
)

BUILD_COMMAND make
INSTALL_COMMAND make install prefix=${CMAKE_BINARY_DIR}/thirdparty/htslib
LOG_DOWNLOAD 1
)

include_directories(${CMAKE_BINARY_DIR}/thirdparty/zlib/include)
include_directories(${CMAKE_SOURCE_DIR}/thirdparty/spdlog/include)
set(zlib_static ${CMAKE_BINARY_DIR}/thirdparty/zlib/lib/libz.a)
set(htslib_static ${CMAKE_BINARY_DIR}/thirdparty/htslib/lib/libhts.a)

Expand All @@ -67,7 +68,7 @@ find_package(Boost 1.4 REQUIRED COMPONENTS program_options filesystem regex date

include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(SYSTEM ${Boost_INCLUDE_DIR})

include_directories(${CMAKE_BINARY_DIR}/thirdparty/htslib/include)

add_subdirectory(thirdparty/graph-tools-master)

Expand All @@ -78,23 +79,19 @@ add_subdirectory(genotyping)
add_subdirectory(reads)
add_subdirectory(classification)
add_subdirectory(region_spec)
add_subdirectory(region_analysis)
add_subdirectory(sample_analysis)
add_subdirectory(input)
add_subdirectory(output)
add_subdirectory(alignment)
add_subdirectory(stats)
add_subdirectory(filtering)
add_subdirectory(graph_components)
add_subdirectory(workflow)


add_executable(ExpansionHunter src/Version.hh src/ExpansionHunter.cpp)
file(GLOB SOURCES "src/*.cpp")
add_executable(ExpansionHunter ${SOURCES})
target_compile_features(ExpansionHunter PRIVATE cxx_range_for)
target_link_libraries(ExpansionHunter graphtools common workflow genotyping region_spec sample_analysis input output
alignment filtering stats classification graph_components pthread ${Boost_LIBRARIES})
install(TARGETS ExpansionHunter DESTINATION bin)
target_link_libraries(ExpansionHunter graphtools common genotyping region_analysis region_spec sample_analysis input output alignment filtering stats pthread ${Boost_LIBRARIES})
install (TARGETS ExpansionHunter DESTINATION bin)

add_dependencies(htslib zlib)
add_dependencies(common htslib)
target_include_directories(common PUBLIC ${CMAKE_BINARY_DIR}/thirdparty/zlib/include)
target_include_directories(common PUBLIC ${CMAKE_BINARY_DIR}/thirdparty/htslib/include)
35 changes: 1 addition & 34 deletions COPYRIGHT.txt
Original file line number Diff line number Diff line change
Expand Up @@ -258,37 +258,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

******************************************************************

The Netlib repository

Copyright © 1996 California Institute of Technology, Pasadena, California.
ALL RIGHTS RESERVED. Based on Government Sponsored Research NAS7-03001.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met: Redistributions
of source code must retain this copyright notice, this list of conditions and
the following disclaimer. Redistributions in binary form must reproduce the
above copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the distribution.
Neither the name of the California Institute of Technology (Caltech) nor the
names of its contributors may be used to endorse or promote products derived
from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.

******************************************************************

MurmurHash3 was written by Austin Appleby, and is placed in the public
domain. The author hereby disclaims copyright to this source code.
THE SOFTWARE.
11 changes: 5 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,10 @@ contained in the [docs folder](docs/01_Introduction.md).

The method is described in the following papers:

- Dolzhenko and others, [Detection of long repeat expansions from PCR-free
whole-genome sequence data](http://genome.cshlp.org/content/27/11/1895), Genome
Research 2017
- Egor Dolzhenko, Joke van Vugt, Richard Shaw, Mitch Bekritsky, and others,
[Detection of long repeat expansions from PCR-free whole-genome sequence data](http://genome.cshlp.org/content/27/11/1895),
Genome Research 2017

- Dolzhenko and others,
[ExpansionHunter: A sequence-graph based tool to analyze variation in short
tandem repeat regions](https://academic.oup.com/bioinformatics/article/doi/10.1093/bioinformatics/btz431/5499079),
- Egor Dolzhenko, Viraj Deshpande, Felix Schlesinger, Peter Krusche, Roman Petrovski, and others,
[ExpansionHunter: A sequence-graph based tool to analyze variation in short tandem repeat regions](https://academic.oup.com/bioinformatics/article/doi/10.1093/bioinformatics/btz431/5499079),
Bioinformatics 2019
32 changes: 19 additions & 13 deletions alignment/AlignmentFilters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

#include "alignment/AlignmentFilters.hh"

#include <algorithm>
#include <list>
#include <vector>

Expand All @@ -44,26 +43,26 @@ using std::vector;
namespace ehunter
{

bool checkIfComesFromGraphLocus(
const list<GraphAlignment>& readAlignments, const list<GraphAlignment>& mateAlignments,
bool checkIfLocallyPlacedReadPair(
boost::optional<GraphAlignment> readAlignment, boost::optional<GraphAlignment> mateAlignment,
int kMinNonRepeatAlignmentScore)
{
int maxReadScore = 0;
for (const auto& alignment : readAlignments)
int nonRepeatAlignmentScore = 0;

if (readAlignment)
{
maxReadScore = std::max(maxReadScore, scoreAlignmentToNonloopNodes(alignment));
nonRepeatAlignmentScore += scoreAlignmentToNonloopNodes(*readAlignment);
}

int maxMateScore = 0;
for (const auto& alignment : mateAlignments)
if (mateAlignment)
{
maxMateScore = std::max(maxMateScore, scoreAlignmentToNonloopNodes(alignment));
nonRepeatAlignmentScore += scoreAlignmentToNonloopNodes(*mateAlignment);
}

return maxReadScore + maxMateScore >= kMinNonRepeatAlignmentScore;
return nonRepeatAlignmentScore >= kMinNonRepeatAlignmentScore;
}

bool checkIfUpstreamAlignmentIsGood(NodeId nodeId, const GraphAlignment& alignment)
bool checkIfUpstreamAlignmentIsGood(NodeId nodeId, GraphAlignment alignment)
{
const list<int> repeatNodeIndexes = alignment.getIndexesOfNode(nodeId);

Expand All @@ -86,7 +85,7 @@ bool checkIfUpstreamAlignmentIsGood(NodeId nodeId, const GraphAlignment& alignme
return score >= kScoreCutoff;
}

bool checkIfDownstreamAlignmentIsGood(NodeId nodeId, const GraphAlignment& alignment)
bool checkIfDownstreamAlignmentIsGood(NodeId nodeId, GraphAlignment alignment)
{
const list<int> repeatNodeIndexes = alignment.getIndexesOfNode(nodeId);

Expand Down Expand Up @@ -123,7 +122,14 @@ bool checkIfPassesAlignmentFilters(const GraphAlignment& alignment)
const int percentQueryMatches = (100 * alignment.numMatches()) / clippedQueryLength;
const int percentReferenceMatches = (100 * alignment.numMatches()) / referenceLength;

return percentQueryMatches >= 80 && percentReferenceMatches >= 80;
if (percentQueryMatches >= 80 && percentReferenceMatches >= 80)
{
return true;
}
else
{
return false;
}
}

}
26 changes: 14 additions & 12 deletions alignment/AlignmentFilters.hh
Original file line number Diff line number Diff line change
Expand Up @@ -21,33 +21,35 @@

#pragma once

#include <list>
#include <string>

#include <boost/optional.hpp>

#include "graphalign/GraphAlignment.hh"

namespace ehunter
{

/**
* Checks if a read pair originated in the locus defined by the graph
* Checks if a read pair is likely to have originated in the alignment region
*
* Verifies that there is a pair of read/mate alignments with a sufficiently high combined score to non-repeat nodes
* The check is performed by verifying that the alignment score to non-repeat nodes (combined for both mates) is
* sufficiently high.
*
* @param readAlignments: Alignments of a read
* @param mateAlignments: Alignments of read's mate
* @param kMinNonRepeatAlignmentScore: Positive score threshold
* @return true if read/mate alignment with above properties exists
* @param readAlignment: Alignment of a read
* @param mateAlignment: Alignment of read's mate
* @param kMinNonRepeatAlignmentScore: Score threshold
* @return true if the alignment score to non-repeat nodes exceeds the threshold
*/
bool checkIfComesFromGraphLocus(
const std::list<graphtools::GraphAlignment>& readAlignments,
const std::list<graphtools::GraphAlignment>& mateAlignments, int kMinNonRepeatAlignmentScore);
bool checkIfLocallyPlacedReadPair(
boost::optional<graphtools::GraphAlignment> readAlignment,
boost::optional<graphtools::GraphAlignment> mateAlignment, int kMinNonRepeatAlignmentScore);

// Checks if alignment upstream of a given node is high quality
bool checkIfUpstreamAlignmentIsGood(graphtools::NodeId nodeId, const graphtools::GraphAlignment& alignment);
bool checkIfUpstreamAlignmentIsGood(graphtools::NodeId nodeId, graphtools::GraphAlignment alignment);

// Checks if alignment downstream of a given node is high quality
bool checkIfDownstreamAlignmentIsGood(graphtools::NodeId nodeId, const graphtools::GraphAlignment& alignment);
bool checkIfDownstreamAlignmentIsGood(graphtools::NodeId nodeId, graphtools::GraphAlignment alignment);

bool checkIfPassesAlignmentFilters(const graphtools::GraphAlignment& alignment);

Expand Down
Loading

0 comments on commit a656b27

Please sign in to comment.