From 8b7f7428ca394b97714789a1027d07e9534771ef Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 11 May 2021 14:29:49 +0100 Subject: [PATCH 01/29] Bump version 0.9.0 --- CHANGELOG.md | 8 +++++++- CMakeLists.txt | 2 +- README.md | 6 +++--- example/run_pandora.sh | 4 ++-- scripts/portable_binary_builder/README.md | 4 ++-- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b943850..977dba72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ project adheres to ## [Unreleased] +## [0.9.0] + +### Changed +- Version bump from `0.9.0-rc2` to `0.9.0`. + ## [0.9.0-rc2] ### Changed @@ -92,7 +97,8 @@ from this point will have their changes meticulously documented here. - k-mer coverage underflow bug in `LocalPRG` [[#183][183]] -[Unreleased]: https://github.com/rmcolq/pandora/compare/0.9.0-rc2...HEAD +[Unreleased]: https://github.com/rmcolq/pandora/compare/0.9.0...HEAD +[0.9.0]: https://github.com/rmcolq/pandora/releases/tag/0.9.0 [0.9.0-rc2]: https://github.com/rmcolq/pandora/releases/tag/0.9.0-rc2 [0.9.0-rc1]: https://github.com/rmcolq/pandora/releases/tag/0.9.0-rc1 [0.8.0]: https://github.com/rmcolq/pandora/releases/tag/0.8.0 diff --git a/CMakeLists.txt b/CMakeLists.txt index e2922e07..ba25ed1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ HunterGate( # project configuration set(PROJECT_NAME_STR pandora) project(${PROJECT_NAME_STR} VERSION "0.9.0" LANGUAGES C CXX) -set(ADDITIONAL_VERSION_LABELS "-rc2") +set(ADDITIONAL_VERSION_LABELS "") configure_file( include/version.h.in ${CMAKE_BINARY_DIR}/include/version.h ) # add or not feature to print the stack trace diff --git a/README.md b/README.md index f6766be5..cfc06a28 100644 --- a/README.md +++ b/README.md @@ -78,13 +78,13 @@ In this binary, all libraries are linked statically. * **Download**: ``` - wget https://github.com/rmcolq/pandora/releases/download/0.9.0-rc2/pandora-linux-precompiled-v0.9.0-rc2 + wget https://github.com/rmcolq/pandora/releases/download/0.9.0/pandora-linux-precompiled-v0.9.0 ``` * **Running**: ``` -chmod +x pandora-linux-precompiled-v0.9.0-rc2 -./pandora-linux-precompiled-v0.9.0-rc2 -h +chmod +x pandora-linux-precompiled-v0.9.0 +./pandora-linux-precompiled-v0.9.0 -h ``` * **Notes**: diff --git a/example/run_pandora.sh b/example/run_pandora.sh index 17ae6348..cf2858c2 100755 --- a/example/run_pandora.sh +++ b/example/run_pandora.sh @@ -2,8 +2,8 @@ set -eu # configs -pandora_URL="https://github.com/rmcolq/pandora/releases/download/0.9.0-rc2/pandora-linux-precompiled-v0.9.0-rc2" -pandora_executable="./pandora-linux-precompiled-v0.9.0-rc2" +pandora_URL="https://github.com/rmcolq/pandora/releases/download/0.9.0/pandora-linux-precompiled-v0.9.0" +pandora_executable="./pandora-linux-precompiled-v0.9.0" make_prg_URL="https://github.com/leoisl/make_prg/releases/download/v0.2.0_prototype/make_prg_0.2.0_prototype" make_prg_executable="./make_prg_0.2.0_prototype" diff --git a/scripts/portable_binary_builder/README.md b/scripts/portable_binary_builder/README.md index cc0d2b05..22a7b867 100644 --- a/scripts/portable_binary_builder/README.md +++ b/scripts/portable_binary_builder/README.md @@ -2,9 +2,9 @@ ``` cd /scripts/portable_binary_builder -sudo ./build_portable_binary.sh +./build_portable_binary.sh ``` The portable binary will be in `/pandora-linux-precompiled` -You can remove `sudo` if it is not needed to run `docker`. +You can add `sudo` if it is needed to run `docker`. From 733ceadf1be5c0a81f6222727820100a71b99cc4 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 11 May 2021 14:56:28 +0100 Subject: [PATCH 02/29] Updating example/run_pandora.sh with new make_prg version --- example/run_pandora.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/example/run_pandora.sh b/example/run_pandora.sh index cf2858c2..b765c577 100755 --- a/example/run_pandora.sh +++ b/example/run_pandora.sh @@ -4,8 +4,8 @@ set -eu # configs pandora_URL="https://github.com/rmcolq/pandora/releases/download/0.9.0/pandora-linux-precompiled-v0.9.0" pandora_executable="./pandora-linux-precompiled-v0.9.0" -make_prg_URL="https://github.com/leoisl/make_prg/releases/download/v0.2.0_prototype/make_prg_0.2.0_prototype" -make_prg_executable="./make_prg_0.2.0_prototype" +make_prg_URL="https://github.com/leoisl/make_prg/releases/download/v0.2.0/make_prg_0.2.0" +make_prg_executable="./make_prg_0.2.0" function download_tool { From 8010b634a7239e13b1787c6f8d162c64dbc6e2d8 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Wed, 12 May 2021 15:18:44 +0100 Subject: [PATCH 03/29] Adding infrastructure for conda build --- .gitignore | 2 ++ CMakeLists.txt | 19 +++++++++---- example/run_pandora_conda.sh | 39 ++++++++++++++++++++++++++ scripts/create_archives.sh | 54 ++++++++++++++++++++++++++++++++++++ test/CMakeLists.txt | 5 ++-- 5 files changed, 112 insertions(+), 7 deletions(-) create mode 100755 example/run_pandora_conda.sh create mode 100644 scripts/create_archives.sh diff --git a/.gitignore b/.gitignore index fede8a6e..18bf813d 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,5 @@ pandora #portable binary build dir build_portable_executable pandora-linux-precompiled + +/archives/ \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index ba25ed1f..53efa89c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,7 @@ cmake_minimum_required(VERSION 3.12) # required by hunter ZLIB installation # include hunter +option(HUNTER_STATUS_DEBUG "Hunter debug" OFF) # comment if does not want hunter debug on set(HUNTER_ROOT ${CMAKE_BINARY_DIR}/hunter) include("cmake/HunterGate.cmake") HunterGate( @@ -55,8 +56,13 @@ else () set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") endif() -# static C and C++ flags -set(STATIC_C_CXX "-static-libgcc -static-libstdc++") +# always link rt +set(RT_LIBRARY "rt") + +if(NOT BIOCONDA) + # static C and C++ flags + set(STATIC_C_CXX "-static-libgcc -static-libstdc++") +endif() ######################################################################################################################## # EXTERNAL LIBS INSTALLATION @@ -92,10 +98,10 @@ set(Gtest_LIBRARIES GTest::gtest GTest::gmock_main) ######################################################################################################################## # INSTALL BOOST +set(Boost_USE_STATIC_LIBS ON) hunter_add_package(Boost COMPONENTS filesystem iostreams log serialization system thread) find_package(Boost CONFIG REQUIRED filesystem iostreams log serialization system thread) -set(BOOST_LIBRARIES Boost::filesystem Boost::iostreams Boost::log Boost::serialization Boost::system Boost::thread) -set(Boost_USE_STATIC_LIBS ON) +set(Boost_LIBRARIES Boost::filesystem Boost::iostreams Boost::log Boost::serialization Boost::system Boost::thread) ######################################################################################################################## ######################################################################################################################## # END EXTERNAL LIBS INSTALLATION @@ -148,13 +154,16 @@ add_dependencies(${PROJECT_NAME} gatb) target_link_libraries(${PROJECT_NAME} ${GATB_LIBS} - ${BOOST_LIBRARIES} + ${Boost_LIBRARIES} ${ZLIB_LIBRARY} ${CMAKE_DL_LIBS} ${STATIC_C_CXX} ${BACKWARD_LIBRARIES} ${SEQAN_LIBRARIES} + ${RT_LIBRARY} ) enable_testing() add_subdirectory(test) + +install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin) diff --git a/example/run_pandora_conda.sh b/example/run_pandora_conda.sh new file mode 100755 index 00000000..b765c577 --- /dev/null +++ b/example/run_pandora_conda.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +set -eu + +# configs +pandora_URL="https://github.com/rmcolq/pandora/releases/download/0.9.0/pandora-linux-precompiled-v0.9.0" +pandora_executable="./pandora-linux-precompiled-v0.9.0" +make_prg_URL="https://github.com/leoisl/make_prg/releases/download/v0.2.0/make_prg_0.2.0" +make_prg_executable="./make_prg_0.2.0" + + +function download_tool { + URL=$1 + executable=$2 + wget "${URL}" -O "${executable}" + chmod +x "${executable}" +} + +download_tool "${pandora_URL}" "${pandora_executable}" +download_tool "${make_prg_URL}" "${make_prg_executable}" + +echo "Running pandora without denovo..." +echo "Running ${make_prg_executable} from_msa" +"${make_prg_executable}" from_msa --input msas/ --output_prefix prgs/pangenome +echo "Running ${pandora_executable} index" +"${pandora_executable}" index prgs/pangenome.prg.fa +echo "Running ${pandora_executable} compare" +"${pandora_executable}" compare --genotype -o output_toy_example_no_denovo prgs/pangenome.prg.fa reads/read_index.tsv +echo "Running pandora without denovo - done!" + +echo "Running pandora with denovo..." +echo "Running ${pandora_executable} discover" +"${pandora_executable}" discover --outdir pandora_discover_out prgs/pangenome.prg.fa reads/read_index.tsv +echo "Running ${make_prg_executable} update" +"${make_prg_executable}" update --update_DS prgs/pangenome.update_DS --denovo_paths pandora_discover_out/denovo_paths.txt --output_prefix updated_prgs/pangenome_updated +echo "Running ${pandora_executable} index on updated PRGs" +"${pandora_executable}" index updated_prgs/pangenome_updated.prg.fa +echo "Running ${pandora_executable} compare" +"${pandora_executable}" compare --genotype -o output_toy_example_with_denovo updated_prgs/pangenome_updated.prg.fa reads/read_index.tsv +echo "Running pandora with denovo - done!" diff --git a/scripts/create_archives.sh b/scripts/create_archives.sh new file mode 100644 index 00000000..3991dea0 --- /dev/null +++ b/scripts/create_archives.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# run from project root: scripts/create_archives.sh +# based on https://github.com/nzanepro/git-archive-submodules/blob/master/bin/git-archive-submodules.sh +set -eu + +######################################################################################################################## +# argument parsing +if [ "$#" -ne 1 ]; then + echo "Illegal number of parameters." + echo "Usage: $0 " + echo "Example: $0 0.8.0" + exit 1 +fi +PANDORA_VERSION="$1" +######################################################################################################################## + +######################################################################################################################## +# configs +PANDORA_URL="https://github.com/rmcolq/pandora" +ARCHIVES_DIR="./archives" +######################################################################################################################## + +######################################################################################################################## +# main script +ARCHIVES_DIR=$(realpath "${ARCHIVES_DIR}") +if [ -d "${ARCHIVES_DIR}" ]; then + echo "Please remove ${ARCHIVES_DIR} before proceeding." + exit 1 +fi + +mkdir -p "${ARCHIVES_DIR}" +cd "${ARCHIVES_DIR}" + +TARPREFIX="pandora-${PANDORA_VERSION}" +echo "Cloning ${TARPREFIX}" +git clone --recursive --depth=1 --single-branch --branch "${PANDORA_VERSION}" "${PANDORA_URL}" "${PANDORA_VERSION}" + +echo "Creating tar archive..." +cd "${PANDORA_VERSION}" +git archive --prefix="${TARPREFIX}"/ -o "${ARCHIVES_DIR}/${TARPREFIX}.tar" "${PANDORA_VERSION}" +git submodule foreach --recursive \ + "git archive --prefix=${TARPREFIX}/\${displaypath}/ HEAD > ${ARCHIVES_DIR}/tmp.tar && \ + tar --concatenate --file=${ARCHIVES_DIR}/${TARPREFIX}.tar ${ARCHIVES_DIR}/tmp.tar" > /dev/null +rm "${ARCHIVES_DIR}/tmp.tar" + +echo "Compressing to tar.gz..." +gzip -9 "${ARCHIVES_DIR}/${TARPREFIX}.tar" + +echo "Compressing to zip..." +cd "${ARCHIVES_DIR}" && tar xzf "${TARPREFIX}.tar.gz" && \ +zip -r "${TARPREFIX}.zip" "${TARPREFIX}" > /dev/null + +echo "All done!" +######################################################################################################################## diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e4dabd8c..dbaac2d7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -16,11 +16,12 @@ target_include_directories(${PROJECT_NAME}_test PUBLIC target_link_libraries(${PROJECT_NAME}_test ${Gtest_LIBRARIES} ${GATB_LIBS} - ${BOOST_LIBRARIES} + ${Boost_LIBRARIES} ${ZLIB_LIBRARY} ${CMAKE_DL_LIBS} ${STATIC_C_CXX} ${BACKWARD_LIBRARIES} - ) + ${RT_LIBRARY} +) add_test(NAME ${PROJECT_NAME}_test COMMAND ${PROJECT_NAME}_test) From 01c0931a53e9750ad3cff378dab62598a5850978 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Mon, 17 May 2021 23:14:09 +0100 Subject: [PATCH 04/29] Updating example with a self check --- .gitignore | 3 +- example/README.md | 20 +- .../toy_sample_1/kmer_covgs.txt | 1000 +++++++++++++++++ .../toy_sample_1/kmer_probs.txt | 200 ++++ .../toy_sample_1/pandora.consensus.fq.gz | Bin 0 -> 368 bytes .../toy_sample_1/pandora.pangraph.gfa | 3 + .../toy_sample_2/kmer_covgs.txt | 1000 +++++++++++++++++ .../toy_sample_2/kmer_probs.txt | 200 ++++ .../toy_sample_2/pandora.consensus.fq.gz | Bin 0 -> 370 bytes .../toy_sample_2/pandora.pangraph.gfa | 3 + .../toy_sample_1/kmer_covgs.txt | 1000 +++++++++++++++++ .../toy_sample_1/kmer_probs.txt | 200 ++++ .../toy_sample_1/pandora.consensus.fq.gz | Bin 0 -> 368 bytes .../toy_sample_1/pandora.pangraph.gfa | 3 + .../toy_sample_2/kmer_covgs.txt | 1000 +++++++++++++++++ .../toy_sample_2/kmer_probs.txt | 200 ++++ .../toy_sample_2/pandora.consensus.fq.gz | Bin 0 -> 371 bytes .../toy_sample_2/pandora.pangraph.gfa | 3 + .../pandora_discover_out/denovo_paths.txt | 34 + .../toy_sample_1/denovo_paths.txt | 16 + .../toy_sample_1/pandora.consensus.fq.gz | Bin 0 -> 368 bytes .../toy_sample_1/pandora.pangraph.gfa | 3 + .../candidate_regions_write_buffer.bin | 10 + .../toy_sample_2/denovo_paths.txt | 17 + .../toy_sample_2/pandora.consensus.fq.gz | Bin 0 -> 370 bytes .../toy_sample_2/pandora.pangraph.gfa | 3 + .../candidate_regions_write_buffer.bin | 12 + .../prgs/kmer_prgs/01/GC00006032.k15.w14.gfa | 97 ++ .../prgs/kmer_prgs/01/GC00010897.k15.w14.gfa | 146 +++ .../prgs/pangenome.prg.fa.k15.w14.idx | 113 ++ example/out_truth/prgs/pangenome.update_DS | Bin 0 -> 510 bytes .../ForkPoolWorker-1/GC00006032.fa.pickle | Bin 0 -> 48037 bytes .../ForkPoolWorker-1/GC00010897.fa.pickle | Bin 0 -> 19895 bytes .../kmer_prgs/01/GC00006032.k15.w14.gfa | 102 ++ .../kmer_prgs/01/GC00010897.k15.w14.gfa | 158 +++ .../pangenome_updated.prg.fa.k15.w14.idx | 120 ++ example/run_pandora.sh | 20 +- 37 files changed, 5672 insertions(+), 14 deletions(-) create mode 100644 example/out_truth/output_toy_example_no_denovo/toy_sample_1/kmer_covgs.txt create mode 100644 example/out_truth/output_toy_example_no_denovo/toy_sample_1/kmer_probs.txt create mode 100644 example/out_truth/output_toy_example_no_denovo/toy_sample_1/pandora.consensus.fq.gz create mode 100644 example/out_truth/output_toy_example_no_denovo/toy_sample_1/pandora.pangraph.gfa create mode 100644 example/out_truth/output_toy_example_no_denovo/toy_sample_2/kmer_covgs.txt create mode 100644 example/out_truth/output_toy_example_no_denovo/toy_sample_2/kmer_probs.txt create mode 100644 example/out_truth/output_toy_example_no_denovo/toy_sample_2/pandora.consensus.fq.gz create mode 100644 example/out_truth/output_toy_example_no_denovo/toy_sample_2/pandora.pangraph.gfa create mode 100644 example/out_truth/output_toy_example_with_denovo/toy_sample_1/kmer_covgs.txt create mode 100644 example/out_truth/output_toy_example_with_denovo/toy_sample_1/kmer_probs.txt create mode 100644 example/out_truth/output_toy_example_with_denovo/toy_sample_1/pandora.consensus.fq.gz create mode 100644 example/out_truth/output_toy_example_with_denovo/toy_sample_1/pandora.pangraph.gfa create mode 100644 example/out_truth/output_toy_example_with_denovo/toy_sample_2/kmer_covgs.txt create mode 100644 example/out_truth/output_toy_example_with_denovo/toy_sample_2/kmer_probs.txt create mode 100644 example/out_truth/output_toy_example_with_denovo/toy_sample_2/pandora.consensus.fq.gz create mode 100644 example/out_truth/output_toy_example_with_denovo/toy_sample_2/pandora.pangraph.gfa create mode 100644 example/out_truth/pandora_discover_out/denovo_paths.txt create mode 100644 example/out_truth/pandora_discover_out/toy_sample_1/denovo_paths.txt create mode 100644 example/out_truth/pandora_discover_out/toy_sample_1/pandora.consensus.fq.gz create mode 100644 example/out_truth/pandora_discover_out/toy_sample_1/pandora.pangraph.gfa create mode 100644 example/out_truth/pandora_discover_out/toy_sample_1/temp_child_00/candidate_regions_write_buffer.bin create mode 100644 example/out_truth/pandora_discover_out/toy_sample_2/denovo_paths.txt create mode 100644 example/out_truth/pandora_discover_out/toy_sample_2/pandora.consensus.fq.gz create mode 100644 example/out_truth/pandora_discover_out/toy_sample_2/pandora.pangraph.gfa create mode 100644 example/out_truth/pandora_discover_out/toy_sample_2/temp_child_00/candidate_regions_write_buffer.bin create mode 100644 example/out_truth/prgs/kmer_prgs/01/GC00006032.k15.w14.gfa create mode 100644 example/out_truth/prgs/kmer_prgs/01/GC00010897.k15.w14.gfa create mode 100644 example/out_truth/prgs/pangenome.prg.fa.k15.w14.idx create mode 100644 example/out_truth/prgs/pangenome.update_DS create mode 100644 example/out_truth/prgs/pangenome_prgs/ForkPoolWorker-1/GC00006032.fa.pickle create mode 100644 example/out_truth/prgs/pangenome_prgs/ForkPoolWorker-1/GC00010897.fa.pickle create mode 100644 example/out_truth/updated_prgs/kmer_prgs/01/GC00006032.k15.w14.gfa create mode 100644 example/out_truth/updated_prgs/kmer_prgs/01/GC00010897.k15.w14.gfa create mode 100644 example/out_truth/updated_prgs/pangenome_updated.prg.fa.k15.w14.idx diff --git a/.gitignore b/.gitignore index 18bf813d..2919e2b1 100644 --- a/.gitignore +++ b/.gitignore @@ -100,4 +100,5 @@ pandora build_portable_executable pandora-linux-precompiled -/archives/ \ No newline at end of file +/archives/ +/example/out/ diff --git a/example/README.md b/example/README.md index 67a13c77..6c23cd80 100644 --- a/example/README.md +++ b/example/README.md @@ -28,18 +28,26 @@ Here we present a walkthrough of running `pandora` on a toy example. We run: ### Quick look at the output -`prgs`: contains output of `make_prg from_msa` and `pandora index`. Main files: +The output is already present in dir `out_truth`. If all went fine, the last line of the execution of the script above should be: + +``` +Example run produced the expected result +``` + +and thus the dirs `out` and `out_truth` have the same contents. + +`out/prgs`: contains output of `make_prg from_msa` and `pandora index`. Main files: * `pangenome.prg.fa`: the PRG itself; * `pangenome.prg.fa.k15.w14.idx` and `kmer_prgs`: the PRG index; * `pangenome.update_DS`: update data structures that make the PRG updateable; -`pandora_discover_out`: contains the output of `pandora discover`. Main files: +`out/pandora_discover_out`: contains the output of `pandora discover`. Main files: * `denovo_paths.txt`: describes the denovo paths found in all samples; -`updated_prgs`: contains the output of `make_prg update` and `pandora index` (on the updated PRG). +`out/updated_prgs`: contains the output of `make_prg update` and `pandora index` (on the updated PRG). The files are similar to the ones in the `prgs` folder; -`output_toy_example_no_denovo` and `output_toy_example_with_denovo`: contains the output of +`out/output_toy_example_no_denovo` and `out/output_toy_example_with_denovo`: contains the output of `pandora compare` without denovo discovery and with denovo discovery, respectively. Main files: * `pandora_multisample.matrix`: see https://github.com/rmcolq/pandora/wiki/FAQ#q-where-can-i-find-gene-presenceabsence-information ; * `pandora_multisample.vcf_ref.fa`: see https://github.com/rmcolq/pandora/wiki/FAQ#q-what-are-the-sequences-in-pandora_multisamplevcf_reffa @@ -50,7 +58,7 @@ The files are similar to the ones in the `prgs` folder; **No denovo** -Taking a quick look at an excerpt of `output_toy_example_no_denovo/pandora_multisample_genotyped.vcf` +Taking a quick look at an excerpt of `out/output_toy_example_no_denovo/pandora_multisample_genotyped.vcf` (the VCF genotyped by `pandora` without denovo sequences): ``` @@ -64,7 +72,7 @@ We can see samples `toy_sample_1` and `toy_sample_2` genotype towards different **With denovo** -The VCF (`output_toy_example_with_denovo/pandora_multisample_genotyped.vcf`) has some new variants that were discovered and genotyped. For example: +The VCF (`out/output_toy_example_with_denovo/pandora_multisample_genotyped.vcf`) has some new variants that were discovered and genotyped. For example: ``` #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT toy_sample_1.100x.random.illumina toy_sample_2.100x.random.illumina diff --git a/example/out_truth/output_toy_example_no_denovo/toy_sample_1/kmer_covgs.txt b/example/out_truth/output_toy_example_no_denovo/toy_sample_1/kmer_covgs.txt new file mode 100644 index 00000000..8e3fd889 --- /dev/null +++ b/example/out_truth/output_toy_example_no_denovo/toy_sample_1/kmer_covgs.txt @@ -0,0 +1,1000 @@ +0 21 +1 0 +2 0 +3 0 +4 1 +5 1 +6 1 +7 0 +8 1 +9 0 +10 1 +11 0 +12 0 +13 2 +14 0 +15 1 +16 0 +17 0 +18 0 +19 1 +20 2 +21 2 +22 2 +23 2 +24 0 +25 0 +26 0 +27 2 +28 2 +29 1 +30 2 +31 0 +32 1 +33 0 +34 4 +35 1 +36 1 +37 2 +38 2 +39 1 +40 0 +41 0 +42 0 +43 0 +44 0 +45 1 +46 2 +47 0 +48 1 +49 1 +50 0 +51 1 +52 2 +53 0 +54 0 +55 2 +56 1 +57 0 +58 2 +59 1 +60 1 +61 0 +62 1 +63 1 +64 0 +65 0 +66 0 +67 4 +68 0 +69 0 +70 1 +71 0 +72 0 +73 1 +74 0 +75 2 +76 1 +77 1 +78 2 +79 3 +80 0 +81 3 +82 1 +83 0 +84 0 +85 0 +86 0 +87 1 +88 0 +89 0 +90 1 +91 0 +92 0 +93 0 +94 2 +95 0 +96 0 +97 0 +98 0 +99 1 +100 1 +101 0 +102 0 +103 1 +104 0 +105 0 +106 1 +107 0 +108 0 +109 0 +110 0 +111 0 +112 2 +113 0 +114 0 +115 0 +116 0 +117 1 +118 0 +119 0 +120 0 +121 0 +122 0 +123 1 +124 0 +125 0 +126 0 +127 0 +128 0 +129 0 +130 0 +131 0 +132 0 +133 0 +134 0 +135 0 +136 1 +137 0 +138 1 +139 1 +140 1 +141 0 +142 0 +143 0 +144 0 +145 0 +146 0 +147 0 +148 0 +149 0 +150 0 +151 0 +152 0 +153 1 +154 0 +155 2 +156 0 +157 0 +158 0 +159 0 +160 0 +161 0 +162 0 +163 0 +164 0 +165 0 +166 0 +167 0 +168 0 +169 0 +170 0 +171 0 +172 0 +173 0 +174 0 +175 0 +176 0 +177 0 +178 0 +179 0 +180 0 +181 0 +182 0 +183 0 +184 0 +185 0 +186 0 +187 0 +188 0 +189 0 +190 0 +191 0 +192 0 +193 0 +194 0 +195 0 +196 0 +197 0 +198 0 +199 0 +200 0 +201 0 +202 0 +203 0 +204 0 +205 0 +206 0 +207 0 +208 0 +209 0 +210 0 +211 0 +212 0 +213 0 +214 0 +215 0 +216 0 +217 0 +218 0 +219 0 +220 0 +221 0 +222 0 +223 0 +224 0 +225 0 +226 0 +227 0 +228 0 +229 0 +230 0 +231 0 +232 0 +233 0 +234 0 +235 0 +236 0 +237 0 +238 0 +239 0 +240 0 +241 0 +242 0 +243 0 +244 0 +245 0 +246 0 +247 0 +248 0 +249 0 +250 0 +251 0 +252 0 +253 0 +254 0 +255 0 +256 0 +257 0 +258 0 +259 0 +260 0 +261 0 +262 0 +263 0 +264 0 +265 0 +266 0 +267 0 +268 0 +269 0 +270 0 +271 0 +272 0 +273 0 +274 0 +275 0 +276 0 +277 0 +278 0 +279 0 +280 0 +281 0 +282 0 +283 0 +284 0 +285 0 +286 0 +287 0 +288 0 +289 0 +290 0 +291 0 +292 0 +293 0 +294 0 +295 0 +296 0 +297 0 +298 0 +299 0 +300 0 +301 0 +302 0 +303 0 +304 0 +305 0 +306 0 +307 0 +308 0 +309 0 +310 0 +311 0 +312 0 +313 0 +314 0 +315 0 +316 0 +317 0 +318 0 +319 0 +320 0 +321 0 +322 0 +323 0 +324 0 +325 0 +326 0 +327 0 +328 0 +329 0 +330 0 +331 0 +332 0 +333 0 +334 0 +335 0 +336 0 +337 0 +338 0 +339 0 +340 0 +341 0 +342 0 +343 0 +344 0 +345 0 +346 0 +347 0 +348 0 +349 0 +350 0 +351 0 +352 0 +353 0 +354 0 +355 0 +356 0 +357 0 +358 0 +359 0 +360 0 +361 0 +362 0 +363 0 +364 0 +365 0 +366 0 +367 0 +368 0 +369 0 +370 0 +371 0 +372 0 +373 0 +374 0 +375 0 +376 0 +377 0 +378 0 +379 0 +380 0 +381 0 +382 0 +383 0 +384 0 +385 0 +386 0 +387 0 +388 0 +389 0 +390 0 +391 0 +392 0 +393 0 +394 0 +395 0 +396 0 +397 0 +398 0 +399 0 +400 0 +401 0 +402 0 +403 0 +404 0 +405 0 +406 0 +407 0 +408 0 +409 0 +410 0 +411 0 +412 0 +413 0 +414 0 +415 0 +416 0 +417 0 +418 0 +419 0 +420 0 +421 0 +422 0 +423 0 +424 0 +425 0 +426 0 +427 0 +428 0 +429 0 +430 0 +431 0 +432 0 +433 0 +434 0 +435 0 +436 0 +437 0 +438 0 +439 0 +440 0 +441 0 +442 0 +443 0 +444 0 +445 0 +446 0 +447 0 +448 0 +449 0 +450 0 +451 0 +452 0 +453 0 +454 0 +455 0 +456 0 +457 0 +458 0 +459 0 +460 0 +461 0 +462 0 +463 0 +464 0 +465 0 +466 0 +467 0 +468 0 +469 0 +470 0 +471 0 +472 0 +473 0 +474 0 +475 0 +476 0 +477 0 +478 0 +479 0 +480 0 +481 0 +482 0 +483 0 +484 0 +485 0 +486 0 +487 0 +488 0 +489 0 +490 0 +491 0 +492 0 +493 0 +494 0 +495 0 +496 0 +497 0 +498 0 +499 0 +500 0 +501 0 +502 0 +503 0 +504 0 +505 0 +506 0 +507 0 +508 0 +509 0 +510 0 +511 0 +512 0 +513 0 +514 0 +515 0 +516 0 +517 0 +518 0 +519 0 +520 0 +521 0 +522 0 +523 0 +524 0 +525 0 +526 0 +527 0 +528 0 +529 0 +530 0 +531 0 +532 0 +533 0 +534 0 +535 0 +536 0 +537 0 +538 0 +539 0 +540 0 +541 0 +542 0 +543 0 +544 0 +545 0 +546 0 +547 0 +548 0 +549 0 +550 0 +551 0 +552 0 +553 0 +554 0 +555 0 +556 0 +557 0 +558 0 +559 0 +560 0 +561 0 +562 0 +563 0 +564 0 +565 0 +566 0 +567 0 +568 0 +569 0 +570 0 +571 0 +572 0 +573 0 +574 0 +575 0 +576 0 +577 0 +578 0 +579 0 +580 0 +581 0 +582 0 +583 0 +584 0 +585 0 +586 0 +587 0 +588 0 +589 0 +590 0 +591 0 +592 0 +593 0 +594 0 +595 0 +596 0 +597 0 +598 0 +599 0 +600 0 +601 0 +602 0 +603 0 +604 0 +605 0 +606 0 +607 0 +608 0 +609 0 +610 0 +611 0 +612 0 +613 0 +614 0 +615 0 +616 0 +617 0 +618 0 +619 0 +620 0 +621 0 +622 0 +623 0 +624 0 +625 0 +626 0 +627 0 +628 0 +629 0 +630 0 +631 0 +632 0 +633 0 +634 0 +635 0 +636 0 +637 0 +638 0 +639 0 +640 0 +641 0 +642 0 +643 0 +644 0 +645 0 +646 0 +647 0 +648 0 +649 0 +650 0 +651 0 +652 0 +653 0 +654 0 +655 0 +656 0 +657 0 +658 0 +659 0 +660 0 +661 0 +662 0 +663 0 +664 0 +665 0 +666 0 +667 0 +668 0 +669 0 +670 0 +671 0 +672 0 +673 0 +674 0 +675 0 +676 0 +677 0 +678 0 +679 0 +680 0 +681 0 +682 0 +683 0 +684 0 +685 0 +686 0 +687 0 +688 0 +689 0 +690 0 +691 0 +692 0 +693 0 +694 0 +695 0 +696 0 +697 0 +698 0 +699 0 +700 0 +701 0 +702 0 +703 0 +704 0 +705 0 +706 0 +707 0 +708 0 +709 0 +710 0 +711 0 +712 0 +713 0 +714 0 +715 0 +716 0 +717 0 +718 0 +719 0 +720 0 +721 0 +722 0 +723 0 +724 0 +725 0 +726 0 +727 0 +728 0 +729 0 +730 0 +731 0 +732 0 +733 0 +734 0 +735 0 +736 0 +737 0 +738 0 +739 0 +740 0 +741 0 +742 0 +743 0 +744 0 +745 0 +746 0 +747 0 +748 0 +749 0 +750 0 +751 0 +752 0 +753 0 +754 0 +755 0 +756 0 +757 0 +758 0 +759 0 +760 0 +761 0 +762 0 +763 0 +764 0 +765 0 +766 0 +767 0 +768 0 +769 0 +770 0 +771 0 +772 0 +773 0 +774 0 +775 0 +776 0 +777 0 +778 0 +779 0 +780 0 +781 0 +782 0 +783 0 +784 0 +785 0 +786 0 +787 0 +788 0 +789 0 +790 0 +791 0 +792 0 +793 0 +794 0 +795 0 +796 0 +797 0 +798 0 +799 0 +800 0 +801 0 +802 0 +803 0 +804 0 +805 0 +806 0 +807 0 +808 0 +809 0 +810 0 +811 0 +812 0 +813 0 +814 0 +815 0 +816 0 +817 0 +818 0 +819 0 +820 0 +821 0 +822 0 +823 0 +824 0 +825 0 +826 0 +827 0 +828 0 +829 0 +830 0 +831 0 +832 0 +833 0 +834 0 +835 0 +836 0 +837 0 +838 0 +839 0 +840 0 +841 0 +842 0 +843 0 +844 0 +845 0 +846 0 +847 0 +848 0 +849 0 +850 0 +851 0 +852 0 +853 0 +854 0 +855 0 +856 0 +857 0 +858 0 +859 0 +860 0 +861 0 +862 0 +863 0 +864 0 +865 0 +866 0 +867 0 +868 0 +869 0 +870 0 +871 0 +872 0 +873 0 +874 0 +875 0 +876 0 +877 0 +878 0 +879 0 +880 0 +881 0 +882 0 +883 0 +884 0 +885 0 +886 0 +887 0 +888 0 +889 0 +890 0 +891 0 +892 0 +893 0 +894 0 +895 0 +896 0 +897 0 +898 0 +899 0 +900 0 +901 0 +902 0 +903 0 +904 0 +905 0 +906 0 +907 0 +908 0 +909 0 +910 0 +911 0 +912 0 +913 0 +914 0 +915 0 +916 0 +917 0 +918 0 +919 0 +920 0 +921 0 +922 0 +923 0 +924 0 +925 0 +926 0 +927 0 +928 0 +929 0 +930 0 +931 0 +932 0 +933 0 +934 0 +935 0 +936 0 +937 0 +938 0 +939 0 +940 0 +941 0 +942 0 +943 0 +944 0 +945 0 +946 0 +947 0 +948 0 +949 0 +950 0 +951 0 +952 0 +953 0 +954 0 +955 0 +956 0 +957 0 +958 0 +959 0 +960 0 +961 0 +962 0 +963 0 +964 0 +965 0 +966 0 +967 0 +968 0 +969 0 +970 0 +971 0 +972 0 +973 0 +974 0 +975 0 +976 0 +977 0 +978 0 +979 0 +980 0 +981 0 +982 0 +983 0 +984 0 +985 0 +986 0 +987 0 +988 0 +989 0 +990 0 +991 0 +992 0 +993 0 +994 0 +995 0 +996 0 +997 0 +998 0 +999 0 diff --git a/example/out_truth/output_toy_example_no_denovo/toy_sample_1/kmer_probs.txt b/example/out_truth/output_toy_example_no_denovo/toy_sample_1/kmer_probs.txt new file mode 100644 index 00000000..7ac306ce --- /dev/null +++ b/example/out_truth/output_toy_example_no_denovo/toy_sample_1/kmer_probs.txt @@ -0,0 +1,200 @@ +-200 0 +-199 0 +-198 0 +-197 0 +-196 0 +-195 0 +-194 0 +-193 0 +-192 0 +-191 0 +-190 0 +-189 0 +-188 0 +-187 0 +-186 0 +-185 0 +-184 0 +-183 0 +-182 0 +-181 0 +-180 0 +-179 0 +-178 0 +-177 0 +-176 0 +-175 0 +-174 0 +-173 0 +-172 0 +-171 0 +-170 0 +-169 0 +-168 0 +-167 0 +-166 0 +-165 0 +-164 0 +-163 0 +-162 0 +-161 0 +-160 0 +-159 0 +-158 0 +-157 0 +-156 0 +-155 0 +-154 0 +-153 0 +-152 0 +-151 0 +-150 0 +-149 0 +-148 0 +-147 0 +-146 0 +-145 0 +-144 0 +-143 0 +-142 0 +-141 0 +-140 0 +-139 0 +-138 0 +-137 0 +-136 0 +-135 0 +-134 0 +-133 0 +-132 0 +-131 0 +-130 0 +-129 0 +-128 0 +-127 0 +-126 0 +-125 0 +-124 0 +-123 0 +-122 0 +-121 0 +-120 0 +-119 0 +-118 0 +-117 0 +-116 0 +-115 0 +-114 0 +-113 0 +-112 0 +-111 0 +-110 0 +-109 0 +-108 0 +-107 0 +-106 0 +-105 0 +-104 0 +-103 0 +-102 0 +-101 0 +-100 0 +-99 0 +-98 0 +-97 0 +-96 0 +-95 0 +-94 0 +-93 0 +-92 0 +-91 0 +-90 0 +-89 0 +-88 0 +-87 0 +-86 0 +-85 0 +-84 0 +-83 0 +-82 0 +-81 0 +-80 0 +-79 0 +-78 0 +-77 0 +-76 0 +-75 0 +-74 0 +-73 0 +-72 0 +-71 0 +-70 0 +-69 0 +-68 0 +-67 0 +-66 0 +-65 0 +-64 0 +-63 0 +-62 0 +-61 0 +-60 0 +-59 0 +-58 0 +-57 0 +-56 0 +-55 0 +-54 0 +-53 0 +-52 0 +-51 0 +-50 0 +-49 0 +-48 0 +-47 0 +-46 0 +-45 0 +-44 0 +-43 0 +-42 0 +-41 0 +-40 0 +-39 0 +-38 0 +-37 0 +-36 0 +-35 0 +-34 0 +-33 0 +-32 0 +-31 0 +-30 0 +-29 0 +-28 0 +-27 0 +-26 0 +-25 0 +-24 0 +-23 0 +-22 0 +-21 0 +-20 0 +-19 0 +-18 0 +-17 0 +-16 0 +-15 0 +-14 0 +-13 0 +-12 0 +-11 0 +-10 0 +-9 21 +-8 0 +-7 5 +-6 86 +-5 0 +-4 0 +-3 0 +-2 0 +-1 0 diff --git a/example/out_truth/output_toy_example_no_denovo/toy_sample_1/pandora.consensus.fq.gz b/example/out_truth/output_toy_example_no_denovo/toy_sample_1/pandora.consensus.fq.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d6a43cafc80ca61ef1d1c929aecc3a3590abd8a GIT binary patch literal 368 zcmV-$0gwJ4iwFP!00000|Ls#bP6JU4?dKGs10m2%0$B>AST5XB_yQ3`G$;rG9f#*F zQK&co0?$a}@$&3%zrU>~G7k6P@^j0X9 zG2AH?(M^!(gzIIBL5g`t-5h2zHIPfCqpTQ#lub7YJGnygqC10-YTsxI)6qlcCx{8A z%j2=62SXT9S0`>Bo)tX+msX3SS_Chz5ZW&~bQX z#!sQ*00_J*t)D+*KYMnIoKvpni_6*5%j4|s_+j_H`+VEKe(ay`_a`@J%k$-W!H}s6 zfeK1SR7$|7D)fYOD268qW28A6sZa4`JRLkh2K$I=SNb7BW}zCxFP#c17f)4Xvbb7Z zEjN=kt-S`<1kDv(OQI*Oh4$cO(I%n5WEMxzRg<*;mnd2`@0^^--MQ z`o&l(rkf$ziPnn)2$YA`o5M<$Msm4ypp`}tWwVP;^DxdXpj%6a-|+1WOlVN^uo!YR`FyWZOOP~ ztTExR59d%WlhSqBv5{Cb)t8)I%7dVDp~Irybkja&^>99%{B0SwBFl2T*~~waWj!zF zap0!8*8nGqb1keT%ai6p>DLnRtJP}i!6@%Akx}poC#iEimcDk*SQ~ zPN|4)fs z9!q*Kgb{Uh~o5x10hf#*@P{S)^cIdQTPHYh-g+I1ausp znekJoH~<1CQvCcG``MFycn_KS;d*m?dD`5cJ|5l=pKr_S$MXEJoZVh*FUM(`r=iOX zi!_LvrBeb=rtvZA!5Ew$aB<#IhhaL>rX&nxlHn^%Z>|)MO}T#nIS&}oeLe7x`T{sR(9vz$={Z4FS1qvJB%q zZg(9w&Al4d6r5{dEgm&#F0=+u3pW`IEH-%Cp(6t;4&gXe6E|u{A@-$Jo`)9b^y)}X zVEtq)6wytP=!ENy6(pD!r5A^pObz5xX^T<{ft2-XlKevg$&2m^gjD-RQ<$!9y1zgO z5&C&*EM9{ljHs&{CokV>M}y++o4!`6PRl482N9LxvV^9rDP)Tj;f1d%+<-NMeuICe R|5~|&bYJc5RAST5XB_yQ3`G$;rG9f#*F zQK&co0?$a}@$&3%zrU>~G7k6P@^j0X9 zG2AH?(M^!(gzIIBL5g`t-5h2zHIPfCqpTQ#lub7YJGnygqC10-YTsxI)6qlcCx{8A z%j2=62SXT9S0`>Bo)tX+msX3SS_Chz5ZW&~bQX z#!sQ*00_J*t)D+*KYMnIoKvpni_6*5%j4|s_+j_H`+VEKe(ay`_a`@J%k$-W!H}s6 zfeK1SR7$|7D)fYOD268qW28A6sZa4`JRLkh2K$I=SNb7BW}zCxFP#c17f)4Xvbb7Z zEjN=kt-S`<1kDv(OQI*Oh4$cO(I%n5WEMxzRg<*;mnd2`@0^^--MQ z`o&l(rkf$ziPnn)2$YA`o5M<$Msm4ypp`}tWwVDi}tBEd13_H7PO6d#@ea!76VMC&S?sZ7SrHiJw z=bm$VPwr=nYqk`$a%GB#&Z1Ov;gYEt$t#8*yqfZ6?0xB@0i9{jEe*k4BK17 z!H9HlY-y?(pkLS8GoQgB=_77qzy@X!gcC2BeFPuyBISJseM(WPop` zWNEZ8ffm+j7GPNW7t99+C!~iI%~sS1AEYVabkeD1S{jOMzMx8&GA!V1(j{#q7twGy d-)_{$8Mj(9GxurXtfn}5;v#PHBF^G0{RZbHp1J@4 literal 0 HcmV?d00001 diff --git a/example/out_truth/prgs/pangenome_prgs/ForkPoolWorker-1/GC00006032.fa.pickle b/example/out_truth/prgs/pangenome_prgs/ForkPoolWorker-1/GC00006032.fa.pickle new file mode 100644 index 0000000000000000000000000000000000000000..8fc00631ccfc2216c53cfdc1fb3f5abe41cb6582 GIT binary patch literal 48037 zcmeI52Y43M*0#Y$5k;|!y&?*RPE^zo0WpFIQL$hQ5J(~{n==7v}3Sx_zrwttK^&o7z;>P(8V(Fuw^y%1ijux1?9k z!UCh)QZc!yx@tn*q?*El}S5Kb8&O6xVu$l^sj9!Ediv8$5 zp}MeWbD`tN!eXNrBU3%8Zeqjanug}8360t0>gK}YqZdS;P~FsAUFbA=Vf@L})2kY4 znwskxCKi^cSkN|`T-Q)V^|;!qNi_`!5^ukhkVd*Xv zttyV2QCQ~xbu34*`$;wBlcqK`*JQ&R$JewkEX%=NE-x(KW%PXPxo&)+b46<#j_(cZ zX*H7yD^$!kdLas?RA-E%uwt#YTPZKB+@-d2MXTBsDq7dBINV0^w^=N#QXBTQTUAp{ zVQNjoxSFQIs>4UOV^{I06jmEIu&%LtnT}HXVN)kH*KvGD)f7z7VQXJl9oiasVa+Z! z!Btt!xW;UJQ(>(!g|)lb#*~jVv3=1ibYZJ?@nfxKDA<`deWypkR1kJ|;?sR+^7Y{wtqs#mSsXL(sU|074N3RrApl8oz? zwbZF!mV~D5_(RXtN%mK6eN%<%#FA4eX&PJGd2|U!V)zbKG*y}1AO>Nk

el{RsOcGjSIc)M{@kw=~>N$rb3@Fg}%nCZfIz1t`@GT z&@V6aAH597swp)YpXO#h`c;1~C=DM8{{gBR$}iwP*Cw-nj}}i6TQiwk&oY_+ zbmJB#bEGD7lqR#%Pv$mxVcX(lZs#X+bY9p#Oy&-GVaIbx+vIGN-8WRA}ZHDNL*iLkGV9XG{Kp%&Fq!q5%w08^ll){(&I=92$!zqKIVCR? z!enN7p=n;7OjcokTZyrZvSru**h;K<*2(N!Qqr$mS?LZXcFpY5uV0HlnNyj}sp(|? z3DIW7)fuAJP`q)cY}$`!SQ{|LK-3OymBV#sf5s2l{C|C@;(?PUFFT z8V|_}hlXi9EH51XzkM2Y_rodl?{`P;zE0;=-LJX3qa$n@*N^uxC4EYJm2}&(r<=%r zJd9{*B9CMuk7ObT{pZ`ZFqKDXDv#Dw9^|Tl^3Sh z$$z|Y3zK<~Ci7xV<|TeIFU<>=6({p@KbcqLg)75kUX>TF{&zBMGOtN~Dogs6_U_wl zo64d6?PjE3zm|V0uVpH){o7CFKi{;4iM&n|dA%m`20xKE=7pPz6M3_r$XoKltzjZ> z%L}*vJCQb#cNDLbEO<)#_vzcKTiFOsuy%ee?c1ltAIdwK$veZ9@;}?Qg{iztQ+cNdAIb|4|2vg7m5|4hnznw0nbq^h{ZQ(pWCF2YGG0aHI1vS;U!aTU3JyC#z}Qm!-ftYT-BrOpH9+DCTS*< zG*grGg(m4sKS^KZg|CZ~^o^gSZ}Y-;VUoVj3qQ=Oll1@j;o6V?%_MF1PbcXoCg~?8 z=_gIn&zhuP{3QLF7k(>F((hBp){I_~Pl~;wSmBh#+`^&``+vp=5c%1@kodEGTUp2_CZfzY=V-1jE3 z`H3t38r>1aeV?MPftSG3swXvN3y>O{PM(vtZ{M`-|Xlf>NqoI&2@U8+;=6fw-ZQr&{Ll7R$!6`d_H0+t`k-uK+L2iYtbH?q?}=p1 z*&<4|GDuT%@zQfD{=yaiP%K`?ikF>J@lUS!PyKK#!PGT0hOfhfX=%!q zW5Y4o^0v7@3Y~3_D-`!=$8^4fvK5g=W-DRcF*(-l|JY(@R<=!7DQ;RUKHC-Z3?>}y zRjqinITgR^ieD`juWrR_%&B;$E1p>_Uek)#np5$QuJ}hC6Mg5XdeS7m1{6NbYqQCi ztcz`^k7d1%MZG>2^%`2e=Dc40uGfUtt5fxwt$GpbH5D~>N~z+qG-*6v^vTvi8)vj2(QY~K(s$F$> z?P|4ry(jB#MeAG9T4S;jbNDVr*25A#Ew*n|WW7x3jpZ_iZB?1|q0za#wg>0j9-MP~ zbI#5Bs)W;JKehIXKkJX1XQdJbRJ3N3jmKmg5V`AQwxOjr!uqRaHo%mPu@SZjfx-Nj z5^D8Lf^1Wj04r1LU;}aUtXx8bZARo_gDgE58(>3B*&G{TLkSG#Uz*qIO?S41N`O_U zb+9dQ^K6)e2pdl1VOv>x1UA67Hf1C>!bTAo%)ivF)tlI?QYFB)QR`sa;^x_Q5+Y1f z;bGfbdIxNP?P$tQ*a#a#U@-sEu~u&ivz=7}tV*qe?Sh+U)e<6XERlzev-EgufYq2X z0qbGq*+c?^`Ilm~dXtsassvb_S_iAg&9hx4MA#%E51VZ125f*enlc3&M^+#(n163m ztM@HgrV?OHY8|W@H_xU@h_Goy9=4mMr(*+bcT@JjM%bPN2J`P0YV}4S+e;_0d}xj2Rj5e&kmIkVTTcU z*x{Bw0vljQnsO92!j2{|n14W9tEZ~jF)9IetXc;<4mZz^mk?nm5P8^%mOcp^U?-b$ z3O2${B`}zOgjlO5f7xj&0d~4t2Rj2d&(4$(VP_F}*x8mo2OD7LnsOdC!p9P*!7ma0UKa9nsO61!fqxon14)At9$$G7L@?IRjq^FhMQ-%ONg*L zh&=30OW%bJu)9sU2ODAc5*Y07_pI4{Dgk!CS_gXoH_slF5Md7ydDz32egqp}kDBrr zHo_h!FxcPky|O1%0_;h(4)zpoo;@ug!k!`WuxBm(95%q7H{}Jahm~h95*Y07_Ym1j zDgpMgS_gXtH_u*`5Mi$ou|lY|JBRFbt9YYW!4BTEiu!eE#$<0{yXXtO+1q$|_KwA! zE9Q3>>iYGF(`)s+R`;IOajkye99^qFu*8QJyKD7FrhJTz2l5jF8(VU%{!}G&?K8D* zY@g%i*-Qy>Y+n%h1No(;zruzC`L!wEU?c2X0)r*j>hDwn?0dBi_5*I7{U{;Aej@U) zpDq0hHo$&0T*1`V55w;kC!IEqB;wk~wNv(q| zftzPbN{Fzfh&*g*OD}^Buw_kI4jW<16BsPHe0Ej|uocug*owG$wvvPhTbaniRG z^;H6_M6HAMz|FIs5+bY@k%#rRbRTSh^);m*Hp2Q77%aJbmZ}8U25KE_L)<*uNJ4}S zAo8${Exidgz&16d3>#qs2@IB8KFd`CY%{eEHV8M*21|&rAw(Xwxuu6<18fUZDzFi@ zC4s?`%jYna02{8>!M4K9vk?*^Y-=J98)@lL*Z`|EWgBdSZA)OVGsVV_B zO|66NhMQ;8B}CZnM8zxG9#*kuv4S1!Wfk?UIkArB|J)MhDE0H>SG2BQfM!g#x0Ho& zxl-+epJ)4GXM0%E%H3oGtgc_qHr!7eu5IrAa&$sE}w^5;xLQd<@0b;j==f@S)Lt9UCt)M(WCDXF>yT4a0_;?^4t5$Y%VY@=b_P*#*3Ptwvx*h$ z;B2cX&RThP4oW&}=SuOjb{>A7osazovvz?tbhCD$Tx}d??IL`-<|poAqCC3Gz7sdk?vfC} zcN2N=J(j)~8^HIOazA#C;0H(;I9V7ysB*v$sdeCoar5jE2@(7#kq19!>Bq4F{Ddh_ zV&@2ciiClaMbgtM2mFj$2Ywbe&z_SI!Os(U@C%lH5gWiSnesArj^I~F7&uuly{dA+ zuc>w5*Kzag4G9taCXolfW$CxE0sM|B?_%c&evgEKlhecdDhK?5S_l3RH_twj5Wyc4 zdGIHe{uCR)pPBMGc8=hgBn+Ia+P+XZ;4jrW@K?Bb_O*lv{)WhdzqRyt*Z}_ClpnBj z1pi3Fz{v{kCzS*KS*-*Af}3Z*N{Ha!h&=drOaFll;6F|I3p+<}D}GPZz{zT^HLAe# zsdeD_ar0~e2@$*?kq0ki={DE^Zfi>R=ENfiz|IlelZ1hjm0&NG1MaQXf&1X*Sziee+>gkE`&+sc z8^9ZwvLSYk;EhNaI9U}AP&wd@)jIGdxOujzga|Gp^5B7%F2@G&W~L0n&JjGAgn^S4 z;t-Vs-dwE%55>*1EhI#61(64DY3X6u03L42R@ga$N02aZvdGz5<$y=3b>LCBc~&VQ zg0~?mE^@ZDitUOO>_ERx&f6VZn$Er$*q*Fg@GRn&sXIvV3!WYE^K2({Pbb!OtKVISc%qKgf@h&QFL=gU(Kst&!86_*EqH1yF~MTD;F)Mj zEtUfeqE&fTM_>mvS>)8Kgk!R+T6a(<;pW+73327vK;&n$(b7|}VKxh}XSt z!A97z1O`hMEyt+@*zsx|>;&9AJ5fS}okZkeCtLayY=E6=%4t{+E6YwNFj%rsIYTAD z&Q$AQXW??IAR)reA@Z zcDbdmzy{crrd)-Mu&W6SmMloFQ3tGMz z=Gns%BJ2?&4|~+ok6{DsaZ{eaM%a@C21^zjPpJgh(`p^;8QeU3Rzie5N91A8TlxiT zfW2tSOV|i|nZRJl0^=2x0DD!fgT02EXRk|$us4W2>`hC*g$=N`O?d|!Veb+cELl{% zrxIZAt97ssaP#a#2@&=Yk%xV3=})i$_NghKVI%Bw0)r(BiJ2+^_Jvvp`w}tH|Q=GiY2BJ5Wp5Bts1zheXJ4^#fc zM%Z5j21^zUt@vwQ1Z%CxJc+|6^j)s*ummf;h!mWB5-eHmax(#E$&>=#y?WpMMoj|_QXtdN*uL)xOsM*ga|vH$iq&s^oiI2JIR!ju@QC( zfx(g$$f+s;cA8oTI~_OA&X5pcXA*hXS(ZK<8(`;{axON)&Lc2bvI03@CBQCF>tGk+ z=GjFOBJ5%!54*(Dmtq6#GE*+cM%Wbu21`~TSE>ZqRcamVYTP`#MnZ&LOXOkKS^9cx zfZbrqjo1jgiNIjV3gl*$0J}x4gWZaoXSYd+u-l0|><&xci4Cy3Ot~8yVfPRiELnlv zs}f-MsdcdXar5i}2@&=nk%v8G>4&ia_J}EuVk7J^0)r(hkjGU5>qD6>;8w%N(8e-?qd%7Q6HQyQaK{pJ->Zl3)nA$Ib2BHzhBEd3`obn-7#TJiTQ&UGhSvlHuNvYeYwyQY)#t96}R z05{JTln^_)5Rva>8%wvvhEBFKr9F18JGn4Bu}&t-zz*6qom@n%>*S)idDc-v?BrrZ zzLSevx)U~ZatTwG#LjgmmtrT@$z-9mw02D=mr?6Fxh!s;Ehiy%a(N=($L-29fXNnwDM*8#=kRDP6F0-N|*> ziFGnr*mc#e>EyaI=T)U=|L)E%YZh^~h14xLS+>*$5a+sxu zV?!smGGzpIt~Esx-u9G|C^4kXzVkdVY@|~=<^jK`@K|<_gJ(2I^u9lvJ4V|29N&|MTJK4xitdq$Cdx~~VCktv_Co^1r z+d)F?WHXWPprx)JDKd;oNeHgq=v$I8{l;YQ#%{Ng*x zLuxOsM& zgb2Hw$iuF%^p)5EyULWSu@QC+fx(gm_O&VjcAZ)WyB?RXm`RAR8;LyZCQILp4X|5G zxfL5>w-FdDSt#GG5@2_zb+9{e`C6HT2)moe!|t*4z1RS|&y@SI5%vIq!IA~tL_r@|7_O5%wmLhrMO#x3K~Cjw$bAJ*+Z& zkHBEbLh^l;0Q*3#gMEn0*Tf`5*vCX3_KBrG#Rk}CrhJZ#u$cq~OBRG*s07%TY8~t= zT)rA6A;P{P^003${T(*IzBlCuY=r$tV6bFi_mfJ1{jAo(e!=DIU=kwiHzE)F-O_(x z1ME*z{=!CBD}K$~V95flHL76qsdcdVarp|Ega})Z$io)0bQ^4dwKb(3Hp1Ey7%W+6 zEvyn?9n?D5BDj3*OG1QoB=WGuEWJ23z&e?-1UAB!BrsUAU|LEgz?N3)V9Vh0RWAt< zwj7a%EpO@0*Z^C>lohcNwi1ECl7-L8Dgm~NS_fMdm#=q8h_Ka(JZue1uZaz?wMtW?t7XpJN3z&6O0<5cA2U{1HuXIU>ux>;i*4@(UV*{+jlpfePvYrG6OBN!%R06EG zS_kWc%h$LhL|8u}59@E~Qfz>2V9JKrII@ih43;b?2B-ws#%djG6I{N!B_YDfh>8n} zfmTsotY8P5S%rT`If%f$qa19dLoDuGv9Nzxqsw>tj&gIW8)|i2tG6&m*XjyOY-zE( zRu3~}I5v)LD*_u^a;+Yr61ujvS~s?lxZHL~h-0fH@&|GoOK*z}2XZ@8^cO}%*!Bbl zORm*Bs07%KY8`ARTy7sEMA*(m9#&=PU9bUGZOT||gpDIGSaPi%uM%K2Y8`9>F1G;^ zBCM9k!|E(uj}5S0O__v^u*n1lORm)oDgoB0*1@LWvYeL?VHuH!HCehD8(>pSnTCz9 z-3SbpT&t(61laCs9c&L=mgN#6Y%d}Y%PqY(Ho*2VWnXNB?MGm+-O^`Z1MEyw&ca66*#rhl&LQWh z1lYN19qc?@eiuVRgk3;XJcnFp6&DpN*ulkC(b9Y6OUQ=Xic4+l%djooR)7lcl`q$Z z?zZ9zx!O3~R$PhC5zxzpt8n?1XRPiiI<$DIy2k3RwK`5!*O{YJ)%BLR!D4r+y3v%I zu>KHMW;YYqAxzd*x2S|8aI0E(2yesXJ6aOrA-sdg&)1!nz6%@X>uyu-!A97<1O`ji zR`;m{*!^l9>;YW9qa`829wPFvhb{dGHozV=Iz}__F zEo_9nO<=HOZS{^yfW52M!QR8=J6aMV>;obX`_R%KVFT=AQ$E2)*rx;rOV(DOsRY>P zY8`AQF5l6T5Mf^udDvH${u&!#-B;A`K@AsVg)-`&??;7 zBj?vL(zlZfkqu{$Hnw$J>_0erw9|&}?9pDXHV$Wxh4JYW2SF#04n(|G!RiFE$ebTa zENVp^t%$S7V&>@VvA88VS?tapOPI1GmP6P{U!0!pFCfpBB5TJqS@|rjZ8#*$sCCD5 zSzO+pNElGjiA{LzxIB@cz|NLl0UIW8MN?M7woG4{tkILD&??#n`l@OjeKlNOi%5v{ zHHbWYO-rwZ4fM55>4I&Uz7AQVCu^gw+6MZ%Y8`z&T;6v`i1hA6p1!`NOR$07!<3%b zmg&978a-Jo_0~4f`>1vFzPP-|kPzwpi9Eg3(i>m{eM3_=!nRBwK-TEV>S<$b1AP;< zj=m`_Zzv>0`amL2FSqn&*gzj-%3y5E^dV%8o-C&}*EZ0Hs&(`&aC!A0A=0-b^7LVr z9*zz4txOq#ZJE9`S)(WGtC89U`Y5%IUWv;)1qqS9Es>{hXKDS*U4g#6DLY_Wrte7B z=*b0qCv5|Lj9N$E8JCv@5+Z#UB2TZj^jK`5k27UFwq<$^S)(Uc^apzmhN zbZpD?-N_m~x#sVoZJ_U|*3tLE<+;6tNZ*^t)AzCTzSuzD&y@YKEz=JmYxHChaGMMR!{v86A;2KuF@T!w9#emPmAC##k#v<>tt z)jIlBxZFQTi1cfSJpEcrUxy9!>rJ@<+cNz|vPMttxNg!m&~H}j=(pf<{~#gKZzC$M znQpg=JBk(T;7+S>uYp=vVcbQDTS|5E?|1H&;@|JwgP&*jVq02DEnd74yH6XsrPTd$ zwQ*QVJ%C?)zw;mw5A3m8N-a9)rPRY#^oSL)lzP-0Eu|i_#N!sbrPLFqJc;E{yZ1X! z5!jJUR!&c=gah-8T6bih#pUt6gt+v5p2*MZ3zmKn8|L*TQ(neK*ee7EOBPD6ssz|; zY8~u#Tpr&`h_E+_JnSt?zl{yBcT9N~8)5Ge7%W*Ay{{5rAE>XFCoG{Ci1XP zEd41qz&qx$t%Lo7%j0_q5%wFAhy8BpKd=GzrzwA7BditwUa-NEMNVr}!RAxzVDsbh z_+CPUElA{H3t74iHo)4N(heJ8?FkH)tZ5cj39t@o9c&R?9^Xrdu#Q9?wwR?C#|Bs@ zQ z59@B}^|1j~VoDEeg!LpaSh61Jr4nGh)jC)oTpr&`h_HS{9@gK|rPu)5z?2QK5w;P5 z!II_20F?mSSgnI?g3IH32@zIC{)KrH5eyY`7^~VIyn=fx(i+#nvhTHd3vFjl$&@KO{uhHbfq_t);ia2AKW{ zUVhO7>tW^D4g>~E))G6a1lUe$9c&CPzvm$#!m5ZoY!^#cV*_lgDdVtlWa9}8mMk4= zR03>*S_hkm%kOzeh_E^$539HIuGj#ZWXfc099aW_!IG6iqe_5HQR`p@Tz=0(LWDID z6;}$)Rx!0$!49Tbg@3=Z8-aVjGu=vex43gfNB_`Nm+$ob&K_2`r`2(--pd?at8+{2 zZLzyn?_ce;^OG^dZ=AAP+U=Fl>Y! zPGGR)T785{fE}sU!H&XZ?JXg~jv?}}V=a9gHo%TIEg{0LBJ!}SEqx6(z^*mrI&6epPhha*T782` zfZeFp!EVB3-7F!(ZXxopTP=MXHo$H-|ggs1Pu;hgGh)RGxs@B0C!_BkDB}CX0L>~5}rJuqE*wdywgN?9f z2@IB;L!MI!u;_Lo`* zYt>G$*4QF!J|YjB-_i?U18hN47Q#kY8v=tRYpb>@0oG2fgSE%ySF9z(k#!*QuthAr zC^o=4nz9%+!WJhmShBY2q!M6DsCBR1DA2wwx);VtJ1Q^K2an z5!RK+!`8L*de{K#W=eN#gso3tuw=PYq7q;|)H+yC+&t?gA;NkS70(`htfFtRf*tg; zik9B*^d}q69;LSR2H1aa_SjGxy0gbdaFP;Py))`$j87Cs|Y3UriO2d%{Axw?c1-ImBhx3jcH z6rkIivIBND=#Gd6O%^~qsTAlKwGO&7E|1eCMCdL=9$IbbvDg3|XUcf&Y|t7+gC+~1 z2`U9TQLTg4;_?(-LWI^6dFZZ|o`en1$)+@5XM;8(8Z=oDO;IV(f?5a7aCvwxAwrvp zJanq1r(pwhH&do#XM^sJXwYO~w1-N8?y1&6_rm3wxr7Mao5(}=vGl&!0Nu}&{jsw_ z4?r|%vOqder9cl->!34mc~mYTLJuMG&_gYK7&br;H{}TIY|tYS4Vqkdk5Vbnqt!a- zF}OS-mk^=H5qaqGmOcR+peLGg5_UG|$%qC`F2JX#6zHjH9rQF@9*j$f&@+fU^h`^i zg$>ZNO*scU8}wX6gC-Z^^Hd7-e6_vvr7yt-=%uDyhMf(1Iif+6 z3-T2z1$w1g2fYfH$KVno^co@$z1Gs#VFUDfQ*OY{2E7r{pvi^#CY1ucS*?TKg3Hry z2@!f5k%!)H={v9idZ#IOVP}KhjcCy10)3B4f!?duLGQ!mTQm|P^Z_CdebCYmVFUDG zQy#(227MIKpvi^$F_i*+T&;sXfy=jPBt+;_L>~IIrJunD=(DCghn)@jJfcC9dy5xT z3iL&_4*C)<->Q)ip|22m=&P1~4I7}ZoAL&BHt3s(22EB8Z>bdM+iD&39bDGy5+d|H zA`gAv(jQ<0^g~lV!p;W$7}21~jm;-21^TI42mK6}Wx9k2ok>(&ntWjuUluFa!BEF!0PQuS=Oq(KcLA1r8TM! z=zMD30i7S0hvO3BT5mxjKZ^@lx(znWVp~(%VI!cQu;ne?85>|Ln6e@^ z!d4*DfoTtb9(Bl58BmR=tlU?ryXz(!b40)r(Bk6tPP)?2NE^}*%gxP%DnN91AsEnSKY zunkPv5F23|5g064a12lhu#MF^*e19<9G4JbWken}(9-4D0Nc!zL0Atf&ju41ELms_ zQ3|E1+?1`b5jKLrV95ewYn1>Ssn)?p;qq`?LWFHY zVIyoj zfx(gmMU6^;O;GD#6LEPsE+N9|h&-&`(z{{-m4 zu%pyE*wMKBB}ft?>{ucXJI>O_V*~62Q%=N2*hvHiOBMcUy2@!TW zk%ygO=`*nbc9tn;V^!v&c0MkD36g{eyO5~3K)A>%E-qHEgG;Q! zzdgB>z`Z@W%t|k}xO2r4{*k0E-|5?vE3EEHtK(XIl{varUu}tNEOyuGYfZTh8^?A% zfsHM>R^Ol!x^|;lH@2H_SyoGkW4ndOAIMuReH%6$$lFc10~=v?5*RGGR^O!(V0Wu^ zuzPS>PD_Ze`-nX3eoH@q4X_7Ic?cU}4-*(HxmG`-5@3(2b+E^9Sw>5UuqTK->`6;M zg$=N$O?d_zVb2m6EV))crxIY#t97s!a9KV}h_IK4JnUslzk&_0S50{h8)2^#7%aI~ zzo8OfZ>n{$w{TfDONg*{h&=3FOTUK=u=h>*02^T+5*RGGR)3@tU>~b>uupJVE=!28 z&xkzib4$;}2G|#-e2I;)uLum5T&urU39xU}I@q_kERH2a*!M&p_JgH=#0J<;ru>YJ zuwMuamYlGDRSB@))H>MjxOw)6gb4eS$ix1!bgPAnu-5o2bg>aOKY_uL6V?JM0k)u8 z2U`d?&)P_cu(m`V*3Q!Hv0-Ejo6-RrVT%wLEIDBxcn8O5+bY+P8;aJ_kEZO3DaV4u;+3Gkiu40bPi>q2hcH>lt)&u< zz}jlvA?$+7!*L1m5OyW<^R=#}*TaVS>SjuJY=o^(V6bE%SE3SNJ=8i_Ph1|3ONg-E zL>|`1(tWW3*3Xpw*a#~nFj%sX+dw72HdO0i8{y{J00|McF_DLDV(Cq>0aj+pKx~AS z6BsO6$Ze()V1v{;*kIf|8zLdXHYf71p_bkP8(ADqVFU(C7IMQ?0&FX_4mJXp zU;UI2VIzq=Y?P%du>rP?DcfQrY&!yjB@4OHDgm~=S_j(!mtXyq5MetJdDs|B?~Dzw zDpPjBMp!k0!IFjCSd{=9r`Ey7nOdhnjOhxy|afZIS_lQbw_e9T%LJL7*NrcO?Y{-H<6#V zeJs5%HcZ=ortFWMm3aV~F_UG_fvSXgkXpx_fy*Or36XgSk!K!i>BF#rdAKP@U}t3> ziDt}X^>dUeVIHm4F^|FJiMNEvJdVgSkGJ#**uXr|l#{TtGEYV`X0ix6MU^m5RqL3i z;qt&+LS&vn+l1$rbShRlzJS6@}OHnWX>e= z%r7kcB{nd>GUaRRtjurFjG0^}zf~p7@6D%{H82`ot3#DnlY1`iG@@NvyEEE zY>Ug6R3t=Zdm=y1g)Q9y8<>ljvM6>|W=Aw*CifJJsS@VmY8|r^F5gv=5SdF7dFE1< zUK$&i%b2n(c2?$cXvR!#EtXd$%+6{Za|K+M=@KGyC8FX=WM!*ZrC7lZR<#QEvLv}~ zuSUYXD{1TBm8>qszbjb-KhM_0wzMp1Q@qbuOB=dn$=Y(Yaafjg!FS7&LF}V&NgM0)RQ0>B0+l^!d@-KKM~1)?4i88k{#)f+XdUH2+`PV#~)Mciir&>3My>NLRFJVB%v24N`s1K3<-1N0{KWzBi^f#pxJI@Y&13G7e zPZnbvs>=+1Beibu18{jiAR!KZ6CywOO)Xu94TB$ON;!6(9sFi=&IX^X3J0mn41Ta$ zH~1mAyswZD2S1d^4}J?vS75{7w=`uKcAg#la5`s$PnM@!smlz0gjzTFt#NsOA|Vcb z6p|)e6nC4r!F)2@oL@RYjAnLBq0udB9R|_t)=U*Ves{)?24Ud2S16< z+2E5qhso+PgKtpl2H%Lw`z{G_@C712_{`Ex*f98JQ>J3)*}+evb2j+o7G^hfnZZw2 z>ju9&F7MwY#KG@LD$eet)%Y@CV@XK2Sm& z{6R#1@G~rZFg6VS5K|7t&a;C*jLzBMlbgE3)nx{MgjzTFBXM~@DIpI2Xd*xOV=R3v zHVpnaQ;x^Zvx7f@&e`CTyUP>RWd?teS~vKUad}@VArAgjB0u=kEPXmQ4E_vL&cx2M zgFlPT+2E7g+_TkX27iuPH~4dLd4DS*4*q;1KllqQeIYgs{vuN@#?G^Yzl6@&;FJ69 zOVwotf0N11BORXFH-MD#nkAyh* zdx`wu@3Zv%*f979OnDGH&kp_}I%k7V9)&!tE;IN?)VjexikoMTNr;1goX8LU2}?hT z4TFEml&7)t?BJiFb2j+onas25GJ}6ktsDIFxOw)1ggE#YiHgr;Ub2dpixuqP6{~R1 zWaiBW8n4nr_i*S~|8VFvDgNQm>-hY&8Q7K{4jof`IP|7AbPtE#lBS zq2i<&**i+U&G%g^PN_I;M)t141Nn>PD$c05Y)1B;!iyDNQE}Cb?0tnx@DB`CTvKt~ zjO+u2qZHmy@#T!{LxuX+t17;(_;yD2k;1|Jg?AM{RQxm}`&gm=`pJr4hIME?Bl|>g z2Y!coSjX1G7DxD0!gL;%4O_DHu%!_`lb~N{9JXBRVXGs2E@5N7I5cdn*2B6W%#@(- z7Y$pt^{{RTUr1Pi=b*z%S`RBl_)@|YgsG!JwQ6V_UsF{#KKlx}V(HQR4ZW?Zvc|^d f>}!PDl}5AgR#i=P%{5gGQzvKNsH)@CvEBa<`(l|} literal 0 HcmV?d00001 diff --git a/example/out_truth/prgs/pangenome_prgs/ForkPoolWorker-1/GC00010897.fa.pickle b/example/out_truth/prgs/pangenome_prgs/ForkPoolWorker-1/GC00010897.fa.pickle new file mode 100644 index 0000000000000000000000000000000000000000..818594bf9f1923388f1792b5f3911cad5360514f GIT binary patch literal 19895 zcmeHP2Xs_bw@xS`0R#~XQUq)P4JM(hK|_S7V-O)~6yrEaCfo_hOwO4JMLa4h0edfq zz4zXG@4ffld#}IxzHgs%@63G<))QRIJlFI8m;3C!&%XQH`^>)k+;eBbft~AHVzcAX zRy}JZ{)^VNC7K)Jx?q}~HOa9mZ}>u$Z&atknu5H%yyjGWTRNJIwZv6tQB+nG7Z;Zl zkDoA3b*Wh$7A@&mv@y{fS6%B{V;QeGQCBc6l}^l?2BAQ$R#=k{qb*donl2c<6dbKy z(~X|vv1EfzsqQtMALF5mUaX}R&U?z`z)GuLHN9{_Hja^vF||fU z^`5T!)bznjtT{0&*%D7?qKzrt63eJHYq~<+7)xhjs&7pX@GY@<(PTWGNhD{fwIW^R zvX(?L3hR0=+8j^L%6O_@q+3l-7$eX*@pLqiY>3ZOYa6vrVI(h-YFF$2Z)-UN?wjKk z&28ySTu)6k#0%7VSls?btzTHv37!)TYCt4k(s9uMdrrJr4UBZESq%oQF^xRbAdlA# zHfn=HZ$Ko^8yLy=22GVray~^h#Pfa4ji%$OEuO58r`6D@HQnJVSQRy_eo`VevYe|_ zP}SC)Nnm|v#FZ$xYz1mVplxK-#)a|$M|HeDr5n;}lUlWDp7p#ia;gwl4L52; zp?p+~KyBt*)2={m4vQkAM#|?AZHVFX*rMh()JJ)BWkq>qd3AY3Wp!nFIrz%TiVEoP zzv^m;oUaxQNOdI*VFj}a=&QvTmsD4PgC7jI2zt!o|1uA6)dEls4|I!3%)uDen5zVQ zWjPtbi)Ths2@7hh6KLp690DD)yqH`Cuz4Lq6~=l%oD7CRLJvryfH)@z62`taF@q0r zD2YfsG{OHE45Okq21R#3q|BJ?RdxX+cxI0&@|dF!&*Xu-UCXq_2Q8iOm!RwH%+mf@BskzA^ zIld;U(b7*E>gMTqV_J==RbvG&mQ1EHF~X(QIHShbtP99!Yh2@2%;3I_<{C`!>)BO* zbUK=dPSj`O4cmbhs4Y?9TN$;rVEK)ttm?M>be-HwjqzBfP2;YdSgW>atMhozY-?0` zp|_dGyJM1372d!1#6SL?*iP<=87QTkeX2N`md&hea=PA&qinoHELxvxPDHC_Oe%?v zt%&9t7nP2WHpG*uIjLwvB3+-F6W8;j++j=9WUthl?2W>IDB~#}%C^#*;Ia0$H);pZ z)|-bC+tHO+#HgM8600(5>OZB#TysR5Vzc9a(yF9c*=ATQM>z9R%6s2|CB20sLh?Y z|JOJ7J^yl*;Q^p%R7vrQs%$2zY$mE~CaY{OR#~m9vb~Lp=2Y1}uF7IY)%jIcZ&bs- zRhI3H?2mX{!(*B}p^@Vw?Vo>ylxINq*jVxRJwnF+a+QrLE-e~0resA`)`%)=M3ptN z%4V_3JXd82qndK6Y__YiW}{mCDoYxb>X0hq$B3Qp--8MdR;+5-{a5**(kfN9>tEP0 z$CnnB;8e4M$1;T?Qz$aUBGW9gv@5cVQEfRzHm9vFUenJ%XW$7ibjX-1Pjd5&n(to% zPy4pj$uYhj=ahId-Ii7hI^g6yV&w~Rgnewog@5lO9B@bc>CJTnA0Pkzwzr?Wb6V|Rs}3m4zH4gKsg@{SgpKqqa46831W&L9JGhGq)PXn* zEHvsM`!XgozcsE7UO6AtxTZiI63Jhr7EM>}^3Dct&~!#Ej^xR?J)KFlswI&Yv`oSs zYSdwc-oakGw>XmWmPF2S`ooPn!q=bUEvZ^B-%T88)KPAtYEV9ACi^o-8+D97Q^}cI zF%z#@3#*e%`LF5x{L|{#T6J8Z#BjV(C-A~UBAphg6Q`?_5K9uVBpu|*MxDZW>d%J$ zY^Psp)TvHi)t`U^{ZN(}b(%jjg)>tm6uyFtH8!XWiZ6+}&!_ZN9tPjk+Uncc-`u9Y5}(yStscdyKj_>+U}1?tY^l@ZCLV)I)*0 zhs9m!Ir9;^d(^pm%&5n+?w)Y&o;2zy-`&$jJrlTlR@{XiD4wIc=bgJ3jCwKa?j`5$ zWusp4-MwnmYtCJzdR^RwwvIRG?oH?JEu-Gfx_bw=foC^}ca3__O~Cwp;GncVD3n*l z9~$)$>!h=*lXJp#^085$NS%CY>*O<|KKFzB!l*BUT)&dwLUr;rgZst>_pMRiWrO?P z<@$qBKl<)|GV15R-7n%UR42dE-G7|B-;DY_>+TQhPUoR!jm`&kPIV{IbO!ZZ>n;$) zdv4k2t~42E^lBXC4TR0-7(aZo{_2{Z`wM3WOQuMC$iRgCYXrtR5tkEp-Y;=1- za?$N6(=$Q+==Ks#E$FI8w>Peq=t382UtX`*XC>9RSwo z1E~n2TL>xJk`EG#gR>TJaEMr--K^qGyNKRQq?A3pojm)~VlYN80nP1CC9a1bN;CAQ z!!S0{RQl85V6c#Q^3+FwHTpt{B+Tr0UCgv2|>U@ zt>Y}30PJjz7VI3bMxRSX0CpZE2X?+pUjXWZT_~E1Kpj}6z8Hdlg*w3{Gy&M994**o zV2!?1NBmi8n*&iCRA^cF&0&D*t(5Sotr=#ET-^%6~~TFM|e&y#hfJ3swHBG(lXiakPoO z4%X;5s0b2!6OvoSw`BTlP=6KQ5zV`x0oZ#G1T0kf@6!ZeA8@o_AA&XdBPs&0k0Cj* zPh|R2P#^3w(R>aXfPDc$z(U3NB~1YK6-Nv9HCUs+p&|hL7Lo(|PNu&H^}&7+&5xh~ z*iR4yEL4m?(*$6@aI|2*f;IX-R0LqZL2_Wf%k&?hK3E=pt%D;7XaLp;f`EmJu`^8o z)`g=5>k1Yp5Gn$&ZjiFYxVl($&sxAifmpal&T`#@lg2)l_ax7j-V2P;Yk=mqbU2pB z^4>Hf;tkG+MvZb%S!rJa9c590rYI_|p@?^fQ=+_gyE$#lI zS)T^}llcG$vP_}YH;^XCau7$mOoPE1y#W=$G7W*0eKH>^7Q?a@aIm3Rcng}gX6M%|ac?QJebMPh^n8Yv8K z{w-vpSY*3GC88+>#R|>kyBPUW`)_|csz)IN`CLLRXEX!DvX0?sKbNs!jUGouS!6CQ z!GryHNbX*lAk$ld`uECKqS+d>gHtR+2$Eu`(M@DvNO2pEHpOkh8eL9Bkm4jrF2xF& z-VW4Hak6NpfOc?-l?dT~-d)=>Fr>HxN1Nh~V2zGY5u~^iB$r~9Oiu;%Q`}iJ(?B~o z#a$4Bq!`*krZX_4ID?~2u^Oz=yHXLPxEmyw;_fodpI!PX?jf2zK|46bnFv8r4DDNc zF)*Z9%h9H|H&~;iR0JvR1IeWrlj%B8KgD{{G=O$+igAP>DTa2-Mh1oyXK}PCdSH!C zP!Xiq1Sz{)&K8U2tOXpjhy`}bj%=w(hGZghdF)D&=N8lo#%Kkay9MD3K-r@;&9DWf zF*eauwxA3ce55>fwSmR)7L@x>!SeT?xneX=jIjUA7l!-KzA~{uWV`?DCmI8ag5VFb z`Rf3^KVw8G*Z$kBc}3&9$F5EW&SF1Q4n!NHJR0UjdLi$MJXY!}U9&|JJF zkOeQ)nGdBE@DAf>@eT)T^bu49ct=8Vct^?f(V#xwF`_vZG#BqU$buJo2sxfsz&nAX z#XAwK(I-(6;GGP~;hiGWOF@0SQ$@23)Zy_rJ^D1rf*0ypr_%~}XK=K5XM#2QEGh!L zvmrUWb7cBlP#^C+(VP#Oo8JYH1uxVmqK!Qm&x?ypg!Ie zqPY?@7w;;_f)~1*ucj67uHk6$t_5rKbyNg+*F(zQ%{Pd}jadsgxJfK{5W5+iJ&4^R zcDIT=AkxDZKL_gLJujLUKy&e4ge-WWd*LNo0q@-31{EqVmUu&T4e@~um{|7Ke z{|K7f_B*?_{}av7_J79ML{n+|zkorqJevIq76(*Nw*8(ftoGl;?su_6wf_M|j%IoI zlNy{?LH)(*B%02kZn30Mb%8946>3ynX@y)@<7gMF8(4g6Mn$k#-63U9X9Z%>BWnQ% zJ;lN;mH_tRyj!d_$aArJgTdEkpvzmVHED*$>Wi_7rm|RTfyo}z`av>!ZBVXPujOx( z>xj|1VuZz7PZ+j_{xY$?$hI{M5Y0ePtk!TI)A%E-Kl3*jdJry>B@OkD!E}Wc*?^;6 z(jj1t9!f=7WH>ItUOEhttM?6MdLvN(BivXtn}Du#P@Cc+2`bcN3h4?#4d-Zs8UYsP zT`Gd0HizVbDw64upngzWh^82HrGqNLMG{n~6P3~xf*Qrq1~nQi&ZJZXL5+pvf*L2& z<3atPCWvNB(3K8qD_kT&h3<&0=?X!WakN2A1dCH76+uwjLUKWs%k(5rKd1`PYzMm1 zK~2U*5>%*%PN6FVRmsr?wLMsz`=|(l+7XfqDk9T6f%-vJiDoM3N(Z$wE|Q=^jdmJc zA*fwA+MuR`#Yv5dAgF3c*+#poSnQUyfP>w|0=;A9Za@ApAsWnZcH=$Bv%%~M#^{-# zxecb!b>qEgh6YoMv5BVAVD<)sW#dD56fC~517&wvWBJ`BCPsB)gzi!=47*E%OvFXD z-K9}9vp})vcyER8K0FArn4zwbpb6HZiKAW2*3e^9ETScQl z127Fiz(U<1O%s4+I9jkausD5C5rEBw+pdtV}9FhY& zLZ*)d^}&u3&C#F^tWqBXLBK+H<*_sY*l`>!*zsU-%%CCwI}wrtJ4vQb2KB*C5zSK2 zAhS~;2w3QDTSgOroyO6Eoemaf3@QS!Ga+T~wzI_I?5qVGoFf)|O2;t2Te&oJD>@E~LRQ^T6u<|dKiAzMbm4B&dE&~k`yBvZf7OMO!Xo9$|<`K{U z>`@2;7OMQmXacauIa;tMz~Wp%MF92`BnS4iOg{tagFP#n=RgCn=OGAKs2E?M3BX?D zXu)0ri$et!0oW^$vc>qSSiF|CfP>e?0x$J@@OjqQZ#~|i37h$wa>ZMqxy>9d<&DbQ zG($6g2V)aWrJ27A1_|=f`aQ5XW`MGp_gP_8e;{@riXE!@BQWx4{jumj5xp(yPet2*N; z1lJYKdY~PeV1EQ435Htg`V0#R4&Z1L90(R)FH#XCI2e*ka08hh0_rC?R5ZgtJ2b%! z5r8BZYU3L*EF`!wN1Na#V2$3CiXg#4NG`$QGCcy+PjEBQY!2F?2^Jv$NiftIM=~rV zxCKX>U@=&uOQ;AEEQRC}93|7ELHz{Bh-NHkhbA}<0Z4+Oy>UFlLV^=G+61=*YxGuB z1PN{pDZ4k8iN(aM1srT67C07lTpQh%0hve}k67j8x#>&-V{`>*?xvG*yYhB4!=^JC zV-rnf)0qOsZaTSt#h@#pHhOzd{zhHzAPk=;ca({U$aZ7dNiM@W*0* z(?2Alrvh0%?9h&~GvmQ$HI1YFuy+A#^mHo9B1v3=9c2b2SIyNjy(_3+&AW+achHW> zjPExkv(Q$u2jf9zdvdgy%>--oUQ`5`)k1Qa?Jd($P(QPML=yw;n9S;cESZJ&kb1_0 z%o;e_%;I2;Zlof}Y!)P!nJ3c;P(QOK(aZ+zn9Q1iESZHij~2#*%#s{!W+|{nw^9*g zrXab@v`nW#{me3=X#?$;%;o@DG7Ie*a~TgZo5#^+HXp3f`%)2Pwg8gLY(JScpnhii zi{=2(j>+smAWLSU?P4M0L1qVWw3!_Y7C#N9BFJnJr0jOlE*6Wk7I3gcEU;a4Kyy5l zkr@2>9rlUC$a9}K91OnE1Z%>Dhb6>D)^eDO66Cj zp*WQao-nKC1+QPES*Ya^yyG|gYBORrW2WX QG}+do&!APWwz`r31(dXXH2?qr literal 0 HcmV?d00001 diff --git a/example/out_truth/updated_prgs/kmer_prgs/01/GC00006032.k15.w14.gfa b/example/out_truth/updated_prgs/kmer_prgs/01/GC00006032.k15.w14.gfa new file mode 100644 index 00000000..92203181 --- /dev/null +++ b/example/out_truth/updated_prgs/kmer_prgs/01/GC00006032.k15.w14.gfa @@ -0,0 +1,102 @@ +H VN:Z:1.0 bn:Z:--linear --singlearr +S 0 1{[0, 0)} FC:i:0 RC:i:0 +L 0 + 1 + 0M +S 1 1{[9, 24)} FC:i:0 RC:i:0 +L 1 + 2 + 0M +S 2 1{[10, 25)} FC:i:0 RC:i:0 +L 2 + 3 + 0M +S 3 1{[15, 30)} FC:i:0 RC:i:0 +L 3 + 4 + 0M +S 4 1{[25, 40)} FC:i:0 RC:i:0 +L 4 + 5 + 0M +S 5 1{[29, 44)} FC:i:0 RC:i:0 +L 5 + 6 + 0M +L 5 + 7 + 0M +S 6 3{[43, 48)[51, 52)[59, 68)} FC:i:0 RC:i:0 +L 6 + 8 + 0M +S 7 3{[43, 48)[55, 56)[59, 68)} FC:i:0 RC:i:0 +L 7 + 9 + 0M +S 8 1{[62, 77)} FC:i:0 RC:i:0 +L 8 + 10 + 0M +S 9 3{[45, 48)[55, 56)[59, 70)} FC:i:0 RC:i:0 +L 9 + 10 + 0M +S 10 1{[68, 83)} FC:i:0 RC:i:0 +L 10 + 11 + 0M +S 11 1{[71, 86)} FC:i:0 RC:i:0 +L 11 + 12 + 0M +S 12 1{[76, 91)} FC:i:0 RC:i:0 +L 12 + 13 + 0M +S 13 1{[77, 92)} FC:i:0 RC:i:0 +L 13 + 14 + 0M +S 14 1{[89, 104)} FC:i:0 RC:i:0 +L 14 + 15 + 0M +S 15 1{[92, 107)} FC:i:0 RC:i:0 +L 15 + 16 + 0M +S 16 1{[96, 111)} FC:i:0 RC:i:0 +L 16 + 17 + 0M +S 17 1{[103, 118)} FC:i:0 RC:i:0 +L 17 + 18 + 0M +S 18 1{[115, 130)} FC:i:0 RC:i:0 +L 18 + 19 + 0M +S 19 1{[125, 140)} FC:i:0 RC:i:0 +L 19 + 20 + 0M +S 20 1{[135, 150)} FC:i:0 RC:i:0 +L 20 + 21 + 0M +S 21 1{[139, 154)} FC:i:0 RC:i:0 +L 21 + 22 + 0M +L 21 + 23 + 0M +S 22 3{[143, 155)[158, 159)[166, 168)} FC:i:0 RC:i:0 +L 22 + 24 + 0M +L 22 + 25 + 0M +S 23 3{[147, 155)[162, 163)[166, 172)} FC:i:0 RC:i:0 +L 23 + 26 + 0M +S 24 3{[166, 179)[182, 183)[191, 192)} FC:i:0 RC:i:0 +L 24 + 27 + 0M +S 25 3{[148, 155)[158, 159)[166, 173)} FC:i:0 RC:i:0 +L 25 + 28 + 0M +S 26 3{[154, 155)[162, 163)[166, 179)} FC:i:0 RC:i:0 +L 26 + 27 + 0M +L 26 + 28 + 0M +S 27 3{[168, 179)[182, 183)[191, 194)} FC:i:0 RC:i:0 +L 27 + 29 + 0M +S 28 3{[171, 179)[187, 188)[191, 197)} FC:i:0 RC:i:0 +L 28 + 30 + 0M +S 29 3{[171, 179)[182, 183)[191, 197)} FC:i:0 RC:i:0 +L 29 + 31 + 0M +S 30 3{[176, 179)[187, 188)[191, 202)} FC:i:0 RC:i:0 +L 30 + 32 + 0M +S 31 2{[182, 183)[191, 205)} FC:i:0 RC:i:0 +L 31 + 32 + 0M +S 32 1{[197, 212)} FC:i:0 RC:i:0 +L 32 + 33 + 0M +S 33 1{[211, 226)} FC:i:0 RC:i:0 +L 33 + 34 + 0M +S 34 1{[225, 240)} FC:i:0 RC:i:0 +L 34 + 35 + 0M +S 35 1{[229, 244)} FC:i:0 RC:i:0 +L 35 + 36 + 0M +L 35 + 37 + 0M +S 36 3{[240, 248)[257, 258)[262, 268)} FC:i:0 RC:i:0 +L 36 + 38 + 0M +S 37 1{[233, 248)} FC:i:0 RC:i:0 +L 37 + 39 + 0M +S 38 3{[241, 248)[257, 258)[262, 269)} FC:i:0 RC:i:0 +L 38 + 40 + 0M +S 39 3{[240, 248)[252, 253)[262, 268)} FC:i:0 RC:i:0 +L 39 + 41 + 0M +S 40 3{[247, 248)[257, 258)[262, 275)} FC:i:0 RC:i:0 +L 40 + 42 + 0M +L 40 + 43 + 0M +S 41 1{[263, 278)} FC:i:0 RC:i:0 +L 41 + 44 + 0M +L 41 + 43 + 0M +S 42 3{[272, 280)[284, 285)[294, 300)} FC:i:0 RC:i:0 +L 42 + 46 + 0M +S 43 3{[272, 280)[289, 290)[294, 300)} FC:i:0 RC:i:0 +L 43 + 45 + 0M +L 43 + 46 + 0M +S 44 2{[266, 280)[284, 285)} FC:i:0 RC:i:0 +L 44 + 42 + 0M +S 45 5{[277, 280)[289, 290)[294, 303)[307, 308)[317, 318)} FC:i:0 RC:i:0 +L 45 + 46 + 0M +S 46 1{[325, 325)} FC:i:0 RC:i:0 diff --git a/example/out_truth/updated_prgs/kmer_prgs/01/GC00010897.k15.w14.gfa b/example/out_truth/updated_prgs/kmer_prgs/01/GC00010897.k15.w14.gfa new file mode 100644 index 00000000..5e19cfdb --- /dev/null +++ b/example/out_truth/updated_prgs/kmer_prgs/01/GC00010897.k15.w14.gfa @@ -0,0 +1,158 @@ +H VN:Z:1.0 bn:Z:--linear --singlearr +S 0 1{[0, 0)} FC:i:0 RC:i:0 +L 0 + 1 + 0M +S 1 1{[6, 21)} FC:i:0 RC:i:0 +L 1 + 2 + 0M +S 2 1{[13, 28)} FC:i:0 RC:i:0 +L 2 + 3 + 0M +S 3 1{[24, 39)} FC:i:0 RC:i:0 +L 3 + 4 + 0M +S 4 1{[25, 40)} FC:i:0 RC:i:0 +L 4 + 5 + 0M +L 4 + 6 + 0M +S 5 2{[29, 43)[46, 47)} FC:i:0 RC:i:0 +L 5 + 7 + 0M +S 6 3{[30, 43)[50, 51)[54, 55)} FC:i:0 RC:i:0 +L 6 + 8 + 0M +S 7 3{[42, 43)[46, 47)[54, 67)} FC:i:0 RC:i:0 +L 7 + 9 + 0M +S 8 3{[41, 43)[50, 51)[54, 66)} FC:i:0 RC:i:0 +L 8 + 10 + 0M +S 9 1{[59, 74)} FC:i:0 RC:i:0 +L 9 + 11 + 0M +S 10 1{[55, 70)} FC:i:0 RC:i:0 +L 10 + 9 + 0M +S 11 1{[61, 76)} FC:i:0 RC:i:0 +L 11 + 12 + 0M +S 12 1{[62, 77)} FC:i:0 RC:i:0 +L 12 + 13 + 0M +S 13 1{[67, 82)} FC:i:0 RC:i:0 +L 13 + 14 + 0M +S 14 1{[78, 93)} FC:i:0 RC:i:0 +L 14 + 15 + 0M +S 15 1{[85, 100)} FC:i:0 RC:i:0 +L 15 + 16 + 0M +S 16 1{[86, 101)} FC:i:0 RC:i:0 +L 16 + 17 + 0M +S 17 1{[88, 103)} FC:i:0 RC:i:0 +L 17 + 18 + 0M +S 18 1{[98, 113)} FC:i:0 RC:i:0 +L 18 + 19 + 0M +S 19 1{[99, 114)} FC:i:0 RC:i:0 +L 19 + 20 + 0M +L 19 + 21 + 0M +S 20 3{[113, 120)[123, 124)[131, 138)} FC:i:0 RC:i:0 +L 20 + 22 + 0M +S 21 3{[109, 120)[127, 128)[131, 134)} FC:i:0 RC:i:0 +L 21 + 23 + 0M +S 22 1{[137, 152)} FC:i:0 RC:i:0 +L 22 + 24 + 0M +S 23 3{[117, 120)[127, 128)[131, 142)} FC:i:0 RC:i:0 +L 23 + 24 + 0M +S 24 1{[140, 155)} FC:i:0 RC:i:0 +L 24 + 25 + 0M +S 25 1{[142, 157)} FC:i:0 RC:i:0 +L 25 + 26 + 0M +S 26 1{[153, 168)} FC:i:0 RC:i:0 +L 26 + 27 + 0M +S 27 1{[157, 172)} FC:i:0 RC:i:0 +L 27 + 28 + 0M +S 28 1{[160, 175)} FC:i:0 RC:i:0 +L 28 + 29 + 0M +L 28 + 30 + 0M +S 29 3{[169, 181)[184, 185)[193, 195)} FC:i:0 RC:i:0 +L 29 + 31 + 0M +S 30 3{[173, 181)[189, 190)[193, 199)} FC:i:0 RC:i:0 +L 30 + 32 + 0M +S 31 3{[172, 181)[184, 185)[193, 198)} FC:i:0 RC:i:0 +L 31 + 32 + 0M +S 32 1{[195, 210)} FC:i:0 RC:i:0 +L 32 + 33 + 0M +S 33 1{[206, 221)} FC:i:0 RC:i:0 +L 33 + 34 + 0M +S 34 1{[220, 235)} FC:i:0 RC:i:0 +L 34 + 35 + 0M +S 35 1{[231, 246)} FC:i:0 RC:i:0 +L 35 + 36 + 0M +S 36 1{[235, 250)} FC:i:0 RC:i:0 +L 36 + 37 + 0M +S 37 1{[239, 254)} FC:i:0 RC:i:0 +L 37 + 38 + 0M +S 38 1{[250, 265)} FC:i:0 RC:i:0 +L 38 + 39 + 0M +S 39 1{[262, 277)} FC:i:0 RC:i:0 +L 39 + 40 + 0M +S 40 1{[272, 287)} FC:i:0 RC:i:0 +L 40 + 41 + 0M +S 41 1{[273, 288)} FC:i:0 RC:i:0 +L 41 + 42 + 0M +S 42 1{[274, 289)} FC:i:0 RC:i:0 +L 42 + 43 + 0M +S 43 1{[282, 297)} FC:i:0 RC:i:0 +L 43 + 44 + 0M +S 44 1{[284, 299)} FC:i:0 RC:i:0 +L 44 + 45 + 0M +S 45 1{[286, 301)} FC:i:0 RC:i:0 +L 45 + 46 + 0M +S 46 1{[292, 307)} FC:i:0 RC:i:0 +L 46 + 47 + 0M +L 46 + 48 + 0M +S 47 3{[303, 312)[324, 328)[332, 334)} FC:i:0 RC:i:0 +L 47 + 49 + 0M +S 48 3{[304, 312)[316, 320)[332, 335)} FC:i:0 RC:i:0 +L 48 + 50 + 0M +S 49 3{[304, 312)[324, 328)[332, 335)} FC:i:0 RC:i:0 +L 49 + 51 + 0M +S 50 2{[319, 320)[332, 346)} FC:i:0 RC:i:0 +L 50 + 52 + 0M +S 51 2{[325, 328)[332, 344)} FC:i:0 RC:i:0 +L 51 + 52 + 0M +S 52 1{[335, 350)} FC:i:0 RC:i:0 +L 52 + 53 + 0M +S 53 1{[349, 364)} FC:i:0 RC:i:0 +L 53 + 54 + 0M +S 54 1{[356, 371)} FC:i:0 RC:i:0 +L 54 + 55 + 0M +L 54 + 56 + 0M +S 55 3{[370, 373)[377, 378)[387, 398)} FC:i:0 RC:i:0 +L 55 + 57 + 0M +S 56 3{[360, 373)[382, 383)[387, 388)} FC:i:0 RC:i:0 +L 56 + 58 + 0M +S 57 1{[389, 404)} FC:i:0 RC:i:0 +L 57 + 59 + 0M +S 58 3{[366, 373)[382, 383)[387, 394)} FC:i:0 RC:i:0 +L 58 + 57 + 0M +S 59 1{[392, 407)} FC:i:0 RC:i:0 +L 59 + 60 + 0M +S 60 1{[404, 419)} FC:i:0 RC:i:0 +L 60 + 61 + 0M +S 61 1{[412, 427)} FC:i:0 RC:i:0 +L 61 + 62 + 0M +S 62 1{[420, 435)} FC:i:0 RC:i:0 +L 62 + 63 + 0M +S 63 1{[423, 438)} FC:i:0 RC:i:0 +L 63 + 64 + 0M +S 64 1{[428, 443)} FC:i:0 RC:i:0 +L 64 + 65 + 0M +S 65 1{[439, 454)} FC:i:0 RC:i:0 +L 65 + 66 + 0M +S 66 1{[446, 461)} FC:i:0 RC:i:0 +L 66 + 67 + 0M +S 67 1{[447, 462)} FC:i:0 RC:i:0 +L 67 + 68 + 0M +S 68 1{[453, 468)} FC:i:0 RC:i:0 +L 68 + 69 + 0M +S 69 1{[465, 480)} FC:i:0 RC:i:0 +L 69 + 70 + 0M +L 69 + 71 + 0M +S 70 3{[468, 481)[490, 491)[495, 496)} FC:i:0 RC:i:0 +L 70 + 72 + 0M +S 71 3{[478, 481)[485, 486)[495, 506)} FC:i:0 RC:i:0 +L 71 + 73 + 0M +S 72 3{[476, 481)[490, 491)[495, 504)} FC:i:0 RC:i:0 +L 72 + 73 + 0M +S 73 1{[498, 513)} FC:i:0 RC:i:0 +L 73 + 74 + 0M +S 74 1{[502, 517)} FC:i:0 RC:i:0 +L 74 + 75 + 0M +S 75 1{[517, 517)} FC:i:0 RC:i:0 diff --git a/example/out_truth/updated_prgs/pangenome_updated.prg.fa.k15.w14.idx b/example/out_truth/updated_prgs/pangenome_updated.prg.fa.k15.w14.idx new file mode 100644 index 00000000..8f408f12 --- /dev/null +++ b/example/out_truth/updated_prgs/pangenome_updated.prg.fa.k15.w14.idx @@ -0,0 +1,120 @@ +119 +16318469 1 (1, 5{[277, 280)[289, 290)[294, 303)[307, 308)[317, 318)}, 45, 1) +95908974 1 (1, 2{[266, 280)[284, 285)}, 44, 0) +74944522 1 (1, 3{[272, 280)[289, 290)[294, 300)}, 43, 0) +25603268 1 (1, 3{[272, 280)[284, 285)[294, 300)}, 42, 1) +150075561 1 (1, 1{[263, 278)}, 41, 1) +92543452 1 (1, 3{[247, 248)[257, 258)[262, 275)}, 40, 1) +182535811 1 (1, 3{[240, 248)[252, 253)[262, 268)}, 39, 1) +22204066 1 (1, 3{[241, 248)[257, 258)[262, 269)}, 38, 0) +158006043 1 (1, 1{[233, 248)}, 37, 1) +136923223 1 (1, 3{[240, 248)[257, 258)[262, 268)}, 36, 1) +150896025 1 (1, 1{[229, 244)}, 35, 1) +50450125 1 (1, 1{[225, 240)}, 34, 1) +76253524 1 (1, 1{[211, 226)}, 33, 0) +61544635 1 (1, 1{[197, 212)}, 32, 0) +32095090 1 (1, 2{[182, 183)[191, 205)}, 31, 1) +84155375 1 (1, 3{[176, 179)[187, 188)[191, 202)}, 30, 0) +964725 1 (1, 3{[171, 179)[182, 183)[191, 197)}, 29, 1) +22900170 1 (1, 3{[171, 179)[187, 188)[191, 197)}, 28, 1) +58898711 1 (1, 3{[168, 179)[182, 183)[191, 194)}, 27, 1) +62158241 1 (1, 3{[154, 155)[162, 163)[166, 179)}, 26, 1) +182808637 1 (1, 3{[148, 155)[158, 159)[166, 173)}, 25, 1) +126788003 1 (1, 3{[166, 179)[182, 183)[191, 192)}, 24, 0) +70667501 1 (1, 3{[147, 155)[162, 163)[166, 172)}, 23, 0) +80230343 1 (1, 3{[143, 155)[158, 159)[166, 168)}, 22, 0) +40017473 1 (1, 1{[139, 154)}, 21, 1) +10144007 1 (1, 1{[135, 150)}, 20, 0) +87464180 1 (1, 1{[125, 140)}, 19, 0) +73742478 1 (1, 1{[115, 130)}, 18, 1) +45109196 1 (1, 1{[103, 118)}, 17, 1) +62100866 1 (1, 1{[96, 111)}, 16, 1) +84500547 1 (1, 1{[92, 107)}, 15, 0) +117300766 1 (1, 1{[89, 104)}, 14, 1) +85322191 1 (1, 1{[77, 92)}, 13, 1) +21917314 1 (1, 1{[76, 91)}, 12, 1) +7102605 1 (1, 1{[71, 86)}, 11, 1) +44260529 1 (1, 1{[68, 83)}, 10, 0) +1715623 1 (1, 3{[45, 48)[55, 56)[59, 70)}, 9, 1) +118310049 1 (1, 1{[62, 77)}, 8, 1) +23944745 1 (1, 3{[43, 48)[55, 56)[59, 68)}, 7, 0) +12634334 1 (1, 3{[43, 48)[51, 52)[59, 68)}, 6, 0) +32446550 1 (1, 1{[29, 44)}, 5, 1) +85119856 1 (1, 1{[25, 40)}, 4, 1) +146396092 1 (1, 1{[15, 30)}, 3, 0) +117410367 1 (1, 1{[10, 25)}, 2, 0) +47412201 1 (1, 1{[9, 24)}, 1, 0) +454860 1 (0, 1{[502, 517)}, 74, 1) +10470925 1 (0, 1{[498, 513)}, 73, 0) +121176941 1 (0, 3{[476, 481)[490, 491)[495, 504)}, 72, 1) +16393675 1 (0, 3{[478, 481)[485, 486)[495, 506)}, 71, 1) +13727052 1 (0, 3{[468, 481)[490, 491)[495, 496)}, 70, 0) +24884945 1 (0, 1{[465, 480)}, 69, 1) +39358686 1 (0, 1{[453, 468)}, 68, 1) +35934452 1 (0, 1{[447, 462)}, 67, 0) +194061 1 (0, 1{[446, 461)}, 66, 0) +3212702 1 (0, 1{[439, 454)}, 65, 0) +36203250 1 (0, 1{[428, 443)}, 64, 0) +85606590 1 (0, 1{[423, 438)}, 63, 0) +109416972 1 (0, 1{[420, 435)}, 62, 0) +142092369 1 (0, 1{[412, 427)}, 61, 0) +27165023 1 (0, 1{[404, 419)}, 60, 1) +33357868 1 (0, 1{[392, 407)}, 59, 1) +1276048 1 (0, 1{[389, 404)}, 57, 1) +71473097 1 (0, 3{[360, 373)[382, 383)[387, 388)}, 56, 1) +8869878 1 (0, 3{[370, 373)[377, 378)[387, 398)}, 55, 1) +403679 1 (0, 1{[356, 371)}, 54, 1) +73010802 1 (0, 1{[349, 364)}, 53, 1) +21552618 1 (0, 1{[335, 350)}, 52, 1) +20504340 1 (0, 2{[325, 328)[332, 344)}, 51, 0) +140531031 1 (0, 2{[319, 320)[332, 346)}, 50, 0) +24327029 1 (0, 3{[304, 312)[324, 328)[332, 335)}, 49, 0) +10770263 1 (0, 3{[304, 312)[316, 320)[332, 335)}, 48, 1) +116049953 1 (0, 3{[303, 312)[324, 328)[332, 334)}, 47, 1) +124991398 1 (0, 1{[292, 307)}, 46, 0) +46739838 1 (0, 1{[286, 301)}, 45, 1) +24932839 1 (0, 1{[284, 299)}, 44, 1) +5793861 1 (0, 1{[282, 297)}, 43, 0) +40417193 1 (0, 1{[274, 289)}, 42, 1) +47369520 1 (0, 1{[272, 287)}, 40, 1) +57951963 1 (0, 1{[262, 277)}, 39, 0) +82000267 1 (0, 3{[366, 373)[382, 383)[387, 394)}, 58, 0) +105221040 1 (0, 1{[250, 265)}, 38, 1) +18585087 1 (0, 1{[239, 254)}, 37, 0) +58759004 1 (0, 1{[235, 250)}, 36, 1) +46243831 1 (0, 1{[273, 288)}, 41, 1) +158709222 1 (0, 1{[231, 246)}, 35, 0) +70513877 1 (0, 1{[220, 235)}, 34, 1) +69201700 1 (0, 1{[206, 221)}, 33, 0) +13740578 1 (0, 1{[195, 210)}, 32, 1) +34839656 1 (0, 3{[172, 181)[184, 185)[193, 198)}, 31, 0) +38692934 1 (0, 3{[173, 181)[189, 190)[193, 199)}, 30, 0) +32296303 1 (0, 3{[169, 181)[184, 185)[193, 195)}, 29, 0) +14936588 1 (0, 1{[160, 175)}, 28, 1) +27686752 1 (0, 1{[157, 172)}, 27, 0) +96552299 1 (0, 1{[153, 168)}, 26, 0) +55824476 1 (0, 1{[142, 157)}, 25, 1) +37634093 1 (0, 1{[140, 155)}, 24, 0) +27254804 1 (0, 3{[117, 120)[127, 128)[131, 142)}, 23, 1) +39280491 1 (0, 1{[137, 152)}, 22, 1) +135073096 1 (0, 3{[109, 120)[127, 128)[131, 134)}, 21, 0) +91569063 1 (0, 1{[99, 114)}, 19, 0) +31238650 1 (0, 3{[113, 120)[123, 124)[131, 138)}, 20, 1) +84550898 1 (0, 1{[98, 113)}, 18, 0) +83969001 1 (0, 1{[88, 103)}, 17, 0) +62421088 1 (0, 1{[86, 101)}, 16, 1) +92281898 1 (0, 1{[85, 100)}, 15, 0) +138968336 1 (0, 1{[78, 93)}, 14, 0) +72337557 1 (0, 1{[67, 82)}, 13, 0) +68189800 1 (0, 1{[62, 77)}, 12, 1) +52616670 1 (0, 1{[61, 76)}, 11, 1) +117600578 1 (0, 1{[55, 70)}, 10, 0) +15850244 1 (0, 1{[59, 74)}, 9, 1) +182852002 1 (0, 3{[41, 43)[50, 51)[54, 66)}, 8, 1) +52061784 1 (0, 3{[42, 43)[46, 47)[54, 67)}, 7, 1) +42564047 1 (0, 3{[30, 43)[50, 51)[54, 55)}, 6, 1) +58311598 1 (0, 2{[29, 43)[46, 47)}, 5, 1) +31358832 1 (0, 1{[25, 40)}, 4, 0) +154624683 1 (0, 1{[24, 39)}, 3, 1) +173965435 1 (0, 1{[13, 28)}, 2, 1) +84235074 1 (0, 1{[6, 21)}, 1, 1) diff --git a/example/run_pandora.sh b/example/run_pandora.sh index b765c577..6ec0fa95 100755 --- a/example/run_pandora.sh +++ b/example/run_pandora.sh @@ -20,20 +20,26 @@ download_tool "${make_prg_URL}" "${make_prg_executable}" echo "Running pandora without denovo..." echo "Running ${make_prg_executable} from_msa" -"${make_prg_executable}" from_msa --input msas/ --output_prefix prgs/pangenome +"${make_prg_executable}" from_msa --threads 1 --input msas/ --output_prefix out/prgs/pangenome echo "Running ${pandora_executable} index" -"${pandora_executable}" index prgs/pangenome.prg.fa +"${pandora_executable}" index --threads 1 out/prgs/pangenome.prg.fa echo "Running ${pandora_executable} compare" -"${pandora_executable}" compare --genotype -o output_toy_example_no_denovo prgs/pangenome.prg.fa reads/read_index.tsv +"${pandora_executable}" compare --threads 1 --genotype -o out/output_toy_example_no_denovo out/prgs/pangenome.prg.fa reads/read_index.tsv echo "Running pandora without denovo - done!" echo "Running pandora with denovo..." echo "Running ${pandora_executable} discover" -"${pandora_executable}" discover --outdir pandora_discover_out prgs/pangenome.prg.fa reads/read_index.tsv +"${pandora_executable}" discover --threads 1 --outdir out/pandora_discover_out out/prgs/pangenome.prg.fa reads/read_index.tsv echo "Running ${make_prg_executable} update" -"${make_prg_executable}" update --update_DS prgs/pangenome.update_DS --denovo_paths pandora_discover_out/denovo_paths.txt --output_prefix updated_prgs/pangenome_updated +"${make_prg_executable}" update --threads 1 --update_DS out/prgs/pangenome.update_DS --denovo_paths out/pandora_discover_out/denovo_paths.txt --output_prefix out/updated_prgs/pangenome_updated echo "Running ${pandora_executable} index on updated PRGs" -"${pandora_executable}" index updated_prgs/pangenome_updated.prg.fa +"${pandora_executable}" index --threads 1 out/updated_prgs/pangenome_updated.prg.fa echo "Running ${pandora_executable} compare" -"${pandora_executable}" compare --genotype -o output_toy_example_with_denovo updated_prgs/pangenome_updated.prg.fa reads/read_index.tsv +"${pandora_executable}" compare --threads 1 --genotype -o out/output_toy_example_with_denovo out/updated_prgs/pangenome_updated.prg.fa reads/read_index.tsv echo "Running pandora with denovo - done!" + +if diff -rq -I '##fileDate.*' out out_truth ; then + echo "Example run produced the expected result" +else + echo "ERROR: Example run DID NOT produce the expected result" +fi From 1672ed3f2ed982136b0fef883e6d9faa6214acc2 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Fri, 21 May 2021 12:59:47 +0100 Subject: [PATCH 05/29] Updating example to run with pandora installed through conda --- example/README.md | 13 ++++++++++++ example/run_pandora.sh | 28 +++++++++++++++++++------- example/run_pandora_conda.sh | 39 ------------------------------------ 3 files changed, 34 insertions(+), 46 deletions(-) delete mode 100755 example/run_pandora_conda.sh diff --git a/example/README.md b/example/README.md index 6c23cd80..0c6781a5 100644 --- a/example/README.md +++ b/example/README.md @@ -81,6 +81,19 @@ GC00010897 44 . C T . . SVTYPE=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_CO GC00010897 422 . A T . . SVTYPE=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 1:0,8:0,5:0,8:0,5:0,16:0,11:1,0:-155.867,-20.2266:135.641 0:12,0:9,0:12,0:9,0:12,0:9,0:0,1:-9.39494,-182.709:173.314 ``` +## Extra + +### Running with conda + +If you install `pandora` via `conda`: `conda install -c bioconda pandora`, +you can run this sample example by activating the `conda` environment containing `pandora` and running: + +``` +./run_pandora.sh conda +``` + +The output should be the same as using the precompiled binary. + [pandora_2020_paper]: https://www.biorxiv.org/content/10.1101/2020.11.12.380378v2 diff --git a/example/run_pandora.sh b/example/run_pandora.sh index 6ec0fa95..e7775291 100755 --- a/example/run_pandora.sh +++ b/example/run_pandora.sh @@ -1,12 +1,14 @@ #!/usr/bin/env bash set -eu -# configs -pandora_URL="https://github.com/rmcolq/pandora/releases/download/0.9.0/pandora-linux-precompiled-v0.9.0" -pandora_executable="./pandora-linux-precompiled-v0.9.0" -make_prg_URL="https://github.com/leoisl/make_prg/releases/download/v0.2.0/make_prg_0.2.0" -make_prg_executable="./make_prg_0.2.0" - +######################################################################################################################## +# argument parsing +if [[ "$#" -gt 1 || ( "$#" -eq 1 && "$1" != "conda" ) ]] ; then + echo "Illegal parameters." + echo "Usage: $0 or $0 conda" + exit 1 +fi +######################################################################################################################## function download_tool { URL=$1 @@ -15,7 +17,19 @@ function download_tool { chmod +x "${executable}" } -download_tool "${pandora_URL}" "${pandora_executable}" +# setup tools +if [ "$#" -eq 0 ] ; then + # not conda env + pandora_URL="https://github.com/rmcolq/pandora/releases/download/0.9.0/pandora-linux-precompiled-v0.9.0" + pandora_executable="./pandora-linux-precompiled-v0.9.0" + download_tool "${pandora_URL}" "${pandora_executable}" +else + # conda env + pandora_executable="pandora" +fi + +make_prg_URL="https://github.com/leoisl/make_prg/releases/download/v0.2.0/make_prg_0.2.0" +make_prg_executable="./make_prg_0.2.0" download_tool "${make_prg_URL}" "${make_prg_executable}" echo "Running pandora without denovo..." diff --git a/example/run_pandora_conda.sh b/example/run_pandora_conda.sh deleted file mode 100755 index b765c577..00000000 --- a/example/run_pandora_conda.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash -set -eu - -# configs -pandora_URL="https://github.com/rmcolq/pandora/releases/download/0.9.0/pandora-linux-precompiled-v0.9.0" -pandora_executable="./pandora-linux-precompiled-v0.9.0" -make_prg_URL="https://github.com/leoisl/make_prg/releases/download/v0.2.0/make_prg_0.2.0" -make_prg_executable="./make_prg_0.2.0" - - -function download_tool { - URL=$1 - executable=$2 - wget "${URL}" -O "${executable}" - chmod +x "${executable}" -} - -download_tool "${pandora_URL}" "${pandora_executable}" -download_tool "${make_prg_URL}" "${make_prg_executable}" - -echo "Running pandora without denovo..." -echo "Running ${make_prg_executable} from_msa" -"${make_prg_executable}" from_msa --input msas/ --output_prefix prgs/pangenome -echo "Running ${pandora_executable} index" -"${pandora_executable}" index prgs/pangenome.prg.fa -echo "Running ${pandora_executable} compare" -"${pandora_executable}" compare --genotype -o output_toy_example_no_denovo prgs/pangenome.prg.fa reads/read_index.tsv -echo "Running pandora without denovo - done!" - -echo "Running pandora with denovo..." -echo "Running ${pandora_executable} discover" -"${pandora_executable}" discover --outdir pandora_discover_out prgs/pangenome.prg.fa reads/read_index.tsv -echo "Running ${make_prg_executable} update" -"${make_prg_executable}" update --update_DS prgs/pangenome.update_DS --denovo_paths pandora_discover_out/denovo_paths.txt --output_prefix updated_prgs/pangenome_updated -echo "Running ${pandora_executable} index on updated PRGs" -"${pandora_executable}" index updated_prgs/pangenome_updated.prg.fa -echo "Running ${pandora_executable} compare" -"${pandora_executable}" compare --genotype -o output_toy_example_with_denovo updated_prgs/pangenome_updated.prg.fa reads/read_index.tsv -echo "Running pandora with denovo - done!" From 59ffe1affc453c47287e6a89182a06b0313d10b4 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Fri, 21 May 2021 13:18:48 +0100 Subject: [PATCH 06/29] Preparing release 0.9.1 --- CHANGELOG.md | 13 ++++++++++++- CMakeLists.txt | 2 +- README.md | 14 +++++++++++--- example/run_pandora.sh | 4 ++-- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 977dba72..deb356ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,16 @@ project adheres to ## [Unreleased] +## [0.9.1] + +### Added +- `pandora` is now installable through `conda`; +- A script to archive the `pandora` repository with git submodules; + +### Changed +- Improved the sample example so now we can assert that the output produced is the expected one; +- Changes to the build process that enables `pandora` to be compiled in the `conda` environment; + ## [0.9.0] ### Changed @@ -97,7 +107,8 @@ from this point will have their changes meticulously documented here. - k-mer coverage underflow bug in `LocalPRG` [[#183][183]] -[Unreleased]: https://github.com/rmcolq/pandora/compare/0.9.0...HEAD +[Unreleased]: https://github.com/rmcolq/pandora/compare/0.9.1...HEAD +[0.9.1]: https://github.com/rmcolq/pandora/releases/tag/0.9.1 [0.9.0]: https://github.com/rmcolq/pandora/releases/tag/0.9.0 [0.9.0-rc2]: https://github.com/rmcolq/pandora/releases/tag/0.9.0-rc2 [0.9.0-rc1]: https://github.com/rmcolq/pandora/releases/tag/0.9.0-rc1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 53efa89c..47e29d1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ HunterGate( # project configuration set(PROJECT_NAME_STR pandora) -project(${PROJECT_NAME_STR} VERSION "0.9.0" LANGUAGES C CXX) +project(${PROJECT_NAME_STR} VERSION "0.9.1" LANGUAGES C CXX) set(ADDITIONAL_VERSION_LABELS "") configure_file( include/version.h.in ${CMAKE_BINARY_DIR}/include/version.h ) diff --git a/README.md b/README.md index cfc06a28..95768d16 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ - [Hands-on toy example](#hands-on-toy-example) - [Installation](#installation) - [Precompiled portable binary](#no-installation-needed---precompiled-portable-binary) + - [Conda](#conda) - [Containers](#containers) - [Installation from source](#installation-from-source) - [Usage](#usage) @@ -78,18 +79,25 @@ In this binary, all libraries are linked statically. * **Download**: ``` - wget https://github.com/rmcolq/pandora/releases/download/0.9.0/pandora-linux-precompiled-v0.9.0 + wget https://github.com/rmcolq/pandora/releases/download/0.9.1/pandora-linux-precompiled-v0.9.1 ``` * **Running**: ``` -chmod +x pandora-linux-precompiled-v0.9.0 -./pandora-linux-precompiled-v0.9.0 -h +chmod +x pandora-linux-precompiled-v0.9.1 +./pandora-linux-precompiled-v0.9.1 -h ``` * **Notes**: * We provide precompiled binaries for Linux OS only; +### Conda + +To install `pandora` through `conda`, run: +``` +conda install -c bioconda pandora +``` + ### Containers [![Docker Repository on Quay](https://quay.io/repository/rmcolq/pandora/status "Docker Repository on Quay")](https://quay.io/repository/rmcolq/pandora) diff --git a/example/run_pandora.sh b/example/run_pandora.sh index e7775291..e2d1284e 100755 --- a/example/run_pandora.sh +++ b/example/run_pandora.sh @@ -20,8 +20,8 @@ function download_tool { # setup tools if [ "$#" -eq 0 ] ; then # not conda env - pandora_URL="https://github.com/rmcolq/pandora/releases/download/0.9.0/pandora-linux-precompiled-v0.9.0" - pandora_executable="./pandora-linux-precompiled-v0.9.0" + pandora_URL="https://github.com/rmcolq/pandora/releases/download/0.9.1/pandora-linux-precompiled-v0.9.1" + pandora_executable="./pandora-linux-precompiled-v0.9.1" download_tool "${pandora_URL}" "${pandora_executable}" else # conda env From 97ebbfd79b4ee791afedf4fb0dd6cad4adbaa870 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Fri, 21 May 2021 13:31:25 +0100 Subject: [PATCH 07/29] Making create_archives.sh executable --- scripts/create_archives.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/create_archives.sh diff --git a/scripts/create_archives.sh b/scripts/create_archives.sh old mode 100644 new mode 100755 From c48e6dfb93d84da4dfa9822fa668ab615985cf10 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Fri, 17 Sep 2021 09:08:19 +1000 Subject: [PATCH 08/29] update link to paper and add citation --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 95768d16..faa3c73f 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ - [Installation from source](#installation-from-source) - [Usage](#usage) +> Colquhoun, R.M., Hall, M.B., Lima, L. *et al.* Pandora: nucleotide-resolution bacterial pan-genomics with reference graphs. *Genome Biol* **22,** 267 (2021). https://doi.org/10.1186/s13059-021-02473-1 + ## Introduction Pandora is a tool for bacterial genome analysis using a pangenome reference graph (PanRG). It allows gene presence/absence detection and genotyping of SNPs, indels and longer variants in one or a number of samples. Pandora works with Illumina or Nanopore data. For more details, see [our paper][pandora_2020_paper]. @@ -147,4 +149,4 @@ See [Usage](https://github.com/rmcolq/pandora/wiki/Usage). -[pandora_2020_paper]: https://www.biorxiv.org/content/10.1101/2020.11.12.380378v2 +[pandora_2020_paper]: https://doi.org/10.1186/s13059-021-02473-1 From cf702d4c70eaa90c81346ed114dd275864ccb9b6 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 6 Oct 2021 17:10:01 +1000 Subject: [PATCH 09/29] change svtype to variant class "VC" --- include/vcf.h | 21 ++- include/vcfrecord.h | 24 +-- src/vcf.cpp | 64 ++++---- src/vcfrecord.cpp | 49 ++++-- test/localPRG_test.cpp | 36 ++-- test/vcf_test.cpp | 140 +++------------- test/vcfrecord_test.cpp | 352 +--------------------------------------- 7 files changed, 139 insertions(+), 547 deletions(-) diff --git a/include/vcf.h b/include/vcf.h index bac377ae..3d813c40 100644 --- a/include/vcf.h +++ b/include/vcf.h @@ -53,6 +53,11 @@ class VCF { virtual bool operator!=(const VCF& y) const; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // static constants for common VCF fields + static const std::string VARIANT_CLASS_ID; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // adders virtual void add_record(const std::string& chrom, uint32_t position, @@ -106,11 +111,11 @@ class VCF { // to_string methods virtual std::string header() const; virtual std::string to_string(bool genotyping_from_maximum_likelihood, - bool genotyping_from_coverage, bool output_dot_allele = false, - bool graph_is_simple = true, bool graph_is_nested = true, - bool graph_has_too_many_alts = true, bool sv_type_is_snp = true, - bool sv_type_is_indel = true, bool sv_type_is_ph_snps = true, - bool sv_type_is_complex = true); + bool genotyping_from_coverage, bool output_dot_allele = false, + bool graph_is_simple = true, bool graph_is_nested = true, + bool graph_has_too_many_alts = true, bool variant_class_is_snp = true, + bool variant_class_is_indel = true, bool variant_class_is_ph_snps = true, + bool variant_class_is_complex = true); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -128,9 +133,9 @@ class VCF { virtual void save(const fs::path& filepath, bool genotyping_from_maximum_likelihood, bool genotyping_from_coverage, bool output_dot_allele = false, bool graph_is_simple = true, bool graph_is_nested = true, - bool graph_has_too_many_alts = true, bool sv_type_is_snp = true, - bool sv_type_is_indel = true, bool sv_type_is_ph_snps = true, - bool sv_type_is_complex = true); + bool graph_has_too_many_alts = true, bool variant_class_is_snp = true, + bool variant_class_is_indel = true, bool variant_class_is_ph_snps = true, + bool variant_class_is_complex = true); // concatenate several VCF files that were previously written to disk as .vcfs into // a single VCF file diff --git a/include/vcfrecord.h b/include/vcfrecord.h index 2e1730a9..5bd3bb56 100644 --- a/include/vcfrecord.h +++ b/include/vcfrecord.h @@ -126,22 +126,10 @@ class VCFRecord { { return this->info.find("GRAPHTYPE=TOO_MANY_ALTS") != std::string::npos; } - virtual inline bool svtype_is_SNP() const - { - return this->info.find("SVTYPE=SNP") != std::string::npos; - } - virtual inline bool svtype_is_indel() const - { - return this->info.find("SVTYPE=INDEL") != std::string::npos; - } - virtual inline bool svtype_is_PH_SNPs() const - { - return this->info.find("SVTYPE=PH_SNPs") != std::string::npos; - } - virtual inline bool svtype_is_complex() const - { - return this->info.find("SVTYPE=COMPLEX") != std::string::npos; - } + virtual inline bool variant_class_is_snp() const; + virtual inline bool variant_class_is_indel() const; + virtual inline bool variant_class_is_phased_snps() const; + virtual inline bool variant_class_is_complex() const; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -216,7 +204,9 @@ class VCFRecord { std::string chrom; uint32_t pos; - std::string infer_SVTYPE() const; + std::string infer_variant_class() const; + + std::string variant_class_info_entry() const; virtual void correct_dot_alleles( char nucleotide, bool add_nucleotide_before_the_sequence); diff --git a/src/vcf.cpp b/src/vcf.cpp index 86206d32..1e7663f1 100644 --- a/src/vcf.cpp +++ b/src/vcf.cpp @@ -1,5 +1,7 @@ #include "vcf.h" +const std::string VCF::VARIANT_CLASS_ID{"VC"}; + void VCF::add_record_core(const VCFRecord& vr) { records.push_back(std::make_shared(vr)); @@ -128,7 +130,7 @@ void VCF::add_a_new_record_discovered_in_a_sample_and_genotype_it( } } else { add_record( - chrom, pos, ref, alt, "SVTYPE=COMPLEX", "GRAPHTYPE=TOO_MANY_ALTS"); + chrom, pos, ref, alt, VCF::VARIANT_CLASS_ID + "=COMPLEX", "GRAPHTYPE=TOO_MANY_ALTS"); records.back() ->sampleIndex_to_sampleInfo[sample_index] .set_gt_from_max_likelihood_path(1); @@ -437,18 +439,19 @@ std::vector VCF::get_all_records_overlapping_the_given_record( return overlapping_records; } -void VCF::save(const fs::path& filepath, bool genotyping_from_maximum_likelihood, - bool genotyping_from_coverage, bool output_dot_allele, bool graph_is_simple, - bool graph_is_nested, bool graph_has_too_many_alts, bool sv_type_is_snp, - bool sv_type_is_indel, bool sv_type_is_ph_snps, bool sv_type_is_complex) -{ +void VCF::save(const fs::path &filepath, bool genotyping_from_maximum_likelihood, + bool genotyping_from_coverage, bool output_dot_allele, bool graph_is_simple, + bool graph_is_nested, bool graph_has_too_many_alts, bool variant_class_is_snp, + bool variant_class_is_indel, bool variant_class_is_ph_snps, + bool variant_class_is_complex) { BOOST_LOG_TRIVIAL(debug) << "Saving VCF to " << filepath; fs::ofstream handle; handle.open(filepath); handle << this->to_string(genotyping_from_maximum_likelihood, - genotyping_from_coverage, output_dot_allele, graph_is_simple, graph_is_nested, - graph_has_too_many_alts, sv_type_is_snp, sv_type_is_indel, sv_type_is_ph_snps, - sv_type_is_complex); + genotyping_from_coverage, output_dot_allele, graph_is_simple, graph_is_nested, + graph_has_too_many_alts, variant_class_is_snp, variant_class_is_indel, + variant_class_is_ph_snps, + variant_class_is_complex); handle.close(); BOOST_LOG_TRIVIAL(debug) << "Finished saving " << this->records.size() << " entries to file"; @@ -482,7 +485,7 @@ std::string VCF::header() const header += "##ALT=\n"; header - += "##INFO=\n"; + += "##INFO=\n"; header += "##ALT=\n"; header += "##ALT=\n"; @@ -521,16 +524,15 @@ std::string VCF::header() const } std::string VCF::to_string(bool genotyping_from_maximum_likelihood, - bool genotyping_from_coverage, bool output_dot_allele, bool graph_is_simple, - bool graph_is_nested, bool graph_has_too_many_alts, bool sv_type_is_snp, - bool sv_type_is_indel, bool sv_type_is_ph_snps, bool sv_type_is_complex) -{ + bool genotyping_from_coverage, bool output_dot_allele, bool graph_is_simple, + bool graph_is_nested, bool graph_has_too_many_alts, bool variant_class_is_snp, + bool variant_class_is_indel, bool variant_class_is_ph_snps, bool variant_class_is_complex) { const bool only_one_flag_is_set - = ((int)(genotyping_from_maximum_likelihood) + (int)(genotyping_from_coverage)) - == 1; + = ((int) (genotyping_from_maximum_likelihood) + (int) (genotyping_from_coverage)) + == 1; if (!only_one_flag_is_set) { fatal_error( - "Error on stringifying VCF record: incompatible genotyping options"); + "Error on stringifying VCF record: incompatible genotyping options"); } std::stringstream out; @@ -540,28 +542,28 @@ std::string VCF::to_string(bool genotyping_from_maximum_likelihood, // TODO: remove this side effect or always keep the VCF sorted sort_records(); - for (const auto& record : this->records) { + for (const auto &record : this->records) { const bool record_has_dot_allele_and_should_be_output - = output_dot_allele and record->contains_dot_allele(); + = output_dot_allele and record->contains_dot_allele(); const bool graph_type_condition_is_satisfied - = (graph_is_simple and record->graph_type_is_simple()) - or (graph_is_nested and record->graph_type_is_nested()) - or (graph_has_too_many_alts and record->graph_type_has_too_many_alts()); - const bool sv_type_condition_is_satisfied - = (sv_type_is_snp and record->svtype_is_SNP()) - or (sv_type_is_indel and record->svtype_is_indel()) - or (sv_type_is_ph_snps and record->svtype_is_PH_SNPs()) - or (sv_type_is_complex and record->svtype_is_complex()); - const bool graph_and_sv_type_conditions_are_satisfied - = graph_type_condition_is_satisfied and sv_type_condition_is_satisfied; + = (graph_is_simple and record->graph_type_is_simple()) + or (graph_is_nested and record->graph_type_is_nested()) + or (graph_has_too_many_alts and record->graph_type_has_too_many_alts()); + const bool variant_class_condition_is_satisfied + = (variant_class_is_snp and record->variant_class_is_snp()) + or (variant_class_is_indel and record->variant_class_is_indel()) + or (variant_class_is_ph_snps and record->variant_class_is_phased_snps()) + or (variant_class_is_complex and record->variant_class_is_complex()); + const bool graph_and_variant_class_conditions_are_satisfied + = graph_type_condition_is_satisfied and variant_class_condition_is_satisfied; const bool record_should_be_output = record_has_dot_allele_and_should_be_output - or graph_and_sv_type_conditions_are_satisfied; + or graph_and_variant_class_conditions_are_satisfied; if (record_should_be_output) { out << record->to_string( - genotyping_from_maximum_likelihood, genotyping_from_coverage) + genotyping_from_maximum_likelihood, genotyping_from_coverage) << std::endl; } } diff --git a/src/vcfrecord.cpp b/src/vcfrecord.cpp index d416d6e0..d903a451 100644 --- a/src/vcfrecord.cpp +++ b/src/vcfrecord.cpp @@ -20,7 +20,7 @@ VCFRecord::VCFRecord(VCF const* parent_vcf, const std::string& chrom, uint32_t p add_new_alt(alt); if (this->info == ".") { - this->info = infer_SVTYPE(); + this->info = this->variant_class_info_entry(); } if (graph_type_info != "") { @@ -42,26 +42,34 @@ VCFRecord::VCFRecord(VCF const* parent_vcf) set_ref_and_clear_alts("."); } -std::string VCFRecord::infer_SVTYPE() const -{ +std::string VCFRecord::infer_variant_class() const { // TODO: How to handle cases where there are more than 2 options, not all of one // type if (ref == "." and (alts.empty() or alts[0] == ".")) return "."; else if (ref == "." or alts.empty() or alts[0] == ".") - return "SVTYPE=INDEL"; + return "INDEL"; else if (ref.length() == 1 and !alts.empty() and alts[0].length() == 1) - return "SVTYPE=SNP"; + return "SNP"; else if (!alts.empty() and alts[0].length() == ref.length()) - return "SVTYPE=PH_SNPs"; + return "PH_SNPs"; else if (!alts.empty() and ref.length() < alts[0].length() - and ref.compare(0, ref.length(), alts[0], 0, ref.length()) == 0) - return "SVTYPE=INDEL"; + and ref.compare(0, ref.length(), alts[0], 0, ref.length()) == 0) + return "INDEL"; else if (!alts.empty() and alts[0].length() < ref.length() - and alts[0].compare(0, alts[0].length(), ref, 0, alts[0].length()) == 0) - return "SVTYPE=INDEL"; + and alts[0].compare(0, alts[0].length(), ref, 0, alts[0].length()) == 0) + return "INDEL"; else - return "SVTYPE=COMPLEX"; + return "COMPLEX"; +} + +std::string VCFRecord::variant_class_info_entry() const { + auto vc{this->infer_variant_class()}; + if (vc == ".") { + return vc; + } else { + return VCF::VARIANT_CLASS_ID + "=" + vc; + } } std::string VCFRecord::get_format( @@ -336,4 +344,21 @@ void VCFRecord::reset_sample_infos_to_contain_the_given_number_of_samples( sampleIndex_to_sampleInfo.clear(); sampleIndex_to_sampleInfo.emplace_back_several_empty_sample_infos( number_of_samples, get_number_of_alleles(), parent_vcf->genotyping_options); -} \ No newline at end of file +} + +inline bool VCFRecord::variant_class_is_snp() const +{ + return this->info.find(VCF::VARIANT_CLASS_ID + "=SNP") != std::string::npos; +} +inline bool VCFRecord::variant_class_is_indel() const +{ + return this->info.find(VCF::VARIANT_CLASS_ID + "=INDEL") != std::string::npos; +} +inline bool VCFRecord::variant_class_is_phased_snps() const +{ + return this->info.find(VCF::VARIANT_CLASS_ID + "=PH_SNPs") != std::string::npos; +} +inline bool VCFRecord::variant_class_is_complex() const +{ + return this->info.find(VCF::VARIANT_CLASS_ID + "=COMPLEX") != std::string::npos; +} diff --git a/test/localPRG_test.cpp b/test/localPRG_test.cpp index 873d0ee8..c27cb4d8 100644 --- a/test/localPRG_test.cpp +++ b/test/localPRG_test.cpp @@ -1148,7 +1148,7 @@ TEST(LocalPRGTest, build_vcf) EXPECT_EQ((uint)1, vcf.get_records()[0]->get_pos()); EXPECT_EQ("GC", vcf.get_records()[0]->get_ref()); EXPECT_EQ("G", vcf.get_records()[0]->get_alts()[0]); - EXPECT_EQ("SVTYPE=INDEL;GRAPHTYPE=SIMPLE", vcf.get_records()[0]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=INDEL;GRAPHTYPE=SIMPLE", vcf.get_records()[0]->info); vcf = create_VCF_with_default_parameters(); vector lmp = { l2.prg.nodes[0], l2.prg.nodes[2], l2.prg.nodes[3] }; @@ -1159,7 +1159,7 @@ TEST(LocalPRGTest, build_vcf) EXPECT_EQ((uint)1, vcf.get_records()[0]->get_pos()); EXPECT_EQ("G", vcf.get_records()[0]->get_ref()); EXPECT_EQ("GC", vcf.get_records()[0]->get_alts()[0]); - EXPECT_EQ("SVTYPE=INDEL;GRAPHTYPE=SIMPLE", vcf.get_records()[0]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=INDEL;GRAPHTYPE=SIMPLE", vcf.get_records()[0]->info); vcf = create_VCF_with_default_parameters(); l3.build_vcf_from_reference_path(vcf, l3.prg.top_path()); @@ -1170,11 +1170,11 @@ TEST(LocalPRGTest, build_vcf) EXPECT_EQ((uint)1, vcf.get_records()[0]->get_pos()); EXPECT_EQ("GC", vcf.get_records()[0]->get_ref()); EXPECT_EQ("G", vcf.get_records()[0]->get_alts()[0]); - EXPECT_EQ("SVTYPE=INDEL;GRAPHTYPE=NESTED", vcf.get_records()[0]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=INDEL;GRAPHTYPE=NESTED", vcf.get_records()[0]->info); EXPECT_EQ((uint)2, vcf.get_records()[1]->get_pos()); EXPECT_EQ("C", vcf.get_records()[1]->get_ref()); EXPECT_EQ("T", vcf.get_records()[1]->get_alts()[0]); - EXPECT_EQ("SVTYPE=SNP;GRAPHTYPE=NESTED", vcf.get_records()[1]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=NESTED", vcf.get_records()[1]->info); vcf = create_VCF_with_default_parameters(); ; @@ -1187,11 +1187,11 @@ TEST(LocalPRGTest, build_vcf) EXPECT_EQ((uint)1, vcf.get_records()[0]->get_pos()); EXPECT_EQ("GT", vcf.get_records()[0]->get_ref()); EXPECT_EQ("G", vcf.get_records()[0]->get_alts()[0]); - EXPECT_EQ("SVTYPE=INDEL;GRAPHTYPE=NESTED", vcf.get_records()[0]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=INDEL;GRAPHTYPE=NESTED", vcf.get_records()[0]->info); EXPECT_EQ((uint)2, vcf.get_records()[1]->get_pos()); EXPECT_EQ("T", vcf.get_records()[1]->get_ref()); EXPECT_EQ("C", vcf.get_records()[1]->get_alts()[0]); - EXPECT_EQ("SVTYPE=SNP;GRAPHTYPE=NESTED", vcf.get_records()[1]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=NESTED", vcf.get_records()[1]->info); vcf = create_VCF_with_default_parameters(); ; @@ -1203,11 +1203,11 @@ TEST(LocalPRGTest, build_vcf) EXPECT_EQ((uint)1, vcf.get_records()[0]->get_pos()); EXPECT_EQ("G", vcf.get_records()[0]->get_ref()); EXPECT_EQ("GC", vcf.get_records()[0]->get_alts()[0]); - EXPECT_EQ("SVTYPE=INDEL;GRAPHTYPE=SIMPLE", vcf.get_records()[0]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=INDEL;GRAPHTYPE=SIMPLE", vcf.get_records()[0]->info); EXPECT_EQ((uint)1, vcf.get_records()[1]->get_pos()); EXPECT_EQ("G", vcf.get_records()[1]->get_ref()); EXPECT_EQ("GT", vcf.get_records()[1]->get_alts()[0]); - EXPECT_EQ("SVTYPE=INDEL;GRAPHTYPE=SIMPLE", vcf.get_records()[1]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=INDEL;GRAPHTYPE=SIMPLE", vcf.get_records()[1]->info); vcf = create_VCF_with_default_parameters(); ; @@ -1219,29 +1219,29 @@ TEST(LocalPRGTest, build_vcf) EXPECT_EQ((uint)119, vcf.get_records()[0]->get_pos()); EXPECT_EQ("T", vcf.get_records()[0]->get_ref()); EXPECT_EQ("C", vcf.get_records()[0]->get_alts()[0]); - EXPECT_EQ("SVTYPE=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[0]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[0]->info); EXPECT_EQ((uint)158, vcf.get_records()[1]->get_pos()); EXPECT_EQ("TTCACTGACTGATGACCGAGTGCTGAAAGAAGTCATGCGACTGGGGGCGTTG", vcf.get_records()[1]->get_ref()); EXPECT_EQ("CTCACTGACTGATGATCGGGTACTGAAAGAAGTTATGAGACTGGGGGCGTTA", vcf.get_records()[1]->get_alts()[0]); - EXPECT_EQ("SVTYPE=PH_SNPs;GRAPHTYPE=SIMPLE", vcf.get_records()[1]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=PH_SNPs;GRAPHTYPE=SIMPLE", vcf.get_records()[1]->info); EXPECT_EQ((uint)251, vcf.get_records()[2]->get_pos()); EXPECT_EQ("A", vcf.get_records()[2]->get_ref()); EXPECT_EQ("G", vcf.get_records()[2]->get_alts()[0]); - EXPECT_EQ("SVTYPE=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[2]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[2]->info); EXPECT_EQ((uint)272, vcf.get_records()[3]->get_pos()); EXPECT_EQ("A", vcf.get_records()[3]->get_ref()); EXPECT_EQ("C", vcf.get_records()[3]->get_alts()[0]); - EXPECT_EQ("SVTYPE=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[3]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[3]->info); EXPECT_EQ((uint)293, vcf.get_records()[4]->get_pos()); EXPECT_EQ("G", vcf.get_records()[4]->get_ref()); EXPECT_EQ("T", vcf.get_records()[4]->get_alts()[0]); - EXPECT_EQ("SVTYPE=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[4]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[4]->info); vcf = create_VCF_with_default_parameters(); ; @@ -1256,29 +1256,29 @@ TEST(LocalPRGTest, build_vcf) EXPECT_EQ((uint)119, vcf.get_records()[0]->get_pos()); EXPECT_EQ("C", vcf.get_records()[0]->get_ref()); EXPECT_EQ("T", vcf.get_records()[0]->get_alts()[0]); - EXPECT_EQ("SVTYPE=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[0]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[0]->info); EXPECT_EQ((uint)158, vcf.get_records()[1]->get_pos()); EXPECT_EQ("TTCACTGACTGATGACCGAGTGCTGAAAGAAGTCATGCGACTGGGGGCGTTG", vcf.get_records()[1]->get_ref()); EXPECT_EQ("CTCACTGACTGATGATCGGGTACTGAAAGAAGTTATGAGACTGGGGGCGTTA", vcf.get_records()[1]->get_alts()[0]); - EXPECT_EQ("SVTYPE=PH_SNPs;GRAPHTYPE=SIMPLE", vcf.get_records()[1]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=PH_SNPs;GRAPHTYPE=SIMPLE", vcf.get_records()[1]->info); EXPECT_EQ((uint)251, vcf.get_records()[2]->get_pos()); EXPECT_EQ("G", vcf.get_records()[2]->get_ref()); EXPECT_EQ("A", vcf.get_records()[2]->get_alts()[0]); - EXPECT_EQ("SVTYPE=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[2]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[2]->info); EXPECT_EQ((uint)272, vcf.get_records()[3]->get_pos()); EXPECT_EQ("A", vcf.get_records()[3]->get_ref()); EXPECT_EQ("C", vcf.get_records()[3]->get_alts()[0]); - EXPECT_EQ("SVTYPE=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[3]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[3]->info); EXPECT_EQ((uint)293, vcf.get_records()[4]->get_pos()); EXPECT_EQ("T", vcf.get_records()[4]->get_ref()); EXPECT_EQ("G", vcf.get_records()[4]->get_alts()[0]); - EXPECT_EQ("SVTYPE=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[4]->info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=SIMPLE", vcf.get_records()[4]->info); vcf = create_VCF_with_default_parameters(); ; diff --git a/test/vcf_test.cpp b/test/vcf_test.cpp index 0119ca5a..e5c800d4 100644 --- a/test/vcf_test.cpp +++ b/test/vcf_test.cpp @@ -1440,8 +1440,8 @@ TEST(VCFTest___merge_multi_allelic, vcf.add_samples({ "sample1", "sample2" }); // add two bi-allelic records to be merged - vcf.add_record("chrom1", 5, "A", "C", "SVTYPE=SNP", "GRAPHTYPE=SIMPLE"); - vcf.add_record("chrom1", 5, "A", "G", "SVTYPE=SNP", "GRAPHTYPE=SIMPLE"); + vcf.add_record("chrom1", 5, "A", "C", VCF::VARIANT_CLASS_ID + "=SNP", "GRAPHTYPE=SIMPLE"); + vcf.add_record("chrom1", 5, "A", "G", VCF::VARIANT_CLASS_ID + "=SNP", "GRAPHTYPE=SIMPLE"); // causing conflict and using genotype from the coverages to solve vcf.get_records()[0]->sampleIndex_to_sampleInfo[0].set_gt_from_max_likelihood_path( @@ -2147,7 +2147,7 @@ TEST_F(VCFTest___header___Fixture, header) "##ALT=\n" "##ALT=\n" - "##INFO=\n" + "##INFO=\n" "##ALT=\n" "##ALT=\n" @@ -2204,17 +2204,17 @@ class VCFTest___to_string___Fixture : public ::testing::Test { : vcf_with_all_records( std::make_shared(&default_genotyping_options)) , graph_type_is_simple_sv_is_snp(VCFRecord(vcf_with_all_records.get(), "0", 0, - "0", "0", "SVTYPE=SNP", "GRAPHTYPE=SIMPLE")) + "0", "0", VCF::VARIANT_CLASS_ID + "=SNP", "GRAPHTYPE=SIMPLE")) , graph_type_is_nested_sv_is_snp(VCFRecord(vcf_with_all_records.get(), "0", 1, - "0", "0", "SVTYPE=SNP", "GRAPHTYPE=NESTED")) + "0", "0", VCF::VARIANT_CLASS_ID + "=SNP", "GRAPHTYPE=NESTED")) , graph_type_has_too_many_alts_sv_is_snp(VCFRecord(vcf_with_all_records.get(), - "0", 2, "0", "0", "SVTYPE=SNP", "GRAPHTYPE=TOO_MANY_ALTS")) + "0", 2, "0", "0", VCF::VARIANT_CLASS_ID + "=SNP", "GRAPHTYPE=TOO_MANY_ALTS")) , graph_type_is_simple_sv_is_indel(VCFRecord(vcf_with_all_records.get(), "0", 3, - "0", "0", "SVTYPE=INDEL", "GRAPHTYPE=SIMPLE")) + "0", "0", VCF::VARIANT_CLASS_ID + "=INDEL", "GRAPHTYPE=SIMPLE")) , graph_type_is_simple_sv_is_ph_snps(VCFRecord(vcf_with_all_records.get(), "0", - 4, "0", "0", "SVTYPE=PH_SNPs", "GRAPHTYPE=SIMPLE")) + 4, "0", "0", VCF::VARIANT_CLASS_ID + "=PH_SNPs", "GRAPHTYPE=SIMPLE")) , graph_type_is_simple_sv_is_complex(VCFRecord(vcf_with_all_records.get(), "0", - 5, "0", "0", "SVTYPE=COMPLEX", "GRAPHTYPE=SIMPLE")) + 5, "0", "0", VCF::VARIANT_CLASS_ID + "=COMPLEX", "GRAPHTYPE=SIMPLE")) , record_with_dot_allele( VCFRecord(vcf_with_all_records.get(), "0", 6, ".", ".", ".", ".")) { @@ -2245,72 +2245,72 @@ class VCFTest___to_string___Fixture : public ::testing::Test { void TearDown() override { } }; -TEST_F(VCFTest___to_string___Fixture, graph_type_is_simple_sv_is_snp) +TEST_F(VCFTest___to_string___Fixture, graph_type_is_simple_vc_is_snp) { std::string actual = vcf_with_all_records->to_string( true, false, false, true, false, false, true, false, false, false); - std::string expected = "##Dummy_header;\n0\t1\t.\t0\t0\t.\t.\tSVTYPE=SNP;GRAPHTYPE=" + std::string expected = "##Dummy_header;\n0\t1\t.\t0\t0\t.\t.\tVC=SNP;GRAPHTYPE=" "SIMPLE\tGT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_" "REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; EXPECT_EQ(actual, expected); } -TEST_F(VCFTest___to_string___Fixture, graph_type_is_nested_sv_is_snp) +TEST_F(VCFTest___to_string___Fixture, graph_type_is_nested_vc_is_snp) { std::string actual = vcf_with_all_records->to_string( true, false, false, false, true, false, true, false, false, false); - std::string expected = "##Dummy_header;\n0\t2\t.\t0\t0\t.\t.\tSVTYPE=SNP;GRAPHTYPE=" + std::string expected = "##Dummy_header;\n0\t2\t.\t0\t0\t.\t.\t" + VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=" "NESTED\tGT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_" "REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; EXPECT_EQ(actual, expected); } -TEST_F(VCFTest___to_string___Fixture, graph_type_has_too_many_alts_sv_is_snp) +TEST_F(VCFTest___to_string___Fixture, graph_type_has_too_many_alts_vc_is_snp) { std::string actual = vcf_with_all_records->to_string( true, false, false, false, false, true, true, false, false, false); - std::string expected = "##Dummy_header;\n0\t3\t.\t0\t0\t.\t.\tSVTYPE=SNP;GRAPHTYPE=" + std::string expected = "##Dummy_header;\n0\t3\t.\t0\t0\t.\t.\t" + VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=" "TOO_MANY_ALTS\tGT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:" "MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; EXPECT_EQ(actual, expected); } -TEST_F(VCFTest___to_string___Fixture, graph_type_is_simple_sv_is_indel) +TEST_F(VCFTest___to_string___Fixture, graph_type_is_simple_vc_is_indel) { std::string actual = vcf_with_all_records->to_string( true, false, false, true, false, false, false, true, false, false); - std::string expected = "##Dummy_header;\n0\t4\t.\t0\t0\t.\t.\tSVTYPE=INDEL;" + std::string expected = "##Dummy_header;\n0\t4\t.\t0\t0\t.\t.\t" + VCF::VARIANT_CLASS_ID + "=INDEL;" "GRAPHTYPE=SIMPLE\tGT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_" "COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; EXPECT_EQ(actual, expected); } -TEST_F(VCFTest___to_string___Fixture, graph_type_is_simple_sv_is_ph_snps) +TEST_F(VCFTest___to_string___Fixture, graph_type_is_simple_vc_is_ph_snps) { std::string actual = vcf_with_all_records->to_string( true, false, false, true, false, false, false, false, true, false); - std::string expected = "##Dummy_header;\n0\t5\t.\t0\t0\t.\t.\tSVTYPE=PH_SNPs;" + std::string expected = "##Dummy_header;\n0\t5\t.\t0\t0\t.\t.\t" + VCF::VARIANT_CLASS_ID + "=PH_SNPs;" "GRAPHTYPE=SIMPLE\tGT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_" "COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; EXPECT_EQ(actual, expected); } -TEST_F(VCFTest___to_string___Fixture, graph_type_is_simple_sv_is_complex) +TEST_F(VCFTest___to_string___Fixture, graph_type_is_simple_vc_is_complex) { std::string actual = vcf_with_all_records->to_string( true, false, false, true, false, false, false, false, false, true); - std::string expected = "##Dummy_header;\n0\t6\t.\t0\t0\t.\t.\tSVTYPE=COMPLEX;" + std::string expected = "##Dummy_header;\n0\t6\t.\t0\t0\t.\t.\t" + VCF::VARIANT_CLASS_ID + "=COMPLEX;" "GRAPHTYPE=SIMPLE\tGT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_" "COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; @@ -2346,22 +2346,22 @@ TEST_F(VCFTest___to_string___Fixture, no_records_filtered_out) std::string expected = "##Dummy_header;\n"; expected - += "0\t1\t.\t0\t0\t.\t.\tSVTYPE=SNP;GRAPHTYPE=SIMPLE\tGT:MEAN_FWD_COVG:MEAN_" + += "0\t1\t.\t0\t0\t.\t.\t" + VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=SIMPLE\tGT:MEAN_FWD_COVG:MEAN_" "REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; expected - += "0\t2\t.\t0\t0\t.\t.\tSVTYPE=SNP;GRAPHTYPE=NESTED\tGT:MEAN_FWD_COVG:MEAN_" + += "0\t2\t.\t0\t0\t.\t.\t" + VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=NESTED\tGT:MEAN_FWD_COVG:MEAN_" "REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; expected - += "0\t3\t.\t0\t0\t.\t.\tSVTYPE=SNP;GRAPHTYPE=TOO_MANY_ALTS\tGT:MEAN_FWD_COVG:" + += "0\t3\t.\t0\t0\t.\t.\t" + VCF::VARIANT_CLASS_ID + "=SNP;GRAPHTYPE=TOO_MANY_ALTS\tGT:MEAN_FWD_COVG:" "MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; expected - += "0\t4\t.\t0\t0\t.\t.\tSVTYPE=INDEL;GRAPHTYPE=SIMPLE\tGT:MEAN_FWD_COVG:MEAN_" + += "0\t4\t.\t0\t0\t.\t.\t" + VCF::VARIANT_CLASS_ID + "=INDEL;GRAPHTYPE=SIMPLE\tGT:MEAN_FWD_COVG:MEAN_" "REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; expected - += "0\t5\t.\t0\t0\t.\t.\tSVTYPE=PH_SNPs;GRAPHTYPE=SIMPLE\tGT:MEAN_FWD_COVG:" + += "0\t5\t.\t0\t0\t.\t.\t" + VCF::VARIANT_CLASS_ID + "=PH_SNPs;GRAPHTYPE=SIMPLE\tGT:MEAN_FWD_COVG:" "MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; expected - += "0\t6\t.\t0\t0\t.\t.\tSVTYPE=COMPLEX;GRAPHTYPE=SIMPLE\tGT:MEAN_FWD_COVG:" + += "0\t6\t.\t0\t0\t.\t.\t" + VCF::VARIANT_CLASS_ID + "=COMPLEX;GRAPHTYPE=SIMPLE\tGT:MEAN_FWD_COVG:" "MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; expected += "0\t7\t.\t.\t.\t.\t.\t.;.\tGT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:" "MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS\t\n"; @@ -2369,92 +2369,6 @@ TEST_F(VCFTest___to_string___Fixture, no_records_filtered_out) EXPECT_EQ(actual, expected); } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// OLD serialization TESTS -// WILL NOT BE READDED AS WE DO NOT NEED FULL SERIALIZATION OF VCFs (ONLY SAVE, WHICH -// JUST WRAPS VCF::to_string()), WHICH IS TESTED WITH A NEW TEST -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// class VCFTest___serialization___Fixture : public ::testing::Test { -// protected: -// VCF vcf_with_zero_records; -// VCF vcf_with_one_record; -// VCF vcf_with_three_records; -// void SetUp() override { -// { -// vcf_with_one_record.add_record("chrom1", 5, "A", "G", -// "GRAPHTYPE=SIMPLE;SVTYPE=SNP"); -// } -// -// { -// vcf_with_three_records.add_record("chrom1", 5, "A", "G", -// "GRAPHTYPE=SIMPLE;SVTYPE=SNP"); -// vcf_with_three_records.add_record("chrom1", 46, "T", "TA", -// "GRAPHTYPE=SIMPLE;SVTYPE=SNP"); VCFRecord vcf_record = VCFRecord("chrom1", -// 79, "C", "G", "GRAPHTYPE=SIMPLE;SVTYPE=SNP"); std::vector -// empty_sample_names = {}; vcf_with_three_records.add_record(vcf_record, -// empty_sample_names); -// } -// } -// -// void TearDown() override { -// } -//}; -// -// TEST_F(VCFTest___serialization___Fixture, -// save_vcf_with_zero_records___load_vcf___expect_equal_vcf) { -// vcf_with_zero_records.save("vcf_serialization_test_zero.vcf"); -// -// VCF actual; -// actual.load("vcf_serialization_test_zero.vcf"); -// -// VCF& expected = vcf_with_zero_records; -// EXPECT_EQ(actual, expected); -//} -// -// TEST_F(VCFTest___serialization___Fixture, -// save_vcf_with_one_record___load_vcf___expect_equal_vcf) { -// vcf_with_one_record.save("vcf_serialization_test_one.vcf"); -// -// VCF actual; -// actual.load("vcf_serialization_test_one.vcf"); -// -// VCF& expected = vcf_with_one_record; -// EXPECT_EQ(actual, expected); -//} -// -// -// TEST_F(VCFTest___serialization___Fixture, -// save_vcf_with_three_records___load_vcf___expect_equal_vcf) { -// vcf_with_three_records.save("vcf_serialization_test_three.vcf"); -// -// VCF actual; -// actual.load("vcf_serialization_test_three.vcf"); -// -// VCF& expected = vcf_with_three_records; -// EXPECT_EQ(actual, expected); -//} -// -// -// TEST(VCFTest, filter) { -// VCF vcf, vcf1, vcf2, vcf3, vcf4; -// vcf.add_record("chrom1", 5, "A", "G", "SVTYPE=SNP;GRAPHTYPE=SIMPLE"); -// vcf.add_record("chrom1", 46, "T", "TA", "SVTYPE=INDEL;GRAPHTYPE=NESTED"); -// vcf.add_record("chrom1", 79, "CTT", "GTA", "SVTYPE=PH_SNPs;GRAPHTYPE=SIMPLE"); -// vcf.add_record("chrom1", 79, "CTT", "ATA", "SVTYPE=PH_SNPs;GRAPHTYPE=NESTED"); -// vcf.save("vcf_filter_test.vcf", false, true, false, false, true, false, true, -// false); -// -// vcf1.add_record("chrom1", 5, "A", "G", "SVTYPE=SNP;GRAPHTYPE=SIMPLE"); -// vcf1.add_record("chrom1", 79, "CTT", "GTA", "SVTYPE=PH_SNPs;GRAPHTYPE=SIMPLE"); -// vcf2.load("vcf_filter_test.vcf"); -// EXPECT_EQ(vcf2, vcf1); -// -// vcf.save("vcf_filter_test.vcf", false, true, true, false, false, false, true, -// false); vcf3.add_record("chrom1", 79, "CTT", "GTA", -// "SVTYPE=PH_SNPs;GRAPHTYPE=SIMPLE"); vcf3.add_record("chrom1", 79, "CTT", "ATA", -// "SVTYPE=PH_SNPs;GRAPHTYPE=NESTED"); vcf4.load("vcf_filter_test.vcf"); -// EXPECT_EQ(vcf3, vcf4); -//} class VCFTest___save___Fixture : public ::testing::Test { protected: diff --git a/test/vcfrecord_test.cpp b/test/vcfrecord_test.cpp index db857b53..942632d9 100644 --- a/test/vcfrecord_test.cpp +++ b/test/vcfrecord_test.cpp @@ -41,7 +41,7 @@ TEST(VCFRecordTest, create_with_values) EXPECT_EQ("T", vr.get_alts()[0]); EXPECT_EQ(".", vr.qual); EXPECT_EQ(".", vr.filter); - EXPECT_EQ("SVTYPE=SNP", vr.info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP", vr.info); EXPECT_EQ(1, vr.sampleIndex_to_sampleInfo.size()); EXPECT_EQ((uint)2, vr.sampleIndex_to_sampleInfo[0].get_number_of_alleles()); } @@ -60,7 +60,7 @@ TEST(VCFRecordTest, create_from_record) EXPECT_EQ("T", vr.get_alts()[0]); EXPECT_EQ(".", vr.qual); EXPECT_EQ(".", vr.filter); - EXPECT_EQ("SVTYPE=SNP", vr.info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP", vr.info); EXPECT_EQ(1, vr.sampleIndex_to_sampleInfo.size()); EXPECT_EQ((uint)2, vr.sampleIndex_to_sampleInfo[0].get_number_of_alleles()); } @@ -79,7 +79,7 @@ TEST(VCFRecordTest, create_from_record_with_samples) EXPECT_EQ("T", vr.get_alts()[0]); EXPECT_EQ(".", vr.qual); EXPECT_EQ(".", vr.filter); - EXPECT_EQ("SVTYPE=SNP", vr.info); + EXPECT_EQ(VCF::VARIANT_CLASS_ID + "=SNP", vr.info); EXPECT_EQ(1, vr.sampleIndex_to_sampleInfo.size()); EXPECT_EQ((uint)2, vr.sampleIndex_to_sampleInfo[0].get_number_of_alleles()); } @@ -738,348 +738,4 @@ TEST_F(VCFRecordTest___can_biallelic_record_be_merged_into_this______Fixture, bool actual = vcf_record_ref_A.can_biallelic_record_be_merged_into_this( vcf_record_ref_A_same_alt); EXPECT_FALSE(actual); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// PREVIOUS TESTS FROM VCF_RECORD FOLLOW -// COMMENTED OUT == NO NEED ANYMORE AND I PUT THE REASON WHY IT IS NOT NEEDED -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// format-related methods -// REASON WHY THESE ARE COMMENTED OUT: NO NEED ANYMORE, FORMATS ARE NOT VARIABLE -// ANYMORE, WHICH RENDERS THE CODE SIMPLER -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TEST(VCFRecordTest, add_formats_none) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// vector new_formats = {}; -// vr.add_formats(new_formats); -// vector expected_formats = { "GT" }; -// EXPECT_ITERABLE_EQ(vector, expected_formats, vr.format); -//} -// -// TEST(VCFRecordTest, add_formats_some) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// vector new_formats = { "hi", "there" }; -// vr.add_formats(new_formats); -// vector expected_formats = { "GT", "hi", "there" }; -// EXPECT_ITERABLE_EQ(vector, expected_formats, vr.format); -//} -// -// TEST(VCFRecordTest, add_formats_some_repeat) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// vector new_formats = { "hi", "there" }; -// vr.add_formats(new_formats); -// vr.add_formats(new_formats); -// vector expected_formats = { "GT", "hi", "there" }; -// EXPECT_ITERABLE_EQ(vector, expected_formats, vr.format); -//} -// -// TEST(VCFRecordTest, add_formats_some_overlapping) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// vector new_formats = { "hi", "there" }; -// vr.add_formats(new_formats); -// new_formats = { "hi", "again" }; -// vr.add_formats(new_formats); -// vector expected_formats = { "GT", "hi", "there", "again" }; -// EXPECT_ITERABLE_EQ(vector, expected_formats, vr.format); -//} -// -// TEST(VCFRecordTest, add_format_death_no_samples) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// uint16_t v = 20; -// EXPECT_DEATH(vr.set_format(0, "hello", v), ""); -// float w = 20.0; -// EXPECT_DEATH(vr.set_format(0, "hello", w), ""); -//} -// -// TEST(VCFRecordTest, add_format_cap_too_big) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// uint32_t v = 60000000; -// unordered_map> m; -// vr.samples.push_back(m); -// vr.set_format(0, "hello", v); -// EXPECT_EQ(vr.get_format_u(0, "hello")[0], std::numeric_limits::max() - -// 1); -//} -// -// TEST(VCFRecordTest, add_format_new_uint) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// unordered_map> m; -// vr.samples.push_back(m); -// uint16_t v = 20; -// vr.set_format(0, "hello", v); -// EXPECT_EQ(vr.samples.size(), 1); -// EXPECT_TRUE(vr.samples[0].find("hello") != vr.samples[0].end()); -// std::vector exp_v = { v }; -// EXPECT_ITERABLE_EQ(std::vector, vr.samples[0]["hello"], exp_v); -// std::vector exp_f = { "GT", "hello" }; -// EXPECT_ITERABLE_EQ(std::vector, vr.format, exp_f); -//} -// -// TEST(VCFRecordTest, add_format_old_uint_overwritten) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// unordered_map> m; -// m["hello"] = { 10 }; -// vr.samples.push_back(m); -// uint16_t v = 20; -// vr.set_format(0, "hello", v); -// EXPECT_EQ(vr.samples.size(), 1); -// EXPECT_TRUE(vr.samples[0].find("hello") != vr.samples[0].end()); -// std::vector exp_v = { v }; -// EXPECT_ITERABLE_EQ(std::vector, vr.samples[0]["hello"], exp_v); -// std::vector exp_f = { "GT", "hello" }; -// EXPECT_ITERABLE_EQ(std::vector, vr.format, exp_f); -//} -// -// TEST(VCFRecordTest, add_format_new_float) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// unordered_map> m; -// vr.samples.push_back(m); -// float v = 20.0; -// vr.set_format(0, "hello", v); -// EXPECT_EQ(vr.regt_samples.size(), 1); -// EXPECT_TRUE(vr.regt_samples[0].find("hello") != vr.regt_samples[0].end()); -// std::vector exp_v = { v }; -// EXPECT_ITERABLE_EQ(std::vector, vr.regt_samples[0]["hello"], exp_v); -// std::vector exp_f = { "GT", "hello" }; -// EXPECT_ITERABLE_EQ(std::vector, vr.format, exp_f); -//} -// -// TEST(VCFRecordTest, add_format_old_float_overwritten) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// unordered_map> m; -// m["hello"] = {}; -// m["hello"].push_back(10.0); -// vr.regt_samples.push_back(m); -// float v = 20.0; -// vr.set_format(0, "hello", v); -// EXPECT_EQ(vr.regt_samples.size(), 1); -// EXPECT_TRUE(vr.regt_samples[0].find("hello") != vr.regt_samples[0].end()); -// std::vector exp_v = { v }; -// EXPECT_ITERABLE_EQ(std::vector, vr.regt_samples[0]["hello"], exp_v); -// std::vector exp_f = { "GT", "hello" }; -// EXPECT_ITERABLE_EQ(std::vector, vr.format, exp_f); -//} -// -// TEST(VCFRecordTest, append_format_old_uint) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// unordered_map> m; -// vr.samples.push_back(m); -// uint16_t v = 10; -// vr.set_format(0, "hello", v); -// v = 20; -// vr.append_format(0, "hello", v); -// EXPECT_EQ(vr.samples.size(), 1); -// EXPECT_TRUE(vr.samples[0].find("hello") != vr.samples[0].end()); -// std::vector exp_v = { 10, 20 }; -// EXPECT_ITERABLE_EQ(std::vector, vr.samples[0]["hello"], exp_v); -// std::vector exp_f = { "GT", "hello" }; -// EXPECT_ITERABLE_EQ(std::vector, vr.format, exp_f); -//} -// -// TEST(VCFRecordTest, append_format_old_float) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// unordered_map> m; -// vr.regt_samples.push_back(m); -// float v = 10.0; -// vr.set_format(0, "hello", v); -// v = 20.0; -// vr.append_format(0, "hello", v); -// EXPECT_EQ(vr.regt_samples.size(), 1); -// EXPECT_TRUE(vr.regt_samples[0].find("hello") != vr.regt_samples[0].end()); -// std::vector exp_v = { 10.0, 20.0 }; -// EXPECT_ITERABLE_EQ(std::vector, vr.regt_samples[0]["hello"], exp_v); -// std::vector exp_f = { "GT", "hello" }; -// EXPECT_ITERABLE_EQ(std::vector, vr.format, exp_f); -//} -// -// TEST(VCFRecordTest, get_format_float) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// unordered_map> m; -// m.reserve(3); -// vr.regt_samples.push_back(m); -// float v = 10.0; -// vr.set_format(0, "hello", v); -// -// auto res = vr.get_format_f(1, "hello"); -// EXPECT_EQ(res.size(), 0); -// EXPECT_TRUE(res.empty()); -// -// res = vr.get_format_f(0, "help"); -// EXPECT_EQ(res.size(), 0); -// EXPECT_TRUE(res.empty()); -// -// res = vr.get_format_f(0, "hello"); -// EXPECT_EQ(res.size(), 1); -// EXPECT_FALSE(res.empty()); -//} -// -// TEST(VCFRecordTest, get_format_uint) -//{ -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// unordered_map> m; -// m.reserve(3); -// vr.samples.push_back(m); -// uint16_t v = 10; -// vr.set_format(0, "hello", v); -// -// auto res = vr.get_format_u(1, "hello"); -// EXPECT_EQ(res.size(), 0); -// EXPECT_TRUE(res.empty()); -// -// res = vr.get_format_u(0, "help"); -// EXPECT_EQ(res.size(), 0); -// EXPECT_TRUE(res.empty()); -// -// res = vr.get_format_u(0, "hello"); -// EXPECT_EQ(res.size(), 1); -// EXPECT_FALSE(res.empty()); -//} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// ostream methods -// ALL ostream TESTS WERE REPLACED BY to_string TESTS - MUCH OF THE COMPLEXITY HERE WAS -// MOVED TO OTHER METHODS/CONCEPTS e.g. VCFRecord::alts_to_string(), -// VCFRecord::get_format(), SampleIndexToSampleInfo::to_string(), -// SampleInfo::to_string(), etc... -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TEST(VCFRecordTest, ostream) { -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// vector v = {"chrom1", "4", ".", "A", "T", ".", ".", "SVTYPE=SNP", "GT"}; -// stringstream out; -// out << vr; -// string rr; -// for (const auto &s : v) { -// out >> rr; -// EXPECT_EQ(s, rr); -// } -//} -// -// TEST(VCFRecordTest, ostream_with_sample_not_all_info_in_formats) { -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// SampleInfo sample_info; -// sample_info["GT"] = {1}; -// sample_info["pringle"] = {2}; -// vr.sampleIndex_to_sampleInfo.push_back(sample_info); -// vector v = {"chrom1", "4", ".", "A", "T", ".", ".", "SVTYPE=SNP", "GT"}; -// stringstream out; -// out << vr; -// string rr; -// for (const auto &s : v) { -// out >> rr; -// EXPECT_EQ(s, rr); -// } -// uint16_t u = 1; -// uint ru; -// out >> ru; -// EXPECT_EQ(u, ru); -//} -// -// TEST(VCFRecordTest, ostream_with_sample_including_all_formats) { -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// SampleInfo sample_info; -// sample_info["GT"] = {0}; -// sample_info["pringle"] = {2}; -// vr.sampleIndex_to_sampleInfo.push_back(sample_info); -// vr.add_formats({"pringle"}); -// vector v = {"chrom1", "4", ".", "A", "T", ".", ".", "SVTYPE=SNP", -// "GT:pringle"}; vector vu = {0, 2}; stringstream out; out << vr; string -// rr; for (const auto &s : v) { -// out >> rr; -// EXPECT_EQ(s, rr); -// } -// uint ru; -// for (const auto &s : vu) { -// out >> ru; -// EXPECT_EQ(s, ru); -// out.ignore(1, ':'); -// } -// -//} -// -// TEST(VCFRecordTest, ostream_with_sample_more_formats_than_info) { -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// SampleInfo sample_info; -// sample_info["GT"] = {0}; -// vr.sampleIndex_to_sampleInfo.push_back(sample_info); -// vr.add_formats({"pringle"}); -// vector v = {"chrom1", "4", ".", "A", "T", ".", ".", "SVTYPE=SNP", -// "GT:pringle"}; vector vu = {0}; stringstream out; out << vr; string rr; -// for (const auto &s : v) { -// out >> rr; -// EXPECT_EQ(s, rr); -// } -// uint ru; -// uint16_t u = 0; -// out >> ru; -// EXPECT_EQ(u, ru); -// out >> rr; -// EXPECT_EQ(":.", rr); -//} -// -// TEST(VCFRecordTest, ostream_with_sample_more_formats_than_info_regt) { -// VCFRecord vr(&vcf, "chrom1", 3, "A", "T"); -// SampleInfo sample_info_int; -// sample_info_int["GT"] = {0}; -// vr.sampleIndex_to_sampleInfo.push_back(sample_info_int); -// SampleInfo sample_info_float; -// sample_info_float["pringle"] = {0.1}; -// vr.sampleIndex_to_sampleInfo.push_back(sample_info_float); -// vr.add_formats({"pringle"}); -// vector v = {"chrom1", "4", ".", "A", "T", ".", ".", "SVTYPE=SNP", -// "GT:pringle"}; stringstream out; out << vr; string rr; for (const auto &s : v) { -// out >> rr; -// EXPECT_EQ(s, rr); -// } -// uint ru; -// uint16_t u = 0; -// out >> ru; -// EXPECT_EQ(u, ru); -// out.ignore(1, ':'); -// float rf = 0.0, f = 0.1; -// out >> rf; -// EXPECT_EQ(f, rf); -//} -// -// TEST(VCFRecordTest, ostream_with_zero_pos) { -// VCFRecord vr(&vcf, "chrom1", 0, "A", "T"); -// SampleInfo sample_info_int; -// sample_info_int["GT"] = {0}; -// vr.sampleIndex_to_sampleInfo.push_back(sample_info_int); -// SampleInfo sample_info_float; -// sample_info_float["pringle"] = {0.1}; -// vr.sampleIndex_to_sampleInfo.push_back(sample_info_float); -// vr.add_formats({"pringle"}); -// vector v = {"chrom1", "1", ".", "A", "T", ".", ".", "SVTYPE=SNP", -// "GT:pringle"}; stringstream out; out << vr; string rr; for (const auto &s : v) { -// out >> rr; -// EXPECT_EQ(s, rr); -// } -// uint ru; -// uint16_t u = 0; -// out >> ru; -// EXPECT_EQ(u, ru); -// out.ignore(1, ':'); -// float rf = 0.0, f = 0.1; -// out >> rf; -// EXPECT_EQ(f, rf); -//} \ No newline at end of file +} \ No newline at end of file From 444e94cbcc342a496c03f7bb55105ef723367dc8 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 6 Oct 2021 17:13:35 +1000 Subject: [PATCH 10/29] update example SVTYPE change --- example/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/example/README.md b/example/README.md index 0c6781a5..4e649bd6 100644 --- a/example/README.md +++ b/example/README.md @@ -63,9 +63,9 @@ Taking a quick look at an excerpt of `out/output_toy_example_no_denovo/pandora_m ``` #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT toy_sample_1 toy_sample_2 -GC00006032 146 . T C . . SVTYPE=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 1:0,41:0,52:0,41:0,52:0,83:0,105:1,0:-526.281,-18.7786:507.502 0:15,0:15,0:15,0:15,0:31,0:31,0:0,1:-3.53065,-214.155:210.624 -GC00006032 160 . A C . . SVTYPE=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 1:0,26:0,40:0,33:0,50:0,106:0,160:1,0.25:-401.941,-17.9221:384.019 0:19,0:12,0:19,0:12,0:38,0:24,0:0,1:-3.32705,-218.76:215.433 -GC00006032 218 . T C . . SVTYPE=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 1:3,11:4,14:0,11:0,14:12,23:16,28:0.75,0:-182.162,-41.9443:140.217 0:11,0:5,0:13,0:6,0:44,0:21,0:0.25,1:-19.9705,-149.683:129.712 +GC00006032 146 . T C . . VC=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 1:0,41:0,52:0,41:0,52:0,83:0,105:1,0:-526.281,-18.7786:507.502 0:15,0:15,0:15,0:15,0:31,0:31,0:0,1:-3.53065,-214.155:210.624 +GC00006032 160 . A C . . VC=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 1:0,26:0,40:0,33:0,50:0,106:0,160:1,0.25:-401.941,-17.9221:384.019 0:19,0:12,0:19,0:12,0:38,0:24,0:0,1:-3.32705,-218.76:215.433 +GC00006032 218 . T C . . VC=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 1:3,11:4,14:0,11:0,14:12,23:16,28:0.75,0:-182.162,-41.9443:140.217 0:11,0:5,0:13,0:6,0:44,0:21,0:0.25,1:-19.9705,-149.683:129.712 ``` We can see samples `toy_sample_1` and `toy_sample_2` genotype towards different alleles. @@ -76,9 +76,9 @@ The VCF (`out/output_toy_example_with_denovo/pandora_multisample_genotyped.vcf`) ``` #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT toy_sample_1.100x.random.illumina toy_sample_2.100x.random.illumina -GC00006032 49 . A G . . SVTYPE=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 0:44,0:59,0:44,0:59,0:44,0:59,0:0,1:-26.8805,-570.333:543.452 1:0,48:0,50:0,48:0,50:0,97:0,100:1,0:-537.307,-28.9415:508.365 -GC00010897 44 . C T . . SVTYPE=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 1:0,11:0,16:0,11:0,16:0,23:0,32:1,0:-220.34,-8.03511:212.304 0:22,0:18,0:22,0:18,0:44,0:37,0:0,1:-2.87264,-270.207:267.334 -GC00010897 422 . A T . . SVTYPE=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 1:0,8:0,5:0,8:0,5:0,16:0,11:1,0:-155.867,-20.2266:135.641 0:12,0:9,0:12,0:9,0:12,0:9,0:0,1:-9.39494,-182.709:173.314 +GC00006032 49 . A G . . VC=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 0:44,0:59,0:44,0:59,0:44,0:59,0:0,1:-26.8805,-570.333:543.452 1:0,48:0,50:0,48:0,50:0,97:0,100:1,0:-537.307,-28.9415:508.365 +GC00010897 44 . C T . . VC=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 1:0,11:0,16:0,11:0,16:0,23:0,32:1,0:-220.34,-8.03511:212.304 0:22,0:18,0:22,0:18,0:44,0:37,0:0,1:-2.87264,-270.207:267.334 +GC00010897 422 . A T . . VC=SNP;GRAPHTYPE=SIMPLE GT:MEAN_FWD_COVG:MEAN_REV_COVG:MED_FWD_COVG:MED_REV_COVG:SUM_FWD_COVG:SUM_REV_COVG:GAPS:LIKELIHOOD:GT_CONF 1:0,8:0,5:0,8:0,5:0,16:0,11:1,0:-155.867,-20.2266:135.641 0:12,0:9,0:12,0:9,0:12,0:9,0:0,1:-9.39494,-182.709:173.314 ``` ## Extra From 26b86eedc3f25ad21f18129233520a4aed6ac3f0 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 6 Oct 2021 17:15:26 +1000 Subject: [PATCH 11/29] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index deb356ae..9793b4c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ project adheres to ## [Unreleased] +### Changed +- The VCF INFO field `SVTYPE` has now been changed to `VC` [[#249][249]] + ## [0.9.1] ### Added @@ -120,6 +123,7 @@ from this point will have their changes meticulously documented here. [223]: https://github.com/rmcolq/pandora/pull/223 [224]: https://github.com/rmcolq/pandora/pull/224 [234]: https://github.com/rmcolq/pandora/pull/234 +[249]: https://github.com/rmcolq/pandora/issues/249 [265]: https://github.com/rmcolq/pandora/pull/265 From d57a75e88c1f76cf91ec57840a9cb2c05a0ebba9 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Mon, 11 Oct 2021 11:17:29 +1000 Subject: [PATCH 12/29] replace svtype in test vcf --- test/test_cases/localPRG_test.nested_varsite.vcf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_cases/localPRG_test.nested_varsite.vcf b/test/test_cases/localPRG_test.nested_varsite.vcf index 2cef571b..19816c8f 100644 --- a/test/test_cases/localPRG_test.nested_varsite.vcf +++ b/test/test_cases/localPRG_test.nested_varsite.vcf @@ -4,11 +4,11 @@ ##ALT= ##ALT= ##ALT= -##INFO= +##INFO= ##ALT= ##ALT= ##ALT= ##INFO= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -nested varsite 1 . GC G . . SVTYPE=INDEL;GRAPHTYPE=COMPLEX . -nested varsite 1 . GC GT . . SVTYPE=PH_SNPs;GRAPHTYPE=COMPLEX 1 +nested varsite 1 . GC G . . VC=INDEL;GRAPHTYPE=COMPLEX . +nested varsite 1 . GC GT . . VC=PH_SNPs;GRAPHTYPE=COMPLEX 1 From 4beb13c447acace0d2a9a2620ef7ce6a373f35b6 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 20 Oct 2021 11:50:47 +1000 Subject: [PATCH 13/29] handle malformatted tsv and no trailing empty line --- CHANGELOG.md | 123 ++++++++++-------- src/utils.cpp | 32 ++--- test/test_cases/malformatted_read_index.tsv | 3 + ...sample_read_index_no_empty_line_at_end.tsv | 3 + test/utils_test.cpp | 22 +++- 5 files changed, 110 insertions(+), 73 deletions(-) create mode 100644 test/test_cases/malformatted_read_index.tsv create mode 100644 test/test_cases/sample_read_index_no_empty_line_at_end.tsv diff --git a/CHANGELOG.md b/CHANGELOG.md index 9793b4c6..3d60bb15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,96 +2,107 @@ All notable changes to this project will be documented in this file. -The format is based on -[Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this -project adheres to -[Semantic Versioning](https://semver.org/spec/v2.0.0.html). +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and +this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ### Changed + - The VCF INFO field `SVTYPE` has now been changed to `VC` [[#249][249]] +### Fixed + +- More robust TSV file parsing. Empty line no longer required at end [[#213][213]] + ## [0.9.1] ### Added + - `pandora` is now installable through `conda`; - A script to archive the `pandora` repository with git submodules; ### Changed -- Improved the sample example so now we can assert that the output produced is the expected one; -- Changes to the build process that enables `pandora` to be compiled in the `conda` environment; + +- Improved the sample example so now we can assert that the output produced is the + expected one; +- Changes to the build process that enables `pandora` to be compiled in the `conda` + environment; ## [0.9.0] ### Changed + - Version bump from `0.9.0-rc2` to `0.9.0`. ## [0.9.0-rc2] ### Changed -- `pandora discover` now processes one sample at a time, but runs with several threads on the heavy tasks, i.e. when -mapping reads, finding candidate regions, and finding denovo variants. The result is that it now takes a lot less RAM to -run on multiple samples. + +- `pandora discover` now processes one sample at a time, but runs with several threads + on the heavy tasks, i.e. when mapping reads, finding candidate regions, and finding + denovo variants. The result is that it now takes a lot less RAM to run on multiple + samples. ## [0.9.0-rc1] ### Changed -- `pandora discover` now receives read index files describing samples and reads, and discover denovo sequences in these samples. - To improve performance on discovering denovo sequences on several samples, `pandora discover` is now multithreaded, but - the performance is still the same as the previous version, i.e. each sample is processed in a single-threaded way; -- `pandora discover` output changed to a proprietary format. See [example](example) for the new output; -- `pandora` can now communicate with a [`make_prg` prototype](https://github.com/leoisl/make_prg) that is able to update PRGs -without needing to realign and remake the PRG. This provides major performance upgrades to running the full `pandora` pipeline -with denovo discovery enabled, and there is no need anymore to use a `snakemake` pipeline -(see [this example](example/run_pandora.sh) to how to run the full pipeline); -- We now use [musl libc](https://musl.libc.org/) instead of [Holy Build Box](https://github.com/phusion/holy-build-box) -to build a precompiled portable binary, removing the dependency on `OpenMP 4.0+` or `GCC 4.9+`, and `GLIBC`; + +- `pandora discover` now receives read index files describing samples and reads, and + discover denovo sequences in these samples. To improve performance on discovering + denovo sequences on several samples, `pandora discover` is now multithreaded, but the + performance is still the same as the previous version, i.e. each sample is processed + in a single-threaded way; +- `pandora discover` output changed to a proprietary format. See [example](example) for + the new output; +- `pandora` can now communicate with a + [`make_prg` prototype](https://github.com/leoisl/make_prg) that is able to update PRGs + without needing to realign and remake the PRG. This provides major performance + upgrades to running the full `pandora` pipeline with denovo discovery enabled, and + there is no need anymore to use a `snakemake` pipeline (see + [this example](example/run_pandora.sh) to how to run the full pipeline); +- We now use [musl libc](https://musl.libc.org/) instead of + [Holy Build Box](https://github.com/phusion/holy-build-box) to build a precompiled + portable binary, removing the dependency on `OpenMP 4.0+` or `GCC 4.9+`, and `GLIBC`; ## [0.8.0] ### Added -- We now provide a script to build a portable precompiled binary as - another option to run `pandora` easily. The portable binary is now - provided with the release; -- `pandora` can now provide a meaningful stack trace in case of errors, - to facilitate debugging (need to pass flag `-DPRINT_STACKTRACE` to - `CMake`). Due to this, we now add debug symbols (`-g` flag) to every - `pandora` build type, but this - [does not impact performance](https://stackoverflow.com/a/39223245). - The precompiled binary has this enabled. +- We now provide a script to build a portable precompiled binary as another option to + run `pandora` easily. The portable binary is now provided with the release; +- `pandora` can now provide a meaningful stack trace in case of errors, to facilitate + debugging (need to pass flag `-DPRINT_STACKTRACE` to `CMake`). Due to this, we now add + debug symbols (`-g` flag) to every `pandora` build type, but this + [does not impact performance](https://stackoverflow.com/a/39223245). The precompiled + binary has this enabled. ### Changed -- We now use the [Hunter](https://github.com/cpp-pm/hunter) package - manager, removing the requirement of having `ZLIB` and `Boost` - system-wide installations; -- `GATB` is now a git submodule instead of an external project - downloaded and compiled during compilation time. This means that when - git cloning `pandora`, `cgranges` and `GATB` are also - downloaded/cloned, and when preparing the build (running `cmake`), - `Hunter` downloads and installs `Boost`, `GTest` and `ZLIB`. Thus we - still need internet connection to prepare the build (running `cmake`) - but not for compiling (running `make`). +- We now use the [Hunter](https://github.com/cpp-pm/hunter) package manager, removing + the requirement of having `ZLIB` and `Boost` system-wide installations; +- `GATB` is now a git submodule instead of an external project downloaded and compiled + during compilation time. This means that when git cloning `pandora`, `cgranges` and + `GATB` are also downloaded/cloned, and when preparing the build (running `cmake`), + `Hunter` downloads and installs `Boost`, `GTest` and `ZLIB`. Thus we still need + internet connection to prepare the build (running `cmake`) but not for compiling + (running `make`). - We now use a GATB fork that accepts a `ZLIB` custom installation; -- Refactored all thirdparty libraries (`cgranges`, `GATB`, `backward`, - `CLI11`, `inthash`) into their own directory `thirdparty`. +- Refactored all thirdparty libraries (`cgranges`, `GATB`, `backward`, `CLI11`, + `inthash`) into their own directory `thirdparty`. ### Fixed -- Refactored asserts into exceptions, and now `pandora` can be compiled - correctly in the `Release` mode. The build process is thus able to - create a more optimized binary, resulting in improved performance. +- Refactored asserts into exceptions, and now `pandora` can be compiled correctly in the + `Release` mode. The build process is thus able to create a more optimized binary, + resulting in improved performance. - Don't assume Nanopore reads are longer than loci [[#265][265]] - - ## [v0.7.0] -There is a significant amount of changes to the project between version -0.6 and this release. Only major things are listed here. Future releases -from this point will have their changes meticulously documented here. +There is a significant amount of changes to the project between version 0.6 and this +release. Only major things are listed here. Future releases from this point will have +their changes meticulously documented here. ### Added @@ -101,8 +112,7 @@ from this point will have their changes meticulously documented here. ### Changed - FASTA/Q files are now parsed with `klib` [[#223][223]] -- command-line interface is now overhauled with many breaking changes - [[#224][224]] +- command-line interface is now overhauled with many breaking changes [[#224][224]] - global genotyping has been made default [[#220][220]] - Various improvements to VCF-related functions @@ -110,20 +120,19 @@ from this point will have their changes meticulously documented here. - k-mer coverage underflow bug in `LocalPRG` [[#183][183]] -[Unreleased]: https://github.com/rmcolq/pandora/compare/0.9.1...HEAD -[0.9.1]: https://github.com/rmcolq/pandora/releases/tag/0.9.1 +[0.8.0]: https://github.com/rmcolq/pandora/releases/tag/0.8.0 [0.9.0]: https://github.com/rmcolq/pandora/releases/tag/0.9.0 -[0.9.0-rc2]: https://github.com/rmcolq/pandora/releases/tag/0.9.0-rc2 [0.9.0-rc1]: https://github.com/rmcolq/pandora/releases/tag/0.9.0-rc1 -[0.8.0]: https://github.com/rmcolq/pandora/releases/tag/0.8.0 -[v0.7.0]: https://github.com/rmcolq/pandora/releases/tag/v0.7.0 - +[0.9.0-rc2]: https://github.com/rmcolq/pandora/releases/tag/0.9.0-rc2 +[0.9.1]: https://github.com/rmcolq/pandora/releases/tag/0.9.1 [183]: https://github.com/rmcolq/pandora/issues/183 +[213]: https://github.com/rmcolq/pandora/issues/213 [220]: https://github.com/rmcolq/pandora/pull/220 [223]: https://github.com/rmcolq/pandora/pull/223 [224]: https://github.com/rmcolq/pandora/pull/224 [234]: https://github.com/rmcolq/pandora/pull/234 [249]: https://github.com/rmcolq/pandora/issues/249 [265]: https://github.com/rmcolq/pandora/pull/265 - +[Unreleased]: https://github.com/rmcolq/pandora/compare/0.9.1...HEAD +[v0.7.0]: https://github.com/rmcolq/pandora/releases/tag/v0.7.0 diff --git a/src/utils.cpp b/src/utils.cpp index cb20b0dd..55ef63db 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -594,31 +594,33 @@ std::string transform_cli_gsize(std::string str) std::string make_absolute(std::string str) { return fs::absolute(str).string(); } std::vector> load_read_index( - const fs::path& read_index_fpath) -{ + const fs::path &read_index_fpath) { std::map samples; - std::string name, reads_path, line; + std::string name, line; fs::ifstream instream(read_index_fpath); - if (instream.is_open()) { - while (getline(instream, line).good()) { - std::istringstream linestream(line); - if (std::getline(linestream, name, '\t')) { - linestream >> reads_path; - if (samples.find(name) != samples.end()) { - BOOST_LOG_TRIVIAL(warning) + if (instream.fail()) { + fatal_error("Unable to open read index file ", read_index_fpath); + } + while (getline(instream, line)) { + std::istringstream linestream(line); + if (std::getline(linestream, name, '\t')) { + if (samples.find(name) != samples.end()) { + BOOST_LOG_TRIVIAL(warning) << "Warning: non-unique sample ids given! Only the last " "of these will be kept"; - } - samples[name] = reads_path; } + std::string reads_path; + linestream >> reads_path; + if (reads_path.empty()) { + fatal_error("Malformatted read index file entry for ", name); + } + samples[name] = reads_path; } - } else { - fatal_error("Unable to open read index file ", read_index_fpath); } BOOST_LOG_TRIVIAL(info) << "Finished loading " << samples.size() << " samples from read index"; return std::vector>( - samples.begin(), samples.end()); + samples.begin(), samples.end()); } std::string remove_spaces_from_string(const std::string& str) diff --git a/test/test_cases/malformatted_read_index.tsv b/test/test_cases/malformatted_read_index.tsv new file mode 100644 index 00000000..67024d9a --- /dev/null +++ b/test/test_cases/malformatted_read_index.tsv @@ -0,0 +1,3 @@ +sample_1 reads_1.fastq +sample_2 reads_2.fastq +sample_3 \ No newline at end of file diff --git a/test/test_cases/sample_read_index_no_empty_line_at_end.tsv b/test/test_cases/sample_read_index_no_empty_line_at_end.tsv new file mode 100644 index 00000000..fec5acc4 --- /dev/null +++ b/test/test_cases/sample_read_index_no_empty_line_at_end.tsv @@ -0,0 +1,3 @@ +sample_1 reads_1.fastq +sample_2 reads_2.fastq +sample_3 reads_3.fastq \ No newline at end of file diff --git a/test/utils_test.cpp b/test/utils_test.cpp index ff882280..f5138228 100644 --- a/test/utils_test.cpp +++ b/test/utils_test.cpp @@ -1187,7 +1187,7 @@ TEST(remove_spaces_from_string, simple_test___only_spaces) TEST(load_read_index, read_index_does_not_exist___expects_FatalRuntimeError) { - ASSERT_EXCEPTION(load_read_index(fs::path("inexistent_read_index.tsv")), + ASSERT_EXCEPTION(load_read_index(fs::path("nonexistent_read_index.tsv")), FatalRuntimeError, "Unable to open read index file"); } @@ -1204,6 +1204,19 @@ TEST(load_read_index, read_index_has_three_samples) EXPECT_EQ(actual, expected); } +TEST(load_read_index, read_index_has_three_samples_and_no_empty_line_at_end) +{ + std::vector> actual + = load_read_index(fs::path("../../test/test_cases/sample_read_index_no_empty_line_at_end.tsv")); + std::vector> expected { { + std::make_pair("sample_1", "reads_1.fastq"), + std::make_pair("sample_2", "reads_2.fastq"), + std::make_pair("sample_3", "reads_3.fastq"), + } }; + + EXPECT_EQ(actual, expected); +} + TEST(load_read_index, read_index_has_three_samples_and_two_are_repeated) { std::vector> actual = load_read_index( @@ -1216,3 +1229,10 @@ TEST(load_read_index, read_index_has_three_samples_and_two_are_repeated) EXPECT_EQ(actual, expected); } + +TEST(load_read_index, read_index_has_missing_column) +{ + ASSERT_EXCEPTION(load_read_index(fs::path("../../test/test_cases/malformatted_read_index.tsv")), + FatalRuntimeError, "Malformatted read index file entry for sample_3"); +} + From 99d93262755721400f609ccce5554bca9da29ea7 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 7 Sep 2022 15:43:22 +1000 Subject: [PATCH 14/29] add failing test for ambiguous bases --- test/seq_test.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/seq_test.cpp b/test/seq_test.cpp index 7332bf2c..6b0964bc 100644 --- a/test/seq_test.cpp +++ b/test/seq_test.cpp @@ -94,3 +94,22 @@ TEST(SeqTest, sketchIncludesEveryLetter) EXPECT_EQ((pos_inc.find(i) != pos_inc.end()), true); } } + +TEST(SeqTest, sketchSkipsAmbiguousBaseAtStart) +{ + const string seq = "NGCTAATGTGTT"; + const auto w{1}; + const auto k{3}; + Seq s1(0, "0", seq, w, k); + + set pos_exclude{0}; + set pos_include{}; + for (auto it = s1.sketch.begin(); it != s1.sketch.end(); ++it) { + for (uint32_t j = (*it).pos_of_kmer_in_read.start; + j < (*it).pos_of_kmer_in_read.get_end(); ++j) { + EXPECT_TRUE(pos_exclude.find(j) == pos_exclude.end()) << (*it); + pos_include.insert(j); + } + } + EXPECT_EQ(pos_include.size(), seq.length()-1); +} From 8e39ff1ec86ee8b18404869a218b577948f864df Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Mon, 12 Sep 2022 14:55:25 +1000 Subject: [PATCH 15/29] add function to split string on ambiguous bases --- include/seq.h | 4 +- include/utils.h | 3 + src/seq.cpp | 89 ++++++++++++++++------------- src/utils.cpp | 43 +++++++++++--- test/seq_test.cpp | 6 +- test/utils_test.cpp | 136 +++++++++++++++++++++++++++++++++++++++++--- 6 files changed, 222 insertions(+), 59 deletions(-) diff --git a/include/seq.h b/include/seq.h index 90b98779..efaf3046 100644 --- a/include/seq.h +++ b/include/seq.h @@ -11,7 +11,7 @@ class Seq { public: uint32_t id; std::string name; - std::string seq; + std::vector seq; std::set sketch; Seq(uint32_t, const std::string&, const std::string&, uint32_t, uint32_t); @@ -32,6 +32,8 @@ class Seq { void minimizer_sketch(const uint32_t w, const uint32_t k); + uint64_t length() const; + friend std::ostream& operator<<(std::ostream& out, const Seq& data); }; diff --git a/include/utils.h b/include/utils.h index 07fae26e..13cacc2f 100644 --- a/include/utils.h +++ b/include/utils.h @@ -14,6 +14,7 @@ #include #include #include "fatal_error.h" +#include "inthash.h" namespace fs = boost::filesystem; @@ -130,4 +131,6 @@ std::vector> load_read_index( std::string remove_spaces_from_string(const std::string& str); +std::vector split_ambiguous(const std::string& s, uint8_t delim = 4); + #endif diff --git a/src/seq.cpp b/src/seq.cpp index 7e7cc555..911aef1a 100644 --- a/src/seq.cpp +++ b/src/seq.cpp @@ -14,8 +14,8 @@ using std::vector; Seq::Seq(uint32_t i, const std::string& n, const std::string& p, uint32_t w, uint32_t k) : id(i) , name(n) - , seq(p) { + seq = split_ambiguous(p); minimizer_sketch(w, k); } @@ -26,7 +26,7 @@ void Seq::initialize( { id = i; name = n; - seq = p; + seq = split_ambiguous(p); sketch.clear(); minimizer_sketch(w, k); } @@ -97,45 +97,47 @@ void Seq::add_new_smallest_minimizer(vector& window, uint64_t& smalle void Seq::minimizer_sketch(const uint32_t w, const uint32_t k) { - const bool sequence_too_short_to_sketch = seq.length() + 1 < w + k; - if (sequence_too_short_to_sketch) - return; - - // initializations - uint64_t shift1 = 2 * (k - 1), mask = (1ULL << 2 * k) - 1, - smallest = std::numeric_limits::max(), kmer[2] = { 0, 0 }, - kh[2] = { 0, 0 }; - uint32_t buff = 0; - vector window; // will store all k-mers as Minimizer in the window - window.reserve(w); - - for (const char letter : seq) { - const bool added = add_letter_to_get_next_kmer(letter, shift1, mask, buff, kmer, - kh); // add the next base and remove the first one to get the next kmer - if (not added) + for (auto &s : seq) { + const bool sequence_too_short_to_sketch = s.length() + 1 < w + k; + if (sequence_too_short_to_sketch) return; - if (buff >= k) { - window.push_back( - Minimizer(std::min(kh[0], kh[1]), buff - k, buff, (kh[0] <= kh[1]))); - } - - if (window.size() == w) { - minimize_window(window, - smallest); // finds the minimizer in the window, add the minimizer to - // the sketch set and erase everything until the minimizer - } else if (buff >= w + k and window.back().canonical_kmer_hash <= smallest) { - add_new_smallest_minimizer(window, - smallest); // add the last element of the window (a Minimizer) to the - // sketch, update the smallest and clear the window - } - - const bool window_has_shortened = window.size() < w; - if (!window_has_shortened) { - fatal_error( - "Error when sketching sequence: a minimizer should have been added " - "and windows should have size < ", - w, " (is ", window.size(), ")"); + // initializations + uint64_t shift1 = 2 * (k - 1), mask = (1ULL << 2 * k) - 1, + smallest = std::numeric_limits::max(), kmer[2] = { 0, 0 }, + kh[2] = { 0, 0 }; + uint32_t buff = 0; + vector window; // will store all k-mers as Minimizer in the window + window.reserve(w); + + for (const char letter : s) { + const bool added = add_letter_to_get_next_kmer(letter, shift1, mask, buff, + kmer, + kh); // add the next base and remove the first one to get the next kmer + if (not added) + return; + + if (buff >= k) { + window.push_back(Minimizer( + std::min(kh[0], kh[1]), buff - k, buff, (kh[0] <= kh[1]))); + } + + if (window.size() == w) { + minimize_window(window, + smallest); // finds the minimizer in the window, add the minimizer to the sketch set and erase everything until the minimizer + } else if (buff >= w + k + and window.back().canonical_kmer_hash <= smallest) { + add_new_smallest_minimizer(window, + smallest); // add the last element of the window (a Minimizer) to the sketch, update the smallest and clear the window + } + + const bool window_has_shortened = window.size() < w; + if (!window_has_shortened) { + fatal_error( + "Error when sketching sequence: a minimizer should have been added " + "and windows should have size < ", + w, " (is ", window.size(), ")"); + } } } } @@ -145,3 +147,12 @@ std::ostream& operator<<(std::ostream& out, Seq const& data) out << data.name; return out; } + +uint64_t Seq::length() const +{ + uint64_t l{0}; + for (auto &s: seq) { + l += s.length(); + } + return l; +} diff --git a/src/utils.cpp b/src/utils.cpp index 55ef63db..9324b9e1 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -466,7 +465,7 @@ uint32_t pangraph_from_read_file(const std::string& filepath, coverageExceeded = true; } else { // no other thread still signalized exceeding max coverage - covg += sequence.seq.length(); + covg += sequence.length(); if (covg / genome_size > max_covg) { // oops, we are the first one to see max_covg being // exceeded, print and exit! @@ -484,8 +483,8 @@ uint32_t pangraph_from_read_file(const std::string& filepath, continue; } - const auto expected_number_kmers_in_read_sketch { sequence.seq.length() - * 2 / (w + 1) }; + const auto expected_number_kmers_in_read_sketch { sequence.length() * 2 + / (w + 1) }; // get the minizer hits auto minimizer_hits = std::make_shared(MinimizerHits()); @@ -594,7 +593,8 @@ std::string transform_cli_gsize(std::string str) std::string make_absolute(std::string str) { return fs::absolute(str).string(); } std::vector> load_read_index( - const fs::path &read_index_fpath) { + const fs::path& read_index_fpath) +{ std::map samples; std::string name, line; fs::ifstream instream(read_index_fpath); @@ -606,8 +606,8 @@ std::vector> load_read_index( if (std::getline(linestream, name, '\t')) { if (samples.find(name) != samples.end()) { BOOST_LOG_TRIVIAL(warning) - << "Warning: non-unique sample ids given! Only the last " - "of these will be kept"; + << "Warning: non-unique sample ids given! Only the last " + "of these will be kept"; } std::string reads_path; linestream >> reads_path; @@ -620,7 +620,7 @@ std::vector> load_read_index( BOOST_LOG_TRIVIAL(info) << "Finished loading " << samples.size() << " samples from read index"; return std::vector>( - samples.begin(), samples.end()); + samples.begin(), samples.end()); } std::string remove_spaces_from_string(const std::string& str) @@ -633,4 +633,29 @@ std::string remove_spaces_from_string(const std::string& str) } } return to_return; -} \ No newline at end of file +} + +std::vector split_ambiguous(const std::string& s, uint8_t delim) +{ + std::vector elems; + auto start { 0 }; + auto i { 0 }; + auto l { 0 }; + for (auto& ch : s) { + uint32_t c = nt4((uint8_t)ch); + if (c == delim) { + if (l > 0) { + elems.emplace_back(s.substr(start, l)); + } + start = i + 1; + l = 0; + } else { + ++l; + } + ++i; + } + if (l > 0) { + elems.emplace_back(s.substr(start, l)); + } + return elems; +} diff --git a/test/seq_test.cpp b/test/seq_test.cpp index 6b0964bc..218880f2 100644 --- a/test/seq_test.cpp +++ b/test/seq_test.cpp @@ -12,7 +12,8 @@ TEST(SeqTest, create) Seq s1(0, "0", "AGCTAATGCGTT", 11, 3); EXPECT_EQ((uint)0, s1.id); EXPECT_EQ("0", s1.name); - EXPECT_EQ("AGCTAATGCGTT", s1.seq); + const std::vector expected_seq{"AGCTAATGCGTT"}; + EXPECT_EQ(expected_seq, s1.seq); } TEST(SeqTest, initialize) @@ -21,7 +22,8 @@ TEST(SeqTest, initialize) s1.initialize(1, "new", "AGCTAATGCATA", 9, 3); EXPECT_EQ((uint)1, s1.id); EXPECT_EQ("new", s1.name); - EXPECT_EQ("AGCTAATGCATA", s1.seq); + const std::vector expected_seq{"AGCTAATGCATA"}; + EXPECT_EQ(expected_seq, s1.seq); } TEST(SeqTest, sketchShortReads) diff --git a/test/utils_test.cpp b/test/utils_test.cpp index f5138228..1ff46582 100644 --- a/test/utils_test.cpp +++ b/test/utils_test.cpp @@ -1206,13 +1206,13 @@ TEST(load_read_index, read_index_has_three_samples) TEST(load_read_index, read_index_has_three_samples_and_no_empty_line_at_end) { - std::vector> actual - = load_read_index(fs::path("../../test/test_cases/sample_read_index_no_empty_line_at_end.tsv")); + std::vector> actual = load_read_index( + fs::path("../../test/test_cases/sample_read_index_no_empty_line_at_end.tsv")); std::vector> expected { { - std::make_pair("sample_1", "reads_1.fastq"), - std::make_pair("sample_2", "reads_2.fastq"), - std::make_pair("sample_3", "reads_3.fastq"), - } }; + std::make_pair("sample_1", "reads_1.fastq"), + std::make_pair("sample_2", "reads_2.fastq"), + std::make_pair("sample_3", "reads_3.fastq"), + } }; EXPECT_EQ(actual, expected); } @@ -1232,7 +1232,127 @@ TEST(load_read_index, read_index_has_three_samples_and_two_are_repeated) TEST(load_read_index, read_index_has_missing_column) { - ASSERT_EXCEPTION(load_read_index(fs::path("../../test/test_cases/malformatted_read_index.tsv")), - FatalRuntimeError, "Malformatted read index file entry for sample_3"); + ASSERT_EXCEPTION( + load_read_index(fs::path("../../test/test_cases/malformatted_read_index.tsv")), + FatalRuntimeError, "Malformatted read index file entry for sample_3"); +} + +TEST(splitAmbiguous, noAmbiguous) +{ + const std::string s("ACGT"); + + const auto actual(split_ambiguous(s)); + const std::vector expected { s }; + + EXPECT_EQ(actual, expected); +} + +TEST(splitAmbiguous, emptySequence) +{ + const std::string s(""); + + const auto actual(split_ambiguous(s)); + const std::vector expected; + + EXPECT_EQ(actual, expected); } +TEST(splitAmbiguous, allAmbiguous) +{ + const std::string s("NXDW"); + + const auto actual(split_ambiguous(s)); + const std::vector expected; + + EXPECT_EQ(actual, expected); +} + +TEST(splitAmbiguous, firstLetterIsAmbiguous) +{ + const std::string s("NACGT"); + + const auto actual(split_ambiguous(s)); + const std::vector expected { "ACGT" }; + + EXPECT_EQ(actual, expected); +} + +TEST(splitAmbiguous, firstTwoLettersAreAmbiguous) +{ + const std::string s("NWACGT"); + + const auto actual(split_ambiguous(s)); + const std::vector expected { "ACGT" }; + + EXPECT_EQ(actual, expected); +} + +TEST(splitAmbiguous, lastLetterIsAmbiguous) +{ + const std::string s("ACGTN"); + + const auto actual(split_ambiguous(s)); + const std::vector expected { "ACGT" }; + + EXPECT_EQ(actual, expected); +} + +TEST(splitAmbiguous, lastTwoLettersAreAmbiguous) +{ + const std::string s("ACGTNW"); + + const auto actual(split_ambiguous(s)); + const std::vector expected { "ACGT" }; + + EXPECT_EQ(actual, expected); +} + +TEST(splitAmbiguous, ambiguousBaseInMiddle) +{ + const std::string s("ACNGT"); + + const auto actual(split_ambiguous(s)); + const std::vector expected { "AC", "GT" }; + + EXPECT_EQ(actual, expected); +} + +TEST(splitAmbiguous, ambiguousBaseOffCentre) +{ + const std::string s("AWCGT"); + + const auto actual(split_ambiguous(s)); + const std::vector expected { "A", "CGT" }; + + EXPECT_EQ(actual, expected); +} + +TEST(splitAmbiguous, twoAmbiguousInMiddle) +{ + const std::string s("AWXCGT"); + + const auto actual(split_ambiguous(s)); + const std::vector expected { "A", "CGT" }; + + EXPECT_EQ(actual, expected); +} + +TEST(splitAmbiguous, twoAmbiguousSpacedOut) +{ + const std::string s("AWCNGT"); + + const auto actual(split_ambiguous(s)); + const std::vector expected { "A", "C", "GT" }; + + EXPECT_EQ(actual, expected); +} + +TEST(splitAmbiguous, twoAmbiguousSpacedOutRuns) +{ + const std::string s("AWXCNXGT"); + + const auto actual(split_ambiguous(s)); + const std::vector expected { "A", "C", "GT" }; + + EXPECT_EQ(actual, expected); +} From dbba0d628d161749ca680819013578674e447ac8 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Mon, 12 Sep 2022 14:59:23 +1000 Subject: [PATCH 16/29] add tests for length function --- test/seq_test.cpp | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/test/seq_test.cpp b/test/seq_test.cpp index 218880f2..7502163c 100644 --- a/test/seq_test.cpp +++ b/test/seq_test.cpp @@ -1,8 +1,7 @@ #include "gtest/gtest.h" #include "seq.h" -#include "minimizer.h" #include "interval.h" -#include +#include #include using namespace std; @@ -12,7 +11,7 @@ TEST(SeqTest, create) Seq s1(0, "0", "AGCTAATGCGTT", 11, 3); EXPECT_EQ((uint)0, s1.id); EXPECT_EQ("0", s1.name); - const std::vector expected_seq{"AGCTAATGCGTT"}; + const std::vector expected_seq { "AGCTAATGCGTT" }; EXPECT_EQ(expected_seq, s1.seq); } @@ -22,7 +21,7 @@ TEST(SeqTest, initialize) s1.initialize(1, "new", "AGCTAATGCATA", 9, 3); EXPECT_EQ((uint)1, s1.id); EXPECT_EQ("new", s1.name); - const std::vector expected_seq{"AGCTAATGCATA"}; + const std::vector expected_seq { "AGCTAATGCATA" }; EXPECT_EQ(expected_seq, s1.seq); } @@ -97,15 +96,39 @@ TEST(SeqTest, sketchIncludesEveryLetter) } } +TEST(SeqTest, lengthNoAmbiguous) +{ + const std::string s { "AGCTAATGCGTT" }; + const Seq seq(0, "0", s, 3, 3); + + EXPECT_EQ(seq.length(), s.length()); +} + +TEST(SeqTest, lengthOneAmbiguous) +{ + const std::string s { "AGCTAATGNGTT" }; + const Seq seq(0, "0", s, 3, 3); + + EXPECT_EQ(seq.length(), s.length() - 1); +} + +TEST(SeqTest, lengthTwoAmbiguous) +{ + const std::string s { "AWGCTAATGNGTT" }; + const Seq seq(0, "0", s, 3, 3); + + EXPECT_EQ(seq.length(), s.length() - 2); +} + TEST(SeqTest, sketchSkipsAmbiguousBaseAtStart) { const string seq = "NGCTAATGTGTT"; - const auto w{1}; - const auto k{3}; + const auto w { 1 }; + const auto k { 3 }; Seq s1(0, "0", seq, w, k); - set pos_exclude{0}; - set pos_include{}; + set pos_exclude { 0 }; + set pos_include {}; for (auto it = s1.sketch.begin(); it != s1.sketch.end(); ++it) { for (uint32_t j = (*it).pos_of_kmer_in_read.start; j < (*it).pos_of_kmer_in_read.get_end(); ++j) { @@ -113,5 +136,5 @@ TEST(SeqTest, sketchSkipsAmbiguousBaseAtStart) pos_include.insert(j); } } - EXPECT_EQ(pos_include.size(), seq.length()-1); + EXPECT_EQ(pos_include.size(), seq.length() - 1); } From b78ce78703f3bdf4433bc820a2574f271fce16eb Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Mon, 12 Sep 2022 15:09:04 +1000 Subject: [PATCH 17/29] rearrange test --- src/seq.cpp | 18 +++++++++--------- test/seq_test.cpp | 37 +++++++++++++++++++------------------ 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/seq.cpp b/src/seq.cpp index 911aef1a..21635c31 100644 --- a/src/seq.cpp +++ b/src/seq.cpp @@ -97,18 +97,18 @@ void Seq::add_new_smallest_minimizer(vector& window, uint64_t& smalle void Seq::minimizer_sketch(const uint32_t w, const uint32_t k) { + // initializations + uint64_t shift1 = 2 * (k - 1), mask = (1ULL << 2 * k) - 1, + smallest = std::numeric_limits::max(), kmer[2] = { 0, 0 }, + kh[2] = { 0, 0 }; + uint32_t buff = 0; + vector window; // will store all k-mers as Minimizer in the window + window.reserve(w); + for (auto &s : seq) { const bool sequence_too_short_to_sketch = s.length() + 1 < w + k; if (sequence_too_short_to_sketch) - return; - - // initializations - uint64_t shift1 = 2 * (k - 1), mask = (1ULL << 2 * k) - 1, - smallest = std::numeric_limits::max(), kmer[2] = { 0, 0 }, - kh[2] = { 0, 0 }; - uint32_t buff = 0; - vector window; // will store all k-mers as Minimizer in the window - window.reserve(w); + continue; for (const char letter : s) { const bool added = add_letter_to_get_next_kmer(letter, shift1, mask, buff, diff --git a/test/seq_test.cpp b/test/seq_test.cpp index 7502163c..647959ed 100644 --- a/test/seq_test.cpp +++ b/test/seq_test.cpp @@ -96,6 +96,25 @@ TEST(SeqTest, sketchIncludesEveryLetter) } } +TEST(SeqTest, sketchSkipsAmbiguousBaseAtStart) +{ + const string seq = "NGCTAATGTGTT"; + const auto w { 1 }; + const auto k { 3 }; + Seq s1(0, "0", seq, w, k); + + set pos_exclude { 0 }; + set pos_include {}; + for (auto it = s1.sketch.begin(); it != s1.sketch.end(); ++it) { + for (uint32_t j = (*it).pos_of_kmer_in_read.start; + j < (*it).pos_of_kmer_in_read.get_end(); ++j) { + EXPECT_TRUE(pos_exclude.find(j) == pos_exclude.end()) << (*it); + pos_include.insert(j); + } + } + EXPECT_EQ(pos_include.size(), seq.length() - 1); +} + TEST(SeqTest, lengthNoAmbiguous) { const std::string s { "AGCTAATGCGTT" }; @@ -120,21 +139,3 @@ TEST(SeqTest, lengthTwoAmbiguous) EXPECT_EQ(seq.length(), s.length() - 2); } -TEST(SeqTest, sketchSkipsAmbiguousBaseAtStart) -{ - const string seq = "NGCTAATGTGTT"; - const auto w { 1 }; - const auto k { 3 }; - Seq s1(0, "0", seq, w, k); - - set pos_exclude { 0 }; - set pos_include {}; - for (auto it = s1.sketch.begin(); it != s1.sketch.end(); ++it) { - for (uint32_t j = (*it).pos_of_kmer_in_read.start; - j < (*it).pos_of_kmer_in_read.get_end(); ++j) { - EXPECT_TRUE(pos_exclude.find(j) == pos_exclude.end()) << (*it); - pos_include.insert(j); - } - } - EXPECT_EQ(pos_include.size(), seq.length() - 1); -} From db2b135a6d6e6f62955fbf3b8bc6a64c6fae9996 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Mon, 12 Sep 2022 20:40:49 +0100 Subject: [PATCH 18/29] Recording substring offsets to retain the position of a minimizer on the reads --- include/seq.h | 10 ++++-- include/utils.h | 3 +- src/seq.cpp | 83 +++++++++++++++++++++++++-------------------- src/utils.cpp | 13 ++++--- test/utils_test.cpp | 63 +++++++++++++++++++++++++++------- 5 files changed, 115 insertions(+), 57 deletions(-) diff --git a/include/seq.h b/include/seq.h index efaf3046..d9fa18a4 100644 --- a/include/seq.h +++ b/include/seq.h @@ -12,6 +12,7 @@ class Seq { uint32_t id; std::string name; std::vector seq; + std::vector offsets; std::set sketch; Seq(uint32_t, const std::string&, const std::string&, uint32_t, uint32_t); @@ -30,11 +31,16 @@ class Seq { void add_new_smallest_minimizer(std::vector&, uint64_t&); - void minimizer_sketch(const uint32_t w, const uint32_t k); - uint64_t length() const; friend std::ostream& operator<<(std::ostream& out, const Seq& data); + + void minimizer_sketch(const uint32_t w, const uint32_t k); + +private: + void minimizer_sketch(const std::string &s, const size_t seq_offset, + const uint32_t w, const uint32_t k); + }; #endif diff --git a/include/utils.h b/include/utils.h index 13cacc2f..25692839 100644 --- a/include/utils.h +++ b/include/utils.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "minihits.h" #include "pangenome/ns.cpp" @@ -131,6 +132,6 @@ std::vector> load_read_index( std::string remove_spaces_from_string(const std::string& str); -std::vector split_ambiguous(const std::string& s, uint8_t delim = 4); +std::pair, std::vector> split_ambiguous(const std::string& s, uint8_t delim = 4); #endif diff --git a/src/seq.cpp b/src/seq.cpp index 21635c31..ab131750 100644 --- a/src/seq.cpp +++ b/src/seq.cpp @@ -15,7 +15,9 @@ Seq::Seq(uint32_t i, const std::string& n, const std::string& p, uint32_t w, uin : id(i) , name(n) { - seq = split_ambiguous(p); + auto seqs_and_offsets = split_ambiguous(p); + seq = seqs_and_offsets.first; + offsets = seqs_and_offsets.second; minimizer_sketch(w, k); } @@ -26,7 +28,9 @@ void Seq::initialize( { id = i; name = n; - seq = split_ambiguous(p); + auto seqs_and_offsets = split_ambiguous(p); + seq = seqs_and_offsets.first; + offsets = seqs_and_offsets.second; sketch.clear(); minimizer_sketch(w, k); } @@ -95,7 +99,14 @@ void Seq::add_new_smallest_minimizer(vector& window, uint64_t& smalle window.clear(); } -void Seq::minimizer_sketch(const uint32_t w, const uint32_t k) +void Seq::minimizer_sketch(const uint32_t w, const uint32_t k) { + for (size_t i = 0; i < seq.size(); ++i) { + minimizer_sketch(seq[i], offsets[i], w, k); + } +} + +void Seq::minimizer_sketch(const std::string &s, const size_t seq_offset, + const uint32_t w, const uint32_t k) { // initializations uint64_t shift1 = 2 * (k - 1), mask = (1ULL << 2 * k) - 1, @@ -105,39 +116,37 @@ void Seq::minimizer_sketch(const uint32_t w, const uint32_t k) vector window; // will store all k-mers as Minimizer in the window window.reserve(w); - for (auto &s : seq) { - const bool sequence_too_short_to_sketch = s.length() + 1 < w + k; - if (sequence_too_short_to_sketch) - continue; - - for (const char letter : s) { - const bool added = add_letter_to_get_next_kmer(letter, shift1, mask, buff, - kmer, - kh); // add the next base and remove the first one to get the next kmer - if (not added) - return; - - if (buff >= k) { - window.push_back(Minimizer( - std::min(kh[0], kh[1]), buff - k, buff, (kh[0] <= kh[1]))); - } - - if (window.size() == w) { - minimize_window(window, - smallest); // finds the minimizer in the window, add the minimizer to the sketch set and erase everything until the minimizer - } else if (buff >= w + k - and window.back().canonical_kmer_hash <= smallest) { - add_new_smallest_minimizer(window, - smallest); // add the last element of the window (a Minimizer) to the sketch, update the smallest and clear the window - } - - const bool window_has_shortened = window.size() < w; - if (!window_has_shortened) { - fatal_error( - "Error when sketching sequence: a minimizer should have been added " - "and windows should have size < ", - w, " (is ", window.size(), ")"); - } + const bool sequence_too_short_to_sketch = s.length() + 1 < w + k; + if (sequence_too_short_to_sketch) + return; + + for (const char letter : s) { + const bool added = add_letter_to_get_next_kmer(letter, shift1, mask, buff, + kmer, + kh); // add the next base and remove the first one to get the next kmer + if (not added) + return; + + if (buff >= k) { + window.push_back(Minimizer( + std::min(kh[0], kh[1]), buff - k + seq_offset, buff + seq_offset, (kh[0] <= kh[1]))); + } + + if (window.size() == w) { + minimize_window(window, + smallest); // finds the minimizer in the window, add the minimizer to the sketch set and erase everything until the minimizer + } else if (buff >= w + k + and window.back().canonical_kmer_hash <= smallest) { + add_new_smallest_minimizer(window, + smallest); // add the last element of the window (a Minimizer) to the sketch, update the smallest and clear the window + } + + const bool window_has_shortened = window.size() < w; + if (!window_has_shortened) { + fatal_error( + "Error when sketching sequence: a minimizer should have been added " + "and windows should have size < ", + w, " (is ", window.size(), ")"); } } } @@ -151,7 +160,7 @@ std::ostream& operator<<(std::ostream& out, Seq const& data) uint64_t Seq::length() const { uint64_t l{0}; - for (auto &s: seq) { + for (const auto &s: seq) { l += s.length(); } return l; diff --git a/src/utils.cpp b/src/utils.cpp index 9324b9e1..1a92b571 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -635,9 +635,10 @@ std::string remove_spaces_from_string(const std::string& str) return to_return; } -std::vector split_ambiguous(const std::string& s, uint8_t delim) +std::pair, std::vector> split_ambiguous(const std::string& s, uint8_t delim) { - std::vector elems; + std::vector substrs; + std::vector offsets; auto start { 0 }; auto i { 0 }; auto l { 0 }; @@ -645,7 +646,8 @@ std::vector split_ambiguous(const std::string& s, uint8_t delim) uint32_t c = nt4((uint8_t)ch); if (c == delim) { if (l > 0) { - elems.emplace_back(s.substr(start, l)); + substrs.emplace_back(s.substr(start, l)); + offsets.emplace_back(start); } start = i + 1; l = 0; @@ -655,7 +657,8 @@ std::vector split_ambiguous(const std::string& s, uint8_t delim) ++i; } if (l > 0) { - elems.emplace_back(s.substr(start, l)); + substrs.emplace_back(s.substr(start, l)); + offsets.emplace_back(start); } - return elems; + return std::make_pair(substrs, offsets); } diff --git a/test/utils_test.cpp b/test/utils_test.cpp index 1ff46582..cf7f7063 100644 --- a/test/utils_test.cpp +++ b/test/utils_test.cpp @@ -1242,7 +1242,10 @@ TEST(splitAmbiguous, noAmbiguous) const std::string s("ACGT"); const auto actual(split_ambiguous(s)); - const std::vector expected { s }; + + const std::vector expected_substrs { s }; + const std::vector expected_offsets { 0 }; + const auto expected = make_pair(expected_substrs, expected_offsets); EXPECT_EQ(actual, expected); } @@ -1252,7 +1255,11 @@ TEST(splitAmbiguous, emptySequence) const std::string s(""); const auto actual(split_ambiguous(s)); - const std::vector expected; + + const std::vector expected_substrs; + const std::vector expected_offsets; + const auto expected = make_pair(expected_substrs, expected_offsets); + EXPECT_EQ(actual, expected); } @@ -1262,7 +1269,11 @@ TEST(splitAmbiguous, allAmbiguous) const std::string s("NXDW"); const auto actual(split_ambiguous(s)); - const std::vector expected; + + const std::vector expected_substrs; + const std::vector expected_offsets; + const auto expected = make_pair(expected_substrs, expected_offsets); + EXPECT_EQ(actual, expected); } @@ -1272,7 +1283,10 @@ TEST(splitAmbiguous, firstLetterIsAmbiguous) const std::string s("NACGT"); const auto actual(split_ambiguous(s)); - const std::vector expected { "ACGT" }; + + const std::vector expected_substrs { "ACGT" }; + const std::vector expected_offsets { 1 }; + const auto expected = make_pair(expected_substrs, expected_offsets); EXPECT_EQ(actual, expected); } @@ -1282,7 +1296,10 @@ TEST(splitAmbiguous, firstTwoLettersAreAmbiguous) const std::string s("NWACGT"); const auto actual(split_ambiguous(s)); - const std::vector expected { "ACGT" }; + + const std::vector expected_substrs { "ACGT" }; + const std::vector expected_offsets { 2 }; + const auto expected = make_pair(expected_substrs, expected_offsets); EXPECT_EQ(actual, expected); } @@ -1292,7 +1309,10 @@ TEST(splitAmbiguous, lastLetterIsAmbiguous) const std::string s("ACGTN"); const auto actual(split_ambiguous(s)); - const std::vector expected { "ACGT" }; + + const std::vector expected_substrs { "ACGT" }; + const std::vector expected_offsets { 0 }; + const auto expected = make_pair(expected_substrs, expected_offsets); EXPECT_EQ(actual, expected); } @@ -1302,7 +1322,10 @@ TEST(splitAmbiguous, lastTwoLettersAreAmbiguous) const std::string s("ACGTNW"); const auto actual(split_ambiguous(s)); - const std::vector expected { "ACGT" }; + + const std::vector expected_substrs { "ACGT" }; + const std::vector expected_offsets { 0 }; + const auto expected = make_pair(expected_substrs, expected_offsets); EXPECT_EQ(actual, expected); } @@ -1312,7 +1335,10 @@ TEST(splitAmbiguous, ambiguousBaseInMiddle) const std::string s("ACNGT"); const auto actual(split_ambiguous(s)); - const std::vector expected { "AC", "GT" }; + + const std::vector expected_substrs { "AC", "GT" }; + const std::vector expected_offsets { 0, 3 }; + const auto expected = make_pair(expected_substrs, expected_offsets); EXPECT_EQ(actual, expected); } @@ -1322,7 +1348,10 @@ TEST(splitAmbiguous, ambiguousBaseOffCentre) const std::string s("AWCGT"); const auto actual(split_ambiguous(s)); - const std::vector expected { "A", "CGT" }; + + const std::vector expected_substrs { "A", "CGT" }; + const std::vector expected_offsets { 0, 2 }; + const auto expected = make_pair(expected_substrs, expected_offsets); EXPECT_EQ(actual, expected); } @@ -1332,7 +1361,10 @@ TEST(splitAmbiguous, twoAmbiguousInMiddle) const std::string s("AWXCGT"); const auto actual(split_ambiguous(s)); - const std::vector expected { "A", "CGT" }; + + const std::vector expected_substrs { "A", "CGT" }; + const std::vector expected_offsets { 0, 3 }; + const auto expected = make_pair(expected_substrs, expected_offsets); EXPECT_EQ(actual, expected); } @@ -1342,7 +1374,10 @@ TEST(splitAmbiguous, twoAmbiguousSpacedOut) const std::string s("AWCNGT"); const auto actual(split_ambiguous(s)); - const std::vector expected { "A", "C", "GT" }; + + const std::vector expected_substrs { "A", "C", "GT" }; + const std::vector expected_offsets { 0, 2, 4 }; + const auto expected = make_pair(expected_substrs, expected_offsets); EXPECT_EQ(actual, expected); } @@ -1352,7 +1387,11 @@ TEST(splitAmbiguous, twoAmbiguousSpacedOutRuns) const std::string s("AWXCNXGT"); const auto actual(split_ambiguous(s)); - const std::vector expected { "A", "C", "GT" }; + + const std::vector expected_substrs { "A", "C", "GT" }; + const std::vector expected_offsets { 0, 3, 6 }; + const auto expected = make_pair(expected_substrs, expected_offsets); + EXPECT_EQ(actual, expected); } From 3a71ff6b7fd86c948e0ed4f783bcb71f347c6451 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Tue, 13 Sep 2022 10:17:53 +1000 Subject: [PATCH 19/29] add more ambiguous sketch tests --- test/seq_test.cpp | 75 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/test/seq_test.cpp b/test/seq_test.cpp index 647959ed..bda7d2fa 100644 --- a/test/seq_test.cpp +++ b/test/seq_test.cpp @@ -115,6 +115,80 @@ TEST(SeqTest, sketchSkipsAmbiguousBaseAtStart) EXPECT_EQ(pos_include.size(), seq.length() - 1); } +TEST(SeqTest, sketchSkipsAmbiguousBaseAtEnd) +{ + const string seq = "GCTAATGTGTTN"; + const auto w { 1 }; + const auto k { 3 }; + Seq s1(0, "0", seq, w, k); + + set pos_exclude { 11 }; + set pos_include {}; + for (auto it = s1.sketch.begin(); it != s1.sketch.end(); ++it) { + for (uint32_t j = (*it).pos_of_kmer_in_read.start; + j < (*it).pos_of_kmer_in_read.get_end(); ++j) { + EXPECT_TRUE(pos_exclude.find(j) == pos_exclude.end()) << (*it); + pos_include.insert(j); + } + } + EXPECT_EQ(pos_include.size(), seq.length() - 1); +} + +TEST(SeqTest, sketchSkipsAmbiguousBaseInMiddle) +{ + const string seq = "GCTAATNGTGTT"; + const auto w { 1 }; + const auto k { 3 }; + Seq s1(0, "0", seq, w, k); + + set pos_exclude { 6 }; + set pos_include {}; + for (auto it = s1.sketch.begin(); it != s1.sketch.end(); ++it) { + for (uint32_t j = (*it).pos_of_kmer_in_read.start; + j < (*it).pos_of_kmer_in_read.get_end(); ++j) { + EXPECT_TRUE(pos_exclude.find(j) == pos_exclude.end()) << (*it); + pos_include.insert(j); + } + } + EXPECT_EQ(pos_include.size(), seq.length() - 1); +} + +TEST(SeqTest, sketchSkipsAmbiguousBaseTwoInMiddle) +{ + const string seq = "GCTANATNGTGTT"; + const auto w { 1 }; + const auto k { 3 }; + Seq s1(0, "0", seq, w, k); + + set pos_exclude { 4, 5, 6, 7 }; + set pos_include {}; + for (auto it = s1.sketch.begin(); it != s1.sketch.end(); ++it) { + for (uint32_t j = (*it).pos_of_kmer_in_read.start; + j < (*it).pos_of_kmer_in_read.get_end(); ++j) { + EXPECT_TRUE(pos_exclude.find(j) == pos_exclude.end()) << (*it); + pos_include.insert(j); + } + } + EXPECT_EQ(pos_include.size(), seq.length() - 4); +} + +TEST(SeqTest, sketchSkipsAmbiguousNoStretchesOfK) +{ + const string seq = "GCNTANATNGTWGTNT"; + const auto w { 1 }; + const auto k { 3 }; + Seq s1(0, "0", seq, w, k); + + set pos_include {}; + for (auto it = s1.sketch.begin(); it != s1.sketch.end(); ++it) { + for (uint32_t j = (*it).pos_of_kmer_in_read.start; + j < (*it).pos_of_kmer_in_read.get_end(); ++j) { + pos_include.insert(j); + } + } + EXPECT_TRUE(pos_include.empty()); +} + TEST(SeqTest, lengthNoAmbiguous) { const std::string s { "AGCTAATGCGTT" }; @@ -138,4 +212,3 @@ TEST(SeqTest, lengthTwoAmbiguous) EXPECT_EQ(seq.length(), s.length() - 2); } - From 3c5738efc8e9310b7cb3c2821b02687d4a1b75b8 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Tue, 13 Sep 2022 11:38:03 +1000 Subject: [PATCH 20/29] change ambiguous log message to warn --- src/seq.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/seq.cpp b/src/seq.cpp index ab131750..df5c4124 100644 --- a/src/seq.cpp +++ b/src/seq.cpp @@ -47,7 +47,7 @@ bool Seq::add_letter_to_get_next_kmer(const char& letter, const uint64_t& shift1 buff++; return true; } else { - BOOST_LOG_TRIVIAL(debug) + BOOST_LOG_TRIVIAL(warning) << now() << "bad letter - found a non AGCT base in read so skipping read " << name; sketch.clear(); From af1751e55efa996e51d1ee169abaa4a3f8f0663c Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Tue, 13 Sep 2022 11:39:54 +1000 Subject: [PATCH 21/29] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 222eec25..86cedfd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Fixed - More robust TSV file parsing. Empty line no longer required at end [[#213][213]] +- Handle ambiguous bases properly instead of skipping to next readonce we reach one [[#294][294]] ## [0.9.1] @@ -134,5 +135,6 @@ their changes meticulously documented here. [234]: https://github.com/rmcolq/pandora/pull/234 [249]: https://github.com/rmcolq/pandora/issues/249 [265]: https://github.com/rmcolq/pandora/pull/265 +[294]: https://github.com/rmcolq/pandora/issues/294 [v0.7.0]: https://github.com/rmcolq/pandora/releases/tag/v0.7.0 From f3662695568edc9d251363dae25e9cbfecb1f9a0 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Tue, 13 Sep 2022 14:24:12 +1000 Subject: [PATCH 22/29] add a windowed test for ambiguous --- test/seq_test.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/test/seq_test.cpp b/test/seq_test.cpp index bda7d2fa..d87a7d3e 100644 --- a/test/seq_test.cpp +++ b/test/seq_test.cpp @@ -3,6 +3,7 @@ #include "interval.h" #include #include +#include using namespace std; @@ -169,7 +170,7 @@ TEST(SeqTest, sketchSkipsAmbiguousBaseTwoInMiddle) pos_include.insert(j); } } - EXPECT_EQ(pos_include.size(), seq.length() - 4); + EXPECT_EQ(pos_include.size(), seq.length() - pos_exclude.size()); } TEST(SeqTest, sketchSkipsAmbiguousNoStretchesOfK) @@ -189,6 +190,25 @@ TEST(SeqTest, sketchSkipsAmbiguousNoStretchesOfK) EXPECT_TRUE(pos_include.empty()); } +TEST(SeqTest, sketchSkipsAmbiguousWindowDropsAmbiguous) +{ + const string seq = "AACTCGCGCGCGCCGAGCTGACCGACATNNNNCGGCTCACCGGCGAGATCAACACCCTGGCCCAGC"; + const auto w { 14 }; + const auto k { 15 }; + Seq s1(0, "0", seq, w, k); + + std::set pos_exclude{28, 29, 30, 31}; + set pos_include {}; + for (auto it = s1.sketch.begin(); it != s1.sketch.end(); ++it) { + for (uint32_t j = (*it).pos_of_kmer_in_read.start; + j < (*it).pos_of_kmer_in_read.get_end(); ++j) { + EXPECT_TRUE(pos_exclude.find(j) == pos_exclude.end()) << (*it).pos_of_kmer_in_read; + pos_include.insert(j); + } + } + EXPECT_EQ(s1.sketch.size(), 5); // is 8 total, but there are some duplicates that the set removes +} + TEST(SeqTest, lengthNoAmbiguous) { const std::string s { "AGCTAATGCGTT" }; From 334974df0378a1aa18f8f545f372b8659f9e64d2 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Thu, 15 Sep 2022 15:14:13 +0100 Subject: [PATCH 23/29] Fixing small typo in Changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 86cedfd4..a20e6102 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Fixed - More robust TSV file parsing. Empty line no longer required at end [[#213][213]] -- Handle ambiguous bases properly instead of skipping to next readonce we reach one [[#294][294]] +- Handle ambiguous bases properly instead of skipping to next read once we reach one [[#294][294]] ## [0.9.1] From 07048fbfaf7c79e4752dd09502729b750cf46972 Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Fri, 16 Sep 2022 11:31:34 +0100 Subject: [PATCH 24/29] Refactoring utils::split_ambiguous() using functional functions --- src/utils.cpp | 79 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 24 deletions(-) diff --git a/src/utils.cpp b/src/utils.cpp index 1a92b571..0c3570b2 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "utils.h" #include "seq.h" @@ -635,30 +636,60 @@ std::string remove_spaces_from_string(const std::string& str) return to_return; } +template +std::list> split_on_predicate(std::vector &original, UnaryPredicate predicate) { + std::list> split; + boost::split(split, original, predicate, boost::algorithm::token_compress_on); + auto remove_index = std::remove_if(split.begin(), split.end(), + [](const std::vector &hashed_substr) { return hashed_substr.empty(); } ); + split.erase(remove_index, split.end()); + return split; +} + std::pair, std::vector> split_ambiguous(const std::string& s, uint8_t delim) { - std::vector substrs; - std::vector offsets; - auto start { 0 }; - auto i { 0 }; - auto l { 0 }; - for (auto& ch : s) { - uint32_t c = nt4((uint8_t)ch); - if (c == delim) { - if (l > 0) { - substrs.emplace_back(s.substr(start, l)); - offsets.emplace_back(start); - } - start = i + 1; - l = 0; - } else { - ++l; - } - ++i; - } - if (l > 0) { - substrs.emplace_back(s.substr(start, l)); - offsets.emplace_back(start); - } - return std::make_pair(substrs, offsets); + // encode the input string + std::vector hashed_string(s.size()); + std::transform(s.begin(), s.end(), hashed_string.begin(), nt4); + + // get the runs of valid and ambiguous bases + std::list> valid_substrs = split_on_predicate(hashed_string, + [delim](uint8_t c){ return c == delim; }); + std::list> ambiguous_substrs = split_on_predicate(hashed_string, + [delim](uint8_t c){ return c != delim; }); + + // edge case: no valid bases found + const bool no_valid_bases = valid_substrs.empty(); + if (no_valid_bases) { + return std::make_pair(std::vector(), std::vector()); + } + + // decode valid_substrs + std::vector decoded_valid_substrs(valid_substrs.size()); + auto decoded_valid_substrs_it = decoded_valid_substrs.begin(); + for (auto valid_substrs_it = valid_substrs.begin(); valid_substrs_it != valid_substrs.end(); ++valid_substrs_it, ++decoded_valid_substrs_it) { + decoded_valid_substrs_it->append("*", valid_substrs_it->size()); + std::transform(valid_substrs_it->begin(), valid_substrs_it->end(), decoded_valid_substrs_it->begin(), + [](const uint32_t coded_base) { return "ACGT"[coded_base]; }); + } + + // compute offsets + const bool first_run_is_ambiguous = hashed_string[0] == delim; + size_t first_offset = 0; + if (first_run_is_ambiguous) { + // edge case: sequence starts with ambiguous bases + first_offset = ambiguous_substrs.front().size(); + ambiguous_substrs.pop_front(); + } + std::vector offsets{first_offset}; + + auto valid_substrs_it = valid_substrs.begin(); + size_t previous_substr_len = valid_substrs_it->size(); + for (++valid_substrs_it; valid_substrs_it != valid_substrs.end(); ++valid_substrs_it) { + offsets.push_back(offsets.back() + previous_substr_len + ambiguous_substrs.front().size()); + ambiguous_substrs.pop_front(); + previous_substr_len = valid_substrs_it->size(); + } + + return std::make_pair(decoded_valid_substrs, offsets); } From 78e3ae2578cbd17dc44397de46bbcdc4ad0cbffa Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 20 Sep 2022 11:21:54 +0100 Subject: [PATCH 25/29] Revert "Refactoring utils::split_ambiguous() using functional functions" --- src/utils.cpp | 79 ++++++++++++++++----------------------------------- 1 file changed, 24 insertions(+), 55 deletions(-) diff --git a/src/utils.cpp b/src/utils.cpp index 0c3570b2..1a92b571 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include "utils.h" #include "seq.h" @@ -636,60 +635,30 @@ std::string remove_spaces_from_string(const std::string& str) return to_return; } -template -std::list> split_on_predicate(std::vector &original, UnaryPredicate predicate) { - std::list> split; - boost::split(split, original, predicate, boost::algorithm::token_compress_on); - auto remove_index = std::remove_if(split.begin(), split.end(), - [](const std::vector &hashed_substr) { return hashed_substr.empty(); } ); - split.erase(remove_index, split.end()); - return split; -} - std::pair, std::vector> split_ambiguous(const std::string& s, uint8_t delim) { - // encode the input string - std::vector hashed_string(s.size()); - std::transform(s.begin(), s.end(), hashed_string.begin(), nt4); - - // get the runs of valid and ambiguous bases - std::list> valid_substrs = split_on_predicate(hashed_string, - [delim](uint8_t c){ return c == delim; }); - std::list> ambiguous_substrs = split_on_predicate(hashed_string, - [delim](uint8_t c){ return c != delim; }); - - // edge case: no valid bases found - const bool no_valid_bases = valid_substrs.empty(); - if (no_valid_bases) { - return std::make_pair(std::vector(), std::vector()); - } - - // decode valid_substrs - std::vector decoded_valid_substrs(valid_substrs.size()); - auto decoded_valid_substrs_it = decoded_valid_substrs.begin(); - for (auto valid_substrs_it = valid_substrs.begin(); valid_substrs_it != valid_substrs.end(); ++valid_substrs_it, ++decoded_valid_substrs_it) { - decoded_valid_substrs_it->append("*", valid_substrs_it->size()); - std::transform(valid_substrs_it->begin(), valid_substrs_it->end(), decoded_valid_substrs_it->begin(), - [](const uint32_t coded_base) { return "ACGT"[coded_base]; }); - } - - // compute offsets - const bool first_run_is_ambiguous = hashed_string[0] == delim; - size_t first_offset = 0; - if (first_run_is_ambiguous) { - // edge case: sequence starts with ambiguous bases - first_offset = ambiguous_substrs.front().size(); - ambiguous_substrs.pop_front(); - } - std::vector offsets{first_offset}; - - auto valid_substrs_it = valid_substrs.begin(); - size_t previous_substr_len = valid_substrs_it->size(); - for (++valid_substrs_it; valid_substrs_it != valid_substrs.end(); ++valid_substrs_it) { - offsets.push_back(offsets.back() + previous_substr_len + ambiguous_substrs.front().size()); - ambiguous_substrs.pop_front(); - previous_substr_len = valid_substrs_it->size(); - } - - return std::make_pair(decoded_valid_substrs, offsets); + std::vector substrs; + std::vector offsets; + auto start { 0 }; + auto i { 0 }; + auto l { 0 }; + for (auto& ch : s) { + uint32_t c = nt4((uint8_t)ch); + if (c == delim) { + if (l > 0) { + substrs.emplace_back(s.substr(start, l)); + offsets.emplace_back(start); + } + start = i + 1; + l = 0; + } else { + ++l; + } + ++i; + } + if (l > 0) { + substrs.emplace_back(s.substr(start, l)); + offsets.emplace_back(start); + } + return std::make_pair(substrs, offsets); } From 2e5f583fa1bd0b356e907c16cce1ebbb82eb735e Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 20 Sep 2022 13:07:59 +0100 Subject: [PATCH 26/29] Refactoring split_ambiguous() --- include/utils.h | 2 +- src/utils.cpp | 29 +++++++++++++++-------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/include/utils.h b/include/utils.h index 25692839..0198e0a2 100644 --- a/include/utils.h +++ b/include/utils.h @@ -132,6 +132,6 @@ std::vector> load_read_index( std::string remove_spaces_from_string(const std::string& str); -std::pair, std::vector> split_ambiguous(const std::string& s, uint8_t delim = 4); +std::pair, std::vector> split_ambiguous(const std::string& input_string, uint8_t delim = 4); #endif diff --git a/src/utils.cpp b/src/utils.cpp index 1a92b571..eef67910 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -635,29 +635,30 @@ std::string remove_spaces_from_string(const std::string& str) return to_return; } -std::pair, std::vector> split_ambiguous(const std::string& s, uint8_t delim) +std::pair, std::vector> split_ambiguous(const std::string& input_string, uint8_t delim) { std::vector substrs; std::vector offsets; auto start { 0 }; - auto i { 0 }; - auto l { 0 }; - for (auto& ch : s) { - uint32_t c = nt4((uint8_t)ch); - if (c == delim) { - if (l > 0) { - substrs.emplace_back(s.substr(start, l)); + auto current_index { 0 }; + auto valid_substring_length { 0 }; + for (const auto& base : input_string) { + const uint32_t coded_base = nt4(base); + const bool is_ambiguous = coded_base == delim; + if (is_ambiguous) { + if (valid_substring_length > 0) { + substrs.emplace_back(input_string.substr(start, valid_substring_length)); offsets.emplace_back(start); } - start = i + 1; - l = 0; + start = current_index + 1; + valid_substring_length = 0; } else { - ++l; + ++valid_substring_length; } - ++i; + ++current_index; } - if (l > 0) { - substrs.emplace_back(s.substr(start, l)); + if (valid_substring_length > 0) { + substrs.emplace_back(input_string.substr(start, valid_substring_length)); offsets.emplace_back(start); } return std::make_pair(substrs, offsets); From a5eb55dec4ad911a992f4bff36a30686df7c8d6e Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 20 Sep 2022 13:36:54 +0100 Subject: [PATCH 27/29] Small refactoring of class Seq --- include/seq.h | 7 +++++-- src/seq.cpp | 10 +++++----- test/seq_test.cpp | 4 ++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/include/seq.h b/include/seq.h index d9fa18a4..f04d660e 100644 --- a/include/seq.h +++ b/include/seq.h @@ -11,10 +11,13 @@ class Seq { public: uint32_t id; std::string name; - std::vector seq; - std::vector offsets; std::set sketch; + // the original sequence is split into several valid subsequences (composed of ACGT only) + std::vector subseqs; // these are the subsequences themselves + std::vector offsets; // these are the subsequences offsets on the original string + + Seq(uint32_t, const std::string&, const std::string&, uint32_t, uint32_t); ~Seq(); diff --git a/src/seq.cpp b/src/seq.cpp index df5c4124..53d7e4b1 100644 --- a/src/seq.cpp +++ b/src/seq.cpp @@ -16,7 +16,7 @@ Seq::Seq(uint32_t i, const std::string& n, const std::string& p, uint32_t w, uin , name(n) { auto seqs_and_offsets = split_ambiguous(p); - seq = seqs_and_offsets.first; + subseqs = seqs_and_offsets.first; offsets = seqs_and_offsets.second; minimizer_sketch(w, k); } @@ -29,7 +29,7 @@ void Seq::initialize( id = i; name = n; auto seqs_and_offsets = split_ambiguous(p); - seq = seqs_and_offsets.first; + subseqs = seqs_and_offsets.first; offsets = seqs_and_offsets.second; sketch.clear(); minimizer_sketch(w, k); @@ -100,8 +100,8 @@ void Seq::add_new_smallest_minimizer(vector& window, uint64_t& smalle } void Seq::minimizer_sketch(const uint32_t w, const uint32_t k) { - for (size_t i = 0; i < seq.size(); ++i) { - minimizer_sketch(seq[i], offsets[i], w, k); + for (size_t i = 0; i < subseqs.size(); ++i) { + minimizer_sketch(subseqs[i], offsets[i], w, k); } } @@ -160,7 +160,7 @@ std::ostream& operator<<(std::ostream& out, Seq const& data) uint64_t Seq::length() const { uint64_t l{0}; - for (const auto &s: seq) { + for (const auto &s: subseqs) { l += s.length(); } return l; diff --git a/test/seq_test.cpp b/test/seq_test.cpp index d87a7d3e..b45d746b 100644 --- a/test/seq_test.cpp +++ b/test/seq_test.cpp @@ -13,7 +13,7 @@ TEST(SeqTest, create) EXPECT_EQ((uint)0, s1.id); EXPECT_EQ("0", s1.name); const std::vector expected_seq { "AGCTAATGCGTT" }; - EXPECT_EQ(expected_seq, s1.seq); + EXPECT_EQ(expected_seq, s1.subseqs); } TEST(SeqTest, initialize) @@ -23,7 +23,7 @@ TEST(SeqTest, initialize) EXPECT_EQ((uint)1, s1.id); EXPECT_EQ("new", s1.name); const std::vector expected_seq { "AGCTAATGCATA" }; - EXPECT_EQ(expected_seq, s1.seq); + EXPECT_EQ(expected_seq, s1.subseqs); } TEST(SeqTest, sketchShortReads) From f14cbcec0f8961b00341225ba50793bb3c4a5b9d Mon Sep 17 00:00:00 2001 From: Leandro Ishi Date: Tue, 20 Sep 2022 13:46:10 +0100 Subject: [PATCH 28/29] Now erroring out if we find an ambiguous base in Seq::add_letter_to_get_next_kmer() --- include/seq.h | 2 +- src/seq.cpp | 32 +++++++++++++------------------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/include/seq.h b/include/seq.h index f04d660e..93274344 100644 --- a/include/seq.h +++ b/include/seq.h @@ -25,7 +25,7 @@ class Seq { void initialize( uint32_t, const std::string&, const std::string&, uint32_t, uint32_t); - bool add_letter_to_get_next_kmer(const char&, const uint64_t&, const uint64_t&, + void add_letter_to_get_next_kmer(const char&, const uint64_t&, const uint64_t&, uint32_t&, uint64_t (&)[2], uint64_t (&)[2]); void add_minimizing_kmers_to_sketch(const std::vector&, const uint64_t&); diff --git a/src/seq.cpp b/src/seq.cpp index 53d7e4b1..f9d6a8b4 100644 --- a/src/seq.cpp +++ b/src/seq.cpp @@ -35,24 +35,21 @@ void Seq::initialize( minimizer_sketch(w, k); } -bool Seq::add_letter_to_get_next_kmer(const char& letter, const uint64_t& shift1, +void Seq::add_letter_to_get_next_kmer(const char& letter, const uint64_t& shift1, const uint64_t& mask, uint32_t& buff, uint64_t (&kmer)[2], uint64_t (&kh)[2]) { uint32_t c = nt4((uint8_t)letter); - if (c < 4) { // not an ambiguous base - kmer[0] = (kmer[0] << 2 | c) & mask; // forward k-mer - kmer[1] = (kmer[1] >> 2) | (3ULL ^ c) << shift1; // reverse k-mer - kh[0] = hash64(kmer[0], mask); - kh[1] = hash64(kmer[1], mask); - buff++; - return true; - } else { - BOOST_LOG_TRIVIAL(warning) - << now() << "bad letter - found a non AGCT base in read so skipping read " - << name; - sketch.clear(); - return false; + + const bool is_an_ambiguous_base = c >= 4; + if (is_an_ambiguous_base) { + fatal_error("Found an ambiguous base in Seq::add_letter_to_get_next_kmer()"); } + + kmer[0] = (kmer[0] << 2 | c) & mask; // forward k-mer + kmer[1] = (kmer[1] >> 2) | (3ULL ^ c) << shift1; // reverse k-mer + kh[0] = hash64(kmer[0], mask); + kh[1] = hash64(kmer[1], mask); + buff++; } uint64_t find_smallest_kmer_value( @@ -121,11 +118,8 @@ void Seq::minimizer_sketch(const std::string &s, const size_t seq_offset, return; for (const char letter : s) { - const bool added = add_letter_to_get_next_kmer(letter, shift1, mask, buff, - kmer, - kh); // add the next base and remove the first one to get the next kmer - if (not added) - return; + // add the next base and remove the first one to get the next kmer + add_letter_to_get_next_kmer(letter, shift1, mask, buff,kmer,kh); if (buff >= k) { window.push_back(Minimizer( From 66209a2d5a86c1995c43231b38f6063cb579ea04 Mon Sep 17 00:00:00 2001 From: Michael Hall Date: Wed, 21 Sep 2022 09:07:50 +1000 Subject: [PATCH 29/29] bump version to 0.9.2 --- CHANGELOG.md | 3 +++ CMakeLists.txt | 2 +- README.md | 6 +++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a20e6102..c31ed06e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] +## [0.9.2] + ### Changed - The VCF INFO field `SVTYPE` has now been changed to `VC` [[#249][249]] @@ -122,6 +124,7 @@ their changes meticulously documented here. - k-mer coverage underflow bug in `LocalPRG` [[#183][183]] [Unreleased]: https://github.com/rmcolq/pandora/compare/0.9.1...HEAD +[0.9.2]: https://github.com/rmcolq/pandora/compare/0.9.2...0.9.1 [0.9.1]: https://github.com/rmcolq/pandora/releases/tag/0.9.1 [0.9.0]: https://github.com/rmcolq/pandora/releases/tag/0.9.0 [0.9.0-rc2]: https://github.com/rmcolq/pandora/releases/tag/0.9.0-rc2 diff --git a/CMakeLists.txt b/CMakeLists.txt index 47e29d1a..8d064e1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ HunterGate( # project configuration set(PROJECT_NAME_STR pandora) -project(${PROJECT_NAME_STR} VERSION "0.9.1" LANGUAGES C CXX) +project(${PROJECT_NAME_STR} VERSION "0.9.2" LANGUAGES C CXX) set(ADDITIONAL_VERSION_LABELS "") configure_file( include/version.h.in ${CMAKE_BINARY_DIR}/include/version.h ) diff --git a/README.md b/README.md index faa3c73f..786e8172 100644 --- a/README.md +++ b/README.md @@ -81,13 +81,13 @@ In this binary, all libraries are linked statically. * **Download**: ``` - wget https://github.com/rmcolq/pandora/releases/download/0.9.1/pandora-linux-precompiled-v0.9.1 + wget https://github.com/rmcolq/pandora/releases/download/0.9.2/pandora-linux-precompiled-v0.9.2 ``` * **Running**: ``` -chmod +x pandora-linux-precompiled-v0.9.1 -./pandora-linux-precompiled-v0.9.1 -h +chmod +x pandora-linux-precompiled-v0.9.2 +./pandora-linux-precompiled-v0.9.2 -h ``` * **Notes**: