From 9f1f3fbb6d7b0b22e6fe9f1a28b3150fa1a09358 Mon Sep 17 00:00:00 2001 From: edolzhenko Date: Thu, 25 Jan 2018 14:00:52 -0800 Subject: [PATCH 1/5] release-2.5.4: Bump version number --- .gitignore | 1 + include/version.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5e35646..fd3b9ca 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ cmake-build-debug test-example build +.vscode diff --git a/include/version.h b/include/version.h index 3c34d6b..d750fe8 100644 --- a/include/version.h +++ b/include/version.h @@ -24,4 +24,4 @@ #include -const std::string kProgramVersion = "Expansion Hunter v2.5.3"; +const std::string kProgramVersion = "Expansion Hunter v2.5.4"; From 6cab5fd7a1c71bb05edbffbdd66be2a603aae60a Mon Sep 17 00:00:00 2001 From: edolzhenko Date: Thu, 25 Jan 2018 14:03:13 -0800 Subject: [PATCH 2/5] release-2.5.4: Add Travis CI config --- .travis.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..8ce581f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,19 @@ +language: cpp + +addons: + apt: + sources: + - boost-latest + - ubuntu-toolchain-r-test + packages: + - g++-4.9 + - libboost1.55-all-dev + +script: + - export CC="gcc-4.9" + - export CXX="g++-4.9" + - mkdir build + - cd build + - cmake .. + - make -j4 + - make test \ No newline at end of file From 686a4ee183a8b52739fb5a3b3392a6c5302d4f13 Mon Sep 17 00:00:00 2001 From: edolzhenko Date: Thu, 25 Jan 2018 14:12:34 -0800 Subject: [PATCH 3/5] release-2.5.4: Exit normally on help/version request --- .clang-format | 1 + common/parameters.cc | 8 ++++---- common/parameters.h | 16 ++++++++++------ src/expansion_hunter.cc | 30 +++++++++++++++--------------- 4 files changed, 30 insertions(+), 25 deletions(-) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..f6cb8ad --- /dev/null +++ b/.clang-format @@ -0,0 +1 @@ +BasedOnStyle: Google diff --git a/common/parameters.cc b/common/parameters.cc index 0634a86..8e580ed 100644 --- a/common/parameters.cc +++ b/common/parameters.cc @@ -36,12 +36,12 @@ #include #include -using boost::lexical_cast; -using boost::format; using boost::assign::list_of; -using std::string; +using boost::format; +using boost::lexical_cast; using std::cerr; using std::endl; +using std::string; using std::vector; namespace po = boost::program_options; @@ -119,7 +119,7 @@ bool Parameters::Load(int argc, char **argv) { if (argc == 1) { std::cerr << usage << std::endl; - return false; + throw std::invalid_argument(""); } po::variables_map arg_map; diff --git a/common/parameters.h b/common/parameters.h index ac4f4b6..3484693 100644 --- a/common/parameters.h +++ b/common/parameters.h @@ -31,25 +31,29 @@ #include "genotyping/short_repeat_genotyper.h" class Outputs { -public: + public: Outputs(const std::string vcf_path, const std::string json_path, const std::string log_path); std::ostream &vcf() { return vcf_; } std::ostream &json() { return json_; } std::ostream &log() { return log_; } -private: + private: std::ofstream vcf_; std::ofstream json_; std::ofstream log_; }; class Parameters { -public: + public: const double kSmallestPossibleDepth = 5.0; Parameters() - : region_extension_len_(1000), min_wp_(0.90), min_baseq_(20), - min_anchor_mapq_(60), skip_unaligned_(false), depth_(0.0), + : region_extension_len_(1000), + min_wp_(0.90), + min_baseq_(20), + min_anchor_mapq_(60), + skip_unaligned_(false), + depth_(0.0), sex_(Sex::kFemale) {} bool Load(int argc, char **argv); std::string bam_path() const { return bam_path_; } @@ -73,7 +77,7 @@ class Parameters { bool depth_is_set() const { return depth_ >= kSmallestPossibleDepth; } Sex sex() const { return sex_; } -private: + private: std::string bam_path_; std::string genome_path_; // Maximum distance from a region to search for relevant reads. diff --git a/src/expansion_hunter.cc b/src/expansion_hunter.cc index 3a528d2..eaa16dc 100644 --- a/src/expansion_hunter.cc +++ b/src/expansion_hunter.cc @@ -51,14 +51,14 @@ #include "purity/purity.h" #include "rep_align/rep_align.h" -using std::unordered_set; -using std::map; -using std::vector; -using std::string; +using std::array; using std::cerr; using std::endl; +using std::map; using std::pair; -using std::array; +using std::string; +using std::unordered_set; +using std::vector; // Returns the length of the first read in a BAM file. size_t CalcReadLen(const string &bam_path) { @@ -385,14 +385,12 @@ void EstimateRepeatSizes(const Parameters ¶meters, haplotype_candidates.push_back( RepeatAllele(read_group.size, read_group.num_supporting_reads, ReadType::kSpanning)); - } else if (read_group.read_type == ReadType::kInrepeat) { flanking_size_counts[num_units_in_read] += read_group.num_supporting_reads; haplotype_candidates.push_back( RepeatAllele(num_units_in_read, read_group.num_supporting_reads, ReadType::kInrepeat)); - } else if (read_group.read_type == ReadType::kFlanking) { haplotype_candidates.push_back( RepeatAllele(read_group.size, read_group.num_supporting_reads, @@ -400,7 +398,6 @@ void EstimateRepeatSizes(const Parameters ¶meters, for (const auto &align : read_group.rep_aligns) { flanking_size_counts[align.size] += 1; } - } else { throw std::logic_error("Do not know how to deal with " + kReadTypeToString.at(read_group.read_type) + @@ -427,9 +424,10 @@ void EstimateRepeatSizes(const Parameters ¶meters, cerr << "]" << endl; if (haplotype_candidates.empty()) { - cerr << TimeStamp() - << ",\t[Skipping this region because no informative reads were found]" - << endl; + cerr + << TimeStamp() + << ",\t[Skipping this region because no informative reads were found]" + << endl; continue; } @@ -468,12 +466,13 @@ int main(int argc, char *argv[]) { cerr << kProgramVersion << endl; if (!parameters.Load(argc, argv)) { - return 1; + return 0; } - cerr << TimeStamp() << ",[Starting Logging for " << parameters.sample_name() << "]" << endl; + cerr << TimeStamp() << ",[Starting Logging for " << parameters.sample_name() + << "]" << endl; - Outputs outputs(parameters.vcf_path(), parameters.json_path(), + Outputs outputs(parameters.vcf_path(), parameters.json_path(), parameters.log_path()); map repeat_specs; @@ -498,7 +497,8 @@ int main(int argc, char *argv[]) { parameters.set_depth(depth); } - cerr << TimeStamp() << ",[Read length: " << parameters.read_len() << "]" << endl; + cerr << TimeStamp() << ",[Read length: " << parameters.read_len() << "]" + << endl; cerr << TimeStamp() << ",[Depth: " << parameters.depth() << "]" << endl; if (parameters.depth() < parameters.kSmallestPossibleDepth) { From 660edd2a345688ca4d8105f212c12869216a8d1c Mon Sep 17 00:00:00 2001 From: edolzhenko Date: Thu, 25 Jan 2018 14:23:50 -0800 Subject: [PATCH 4/5] release-2.5.4: Make small changes to VCF format --- src/vcf_output.cc | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/vcf_output.cc b/src/vcf_output.cc index 4821184..be14929 100644 --- a/src/vcf_output.cc +++ b/src/vcf_output.cc @@ -31,18 +31,17 @@ #include -using std::vector; -using std::ostream; -using std::string; -using std::map; using std::cerr; using std::endl; +using std::map; +using std::ostream; +using std::string; +using std::vector; void WriteVcf(const Parameters ¶meters, const std::map &repeat_specs, const std::vector &sample_findings, std::ostream &out) { - std::stringstream vcf_header, vcf_body; // clang-format off vcf_header << @@ -55,11 +54,11 @@ void WriteVcf(const Parameters ¶meters, "##INFO=\n" "##FORMAT=\n" "##FORMAT=\n" - "##FORMAT=\n" - "##FORMAT=\n" - "##FORMAT=\n" - "##FORMAT=\n" - "##FORMAT=\n"; + "##FORMAT=\n" + "##FORMAT=\n" + "##FORMAT=\n" + "##FORMAT=\n" + "##FORMAT=\n"; // clang-format on std::set alt_sizes; @@ -81,7 +80,6 @@ void WriteVcf(const Parameters ¶meters, sample_ad_fl, sample_ad_ir; for (const RepeatAllele allele : region_findings.genotype) { - const int allele_len = allele.size_ * unit_len; const string source = kReadTypeToString.at(allele.type_); @@ -141,17 +139,17 @@ void WriteVcf(const Parameters ¶meters, } } const Region ®ion = repeat_spec.target_region; - const string info = "SVTYPE=STR;END=" + std::to_string(region.end()) + - ";REF=" + std::to_string(reference_size) + ";RL=" + - std::to_string(reference_size * unit_len) + ";RU=" + - unit_encoding + ";REPID=" + region_id; + const string info = "END=" + std::to_string(region.end()) + + ";REF=" + std::to_string(reference_size) + + ";RL=" + std::to_string(reference_size * unit_len) + + ";RU=" + unit_encoding + ";REPID=" + region_id; if (alt.empty()) { alt = "."; } vcf_body << region.chrom() << "\t" << region.start() - 1 << "\t.\t" << ref_field << "\t" << alt << "\t.\tPASS\t" << info - << "\tGT:SO:CN:CI:AD_SP:AD_FL:AD_IR\t" << sample_gt << ":" + << "\tGT:SO:REPCN:REPCI:ADSP:ADFL:ADIR\t" << sample_gt << ":" << sample_so << ":" << sample_cn << ":" << sample_ci << ":" << sample_ad_sp << ":" << sample_ad_fl << ":" << sample_ad_ir << endl; From 745b2948a6f6119c5d6d15434e1ea0794336f5c4 Mon Sep 17 00:00:00 2001 From: edolzhenko Date: Thu, 1 Feb 2018 13:35:10 -0800 Subject: [PATCH 5/5] release-2.5.4: Enable read rescue in CRAMs --- src/expansion_hunter.cc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/expansion_hunter.cc b/src/expansion_hunter.cc index eaa16dc..169ddbe 100644 --- a/src/expansion_hunter.cc +++ b/src/expansion_hunter.cc @@ -189,14 +189,10 @@ void CacheAligns(BamFile *bam_file, const RepeatSpec &repeat_spec, &align_pairs); } - // Filling-in missing mates by jumping around the BAM. + // Filling-in missing mates by jumping around the BAM/CRAM file. cerr << TimeStamp() << ",\t[Filling in mates]" << endl; - if ((*bam_file).format() == BamFile::kBamFile) { - FillinMates(*bam_file, align_pairs, repeat_spec.units_shifts, 0.9, - ontarget_frag_names); - } else { - cerr << TimeStamp() << ",\t[Skipping filling in mates]" << endl; - } + FillinMates(*bam_file, align_pairs, repeat_spec.units_shifts, 0.9, + ontarget_frag_names); cerr << TimeStamp() << ",\t[Done filling in mates]" << endl; }