From 09d8b40b4ba6bc51a3f4d9af15a72dce9f1ba19c Mon Sep 17 00:00:00 2001 From: Joe Zhu Date: Fri, 18 May 2018 08:25:00 +0100 Subject: [PATCH] bump version to 0.5.0 --- .DEploid | 2 +- DESCRIPTION | 9 +- NAMESPACE | 2 + src/DEploid/dEploid.cpp | 12 +- src/DEploid/dEploidIO.cpp | 164 +++++-- src/DEploid/dEploidIO.hpp | 63 ++- src/DEploid/exceptions.hpp | 10 +- src/DEploid/export/dEploidIOExport.cpp | 162 ++++++- .../export/dEploidIOExportPosteriorProb.cpp | 2 +- src/DEploid/export/writeMcmcRelated.cpp | 11 +- src/DEploid/global.h | 4 +- src/DEploid/ibd.cpp | 427 +++++++++++++++++- src/DEploid/ibd.hpp | 95 +++- src/DEploid/mcmc.cpp | 319 +++---------- src/DEploid/mcmc.hpp | 43 +- src/DEploid/panel.cpp | 24 + src/DEploid/panel.hpp | 23 + src/DEploid/updateHap.cpp | 2 +- src/DEploid/updateHap.hpp | 2 - src/DEploid/variantIndex.hpp | 1 + src/dEploidr.cpp | 4 +- tests/testthat/test-DEploid_tools.R | 12 +- 22 files changed, 999 insertions(+), 394 deletions(-) diff --git a/.DEploid b/.DEploid index ca779cb..561c2ad 160000 --- a/.DEploid +++ b/.DEploid @@ -1 +1 @@ -Subproject commit ca779cb3547cb4ebb394199b172ec13eb9707771 +Subproject commit 561c2ad4e9cbd5fa1d6c2d66469f8d0db59d6f8e diff --git a/DESCRIPTION b/DESCRIPTION index 5e430bb..fae13dc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: DEploid Type: Package Title: Deconvolute Mixed Genomes with Unknown Proportions -Version: 0.4.4 +Version: 0.5.0 Authors@R: c( person("Joe", "Zhu", role=c("aut", "cre", "cph"), email="joe.zhu@well.ox.ac.uk"), person('Jacob', 'Almagro-Garcia', role=c('aut', 'cph')), @@ -31,13 +31,14 @@ Imports: Rcpp (>= 0.11.2), scales (>= 0.4.0), plotly (>= 4.7.1), - magrittr (>= 1.5) + magrittr (>= 1.5), + rmarkdown(>= 1.6), + htmlwidgets (>= 1.0) Suggests: knitr, - rmarkdown, testthat (>= 0.9.0) SystemRequirements: C++11 VignetteBuilder: knitr LinkingTo: Rcpp RoxygenNote: 6.0.1 -Date: 2017-08-09 +Date: 2018-05-18 diff --git a/NAMESPACE b/NAMESPACE index 25f013c..37c022f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,8 @@ importFrom("utils", "read.table") importFrom(Rcpp,evalCpp) importFrom("magrittr", "%>%") importFrom("plotly", "plot_ly", "layout", "add_trace") +importFrom("rmarkdown", "pandoc_available") +importFrom("htmlwidgets", "saveWidget") useDynLib(DEploid, .registration = TRUE) export(dEploid) export(plotProportions) diff --git a/src/DEploid/dEploid.cpp b/src/DEploid/dEploid.cpp index b2ba17a..f8023bb 100644 --- a/src/DEploid/dEploid.cpp +++ b/src/DEploid/dEploid.cpp @@ -25,7 +25,6 @@ #include // std::cout #include "mcmc.hpp" -#include "panel.hpp" #include "dEploidIO.hpp" using namespace std; @@ -46,10 +45,13 @@ int main( int argc, char *argv[] ){ return EXIT_SUCCESS; } - if ( dEploidIO.doPainting() ){ + if ( dEploidIO.doComputeLLK() ){ + dEploidIO.computeLLKfromInitialHap(); + } else if ( dEploidIO.doLsPainting() ){ dEploidIO.chromPainting(); - } else{ - + } else if ( dEploidIO.doIbdPainting() ){ + dEploidIO.paintIBD(); + }else{ if (dEploidIO.useIBD()){ // ibd McmcSample * ibdMcmcSample = new McmcSample(); MersenneTwister ibdRg(dEploidIO.randomSeed()); @@ -66,6 +68,8 @@ int main( int argc, char *argv[] ){ false); // use IBD mcmcMachinery.runMcmcChain(true, // show progress false); // use IBD + + dEploidIO.paintIBD(); delete mcmcSample; } // Finishing, write log diff --git a/src/DEploid/dEploidIO.cpp b/src/DEploid/dEploidIO.cpp index f97fa17..08821c3 100644 --- a/src/DEploid/dEploidIO.cpp +++ b/src/DEploid/dEploidIO.cpp @@ -22,13 +22,14 @@ * along with this program. If not, see . */ -#include "dEploidIO.hpp" -#include "utility.hpp" // normailize by sum -#include // assert -#include // std::setw #include #include -#include "updateHap.hpp" +#include // assert +#include // std::setw +#include "utility.hpp" // normailize by sum +#include "updateHap.hpp" // chromPainting +#include "dEploidIO.hpp" +#include "ibd.hpp" DEploidIO::DEploidIO(){ this->init(); @@ -89,11 +90,12 @@ void DEploidIO::core() { void DEploidIO::init() { this->setDoExportRecombProb(false); - this->setRandomSeedWasSet(false); + this->setrandomSeedWasGiven(false); this->setCompressVcf(false); this->setInitialPropWasGiven(false); this->setInitialHapWasGiven(false); this->initialProp.clear(); + this->setPleaseCheckInitialP(true); this->setExcludeSites( false ); this->excludedMarkers = NULL; this->panel = NULL; @@ -112,7 +114,8 @@ void DEploidIO::init() { this->setDoUpdatePair( true ); this->setDoUpdateSingle( true ); this->setDoExportPostProb( false ); - this->setDoPainting( false ); + this->setDoLsPainting( false ); + this->setDoIbdPainting( false ); this->setUseIBD( false ); this->setDoExportSwitchMissCopy ( true ); this->setDoAllowInbreeding( false ); @@ -126,10 +129,11 @@ void DEploidIO::init() { this->setScalingFactor(100.0); this->setParameterG(20.0); this->setParameterSigma(5.0); - + this->setIBDSigma(20.0); this->setUseVcf(false); this->vcfReaderPtr_ = NULL; this->setDoExportVcf(false); + this->setDoComputeLLK(false); #ifdef COMPILEDATE compileTime_ = COMPILEDATE; @@ -169,6 +173,12 @@ void DEploidIO::reInit() { void DEploidIO::finalize(){ + if ( this->doIbdPainting() | this->doComputeLLK() ){ + if (!initialPropWasGiven()){ + throw InitialPropUngiven(""); + } + } + if ( this->useIBD() && this->kStrain() == 1){ throw InvalidK(); } @@ -177,7 +187,7 @@ void DEploidIO::finalize(){ throw VcfOutUnSpecified(""); } - if ( !this->randomSeedWasSet_ ){ + if ( !this->randomSeedWasGiven_ ){ this->set_seed( (unsigned)(time(0)) ); } @@ -260,17 +270,18 @@ void DEploidIO::removeFilesWithSameName(){ strIbdExportProp = this->prefix_ + ".ibd.prop"; strIbdExportLLK = this->prefix_ + ".ibd.llk"; strIbdExportHap = this->prefix_ + ".ibd.hap"; + strIbdExportProbs = this->prefix_ + ".ibd.probs"; strExportVcf = this->prefix_ + ".vcf"; if ( compressVcf() ){ strExportVcf += ".gz"; } - strExportLog = this->prefix_ + ((this->doPainting()) ? ".painting":"") + ".log"; + strExportLog = this->prefix_ + ((this->doLsPainting()) ? ".painting":"") + ".log"; strExportRecombProb = this->prefix_ + ".recomb"; strExportExtra = this->prefix_ + ".extra"; - if ( this->doPainting() == false ){ + if ( this->doLsPainting() == false ){ if (this->useIBD()){ remove(strIbdExportProp.c_str()); remove(strIbdExportLLK.c_str()); @@ -281,9 +292,10 @@ void DEploidIO::removeFilesWithSameName(){ remove(strExportVcf.c_str()); remove(strExportProp.c_str()); remove(strExportExtra.c_str()); + remove(strIbdExportProbs.c_str()); } - if (this->doPainting() || this->doExportPostProb() ){ + if (this->doLsPainting() || this->doExportPostProb() ){ if (this->useIBD()){ strIbdExportSingleFwdProbPrefix = this->prefix_ + ".ibd.single"; for ( size_t i = 0; i < this->kStrain_ ; i++ ){ @@ -385,6 +397,8 @@ void DEploidIO::parse (){ this->setParameterG(readNextInput()); } else if ( *argv_i == "-sigma" ) { this->setParameterSigma(readNextInput()); + } else if ( *argv_i == "-ibdSigma" ) { + this->setIBDSigma(readNextInput()); } else if ( *argv_i == "-recomb" ) { this->constRecombProb_ = readNextInput(); this->useConstRecomb_ = true; @@ -422,10 +436,16 @@ void DEploidIO::parse (){ throw ( FlagsConflict((*argv_i) , "-initialHap") ); } this->readNextStringto ( this->initialHapFileName_ ) ; - this->setDoPainting( true ); + this->setDoLsPainting( true ); this->readInitialHaps(); } else if ( *argv_i == "-ibd" ){ this->setUseIBD(true); + } else if ( *argv_i == "-computeLLK" ){ + this->setDoComputeLLK( true ); + } else if ( *argv_i == "-ibdPainting" ){ + this->setDoIbdPainting( true ); + } else if ( *argv_i == "-skipCheckingInitialP" ){ + this->setPleaseCheckInitialP(false); } else if ( *argv_i == "-initialP" ){ this->readInitialProportions(); this->setInitialPropWasGiven( true ); @@ -444,7 +464,7 @@ void DEploidIO::parse (){ this->setKstrain(this->initialProp.size()); } else if ( *argv_i == "-initialHap" ){ - if ( this->doPainting() == true ){ + if ( this->doLsPainting() == true ){ throw ( FlagsConflict((*argv_i) , "-painting") ); } this->readNextStringto ( this->initialHapFileName_ ) ; @@ -452,7 +472,7 @@ void DEploidIO::parse (){ this->readInitialHaps(); } else if ( *argv_i == "-seed"){ this->set_seed( readNextInput() ); - this->setRandomSeedWasSet( true ); + this->setrandomSeedWasGiven( true ); } else if ( *argv_i == "-z" ){ this->setCompressVcf(true); } else if ( *argv_i == "-h" || *argv_i == "-help"){ @@ -474,9 +494,9 @@ void DEploidIO::checkInput(){ throw FileNameMissing ( "Alt count" );} if ( this->plafFileName_.size() == 0 ){ throw FileNameMissing ( "PLAF" );} - if ( usePanel() && this->panelFileName_.size() == 0 ){ + if ( usePanel() && this->panelFileName_.size() == 0 && !this->doIbdPainting() && !this->doComputeLLK() ){ throw FileNameMissing ( "Reference panel" );} - if ( this->initialPropWasGiven() && ( abs(sumOfVec(initialProp) - 1.0) > 0.00001 )){ + if ( this->initialPropWasGiven() && ( abs(sumOfVec(initialProp) - 1.0) > 0.00001 ) && this->pleaseCheckInitialP() ){ throw SumOfPropNotOne ( to_string(sumOfVec(initialProp)) );} if ( this->initialPropWasGiven() ){ if ( this->kStrainWasManuallySet() == true ){ @@ -561,6 +581,7 @@ void DEploidIO::printHelp(std::ostream& out){ out << "./dEploid -vcf data/testData/PG0390-C.test.vcf -exclude data/testData/labStrains.test.exclude.txt -plaf data/testData/labStrains.test.PLAF.txt -o PG0390-CPanelExclude -panel data/testData/labStrains.test.panel.txt" << endl; out << "./dEploid -vcf data/testData/PG0390-C.test.vcf -exclude data/testData/labStrains.test.exclude.txt -plaf data/testData/labStrains.test.PLAF.txt -o PG0390-CPanelExclude -panel data/testData/labStrains.test.panel.txt -painting PG0390-CPanelExclude.hap" << endl; out << "./dEploid -vcf data/testData/PG0390-C.test.vcf -plaf data/testData/labStrains.test.PLAF.txt -o PG0390-CNopanel -noPanel -k 2 -ibd -nSample 250 -rate 8 -burn 0.67" < DEploidIO::computeExpectedWsafFromInitialHap(){ + // Make this a separate function + // calculate expected wsaf + vector expectedWsaf (this->initialHap.size(), 0.0); + for ( size_t i = 0; i < this->initialHap.size(); i++ ){ + assert( kStrain_ == this->initialHap[i].size() ); + for ( size_t k = 0; k < this->kStrain_; k++){ + expectedWsaf[i] += this->initialHap[i][k] * finalProp[k]; + } + assert ( expectedWsaf[i] >= 0 ); + //assert ( expectedWsaf[i] <= 1.0 ); + } + return expectedWsaf; +} + + +void DEploidIO::computeLLKfromInitialHap(){ + for ( auto const& value: this->initialProp ){ + this->finalProp.push_back(value); + } + + vector expectedWsaf = computeExpectedWsafFromInitialHap(); + if (expectedWsaf.size() != this->refCount_.size()){ + throw LociNumberUnequal("Hap length differs from data!"); + } + vector llk = calcLLKs ( this->refCount_, this->altCount_, expectedWsaf, 0, expectedWsaf.size(), this->scalingFactor()); + this->llkFromInitialHap_ = sumOfVec(llk); +} + + void DEploidIO::chromPainting(){ dout << "Painting haplotypes in" << this->initialHapFileName_ <initialProp ){ - this->filnalProp.push_back(value); + this->finalProp.push_back(value); } // Painting posterior probabilities @@ -623,17 +674,7 @@ void DEploidIO::chromPainting(){ //vector < vector > hap = decovolutedStrainsToBeRead.content_; - // Make this a separate function - // calculate expected wsaf - vector expectedWsaf (this->nLoci_, 0.0); - for ( size_t i = 0; i < this->initialHap.size(); i++ ){ - assert( kStrain_ == this->initialHap[i].size() ); - for ( size_t k = 0; k < this->kStrain_; k++){ - expectedWsaf[i] += this->initialHap[i][k] * filnalProp[k]; - } - assert ( expectedWsaf[i] >= 0 ); - //assert ( expectedWsaf[i] <= 1.0 ); - } + vector expectedWsaf = computeExpectedWsafFromInitialHap(); MersenneTwister tmpRg(this->randomSeed()); @@ -655,7 +696,7 @@ void DEploidIO::chromPainting(){ this->altCount_, this->plaf_, expectedWsaf, - this->filnalProp, this->initialHap, &tmpRg, + this->finalProp, this->initialHap, &tmpRg, start, length, this->panel, this->missCopyProb_, this->scalingFactor(), tmpk); @@ -663,7 +704,7 @@ void DEploidIO::chromPainting(){ if ( this->doAllowInbreeding() == true ){ updatingSingle.setPanelSize(this->panel->inbreedingPanelSize()); } - updatingSingle.painting( refCount_, altCount_, expectedWsaf, this->filnalProp, this->initialHap); + updatingSingle.painting( refCount_, altCount_, expectedWsaf, this->finalProp, this->initialHap); //this->writeLastSingleFwdProb( updatingSingle.fwdProbs_, chromi, tmpk, false ); // false as not using ibd this->writeLastSingleFwdProb( updatingSingle.fwdBwdProbs_, chromi, tmpk, false ); // false as not using ibd } @@ -675,6 +716,9 @@ void DEploidIO::readPanel(){ if ( this->usePanel() == false ){ return; } + if ( this->doIbdPainting() | this->doComputeLLK() ){ + return; + } panel = new Panel(); panel->readFromFile(this->panelFileName_.c_str()); @@ -687,3 +731,61 @@ void DEploidIO::readPanel(){ } +DEploidIO::DEploidIO(const DEploidIO ¤tDEploidIO){ + // This is not working! to be improved + //cout << this->refCount_.size() << endl; + this->refCount_ = currentDEploidIO.refCount_; + //cout << this->refCount_.size() << endl; +} + + +void DEploidIO::getIBDprobsIntegrated(vector < vector > &prob){ + if (prob.size() != this->nLoci()){ + throw InvalidInput("Invalid probabilities! Check size!"); + } + + assert(this->ibdProbsIntegrated.size() == 0); + + for (size_t i = 0; i < prob[0].size(); i++){ + this->ibdProbsIntegrated.push_back(0.0); + } + + for ( size_t siteIndex = 0; siteIndex < this->nLoci(); siteIndex++ ){ + for (size_t i = 0; i < prob[siteIndex].size(); i++){ + this->ibdProbsIntegrated[i] += prob[siteIndex][i]; + } + } + normalizeBySum(this->ibdProbsIntegrated); +} + + +void DEploidIO::computeEffectiveKstrain(vector proportion){ + double tmpSumSq = 0.0; + for (double p : proportion){ + tmpSumSq += p * p; + } + this->effectiveKstrain_ = 1.0 / tmpSumSq; +} + + +void DEploidIO::computeInferredKstrain(vector proportion){ + this->inferredKstrain_ = 0; + for (double p : proportion){ + if ( p > 0.01 ){ + this->inferredKstrain_ += 1; + } + } +} + + +void DEploidIO::computeAdjustedEffectiveKstrain(){ + this->adjustedEffectiveKstrain_ = this->effectiveKstrain_; + if ( (this->inferredKstrain_ == 2) & (ibdProbsIntegrated.size() == 2)){ + if ( this->ibdProbsIntegrated[1] > 0.95 ){ + this->adjustedEffectiveKstrain_ = 1; + } + } +} + + + diff --git a/src/DEploid/dEploidIO.hpp b/src/DEploid/dEploidIO.hpp index e1d61c6..0679f32 100644 --- a/src/DEploid/dEploidIO.hpp +++ b/src/DEploid/dEploidIO.hpp @@ -23,12 +23,12 @@ * */ -#include /* strtol, strtod */ -#include -#include // std::invalid_argument #include -#include // std::cout -#include // std::stringstream +#include +#include // strtol, strtod +#include // std::invalid_argument +#include // std::cout +#include // std::stringstream #include "global.h" #include "exceptions.hpp" #include "panel.hpp" @@ -48,11 +48,14 @@ class DEploidIO{ #ifdef UNITTEST friend class TestIO; friend class TestMcmcMachinery; + friend class TestIBDpath; #endif friend class McmcMachinery; friend class RMcmcSample; + friend class IBDpath; public: DEploidIO(); + DEploidIO(const DEploidIO ¤tDEploidIO); DEploidIO(const std::string &arg); DEploidIO(int argc, char *argv[]); ~DEploidIO (); @@ -63,18 +66,26 @@ class DEploidIO{ bool version() const { return version_; } // Painting related void chromPainting (); - bool doPainting() const { return this->doPainting_; } + bool doLsPainting() const { return this->doLsPainting_; } + bool doIbdPainting() const { return this->doIbdPainting_; } + bool doComputeLLK() const { return this->doComputeLLK_; } + void computeLLKfromInitialHap(); bool useIBD() const { return this->useIBD_;} + void paintIBD(); + double ibdLLK_; + void getIBDprobsIntegrated(vector < vector > &prob); // Log void wrapUp(); - bool randomSeedWasSet() const {return this->randomSeedWasSet_; } + bool randomSeedWasSet() const {return this->randomSeedWasGiven_; } friend std::ostream& operator<< (std::ostream& stream, const DEploidIO& dEploidIO); + size_t randomSeed() const { return randomSeed_;} private: void core(); + double llkFromInitialHap_; // Read in input string plafFileName_; @@ -85,11 +96,12 @@ class DEploidIO{ string initialHapFileName_; string prefix_; size_t randomSeed_; - bool randomSeedWasSet_; - void setRandomSeedWasSet(const bool random){ this->randomSeedWasSet_ = random; } + bool randomSeedWasGiven_; + void setrandomSeedWasGiven(const bool random){ this->randomSeedWasGiven_ = random; } bool initialPropWasGiven_; + bool pleaseCheckInitialP_; bool initialHapWasGiven_; bool kStrainWasManuallySet_; bool kStrainWasSetByHap_; @@ -108,11 +120,12 @@ class DEploidIO{ bool doExportPostProb_; bool doExportSwitchMissCopy_; bool doAllowInbreeding_; - bool doPainting_; + bool doLsPainting_; + bool doIbdPainting_; bool useIBD_; vector initialProp; - vector filnalProp; + vector finalProp; vector < vector > initialHap; vector chrom_; vector < size_t > indexOfChromStarts_; @@ -151,6 +164,9 @@ class DEploidIO{ void setDoExportRecombProb( const bool exportRecombProb ){ this->doExportRecombProb_ = exportRecombProb; } bool doExportRecombProb() const { return this->doExportRecombProb_; } + bool doComputeLLK_; + void setDoComputeLLK( const bool setTo ){ this->doComputeLLK_ = setTo; } + // Parameters double missCopyProb_; double averageCentimorganDistance_;// = 15000.0, @@ -190,6 +206,7 @@ class DEploidIO{ string strIbdExportProp; string strIbdExportLLK; string strIbdExportHap; + string strIbdExportProbs; string strExportSingleFwdProbPrefix; string strExportPairFwdProb; @@ -218,6 +235,7 @@ class DEploidIO{ void set_seed(const size_t seed){ this->randomSeed_ = seed; } void removeFilesWithSameName(); + vector computeExpectedWsafFromInitialHap(); template @@ -259,17 +277,27 @@ class DEploidIO{ void setDoAllowInbreeding ( const bool setTo ) { this->doAllowInbreeding_ = setTo; } bool doAllowInbreeding() const { return this->doAllowInbreeding_; } - void setDoPainting ( const bool setTo ){ this->doPainting_ = setTo; } + void setDoLsPainting ( const bool setTo ){ this->doLsPainting_ = setTo; } + void setDoIbdPainting ( const bool setTo ){ this->doIbdPainting_ = setTo; } void setUseIBD( const bool setTo){ this->useIBD_ = setTo; } bool initialPropWasGiven() const { return initialPropWasGiven_; } void setInitialPropWasGiven(const bool setTo){this->initialPropWasGiven_ = setTo; } + bool pleaseCheckInitialP() const { return pleaseCheckInitialP_; } + void setPleaseCheckInitialP(const bool setTo){this->pleaseCheckInitialP_ = setTo; } + bool initialHapWasGiven() const { return initialHapWasGiven_; } void setInitialHapWasGiven(const bool setTo){ this->initialHapWasGiven_ = setTo; } + bool randomSeedWasGiven() const {return this->randomSeedWasGiven_; } + // log and export resutls void writeRecombProb ( Panel * panel ); + void writeIBDpostProb(vector < vector > & reshapedProbs, vector header); + vector ibdProbsHeader; + vector ibdProbsIntegrated; + void writeLLK (McmcSample * mcmcSample, bool useIBD = false); void writeProp (McmcSample * mcmcSample, bool useIBD = false); void writeHap (McmcSample * mcmcSample, bool useIBD = false); @@ -310,7 +338,11 @@ class DEploidIO{ double parameterSigma_; void setParameterSigma ( const double setTo ) { this->parameterSigma_ = setTo; } double parameterSigma() const { return this->parameterSigma_; } + double ibdSigma_; + void setIBDSigma ( const double setTo ){ this->ibdSigma_ = setTo; } + double ibdSigma() const {return this->ibdSigma_;} + void setNLoci ( const size_t setTo ){ this->nLoci_ = setTo;} size_t nLoci() const { return this->nLoci_; } void setKstrain ( const size_t setTo ){ this->kStrain_ = setTo;} size_t kStrain() const { return this->kStrain_;} @@ -335,6 +367,13 @@ class DEploidIO{ bool forbidCopyFromSame() const { return this->forbidCopyFromSame_; } void setForbidCopyFromSame(const bool forbid){ this->forbidCopyFromSame_ = forbid; } + + double effectiveKstrain_ ; + void computeEffectiveKstrain(vector proportion); + int inferredKstrain_; + void computeInferredKstrain(vector proportion); + double adjustedEffectiveKstrain_; + void computeAdjustedEffectiveKstrain(); }; #endif diff --git a/src/DEploid/exceptions.hpp b/src/DEploid/exceptions.hpp index 802bc88..54086b7 100644 --- a/src/DEploid/exceptions.hpp +++ b/src/DEploid/exceptions.hpp @@ -34,7 +34,6 @@ using namespace std; struct ShouldNotBeCalled : std::exception{ - explicit ShouldNotBeCalled(){ } virtual ~ShouldNotBeCalled() throw() {} virtual const char* what () const noexcept { @@ -70,6 +69,15 @@ struct InvalidInput : std::exception { }; +struct OutOfVectorSize : std::exception{ + explicit OutOfVectorSize(){ } + virtual ~OutOfVectorSize() throw() {} + virtual const char* what () const noexcept { + return string("Out of vector size!").c_str(); + } +}; + + struct InvalidK : public InvalidInput{ InvalidK( ):InvalidInput( ){ this->reason = "k must be at least 2, when using the flag -ibd."; diff --git a/src/DEploid/export/dEploidIOExport.cpp b/src/DEploid/export/dEploidIOExport.cpp index 1cd5436..e6e3528 100644 --- a/src/DEploid/export/dEploidIOExport.cpp +++ b/src/DEploid/export/dEploidIOExport.cpp @@ -81,14 +81,16 @@ void DEploidIO::writeLog ( ostream * writeTo ){ (*writeTo) << "dEploid version: " << dEploidGitVersion_ << endl; (*writeTo) << "\n"; (*writeTo) << "Input data: \n"; - (*writeTo) << setw(12) << "Panel: " << panelFileName_ << "\n"; + if (panelFileName_.size() > 0){ + (*writeTo) << setw(12) << "Panel: " << panelFileName_ << "\n"; + } (*writeTo) << setw(12) << "PLAF: " << plafFileName_ << "\n"; if ( useVcf() ) (*writeTo) << setw(12) << "VCF: " << vcfFileName_ << "\n"; if ( refFileName_.size()>0) (*writeTo) << setw(12) << "REF count: " << refFileName_ << "\n"; if ( altFileName_.size()>0) (*writeTo) << setw(12) << "ALT count: " << altFileName_ << "\n"; if ( excludeSites() ){ (*writeTo) << setw(12) << "Exclude: " << excludeFileName_ << "\n"; } (*writeTo) << "\n"; - if ( this->doPainting() == false ) { + if ( (this->doLsPainting() == false) & (this->doIbdPainting() == false) ) { (*writeTo) << "MCMC parameters: "<< "\n"; (*writeTo) << setw(19) << " MCMC burn: " << mcmcBurn_ << "\n"; (*writeTo) << setw(19) << " MCMC sample: " << nMcmcSample_ << "\n"; @@ -107,7 +109,11 @@ void DEploidIO::writeLog ( ostream * writeTo ){ (*writeTo) << setw(20) << " Miss copy prob: " << this->missCopyProb_ << "\n"; (*writeTo) << setw(20) << " Avrg Cent Morgan: " << this->averageCentimorganDistance_ << "\n"; (*writeTo) << setw(20) << " G: " << this->parameterG() << "\n"; + if (this->useIBD()){ + (*writeTo) << setw(20) << " IBD sigma: " << this->ibdSigma() << "\n"; + } else { (*writeTo) << setw(20) << " sigma: " << this->parameterSigma() << "\n"; + } (*writeTo) << setw(20) << " ScalingFactor: " << this->scalingFactor() << "\n"; if ( this->initialPropWasGiven() ){ (*writeTo) << setw(20) << " Initial prob: " ; @@ -117,11 +123,11 @@ void DEploidIO::writeLog ( ostream * writeTo ){ } } (*writeTo) << "\n"; - if ( this->doPainting() == false ) { + if ( (this->doLsPainting() == false) & (this->doIbdPainting() == false) & (this->doComputeLLK() == false) ) { (*writeTo) << "MCMC diagnostic:"<< "\n"; (*writeTo) << setw(19) << " Accept_ratio: " << acceptRatio_ << "\n"; (*writeTo) << setw(19) << " Max_llks: " << maxLLKs_ << "\n"; - (*writeTo) << setw(19) << " Mean_theta_llks: " << meanThetallks_ << "\n"; + (*writeTo) << setw(19) << " Final_theta_llks: " << meanThetallks_ << "\n"; (*writeTo) << setw(19) << " Mean_llks: " << meanllks_ << "\n"; (*writeTo) << setw(19) << " Stdv_llks: " << stdvllks_ << "\n"; (*writeTo) << setw(19) << " DIC_by_Dtheta: " << dicByTheta_ << "\n"; @@ -132,36 +138,61 @@ void DEploidIO::writeLog ( ostream * writeTo ){ (*writeTo) << setw(14) << "Start at: " << startingTime_ ; (*writeTo) << setw(14) << "End at: " << endTime_ ; (*writeTo) << "\n"; - (*writeTo) << "Output saved to:\n"; - if ( this->doPainting() ){ - for ( size_t i = 0; i < kStrain(); i++ ){ - (*writeTo) << "Posterior probability of strain " << i << ": "<< strExportSingleFwdProbPrefix << i <doComputeLLK() ){ + (*writeTo) << "Input likelihood: " << llkFromInitialHap_; + (*writeTo) << "\n"; } else { - (*writeTo) << setw(14) << "Likelihood: " << strExportLLK << "\n"; - (*writeTo) << setw(14) << "Proportions: " << strExportProp << "\n"; - (*writeTo) << setw(14) << "Haplotypes: " << strExportHap << "\n"; - if ( doExportVcf() ) { (*writeTo) << setw(14) << "Vcf: " << strExportVcf << "\n"; } - if (this->useIBD()){ - (*writeTo) << " IBD method output saved to:\n"; - (*writeTo) << setw(14) << "Likelihood: " << strIbdExportProp << "\n"; - (*writeTo) << setw(14) << "Proportions: " << strIbdExportProp << "\n"; - (*writeTo) << setw(14) << "Haplotypes: " << strIbdExportHap << "\n"; + (*writeTo) << "Output saved to:\n"; + if ( this->doLsPainting() ){ + for ( size_t i = 0; i < kStrain(); i++ ){ + (*writeTo) << "Posterior probability of strain " << i << ": "<< strExportSingleFwdProbPrefix << i <doIbdPainting()){ + if (this->ibdProbsIntegrated.size()>1){ + (*writeTo) << setw(14) << "IBD probs: " << strIbdExportProbs << "\n\n"; + (*writeTo) << " IBD probabilities:\n"; + for ( size_t stateI = 0; stateI < this->ibdProbsHeader.size(); stateI++ ){ + (*writeTo) << setw(14) << this->ibdProbsHeader[stateI] << ": " << this->ibdProbsIntegrated[stateI] << "\n"; + } + } + } else { + (*writeTo) << setw(14) << "Likelihood: " << strExportLLK << "\n"; + (*writeTo) << setw(14) << "Proportions: " << strExportProp << "\n"; + (*writeTo) << setw(14) << "Haplotypes: " << strExportHap << "\n"; + if ( doExportVcf() ) { (*writeTo) << setw(14) << "Vcf: " << strExportVcf << "\n"; } + if (this->useIBD()){ + (*writeTo) << " IBD method output saved to:\n"; + (*writeTo) << setw(14) << "Likelihood: " << strIbdExportLLK << "\n"; + (*writeTo) << setw(14) << "Proportions: " << strIbdExportProp << "\n"; + (*writeTo) << setw(14) << "Haplotypes: " << strIbdExportHap << "\n"; + } + if (this->ibdProbsIntegrated.size()>1){ + (*writeTo) << setw(14) << "IBD probs: " << strIbdExportProbs << "\n\n"; + (*writeTo) << " IBD probabilities:\n"; + for ( size_t stateI = 0; stateI < this->ibdProbsHeader.size(); stateI++ ){ + (*writeTo) << setw(14) << this->ibdProbsHeader[stateI] << ": " << this->ibdProbsIntegrated[stateI] << "\n"; + } + } } + (*writeTo) << "\n"; + (*writeTo) << " IBD best path llk: " << ibdLLK_ << "\n\n"; + + this->computeEffectiveKstrain(this->finalProp); + (*writeTo) << " Effective_K: " << this->effectiveKstrain_ <<"\n"; + this->computeInferredKstrain(this->finalProp); + (*writeTo) << " Inferred_K: " << this->inferredKstrain_ <<"\n"; + this->computeAdjustedEffectiveKstrain(); + (*writeTo) << "Adjusted_effective_K: " << this->adjustedEffectiveKstrain_ <<"\n"; } (*writeTo) << "\n"; (*writeTo) << "Proportions:\n"; - for ( size_t ii = 0; ii < this->filnalProp.size(); ii++){ - (*writeTo) << setw(10) << this->filnalProp[ii]; - (*writeTo) << ((ii < (this->filnalProp.size()-1)) ? "\t" : "\n") ; + for ( size_t ii = 0; ii < this->finalProp.size(); ii++){ + (*writeTo) << setw(10) << this->finalProp[ii]; + (*writeTo) << ((ii < (this->finalProp.size()-1)) ? "\t" : "\n") ; } - } - - - void DEploidIO::writeEventCount(){ ofstreamExportTmp.open( strExportExtra.c_str(), ios::out | ios::app | ios::binary ); @@ -223,3 +254,84 @@ void DEploidIO::writeEventCount(){ ofstreamExportTmp.close(); } + +void DEploidIO::writeIBDpostProb(vector < vector > & reshapedProbs, vector header){ + ostream * writeTo; + #ifdef UNITTEST + writeTo = &std::cout; + #endif + + #ifndef UNITTEST + ofstreamExportTmp.open( strIbdExportProbs.c_str(), ios::out | ios::app | ios::binary ); + writeTo = &ofstreamExportTmp; + #endif + + (*writeTo) << "CHROM" << "\t" << "POS" << "\t"; + for (string tmp : header){ + (*writeTo) << tmp << ((tmp!=header[header.size()-1])?"\t":"\n"); + } + + size_t siteIndex = 0; + for ( size_t chromIndex = 0; chromIndex < position_.size(); chromIndex++){ + for ( size_t posI = 0; posI < position_[chromIndex].size(); posI++){ + (*writeTo) << chrom_[chromIndex] << "\t" << (int)position_[chromIndex][posI] << "\t"; + for (size_t ij = 0; ij < reshapedProbs[siteIndex].size(); ij++){ + (*writeTo) << reshapedProbs[siteIndex][ij] << "\t"; + } + (*writeTo) << endl; + siteIndex++; + } + } + assert(siteIndex == nLoci()); + + #ifndef UNITTEST + ofstreamExportTmp.close(); + #endif +} + + +void DEploidIO::paintIBD(){ + vector goodProp; + vector goodStrainIdx; + + if ( this->doIbdPainting() ){ + this->finalProp = this->initialProp; + } + + for ( size_t i = 0; i < this->finalProp.size(); i++){ + if (this->finalProp[i] > 0.01){ + goodProp.push_back(this->finalProp[i]); + goodStrainIdx.push_back(i); + } + } + + if (goodProp.size() == 1){ + return; + } + + DEploidIO tmpDEploidIO; // (*this); + tmpDEploidIO.setKstrain(goodProp.size()); + tmpDEploidIO.setInitialPropWasGiven(true); + tmpDEploidIO.initialProp = goodProp; + tmpDEploidIO.finalProp = goodProp; + tmpDEploidIO.refCount_ = this->refCount_; + tmpDEploidIO.altCount_ = this->altCount_; + tmpDEploidIO.plaf_ = this->plaf_; + tmpDEploidIO.nLoci_= this->nLoci(); + tmpDEploidIO.position_ = this->position_; + tmpDEploidIO.chrom_ = this->chrom_; + //tmpDEploidIO.useConstRecomb_ = true; + //tmpDEploidIO.constRecombProb_ = 0.000001; + + //tmpDEploidIO.writeLog (&std::cout); + + MersenneTwister tmpRg(this->randomSeed()); + IBDpath tmpIBDpath; + tmpIBDpath.init(tmpDEploidIO, &tmpRg); + tmpIBDpath.buildPathProbabilityForPainting(goodProp); + this->ibdLLK_ = tmpIBDpath.bestPath(goodProp); + this->ibdProbsHeader = tmpIBDpath.getIBDprobsHeader(); + this->getIBDprobsIntegrated(tmpIBDpath.fwdbwd); + this->writeIBDpostProb(tmpIBDpath.fwdbwd, this->ibdProbsHeader); +} + diff --git a/src/DEploid/export/dEploidIOExportPosteriorProb.cpp b/src/DEploid/export/dEploidIOExportPosteriorProb.cpp index 2d7f618..39872c7 100644 --- a/src/DEploid/export/dEploidIOExportPosteriorProb.cpp +++ b/src/DEploid/export/dEploidIOExportPosteriorProb.cpp @@ -83,7 +83,7 @@ void DEploidIO::writeLastSingleFwdProb( vector < vector >& probabilitie ofstreamExportFwdProb.open( strExportFwdProb.c_str(), ios::out | ios::app | ios::binary ); if ( chromIndex == 0 ){ // Print header - ofstreamExportFwdProb << "CHROM" << "\t" << "POS" << "\t";; + ofstreamExportFwdProb << "CHROM" << "\t" << "POS" << "\t"; for ( size_t ii = 0; ii < probabilities[0].size(); ii++){ if (this->doAllowInbreeding() == true){ if ( ii <= (panelSize - this->kStrain()) ){ diff --git a/src/DEploid/export/writeMcmcRelated.cpp b/src/DEploid/export/writeMcmcRelated.cpp index 45def2f..afdd5b7 100644 --- a/src/DEploid/export/writeMcmcRelated.cpp +++ b/src/DEploid/export/writeMcmcRelated.cpp @@ -49,8 +49,8 @@ void DEploidIO::writeMcmcRelated (McmcSample * mcmcSample, bool useIBD){ //this->writeEventCount( ); } else { - this->IBDpathChangeAt = mcmcSample->IBDpathChangeAt; - this->finalIBDpathChangeAt = mcmcSample->currentIBDpathChangeAt; + //this->IBDpathChangeAt = mcmcSample->IBDpathChangeAt; + //this->finalIBDpathChangeAt = mcmcSample->currentIBDpathChangeAt; } } @@ -135,6 +135,13 @@ void DEploidIO::writeVcf( McmcSample * mcmcSample ){ } else { (*writeTo) << "##fileformat=VCFv4.2" << endl; } + // DEploid call + (*writeTo) << "##DEploid call: dEploid "; + for ( string s : argv_ ){ + (*writeTo) << s << " "; + } + (*writeTo) << endl; + // Include proportions for ( size_t ii = 0; ii < kStrain_; ii++){ (*writeTo) << "##Proportion of strain " diff --git a/src/DEploid/global.h b/src/DEploid/global.h index 162d75b..6525f8f 100644 --- a/src/DEploid/global.h +++ b/src/DEploid/global.h @@ -1,10 +1,10 @@ #define dEploid_src_macros -#pragma GCC diagnostic ignored "-Wunused-result" +//#pragma GCC diagnostic ignored "-Wunused-result" #ifndef NDEBUG #define dout std::cout << " " #else -#pragma GCC diagnostic ignored "-Wunused-value" +//#pragma GCC diagnostic ignored "-Wunused-value" #define dout 0 && std::cout #endif diff --git a/src/DEploid/ibd.cpp b/src/DEploid/ibd.cpp index 1622e3c..907ab92 100644 --- a/src/DEploid/ibd.cpp +++ b/src/DEploid/ibd.cpp @@ -26,9 +26,9 @@ #include #include #include -#include "ibd.hpp" #include #include +#include "ibd.hpp" IBDconfiguration::IBDconfiguration(){} @@ -166,6 +166,21 @@ void IBDconfiguration::findEffectiveK(){ } +vector IBDconfiguration::getIBDconfigureHeader(){ + vector ret; + for (size_t i = 0; i < this->states.size(); i++){ + string tmp; + for (size_t j = 0; j < this->states[i].size(); j++){ + stringstream tmp_ss; + tmp_ss << this->states[i][j]; + tmp += tmp_ss.str() + ((j < (this->states[i].size()-1)) ? "-":""); + } + ret.push_back(tmp); + } + return ret; +} + + Hprior::Hprior(){} @@ -177,7 +192,6 @@ void Hprior::buildHprior(size_t kStrain, vector &plaf){ this->setKstrain(kStrain); this->setnLoci(this->plaf_.size()); vector < vector > hSetBase = enumerateBinaryMatrixOfK(this->kStrain()); - size_t stateI = 0; for ( vector state : ibdConfig.states ) { set stateUnique (state.begin(), state.end()); @@ -191,6 +205,7 @@ void Hprior::buildHprior(size_t kStrain, vector &plaf){ vector < vector > hSetBaseTmpUnique = unique(hSetBaseTmp); // uu size_t sizeOfhSetBaseTmpUnique = hSetBaseTmpUnique.size(); + stateIdxFreq.push_back(sizeOfhSetBaseTmpUnique); //h.prior.i<-array(0, c(size.h.set.i, n.loci)); for ( size_t i = 0; i < sizeOfhSetBaseTmpUnique; i++){ @@ -232,6 +247,10 @@ void Hprior::transposePriorProbs(){ } +vector Hprior::getIBDconfigureHeader(){ + return this->ibdConfig.getIBDconfigureHeader(); +} + Hprior::~Hprior(){} @@ -312,3 +331,407 @@ bool twoVectorsAreSame(vector vec1, vector vec2){ return ret; } + +IBDpath::IBDpath(){}; + + +void IBDpath::init(DEploidIO &dEploidIO, RandomGenerator* rg){ + this->ibdRg_ = rg; + this->setNLoci(dEploidIO.nLoci()); + this->setKstrain(dEploidIO.kStrain()); + this->setTheta(1.0 / (double)kStrain()); + + this->IBDpathChangeAt = vector (this->nLoci()); + + // compute likelihood surface + this->makeLlkSurf(dEploidIO.altCount_, dEploidIO.refCount_); + + // initialize haplotype prior + this->hprior.buildHprior(kStrain(), dEploidIO.plaf_); + this->hprior.transposePriorProbs(); + + this->makeIbdTransProbs(); + + // initialize fm + this->fSumState = vector (this->hprior.nPattern()); + + // initialize ibdConfigurePath + this->ibdConfigurePath = vector (this->nLoci()); + + // initialize recombination probabilities; + this->ibdRecombProbs = IBDrecombProbs(dEploidIO.position_, dEploidIO.nLoci()); + this->ibdRecombProbs.computeRecombProbs( dEploidIO.averageCentimorganDistance(), + dEploidIO.parameterG(), + dEploidIO.useConstRecomb(), + dEploidIO.constRecombProb()); + this->currentIBDpathChangeAt = vector (this->nLoci()); + + this->computeUniqueEffectiveKCount(); +}; + + +void IBDpath::ibdSamplePath(vector statePrior){ + int lociIdx = this->nLoci()-1; + vector tmpProp = fm[lociIdx]; + (void)normalizeBySum(tmpProp); + ibdConfigurePath[lociIdx] = sampleIndexGivenProp(this->ibdRg_, tmpProp); + + assert(this->fm.size() == nLoci()); + while ( lociIdx > 0 ){ + lociIdx--; + vector vNoRecomb = vecProd(this->ibdTransProbs[this->hprior.stateIdx[ibdConfigurePath[lociIdx+1]]], fm[lociIdx]); + assert(vNoRecomb.size() == this->hprior.nState()); + vector vRecomb = fm[lociIdx]; + assert(vRecomb.size() == this->hprior.nState()); + vector prop (this->hprior.nState()); + for ( size_t i = 0; i < prop.size(); i++){ + prop[i] = vNoRecomb[i]*this->ibdRecombProbs.pNoRec_[lociIdx] + vRecomb[i]*this->ibdRecombProbs.pRec_[lociIdx]*statePrior[ibdConfigurePath[lociIdx+1]]; + } + tmpProp = prop; + (void)normalizeBySum(tmpProp); + ibdConfigurePath[lociIdx] = sampleIndexGivenProp(this->ibdRg_, tmpProp); + assert( ibdConfigurePath[lociIdx] < this->hprior.nState() ); + assert( ibdConfigurePath[lociIdx] >= 0 ); + } +} + + +vector IBDpath::findWhichIsSomething(vector tmpOp, size_t something){ + vector ret; + for ( size_t i = 0; i < tmpOp.size(); i++){ + if ( tmpOp[i] == something){ + ret.push_back(i); + } + } + return ret; +} + + +void IBDpath::buildPathProbabilityForPainting(vector proportion){ + //vector effectiveKPrior = this->computeEffectiveKPrior(this->theta()); + vector effectiveKPrior = vector (this->hprior.nPattern(), 1.0/this->hprior.nPattern()); + vector statePrior = this->computeStatePrior(effectiveKPrior); + // First building the path likelihood + this->computeIbdPathFwdProb(proportion, statePrior); + // Reshape Fwd + vector < vector > reshapedFwd = reshapeProbs(this->fm); + + this->computeIbdPathBwdProb(proportion, effectiveKPrior, statePrior); + // Reshape Bwd + vector < vector > reshapedBwd = reshapeProbs(this->bwd); + + // Combine Fwd Bwd + this->combineFwdBwd(reshapedFwd, reshapedBwd); +} + + + +void IBDpath::computeIbdPathBwdProb(vector proportion, vector effectiveKPrior, vector statePrior){ + //# assuming each ibd state has equal probabilities, transform it into ibd configurations + //dout << " start building ibd bwd "<< endl; + vector tmp = vector (hprior.stateIdxFreq.size()); + assert(effectiveKPrior.size() == hprior.stateIdxFreq.size()); + for (size_t i = 0; i < tmp.size(); i++){ + tmp[i] = effectiveKPrior[i] / (double)hprior.stateIdxFreq[i]; + } + + vector tmpBw = vector (hprior.nState()); + for (size_t j = 0; j < tmpBw.size(); j++){ + for (size_t i = 0; i < tmp.size(); i++){ + tmpBw[j] += tmp[i] * ibdTransProbs[i][j]; + } + } + + this->bwd.push_back(tmpBw); + for ( size_t rev_siteI = 1; rev_siteI < this->nLoci(); rev_siteI++ ){ + size_t siteI = this->nLoci()-rev_siteI; + + vector lk = computeLlkOfStatesAtSiteI(proportion, siteI); + //vector lk = vector (hprior.nState(), 1.0); + vector bSumState = vector (hprior.nPattern()); + for ( size_t i = 0; i < bSumState.size(); i++){ + for ( size_t j = 0; j < hprior.nState(); j++ ){ + bSumState[i] += ibdTransProbs[i][j]*this->bwd.back()[j]; + } + } + vector vNoRecomb(hprior.nState()); + for (size_t i = 0; i < hprior.stateIdx.size(); i++ ){ + vNoRecomb[i] = bSumState[hprior.stateIdx[i]]; + } + + for (size_t i = 0; i < hprior.nState(); i++ ){ + tmpBw[i] = 0; + for (size_t j = 0; j < lk.size(); j++){ + tmpBw[i] += (lk[j] * bwd.back()[j])*this->ibdRecombProbs.pRec_[siteI-1]; + } + tmpBw[i] *= statePrior[i]; + tmpBw[i] += lk[i] * (this->ibdRecombProbs.pNoRec_[siteI-1]) * vNoRecomb[i]; + tmpBw[i] *= hprior.priorProb[i][siteI]; + } + normalizeBySum(tmpBw); + this->bwd.push_back(tmpBw); + } + reverse(bwd.begin(),bwd.end()); +} + + +void IBDpath::computeIbdPathFwdProb(vector proportion, vector statePrior){ + this->fm.clear(); + vector vPrior = vecProd(statePrior, this->hprior.priorProbTrans[0]); + + vector lk = computeLlkOfStatesAtSiteI(proportion, 0); + this->updateFmAtSiteI(vPrior, lk); + for ( size_t siteI = 1; siteI < this->nLoci(); siteI++ ){ + vector vNoRec; + for ( size_t stateIdxTmp : hprior.stateIdx ){ + vNoRec.push_back(this->fSumState[stateIdxTmp]); + } + for ( size_t i = 0; i < hprior.nState(); i++ ){ + vPrior[i] = (vNoRec[i] * this->ibdRecombProbs.pNoRec_[siteI] + fSum * this->ibdRecombProbs.pRec_[siteI] * statePrior[i]) * hprior.priorProbTrans[siteI][i]; + } + + lk = computeLlkOfStatesAtSiteI(proportion, siteI); + //cout << "lk = " ; for (double l :lk){cout << l << " ";}cout<updateFmAtSiteI(vPrior, lk); + //for (double p : this->fm.back()){printf("%8.4f ", p);}cout< & prior, vector & llk){ + vector postAtSiteI = vecProd(prior, llk); + //normalizeByMax(postAtSiteI); + normalizeBySum(postAtSiteI); + this->fm.push_back(postAtSiteI); + this->fSum = sumOfVec(postAtSiteI); + for ( size_t i = 0; i < fSumState.size(); i++){ + this->fSumState[i] = 0; + for ( size_t j = 0; j < hprior.nState(); j++ ){ + this->fSumState[i] += ibdTransProbs[i][j]*postAtSiteI[j]; + } + } +} + + +double IBDpath::bestPath(vector proportion, double err){ + double sumLLK = 0.0; + for (size_t i = 0; i < nLoci(); i++ ){ + vector tmp; + for (size_t j = 0; j < fm[i].size(); j++){ + tmp.push_back(exp(log(fm[i][j])+log(bwd[i][j]))); + } + normalizeBySum(tmp); + size_t indx = distance(tmp.begin(), max_element(tmp.begin(), tmp.end())); + + vector hSetI = this->hprior.hSet[indx]; + double qs = 0; + for ( size_t j = 0; j < this->kStrain() ; j++ ){ + qs += (double)hSetI[j] * proportion[j]; + } + double qs2 = qs*(1-err) + (1-qs)*err ; + + if ( (qs > 0) & (qs < 1) ){ + sumLLK += logBetaPdf(qs2, this->llkSurf[i][0], this->llkSurf[i][1]); + } + } + return sumLLK; +} + + +void IBDpath::combineFwdBwd(vector < vector > &reshapedFwd, vector < vector > &reshapedBwd){ + for (size_t i = 0; i < nLoci(); i++ ){ + vector tmp; + //cout << " site " << i << endl; + for (size_t j = 0; j < reshapedFwd[i].size(); j++){ + + tmp.push_back(exp(log(reshapedFwd[i][j])+log(reshapedBwd[i][j]))); + //tmp.push_back(exp(log(bwd[i][j]))); + //tmp.push_back(exp(log(fm[i][j]))); + //cout << "fwd = "<::min(); + //cout << normalized< #include #include +#include #include "utility.hpp" +#include "mersenne_twister.hpp" +#include "dEploidIO.hpp" #ifndef IBD #define IBD using namespace std; -vector < vector > unique( vector < vector > &mat ); +int nchoose2(int n); bool twoVectorsAreSame(vector vec1, vector vec2); - - +vector < vector > unique( vector < vector > &mat ); vector convertIntToBinary(int x, size_t len); vector < vector > enumerateBinaryMatrixOfK(size_t k); -struct OutOfVectorSize : std::exception{ - - explicit OutOfVectorSize(){ } - virtual ~OutOfVectorSize() throw() {} - virtual const char* what () const noexcept { - return string("Out of vector size!").c_str(); - } -}; - -int nchoose2(int n); - // The IBDconfiguration is used for index, which should be non-negative, use int, any thing below zero should throw. class IBDconfiguration{ #ifdef UNITTEST @@ -58,6 +49,7 @@ class IBDconfiguration{ friend class TestHprior; #endif friend class Hprior; + friend class McmcMachinery; IBDconfiguration(); ~IBDconfiguration(); @@ -82,6 +74,7 @@ class IBDconfiguration{ vector makeTmpRow(); vector findWhichIsOne(vector tmpOp); bool twoVectorsAreSame(vector vec1, vector vec2); + vector getIBDconfigureHeader(); }; @@ -89,8 +82,11 @@ class Hprior{ #ifdef UNITTEST friend class TestHprior; friend class TestMcmcMachinery; + friend class TestIBDpath; #endif + friend class IBDpath; friend class McmcMachinery; + friend class DEploidIO; Hprior(); ~Hprior(); @@ -111,6 +107,7 @@ class Hprior{ void transposePriorProbs(); vector stateIdx; // size: nState + vector stateIdxFreq; vector > hSet; // size: nState x kStrain size_t nState_; @@ -118,9 +115,79 @@ class Hprior{ vector < size_t > effectiveK; size_t nPattern() const {return this->effectiveK.size();} + vector getIBDconfigureHeader(); }; +class IBDpath{ +#ifdef UNITTEST + friend class TestIBDpath; +#endif + friend class McmcMachinery; + friend class DEploidIO; + RandomGenerator* ibdRg_; + + double fSum; + Hprior hprior; + IBDrecombProbs ibdRecombProbs; + vector < vector > ibdTransProbs; + vector < vector > fm; + vector fSumState; + vector ibdConfigurePath; + + vector < vector > bwd; + vector < vector > fwdbwd; + + IBDpath(); + ~IBDpath(); + + size_t kStrain_; + void setKstrain ( const size_t setTo ){ this->kStrain_ = setTo;} + size_t kStrain() const { return this->kStrain_;} + + size_t nLoci_; + void setNLoci ( const size_t setTo ){ this->nLoci_ = setTo;} + size_t nLoci() const { return this->nLoci_; } + + double theta_; + void setTheta(const double setTo) {this->theta_ = setTo;} + double theta() const {return this->theta_;} + + vector currentIBDpathChangeAt; + + vector < vector > llkSurf; + vector uniqueEffectiveKCount; + + vector IBDpathChangeAt; + // Methods + void computeAndUpdateTheta(); + void updateFmAtSiteI(vector & prior, + vector & llk); + void ibdSamplePath(vector statePrior); + void makeIbdTransProbs(); + vector computeEffectiveKPrior(double theta); + vector computeStatePrior(vector effectiveKPrior); + void makeLlkSurf(vector altCount, + vector refCount, + double scalingConst = 100.0, + double err = 0.01, + size_t gridSize=99); + void computeUniqueEffectiveKCount(); + vector computeLlkOfStatesAtSiteI(vector proportion, size_t siteI, double err = 0.01); + vector findWhichIsSomething(vector tmpOp, size_t something); + + // For painting IBD + void buildPathProbabilityForPainting(vector proportion); + void computeIbdPathFwdProb(vector proportion, vector statePrior); + void computeIbdPathBwdProb(vector proportion, vector effectiveKPrior, vector statePrior); + void combineFwdBwd(vector < vector > &reshapedFwd, vector < vector > &reshapedBwd); + vector < vector > reshapeProbs(vector < vector >& probs); + double bestPath(vector proportion, double err = 0.01); + +public: + vector getIBDprobsHeader(); + void init(DEploidIO &dEploidIO, RandomGenerator* rg); +}; #endif diff --git a/src/DEploid/mcmc.cpp b/src/DEploid/mcmc.cpp index 1a6fc51..d95f9f0 100644 --- a/src/DEploid/mcmc.cpp +++ b/src/DEploid/mcmc.cpp @@ -23,13 +23,14 @@ * */ -#include "mcmc.hpp" -#include "utility.hpp" -#include /* ceil */ #include -#include "updateHap.hpp" #include -#include /* std::numeric_limits< double >::min() */ +#include // std::numeric_limits< double >::min() +#include // ceil +#include "global.h" // dout +#include "updateHap.hpp" +#include "mcmc.hpp" +#include "utility.hpp" McmcSample::McmcSample(){}; McmcSample::~McmcSample(){}; @@ -46,7 +47,7 @@ McmcMachinery::McmcMachinery(DEploidIO* dEploidIO, McmcSample *mcmcSample, Rando this->mcmcEventRg_ = this->hapRg_; this->propRg_ = this->hapRg_; this->initialHapRg_ = this->hapRg_; - this->ibdRg_ = this->hapRg_; + //this->ibdRg_ = this->hapRg_; //this->mcmcEventRg_ = new MersenneTwister(this->seed_); //this->propRg_ = new MersenneTwister(this->seed_); //this->initialHapRg_ = new MersenneTwister(this->seed_); @@ -56,7 +57,7 @@ McmcMachinery::McmcMachinery(DEploidIO* dEploidIO, McmcSample *mcmcSample, Rando this->calcMaxIteration( dEploidIO_->nMcmcSample_ , dEploidIO_->mcmcMachineryRate_, dEploidIO_->mcmcBurn_ ); } this->MN_LOG_TITRE = 0.0; - this->SD_LOG_TITRE = this->dEploidIO_->parameterSigma(); + this->SD_LOG_TITRE = (useIBD == true) ? this->dEploidIO_->ibdSigma() : this->dEploidIO_->parameterSigma(); this->PROP_SCALE = 40.0; stdNorm_ = new StandNormalRandomSample(this->seed_); @@ -101,10 +102,9 @@ void McmcMachinery::initializeMcmcChain(bool useIBD){ } if ( useIBD == true ){ - this->initializeIbdEssentials(); + this->ibdInitializeEssentials(); } - this->mcmcSample_->IBDpathChangeAt = vector (this->nLoci()); this->mcmcSample_->siteOfTwoSwitchOne = vector (this->nLoci()); this->mcmcSample_->siteOfTwoMissCopyOne = vector (this->nLoci()); this->mcmcSample_->siteOfTwoSwitchTwo = vector (this->nLoci()); @@ -112,7 +112,6 @@ void McmcMachinery::initializeMcmcChain(bool useIBD){ this->mcmcSample_->siteOfOneSwitchOne = vector (this->nLoci()); this->mcmcSample_->siteOfOneMissCopyOne = vector (this->nLoci()); - this->mcmcSample_->currentIBDpathChangeAt = vector (this->nLoci()); this->mcmcSample_->currentsiteOfTwoSwitchOne = vector (this->nLoci()); this->mcmcSample_->currentsiteOfTwoMissCopyOne = vector (this->nLoci()); this->mcmcSample_->currentsiteOfTwoSwitchTwo = vector (this->nLoci()); @@ -243,56 +242,19 @@ vector McmcMachinery::titre2prop(vector & tmpTitre){ } -void McmcMachinery::makeLlkSurf(vector altCount, vector refCount, double scalingConst, double err, size_t gridSize){ - double pGridSpacing = 1.0 / (double)(gridSize+1); - vector pGrid; - pGrid.push_back(pGridSpacing); - for (size_t i = 1; i < gridSize; i++){ - pGrid.push_back(pGrid.back() + pGridSpacing); - } - assert(pGrid.size() == gridSize); - - assert(llkSurf.size() == 0); - - for ( size_t i = 0 ; i < altCount.size(); i++) { - double alt = altCount[i]; - double ref = refCount[i]; - - vector ll; - for ( double unadjustedP : pGrid ){ - ll.push_back(calcLLK( ref, alt, unadjustedP, err, scalingConst)); - } - - double llmax = max_value(ll); - vector ln; - for ( double lltmp : ll ){ - ln.push_back(exp(lltmp-llmax)); - } - - double lnSum = sumOfVec(ln); - for (size_t i = 0; i < ln.size(); i++){ - ln[i] = ln[i]/lnSum; - } - - vector tmpVec1 = vecProd(ln, pGrid); - double mn = sumOfVec(tmpVec1); - vector pGridSq = vecProd(pGrid, pGrid); - vector tmpVec2 = vecProd(ln, pGridSq); - double vr = sumOfVec(tmpVec2) - mn*mn; - - double comm = (mn*(1.0-mn)/vr-1.0); - llkSurf.push_back(vector {mn*comm, (1-mn)*comm}); - } - assert(llkSurf.size() == this->nLoci()); -} void McmcMachinery::initializePropIBD(){ //#Initialise titres and convert to proportions - this->currentTitre_ = vector (this->kStrain(), 0.0); + //this->initializeTitre(); this->currentProp_ = ( this->dEploidIO_ -> initialPropWasGiven()) ? - this->dEploidIO_ ->initialProp : - vector (this->kStrain(), 1.0/(double)kStrain()); + this->dEploidIO_ ->initialProp: + this->titre2prop( this->currentTitre_ ); + + //this->currentTitre_ = vector (this->kStrain(), 0.0); + //this->currentProp_ = ( this->dEploidIO_ -> initialPropWasGiven()) ? + //this->dEploidIO_ ->initialProp : + //vector (this->kStrain(), 1.0/(double)kStrain()); } @@ -315,10 +277,9 @@ void McmcMachinery::runMcmcChain( bool showProgress, bool useIBD, bool notInR ){ this->writeLastFwdProb(useIBD); - this->dEploidIO_->filnalProp = this->mcmcSample_->proportion.back(); + this->dEploidIO_->finalProp = this->mcmcSample_->proportion.back(); for (size_t atSiteI = 0; atSiteI < nLoci(); atSiteI++ ){ - this->mcmcSample_->IBDpathChangeAt[atSiteI] /= (double)this->maxIteration_; this->mcmcSample_->siteOfTwoSwitchOne[atSiteI] /= (double)this->maxIteration_; this->mcmcSample_->siteOfTwoMissCopyOne[atSiteI] /= (double)this->maxIteration_; this->mcmcSample_->siteOfTwoSwitchTwo[atSiteI] /= (double)this->maxIteration_; @@ -332,6 +293,13 @@ void McmcMachinery::runMcmcChain( bool showProgress, bool useIBD, bool notInR ){ } if ( useIBD == true ){ + for (size_t atSiteI = 0; atSiteI < nLoci(); atSiteI++ ){ + this->ibdPath.IBDpathChangeAt[atSiteI] /= (double)this->maxIteration_; + } + //vector < vector > reshapedProbs = this->reshapeFm(hprior.stateIdx); + //this->dEploidIO_->ibdProbsHeader = getIBDprobsHeader(); + //this->dEploidIO_->ibdProbsIntegrated = getIBDprobsIntegrated(reshapedProbs); + //this->dEploidIO_->writeIBDpostProb(reshapedProbs, this->dEploidIO_->ibdProbsHeader); clog << "Proportion update acceptance rate: "<kStrain()*1.0*this->maxIteration_)<dEploidIO_->initialProp = averageProportion(this->mcmcSample_->proportion); this->dEploidIO_->setInitialPropWasGiven(true); @@ -348,6 +316,8 @@ void McmcMachinery::runMcmcChain( bool showProgress, bool useIBD, bool notInR ){ } + + void McmcMachinery::computeDiagnostics(){ //clog << "Proportion update acceptance rate: "<kStrain()*1.0*this->maxIteration_)<dEploidIO_->setacceptRatio(acceptUpdate / (1.0*this->maxIteration_)); @@ -406,7 +376,7 @@ vector McmcMachinery::averageProportion(vector < vector > &pro void McmcMachinery::sampleMcmcEvent( bool useIBD ){ this->recordingMcmcBool_ = ( currentMcmcIteration_ > this->mcmcThresh_ && currentMcmcIteration_ % this->McmcMachineryRate_ == 0 ); if ( useIBD == true ){ - sampleMcmcEventIbdStep(); + ibdSampleMcmcEventStep(); assert(doutProp()); } else { this->eventInt_ = this->mcmcEventRg_->sampleInt(3); @@ -427,209 +397,67 @@ void McmcMachinery::sampleMcmcEvent( bool useIBD ){ } -vector McmcMachinery::findWhichIsSomething(vector tmpOp, size_t something){ - vector ret; - for ( size_t i = 0; i < tmpOp.size(); i++){ - if ( tmpOp[i] == something){ - ret.push_back(i); - } - } - return ret; -} - - -void McmcMachinery::makeIbdTransProbs(){ - size_t nPattern = hprior.nPattern(); - size_t nState = hprior.nState(); - assert(ibdTransProbs.size() == 0); - - for ( size_t i = 0; i < nPattern; i++ ){ - vector transProbRow(nState); - vector wi = findWhichIsSomething(hprior.stateIdx, i); - for (size_t wii : wi){ - transProbRow[wii] = 1; - } - ibdTransProbs.push_back(transProbRow); - } -} -void McmcMachinery::computeUniqueEffectiveKCount(){ - this->uniqueEffectiveKCount = vector (this->kStrain()); - for (size_t effectiveKtmp : this->hprior.effectiveK) { - int effectiveKidx = effectiveKtmp-1; - assert(effectiveKidx>=0); - this->uniqueEffectiveKCount[effectiveKidx]++; - } -} -vector McmcMachinery::computeStatePrior( double theta ){ - //#Calculate state prior given theta (theta is prob IBD) - vector pr0(this->kStrain()); - for (int i = 0; i < (int)pr0.size(); i++){ - pr0[i] = binomialPdf(i, (int)(this->kStrain()-1), theta); - } - vector effectiveKPrior; - for ( size_t effectiveKtmp : this->hprior.effectiveK){ - int effectiveKidx = effectiveKtmp-1; - assert(effectiveKidx >= 0); - assert(effectiveKidx < (int)this->kStrain()); - effectiveKPrior.push_back(pr0[effectiveKidx]/uniqueEffectiveKCount[effectiveKidx]); - } - vector ret; - for (size_t stateIdxTmp : this->hprior.stateIdx){ - ret.push_back(effectiveKPrior[stateIdxTmp]); - } - return ret; -} +void McmcMachinery::ibdInitializeEssentials(){ -void McmcMachinery::initializeIbdEssentials(){ - // initialize haplotype prior - this->hprior.buildHprior(this->kStrain(), this->dEploidIO_->plaf_); - this->hprior.transposePriorProbs(); - - // compute likelihood surface - this->makeLlkSurf(this->dEploidIO_->altCount_, this->dEploidIO_->refCount_); + this->initializePropIBD(); + this->ibdPath.init(*this->dEploidIO_, this->hapRg_); vector llkOfData; for ( size_t i = 0; i < nLoci(); i++){ double wsaf = this->dEploidIO_->altCount_[i] / (this->dEploidIO_->refCount_[i] + this->dEploidIO_->altCount_[i] + 0.00000000000001); double adjustedWsaf = wsaf*(1-0.01) + (1-wsaf)*0.01; - llkOfData.push_back( logBetaPdf(adjustedWsaf, this->llkSurf[i][0], this->llkSurf[i][1])); + llkOfData.push_back( logBetaPdf(adjustedWsaf, this->ibdPath.llkSurf[i][0], this->ibdPath.llkSurf[i][1])); } dout << "LLK of data = " << sumOfVec(llkOfData) << endl; - this->initializePropIBD(); - this->setTheta(1.0 / (double)kStrain()); - this->makeIbdTransProbs(); - this->computeUniqueEffectiveKCount(); - - // initialize fm - this->fSumState = vector (this->hprior.nPattern()); - - // initialize ibdPath - this->ibdPath = vector (this->nLoci()); } -vector McmcMachinery::computeLlkOfStatesAtSiteI( size_t siteI, double err ){ - vector llks; - for ( vector hSetI : this->hprior.hSet ){ - double qs = 0; - for ( size_t j = 0; j < this->kStrain() ; j++ ){ - qs += (double)hSetI[j] * this->currentProp_[j]; - } - double qs2 = qs*(1-err) + (1-qs)*err ; - llks.push_back(logBetaPdf(qs2, this->llkSurf[siteI][0], this->llkSurf[siteI][1])); - } - - double maxllk = max_value(llks); - vector ret; - for ( double llk : llks ){ - double normalized = exp(llk-maxllk); - if ( normalized == 0 ){ - normalized = std::numeric_limits< double >::min(); - } - ret.push_back(normalized); - } - - return ret; -} +void McmcMachinery::ibdSampleMcmcEventStep(){ + vector effectiveKPrior = this->ibdPath.computeEffectiveKPrior(this->ibdPath.theta()); + vector statePrior = this->ibdPath.computeStatePrior(effectiveKPrior); + // First building the path likelihood + this->ibdPath.computeIbdPathFwdProb(this->currentProp_, statePrior); -void McmcMachinery::updateFmAtSiteI(vector & prior, vector & llk){ - vector postAtSiteI = vecProd(prior, llk); - normalizeByMax(postAtSiteI); - this->fm.push_back(postAtSiteI); - this->fSum = sumOfVec(postAtSiteI); - for ( size_t i = 0; i < fSumState.size(); i++){ - this->fSumState[i] = 0; - for ( size_t j = 0; j < hprior.nState(); j++ ){ - this->fSumState[i] += ibdTransProbs[i][j]*postAtSiteI[j]; - } - } -} + ////#Now sample path given matrix + this->ibdPath.ibdSamplePath(statePrior); + //#Get haplotypes and update LLK for each site + this->ibdUpdateHaplotypesFromPrior(); + vector llkAtAllSites = computeLlkAtAllSites(); + ////#Given current haplotypes, sample titres 1 by 1 using MH + this->ibdUpdateProportionGivenHap(llkAtAllSites); + // Compute new theta after all proportion and haplotypes are up to date. + this->ibdPath.computeAndUpdateTheta(); -vector McmcMachinery::computeLlkAtAllSites(double err){ - vector ret; - for ( size_t site = 0; site < this->nLoci(); site++ ){ - double qs = 0; - for ( size_t j = 0; j < this->kStrain() ; j++ ){ - qs += (double)this->currentHap_[site][j] * this->currentProp_[j]; - } - double qs2 = qs*(1-err) + (1-qs)*err ; - ret.push_back(logBetaPdf(qs2, this->llkSurf[site][0], this->llkSurf[site][1])); - } - return ret; + this->currentLLks_ = llkAtAllSites; + this->currentExpectedWsaf_ = this->calcExpectedWsaf( this->currentProp_ ); } -void McmcMachinery::sampleMcmcEventIbdStep(){ - this->fm.clear(); - double pRecomb = 0.01; // This should be parsed in - double pNoRecomb = 0.99; - vector statePrior = this->computeStatePrior(this->theta()); - vector vPrior = vecProd(statePrior, this->hprior.priorProbTrans[0]); - - vector lk = computeLlkOfStatesAtSiteI(0); - this->updateFmAtSiteI(vPrior, lk); - for ( size_t siteI = 1; siteI < this->nLoci(); siteI++ ){ - vector vNoRec; - for ( size_t stateIdxTmp : hprior.stateIdx ){ - vNoRec.push_back(this->fSumState[stateIdxTmp]); - } - - for ( size_t i = 0; i < hprior.nState(); i++ ){ - vPrior[i] = (vNoRec[i] * pNoRecomb + fSum * pRecomb * statePrior[i]) * hprior.priorProbTrans[siteI][i]; - } - - lk = computeLlkOfStatesAtSiteI(siteI); - this->updateFmAtSiteI(vPrior, lk); - } - - ////#Now sample path given matrix - int lociIdx = this->nLoci()-1; - vector tmpProp = fm[lociIdx]; - (void)normalizeBySum(tmpProp); - ibdPath[lociIdx] = sampleIndexGivenProp(this->ibdRg_, tmpProp); - - assert(this->fm.size() == nLoci()); - while ( lociIdx > 0 ){ - lociIdx--; - vector vNoRecomb = vecProd(this->ibdTransProbs[this->hprior.stateIdx[ibdPath[lociIdx+1]]], fm[lociIdx]); - assert(vNoRecomb.size() == this->hprior.nState()); - vector vRecomb = fm[lociIdx]; - assert(vRecomb.size() == this->hprior.nState()); - vector prop (this->hprior.nState()); - for ( size_t i = 0; i < prop.size(); i++){ - prop[i] = vNoRecomb[i]*pNoRecomb + vRecomb[i]*pRecomb*statePrior[ibdPath[lociIdx+1]]; - } - tmpProp = prop; - (void)normalizeBySum(tmpProp); - ibdPath[lociIdx] = sampleIndexGivenProp(this->ibdRg_, tmpProp); - assert( ibdPath[lociIdx] < this->hprior.nState() ); - assert( ibdPath[lociIdx] >= 0 ); - } - - //#Get haplotypes and update LLK for each site +void McmcMachinery::ibdUpdateHaplotypesFromPrior(){ for (size_t i = 0; i < this->nLoci(); i++){ for ( size_t j = 0; j < kStrain(); j++){ - this->currentHap_[i][j] = (double)this->hprior.hSet[ibdPath[i]][j]; + this->currentHap_[i][j] = (double)this->ibdPath.hprior.hSet[ibdPath.ibdConfigurePath[i]][j]; } } +} - vector llkAtAllSites = computeLlkAtAllSites(); - ////#Given current haplotypes, sample titres 1 by 1 using MH +void McmcMachinery::ibdUpdateProportionGivenHap(vector &llkAtAllSites){ for (size_t i = 0; i < kStrain(); i++){ double v0 = this->currentTitre_[i]; vector oldProp = this->currentProp_; - this->currentTitre_[i] += (this->stdNorm_->genReal() * 0.1 + 0.0); // tit.0[i]+rnorm(1, 0, scale.t.prop); + //this->currentTitre_[i] += (this->stdNorm_->genReal() * 0.1 + 0.0); // tit.0[i]+rnorm(1, 0, scale.t.prop); + this->currentTitre_[i] += (this->stdNorm_->genReal() * SD_LOG_TITRE* 1.0/PROP_SCALE + 0.0); // tit.0[i]+rnorm(1, 0, scale.t.prop); this->currentProp_ = this->titre2prop(this->currentTitre_); vector vv = computeLlkAtAllSites(); double rr = normal_pdf( this->currentTitre_[i], 0, 1) / @@ -643,42 +471,25 @@ void McmcMachinery::sampleMcmcEventIbdStep(){ this->currentProp_ = oldProp; } } - - this->computeAndUpdateTheta(); - - this->currentLLks_ = llkAtAllSites; - this->currentExpectedWsaf_ = this->calcExpectedWsaf( this->currentProp_ ); } -void McmcMachinery::computeAndUpdateTheta(){ - vector obsState; - size_t previousState = 0; - size_t atSiteI = 0; - for (size_t a : ibdPath){ - if ( a != previousState ){ - obsState.push_back(a); - } - if ( this->hprior.stateIdx[a] != this->hprior.stateIdx[previousState] ){ - this->mcmcSample_->IBDpathChangeAt[atSiteI] += 1.0; - this->mcmcSample_->currentIBDpathChangeAt[atSiteI] = 1.0; - } else { - this->mcmcSample_->currentIBDpathChangeAt[atSiteI] = 0.0; +vector McmcMachinery::computeLlkAtAllSites(double err){ + vector ret; + for ( size_t site = 0; site < this->nLoci(); site++ ){ + double qs = 0; + for ( size_t j = 0; j < this->kStrain() ; j++ ){ + qs += (double)this->currentHap_[site][j] * this->currentProp_[j]; } - previousState = a; - atSiteI++; - } - - size_t sumOfKeffStates = 0; - size_t sccs = 0; - for (size_t obs : obsState){ - sumOfKeffStates += this->hprior.effectiveK[obs] - 1; - sccs += this->kStrain() - this->hprior.effectiveK[obs]; + double qs2 = qs*(1-err) + (1-qs)*err ; + ret.push_back(logBetaPdf(qs2, this->ibdPath.llkSurf[site][0], this->ibdPath.llkSurf[site][1])); } - this->setTheta(rBeta(sccs+1.0, sumOfKeffStates+1.0, this->propRg_)); + return ret; } + + vector McmcMachinery::calcExpectedWsaf( vector &proportion ){ //assert ( sumOfVec(proportion) == 1.0); // this fails ... vector expectedWsaf (this->nLoci_, 0.0); diff --git a/src/DEploid/mcmc.hpp b/src/DEploid/mcmc.hpp index a3f944c..695fb92 100644 --- a/src/DEploid/mcmc.hpp +++ b/src/DEploid/mcmc.hpp @@ -29,9 +29,7 @@ #include "mersenne_twister.hpp" #include "dEploidIO.hpp" #include "panel.hpp" -#include "utility.hpp" -#include "global.h" -#include "randomSample.hpp" +#include "randomSample.hpp" // src/codeCogs/randomSample.hpp #include "ibd.hpp" #ifndef MCMC @@ -56,7 +54,6 @@ class McmcSample { moves.clear(); } - vector IBDpathChangeAt; vector siteOfTwoSwitchOne; vector siteOfTwoMissCopyOne; vector siteOfTwoSwitchTwo; @@ -64,7 +61,6 @@ class McmcSample { vector siteOfOneSwitchOne; vector siteOfOneMissCopyOne; - vector currentIBDpathChangeAt; vector currentsiteOfTwoSwitchOne; vector currentsiteOfTwoMissCopyOne; vector currentsiteOfTwoSwitchTwo; @@ -83,6 +79,7 @@ class McmcMachinery { #ifdef UNITTEST friend class TestMcmcMachinery; #endif + friend class DEploidIO; public: //McmcMachinery(); McmcMachinery( DEploidIO* dEplioidIO, McmcSample *mcmcSample, RandomGenerator* rg_, bool useIBD = false ); @@ -117,13 +114,13 @@ class McmcMachinery { RandomGenerator* mcmcEventRg_; RandomGenerator* propRg_; RandomGenerator* initialHapRg_; - RandomGenerator* ibdRg_; //std::normal_distribution* initialTitre_normal_distribution_;// (MN_LOG_TITRE, SD_LOG_TITRE); //std::normal_distribution* deltaX_normal_distribution_;// (0, 1/PROP_SCALE); StandNormalRandomSample* stdNorm_; double initialTitreNormalVariable(){ return this->stdNorm_->genReal() * SD_LOG_TITRE + MN_LOG_TITRE; } - double deltaXnormalVariable(){ return this->stdNorm_->genReal() * 1.0/PROP_SCALE + MN_LOG_TITRE; } + //double deltaXnormalVariable(){ return this->stdNorm_->genReal() * 1.0/PROP_SCALE + MN_LOG_TITRE; } + double deltaXnormalVariable(){ return this->stdNorm_->genReal() * SD_LOG_TITRE* 1.0/PROP_SCALE + MN_LOG_TITRE; } double MN_LOG_TITRE; double SD_LOG_TITRE; double PROP_SCALE; @@ -169,38 +166,24 @@ class McmcMachinery { void computeDiagnostics(); /* IBD */ - double theta_; - void setTheta(const double setTo) {this->theta_ = setTo;} - double theta() const {return this->theta_;} - - double fSum; - Hprior hprior; - vector < vector > llkSurf; - vector uniqueEffectiveKCount; - vector < vector > ibdTransProbs; - vector < vector > fm; - vector fSumState; - vector ibdPath; - - vector computeStatePrior(double theta); - vector findWhichIsSomething(vector tmpOp, size_t something); - vector computeLlkOfStatesAtSiteI( size_t siteI, double err = 0.01); + IBDpath ibdPath; + vector computeLlkAtAllSites(double err = 0.01); vector averageProportion(vector < vector > &proportion ); - void initializeIbdEssentials(); + void ibdInitializeEssentials(); void makeLlkSurf(vector altCount, vector refCount, double scalingConst = 100.0, double err = 0.01, size_t gridSize=99); - void sampleMcmcEventIbdStep(); - void makeIbdTransProbs(); + void ibdSampleMcmcEventStep(); void initializePropIBD(); - void computeUniqueEffectiveKCount(); - void updateFmAtSiteI(vector & prior, - vector & llk); - void computeAndUpdateTheta(); + void ibdSamplePath(vector statePrior); + void ibdUpdateHaplotypesFromPrior(); + void ibdUpdateProportionGivenHap(vector &llkAtAllSites); + //vector getIBDprobsIntegrated(vector < vector > &prob); + /* Moves */ void updateProportion(); diff --git a/src/DEploid/panel.cpp b/src/DEploid/panel.cpp index 938d9ba..f5c2904 100644 --- a/src/DEploid/panel.cpp +++ b/src/DEploid/panel.cpp @@ -185,6 +185,30 @@ void Panel::updatePanelWithHaps(size_t inbreedingPanelSizeSetTo, size_t excluded } +void IBDrecombProbs::computeRecombProbs( double averageCentimorganDistance, double G, bool useConstRecomb, double constRecombProb ){ + assert(pRec_.size() == 0 ); + assert(pNoRec_.size() == 0 ); + double averageMorganDistance = averageCentimorganDistance * 100; + double geneticDistance; + double rho; + for ( size_t i = 0; i < this->position_.size(); i++){ + for ( size_t j = 1; j < this->position_[i].size(); j++){ + geneticDistance = (double)(this->position_[i][j] - this->position_[i][j-1])/averageMorganDistance ; + //rho = geneticDistance * 2 * Ne; + rho = geneticDistance * G; + double pRecTmp = ( useConstRecomb ) ? constRecombProb : 1.0 - exp(-rho); + this->pRec_.push_back( pRecTmp ); + double pNoRecTmp = 1.0 - pRecTmp; + this->pNoRec_.push_back( pNoRecTmp ); + } + this->pRec_.push_back(1.0); + this->pNoRec_.push_back(0.0); + } + assert(pRec_.size() == this->nLoci_ ); + assert(pNoRec_.size() == this->nLoci_ ); +} + + //vector> outtrans(out[0].size(), //vector(out.size())); diff --git a/src/DEploid/panel.hpp b/src/DEploid/panel.hpp index 8881b07..98c522d 100644 --- a/src/DEploid/panel.hpp +++ b/src/DEploid/panel.hpp @@ -89,4 +89,27 @@ class InitialHaplotypes: public Panel{ ~InitialHaplotypes(){} }; + +class IBDrecombProbs: public VariantIndex{ + friend class IBDpath; + +#ifdef UNITTEST + friend class TestIBDpath; +#endif + + private: + vector < double > pRec_; + vector < double > pNoRec_; // = 1.0 - pRec; + + void computeRecombProbs( double averageCentimorganDistance, double Ne, bool useConstRecomb, double constRecombProb ); + + public: + IBDrecombProbs():VariantIndex(){}; + IBDrecombProbs(vector < vector < int> > position, size_t nLoci){ + this->position_ = position; + this->nLoci_ = nLoci; + } + ~IBDrecombProbs(){} +}; + #endif diff --git a/src/DEploid/updateHap.cpp b/src/DEploid/updateHap.cpp index 4ef6ebe..75b89cc 100644 --- a/src/DEploid/updateHap.cpp +++ b/src/DEploid/updateHap.cpp @@ -25,7 +25,7 @@ #include "updateHap.hpp" #include // std::reverse -#include // div +#include // div UpdateHap::~UpdateHap(){} diff --git a/src/DEploid/updateHap.hpp b/src/DEploid/updateHap.hpp index 721b3df..edfd8eb 100644 --- a/src/DEploid/updateHap.hpp +++ b/src/DEploid/updateHap.hpp @@ -27,8 +27,6 @@ #include #include "utility.hpp" #include "panel.hpp" -//#include "mersenne_twister.hpp" -//#include "global.h" #ifndef HAP #define HAP diff --git a/src/DEploid/variantIndex.hpp b/src/DEploid/variantIndex.hpp index 957edd7..a322972 100644 --- a/src/DEploid/variantIndex.hpp +++ b/src/DEploid/variantIndex.hpp @@ -47,6 +47,7 @@ class VariantIndex { friend class TxtReader; friend class ExcludeMarker; friend class Panel; + friend class IBDrecombProbs; friend class VcfReader; private: diff --git a/src/dEploidr.cpp b/src/dEploidr.cpp index 848a2b7..c60238b 100644 --- a/src/dEploidr.cpp +++ b/src/dEploidr.cpp @@ -26,7 +26,7 @@ #include #include // std::cout #include "mcmc.hpp" -#include "panel.hpp" +//#include "panel.hpp" #include "dEploidIO.hpp" #include #include "r_random_generator.h" @@ -169,7 +169,7 @@ List dEploid(std::string args) { std::shared_ptr ff = std::make_shared(); RRandomGenerator rrg(ff); - if ( dEploidIO.doPainting() ){ + if ( dEploidIO.doLsPainting() ){ //dEploidIO.chromPainting(); stop("Painting is not implemented yet!"); } diff --git a/tests/testthat/test-DEploid_tools.R b/tests/testthat/test-DEploid_tools.R index c9bc549..1040244 100644 --- a/tests/testthat/test-DEploid_tools.R +++ b/tests/testthat/test-DEploid_tools.R @@ -50,7 +50,7 @@ test_that("WSAF Related", { dev.off() expect_that(inherits(plotHistWSAFPlotly(obsWSAF), "plotly"), is_true()) p <- plotHistWSAFPlotly(obsWSAF) - if (htmlwidgets:::pandoc_available()){ + if (rmarkdown:::pandoc_available()){ htmlwidgets::saveWidget(p, file = "histWSAFPlotly.html") } @@ -67,7 +67,7 @@ test_that("WSAF Related", { p <- plotWSAFVsPLAFPlotly(plaf, obsWSAF, PG0390CoverageVcf$refCount, PG0390CoverageVcf$altCount) - if (htmlwidgets:::pandoc_available()){ + if (rmarkdown:::pandoc_available()){ htmlwidgets::saveWidget(p, file = "WSAFvsPLAFPlotly.html") } @@ -89,7 +89,7 @@ test_that("WSAF Related", { PG0390CoverageVcf$refCount, PG0390CoverageVcf$altCount, potentialOutliers = potentialOutliers) - if (htmlwidgets:::pandoc_available()){ + if (rmarkdown:::pandoc_available()){ htmlwidgets::saveWidget(p, file = "WSAFvsPLAFPlotlyOutlier.html") } ### @@ -100,7 +100,7 @@ test_that("WSAF Related", { expect_that(inherits(plotObsExpWSAFPlotly(obsWSAF, expWSAF), "plotly"), is_true()) p <- plotObsExpWSAFPlotly(obsWSAF, expWSAF) - if (htmlwidgets:::pandoc_available()){ + if (rmarkdown:::pandoc_available()){ htmlwidgets::saveWidget(p, file = "ObsExpWSAFPlotly.html") } }) @@ -117,7 +117,7 @@ test_that("plotAltVsRef", { "plotly"), is_true()) p <- plotAltVsRefPlotly(PG0390CoverageVcf$refCount, PG0390CoverageVcf$altCount) - if (htmlwidgets:::pandoc_available()){ + if (rmarkdown:::pandoc_available()){ htmlwidgets::saveWidget(p, file = "plotAltVsRefPlotly.html") } }) @@ -139,7 +139,7 @@ test_that("plotAltVsRefWithOutliers", { p <- plotAltVsRefPlotly(PG0390CoverageVcf$refCount, PG0390CoverageVcf$altCount, potentialOutliers = potentialOutliers) - if (htmlwidgets:::pandoc_available()){ + if (rmarkdown:::pandoc_available()){ htmlwidgets::saveWidget(p, file = "plotAltVsRefPlotlyOutlier.html") } })