diff --git a/src/ChrCopyNumber.h b/src/ChrCopyNumber.h index 57785a5..bc6f119 100644 --- a/src/ChrCopyNumber.h +++ b/src/ChrCopyNumber.h @@ -27,9 +27,11 @@ class ChrCopyNumber void addBAFinfo(SNPinGenome & snpingenome,int indexSNP); + void fillInRatio(bool islog); void calculateRatio(ChrCopyNumber control, float normalizationConst) ; void recalculateRatio (float constant); - void recalculateRatioWithContam(float contamination, float normGenytype); + void recalculateLogRatio (float constant) ; + void recalculateRatioWithContam(float contamination, float normGenytype, bool isLogged); void recalculateRatio(ChrCopyNumber control); void calculateRatio(ChrCopyNumber control, double a0, double a1); void calculateRatio(ChrCopyNumber control, const double * a, const int degree); @@ -39,7 +41,7 @@ class ChrCopyNumber double calculateXiSum(int ploidy, std::map &sds, std::map &meds); double calculateXiSum(int ploidy, std::map &sds); void calculateCopyNumberMedian(); //create median profiles using 'bpfinal_' and store them in medianProfile_, info about medians themselves is stored in medianValues_ and about SD in sd_, lengths of fragments in bpLengths_ - void calculateCopyNumberMedian(int ploidy, int minCNAlength, bool noisyData, bool CompleteGenomicsData); //create median profiles as calculateCopyNumberMedian(), but merges close regions (roundByPloidy(median)) + void calculateCopyNumberMedian(int ploidy, int minCNAlength, bool noisyData, bool CompleteGenomicsData, bool isLogged); //create median profiles as calculateCopyNumberMedian(), but merges close regions (roundByPloidy(median)) void recalcFlanksForIndeces (int i, int j); void recalcFlanks(int telo_centromeric_flanks, int minNumberOfWindows); //merge short notNA-segments around NA-segments @@ -52,7 +54,7 @@ class ChrCopyNumber void deleteFlanks(int telo_centromeric_flanks); void deleteFragment(int i) ; - + int removeLargeExons(float threshold); std::string getGeneNameAtBin(int i); float getValueAt(int i); diff --git a/src/GenomeCopyNumber.h b/src/GenomeCopyNumber.h index c4a3826..98fe751 100644 --- a/src/GenomeCopyNumber.h +++ b/src/GenomeCopyNumber.h @@ -35,13 +35,15 @@ class GenomeCopyNumber int processRead(InputFormat inputFormat, MateOrientation matesOrientation, const char* line_buffer, int& prevInd, std::string targetBed = "", std::string mateFileName = ""); int processReadWithBowtie(std::string const& inputFormat, std::string const& matesOrientation,std::string const line,std::string const line2); int focusOnCapture (std::string const& captureFile); + float removeLargeExons(float iqrToKeep); void initCopyNumber(std::string const& chrLenFileName, int windowSize , int step, std::string targetBed); void finishCopyNumber(long normalCount); void addBAFinfo(SNPinGenome & snpingenome); void removeLowReadCountWindows(GenomeCopyNumber & controlCopyNumber, int RCThresh); void removeLowReadCountWindowsFromControl (int RCThresh); - int calculateRatio( GenomeCopyNumber & controlCopyNumber, int degree, bool intercept,bool logLogNorm) ; + int fillInRatio(); + int calculateRatio( GenomeCopyNumber & controlCopyNumber, int degree, bool intercept) ; void calculateRatioUsingCG( GenomeCopyNumber & controlCopyNumber) ; void calculateRatioUsingCG_Regression( GenomeCopyNumber & controlCopyNumber) ; float calculateNormalizationConstant(GenomeCopyNumber & controlCopyNumber); @@ -123,7 +125,9 @@ class GenomeCopyNumber int findWinNumber(int position, std::string myName, std::string const& matefile); void setWESanalysis(bool WESgiven); - void setmakingPileup(bool makingPileup_given); + void setmakingPileup(bool makingPileup_given); + void setIfLogged(bool); + double Percentage_GenomeExplained(int &); long double calculateRSS(int ploidy); bool isMappUsed(); @@ -136,6 +140,8 @@ class GenomeCopyNumber bool makingPileup; bool SeekingSubc_; bool isMappUsed_; + bool isRatioLogged_; + void fillMyHash(std::string const& mateFileName , std::string const& inputFormat, std::string const& matesOrientation, int windowSize, int step, std::string targetBed = ""); int windowSize_; int step_; diff --git a/src/SNPinGenome.cpp b/src/SNPinGenome.cpp index 1356284..3c5f96a 100644 --- a/src/SNPinGenome.cpp +++ b/src/SNPinGenome.cpp @@ -189,8 +189,9 @@ void SNPinGenome::readMateFile(std::string const& mateFile, std::string const& i void SNPinGenome::readMateFile(std::string const& mateFile, std::string const& inputFormat, int minimalTotalLetterCountPerPosition, int minimalQualityPerPosition, GenomeCopyNumber& genomeCopyNumber, std::string const& chrLenFileName, int windowSize, int step, std::string targetBed) { // must perform partly GenomeCopyNumber::readCopyNumber line #114, all but fillMyHash - genomeCopyNumber.initCopyNumber(chrLenFileName, windowSize, step, targetBed); - assignValues(mateFile, inputFormat, minimalTotalLetterCountPerPosition,minimalQualityPerPosition, &genomeCopyNumber); + genomeCopyNumber.initCopyNumber(chrLenFileName, windowSize, step, targetBed); + assignValues(mateFile, inputFormat, minimalTotalLetterCountPerPosition,minimalQualityPerPosition, &genomeCopyNumber); + pileup_read = true; } diff --git a/src/SNPinGenome.h b/src/SNPinGenome.h index 4de1356..9f87f56 100644 --- a/src/SNPinGenome.h +++ b/src/SNPinGenome.h @@ -66,7 +66,7 @@ struct SNPinGenomePerformArgWrapper : public ThreadArg { int minCNAlength; const char* what; - SNPinGenomePerformArgWrapper(SNPinGenome& snpingenome, std::string const& mateFile, const std::string& inputFormat, int minimalTotalLetterCountPerPosition, int minimalQualityPerPosition, bool noisyData, bool CompleteGenomicsData, GenomeCopyNumber& genomeCopyNumber, double breakPointThreshold, int breakPointType, int minCNAlength, const char* what) : snpingenome(snpingenome), mateFile(mateFile), inputFormat(inputFormat), minimalTotalLetterCountPerPosition(minimalTotalLetterCountPerPosition), minimalQualityPerPosition(minimalQualityPerPosition), noisyData(noisyData),genomeCopyNumber(genomeCopyNumber), breakPointThreshold(breakPointThreshold), breakPointType(breakPointType), minCNAlength(minCNAlength), what(what) { } + SNPinGenomePerformArgWrapper(SNPinGenome& snpingenome, std::string const& mateFile, const std::string& inputFormat, int minimalTotalLetterCountPerPosition, int minimalQualityPerPosition, bool noisyData, bool CompleteGenomicsData, GenomeCopyNumber& genomeCopyNumber, double breakPointThreshold, int breakPointType, int minCNAlength, const char* what) : snpingenome(snpingenome), mateFile(mateFile), inputFormat(inputFormat), minimalTotalLetterCountPerPosition(minimalTotalLetterCountPerPosition), minimalQualityPerPosition(minimalQualityPerPosition), noisyData(noisyData),CompleteGenomicsData(CompleteGenomicsData),genomeCopyNumber(genomeCopyNumber), breakPointThreshold(breakPointThreshold), breakPointType(breakPointType), minCNAlength(minCNAlength), what(what) { } }; extern void* SNPinGenome_perform_wrapper(void *arg); diff --git a/src/SVfinder.h b/src/SVfinder.h index d05ef14..301b3a4 100644 --- a/src/SVfinder.h +++ b/src/SVfinder.h @@ -70,7 +70,7 @@ int runWithDefinedPloidy(int ploidy, GenomeCopyNumber & sampleCopyNumber, Genome int degree,int intercept,bool logLogNorm,float minExpectedGC,float maxExpectedGC,float knownContamination,float breakPointThreshold,int breakPointType,int minCNAlength, int teloCentroFlanks, std::vector & RSS, std::vector &percentage_GenExpl,bool contaminationAdjustment,std::vector &contamination, ThreadPool * thrPool,ThreadPoolManager * thrPoolManager,std::string makePileup, float seekSubclones, - std::string myName,std::vector &unexplainedChromosomes, bool CompleteGenomicsData) ; + std::string myName,std::vector &unexplainedChromosomes, bool CompleteGenomicsData,bool normalization) ; #endif //SVFINDER_H diff --git a/src/freec b/src/freec index ebdc7a0..c875777 100644 Binary files a/src/freec and b/src/freec differ diff --git a/src/main.cpp b/src/main.cpp index 3d2e43c..6f6dee5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -352,44 +352,52 @@ int main(int argc, char *argv[]) bool is_sample_pileup = (sample_inputFormat.compare("pileup") == 0 || sample_inputFormat.compare("SAMtools pileup") == 0); bool is_control_pileup = (control_inputFormat.compare("pileup") == 0 || control_inputFormat.compare("SAMtools pileup") == 0); - bool has_BAF = cf.hasValue("BAF","SNPfile"); std::string makePileup = (std::string)cf.Value("BAF","makePileup", "false"); std::string fastaFile = (std::string)cf.Value("BAF","fastaFile", "false"); + std::string miniPileupFileSample = (std::string)cf.Value("sample","miniPileup", "false"); + std::string miniPileupFileControl = (std::string)cf.Value("sample","miniPileup", "false"); + + bool isHasMiniPileUPsample = (miniPileupFileSample=="false")?0:1; + bool isHasMiniPileUPcontrol = (miniPileupFileControl=="false")?0:1; - if (makePileup != "false" && fastaFile=="false") { + if (makePileup != "false" && fastaFile=="false" && (!isHasMiniPileUPsample || isControlIsPresent && !isHasMiniPileUPcontrol)) { cerr << "To create a usable .pileup file from .BAM you need to provide a fasta file for the whole genome with option \"fastaFile\""< Done!\n"; + if (makePileup != "false" || isHasMiniPileUPcontrol || isHasMiniPileUPsample) { + if (!isHasMiniPileUPsample || (isControlIsPresent && !isHasMiniPileUPcontrol)) { + cout << "Creating Pileup file to compute BAF profile...\n"; + minipileup.makepileup(sampleCopyNumber, controlCopyNumber, sample_MateFile, control_MateFile, myName, makePileup, sample_MateFile, + sample_inputFormat, sample_mateOrientation, pathToSamtools, pathToSambamba, SambambaThreads, chrLenFile, controlName, targetBed, pathToBedtools, fastaFile, minimalQualityPerPosition); + cout << "... -> Done!\n"; + } + GenomeCopyNumberReadMateFileArgWrapper* readMateFileArg; cout << "..will use SNP positions from "<< SNPinfoFile << " to calculate BAF profiles\n"; thrPool = thrPoolManager->newThreadPool("GenomeCopyNumber_readMateFile"); snpingenome.readSNPs(SNPinfoFile); - controlPileup = controlName + "_minipileup" +".pileup"; - samplePileup = myName + "_minipileup" +".pileup"; + if (!isHasMiniPileUPsample || (isControlIsPresent && !isHasMiniPileUPcontrol)) { + controlPileup = controlName + "_minipileup" +".pileup"; + samplePileup = myName + "_minipileup" +".pileup"; + } + if (isHasMiniPileUPsample) { + samplePileup = miniPileupFileSample; + } + if (isControlIsPresent && isHasMiniPileUPcontrol) { + controlPileup = miniPileupFileControl; + } if (is_sample_pileup && !has_sample_mateCopyNumberFile && has_window) { @@ -992,7 +1012,7 @@ int main(int argc, char *argv[]) } double breakPointThreshold_BAF=1; - if (has_BAF || makePileup != "false") { + if (has_BAF || makePileup != "false" || isHasMiniPileUPsample) { breakPointThreshold_BAF = 0.8; if (ifTargeted) breakPointThreshold_BAF = 1.6; @@ -1004,7 +1024,7 @@ int main(int argc, char *argv[]) SNPinGenomePerformArgWrapper* snpArg; - if (makePileup == "false") + if (makePileup == "false" && !isHasMiniPileUPsample) { snpArg = new SNPinGenomePerformArgWrapper(snpingenome, sample_MateFile, sample_inputFormat, minimalTotalLetterCountPerPosition,minimalQualityPerPosition, noisyData,CompleteGenomicsData,sampleCopyNumber, breakPointThreshold_BAF, breakPointType, minCNAlength, "Sample"); thrPool->addThread(SNPinGenome_perform_wrapper, snpArg); @@ -1016,15 +1036,15 @@ int main(int argc, char *argv[]) } //the same for the control sample: if (isControlIsPresent) { - if (makePileup == "false") + if (makePileup == "false" && !isHasMiniPileUPcontrol) { snpArg = new SNPinGenomePerformArgWrapper(snpingenomeControl, control_MateFile, control_inputFormat, minimalTotalLetterCountPerPosition,minimalQualityPerPosition, noisyData, CompleteGenomicsData,controlCopyNumber, breakPointThreshold_BAF, breakPointType, minCNAlength, "Control"); thrPool->addThread(SNPinGenome_perform_wrapper, snpArg); } else - { - //snpArg = new SNPinGenomePerformArgWrapper(snpingenomeControl, control_MateFile,"pileup", minimalTotalLetterCountPerPosition,minimalQualityPerPosition, noisyData, controlCopyNumber, breakPointThreshold_BAF, breakPointType, minCNAlength, "Control"); - //thrPool->addThread(SNPinGenome_perform_wrapper, snpArg); + {// the two lines below were commented for some unknown reason.. + snpArg = new SNPinGenomePerformArgWrapper(snpingenomeControl, controlPileup,"pileup", minimalTotalLetterCountPerPosition,minimalQualityPerPosition, noisyData, CompleteGenomicsData, controlCopyNumber, breakPointThreshold_BAF, breakPointType, minCNAlength, "Control"); + thrPool->addThread(SNPinGenome_perform_wrapper, snpArg); } } diff --git a/src/myFunc.h b/src/myFunc.h index 5d158b3..6604826 100644 --- a/src/myFunc.h +++ b/src/myFunc.h @@ -77,6 +77,7 @@ char* getLine(char* buffer, int buffer_size, FILE* stream, std::string& line); float get_sd (const std::vector& data, float mean); float get_median(const std::vector& data) ; float get_median(const std::vector& data, int start, int end) ; +float get_medianNotNA(const std::vector & myvector) ; float get_mean(const std::vector& data) ; float get_weighted_mean(const std::vector& data, const std::vector& weights) ; float get_sum(const std::vector& data) ; diff --git a/src/version.h b/src/version.h index fa957ec..4417bbc 100644 --- a/src/version.h +++ b/src/version.h @@ -3,7 +3,7 @@ #define VERSION_H const double VERSION_OFFSET = 3; -const double FREEC_VERSION = 10.5; +const double FREEC_VERSION = 10.6; const double CONTROL_FREEC_VERSION = FREEC_VERSION - VERSION_OFFSET; #endif