Skip to content

Commit

Permalink
Fixing bugs: BAF fit
Browse files Browse the repository at this point in the history
  • Loading branch information
valeu committed Apr 23, 2017
1 parent fea52d3 commit a45206e
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 31 deletions.
8 changes: 5 additions & 3 deletions src/ChrCopyNumber.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@ class ChrCopyNumber

void addBAFinfo(SNPinGenome & snpingenome,int indexSNP);

void fillInRatio(bool islog);
void calculateRatio(ChrCopyNumber control, float normalizationConst) ;
void recalculateRatio (float constant);
void recalculateRatioWithContam(float contamination, float normGenytype);
void recalculateLogRatio (float constant) ;
void recalculateRatioWithContam(float contamination, float normGenytype, bool isLogged);
void recalculateRatio(ChrCopyNumber control);
void calculateRatio(ChrCopyNumber control, double a0, double a1);
void calculateRatio(ChrCopyNumber control, const double * a, const int degree);
Expand All @@ -39,7 +41,7 @@ class ChrCopyNumber
double calculateXiSum(int ploidy, std::map <float,float> &sds, std::map <float,float> &meds);
double calculateXiSum(int ploidy, std::map <float,float> &sds);
void calculateCopyNumberMedian(); //create median profiles using 'bpfinal_' and store them in medianProfile_, info about medians themselves is stored in medianValues_ and about SD in sd_, lengths of fragments in bpLengths_
void calculateCopyNumberMedian(int ploidy, int minCNAlength, bool noisyData, bool CompleteGenomicsData); //create median profiles as calculateCopyNumberMedian(), but merges close regions (roundByPloidy(median))
void calculateCopyNumberMedian(int ploidy, int minCNAlength, bool noisyData, bool CompleteGenomicsData, bool isLogged); //create median profiles as calculateCopyNumberMedian(), but merges close regions (roundByPloidy(median))
void recalcFlanksForIndeces (int i, int j);
void recalcFlanks(int telo_centromeric_flanks, int minNumberOfWindows); //merge short notNA-segments around NA-segments

Expand All @@ -52,7 +54,7 @@ class ChrCopyNumber

void deleteFlanks(int telo_centromeric_flanks);
void deleteFragment(int i) ;

int removeLargeExons(float threshold);

std::string getGeneNameAtBin(int i);
float getValueAt(int i);
Expand Down
10 changes: 8 additions & 2 deletions src/GenomeCopyNumber.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,15 @@ class GenomeCopyNumber
int processRead(InputFormat inputFormat, MateOrientation matesOrientation, const char* line_buffer, int& prevInd, std::string targetBed = "", std::string mateFileName = "");
int processReadWithBowtie(std::string const& inputFormat, std::string const& matesOrientation,std::string const line,std::string const line2);
int focusOnCapture (std::string const& captureFile);
float removeLargeExons(float iqrToKeep);
void initCopyNumber(std::string const& chrLenFileName, int windowSize , int step, std::string targetBed);
void finishCopyNumber(long normalCount);
void addBAFinfo(SNPinGenome & snpingenome);
void removeLowReadCountWindows(GenomeCopyNumber & controlCopyNumber, int RCThresh);
void removeLowReadCountWindowsFromControl (int RCThresh);

int calculateRatio( GenomeCopyNumber & controlCopyNumber, int degree, bool intercept,bool logLogNorm) ;
int fillInRatio();
int calculateRatio( GenomeCopyNumber & controlCopyNumber, int degree, bool intercept) ;
void calculateRatioUsingCG( GenomeCopyNumber & controlCopyNumber) ;
void calculateRatioUsingCG_Regression( GenomeCopyNumber & controlCopyNumber) ;
float calculateNormalizationConstant(GenomeCopyNumber & controlCopyNumber);
Expand Down Expand Up @@ -123,7 +125,9 @@ class GenomeCopyNumber

int findWinNumber(int position, std::string myName, std::string const& matefile);
void setWESanalysis(bool WESgiven);
void setmakingPileup(bool makingPileup_given);
void setmakingPileup(bool makingPileup_given);
void setIfLogged(bool);

double Percentage_GenomeExplained(int &);
long double calculateRSS(int ploidy);
bool isMappUsed();
Expand All @@ -136,6 +140,8 @@ class GenomeCopyNumber
bool makingPileup;
bool SeekingSubc_;
bool isMappUsed_;
bool isRatioLogged_;

void fillMyHash(std::string const& mateFileName , std::string const& inputFormat, std::string const& matesOrientation, int windowSize, int step, std::string targetBed = "");
int windowSize_;
int step_;
Expand Down
5 changes: 3 additions & 2 deletions src/SNPinGenome.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,9 @@ void SNPinGenome::readMateFile(std::string const& mateFile, std::string const& i

void SNPinGenome::readMateFile(std::string const& mateFile, std::string const& inputFormat, int minimalTotalLetterCountPerPosition, int minimalQualityPerPosition, GenomeCopyNumber& genomeCopyNumber, std::string const& chrLenFileName, int windowSize, int step, std::string targetBed) {
// must perform partly GenomeCopyNumber::readCopyNumber line #114, all but fillMyHash
genomeCopyNumber.initCopyNumber(chrLenFileName, windowSize, step, targetBed);
assignValues(mateFile, inputFormat, minimalTotalLetterCountPerPosition,minimalQualityPerPosition, &genomeCopyNumber);
genomeCopyNumber.initCopyNumber(chrLenFileName, windowSize, step, targetBed);
assignValues(mateFile, inputFormat, minimalTotalLetterCountPerPosition,minimalQualityPerPosition, &genomeCopyNumber);

pileup_read = true;
}

Expand Down
2 changes: 1 addition & 1 deletion src/SNPinGenome.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ struct SNPinGenomePerformArgWrapper : public ThreadArg {
int minCNAlength;
const char* what;

SNPinGenomePerformArgWrapper(SNPinGenome& snpingenome, std::string const& mateFile, const std::string& inputFormat, int minimalTotalLetterCountPerPosition, int minimalQualityPerPosition, bool noisyData, bool CompleteGenomicsData, GenomeCopyNumber& genomeCopyNumber, double breakPointThreshold, int breakPointType, int minCNAlength, const char* what) : snpingenome(snpingenome), mateFile(mateFile), inputFormat(inputFormat), minimalTotalLetterCountPerPosition(minimalTotalLetterCountPerPosition), minimalQualityPerPosition(minimalQualityPerPosition), noisyData(noisyData),genomeCopyNumber(genomeCopyNumber), breakPointThreshold(breakPointThreshold), breakPointType(breakPointType), minCNAlength(minCNAlength), what(what) { }
SNPinGenomePerformArgWrapper(SNPinGenome& snpingenome, std::string const& mateFile, const std::string& inputFormat, int minimalTotalLetterCountPerPosition, int minimalQualityPerPosition, bool noisyData, bool CompleteGenomicsData, GenomeCopyNumber& genomeCopyNumber, double breakPointThreshold, int breakPointType, int minCNAlength, const char* what) : snpingenome(snpingenome), mateFile(mateFile), inputFormat(inputFormat), minimalTotalLetterCountPerPosition(minimalTotalLetterCountPerPosition), minimalQualityPerPosition(minimalQualityPerPosition), noisyData(noisyData),CompleteGenomicsData(CompleteGenomicsData),genomeCopyNumber(genomeCopyNumber), breakPointThreshold(breakPointThreshold), breakPointType(breakPointType), minCNAlength(minCNAlength), what(what) { }
};

extern void* SNPinGenome_perform_wrapper(void *arg);
Expand Down
2 changes: 1 addition & 1 deletion src/SVfinder.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ int runWithDefinedPloidy(int ploidy, GenomeCopyNumber & sampleCopyNumber, Genome
int degree,int intercept,bool logLogNorm,float minExpectedGC,float maxExpectedGC,float knownContamination,float breakPointThreshold,int breakPointType,int minCNAlength,
int teloCentroFlanks, std::vector<double> & RSS, std::vector<double> &percentage_GenExpl,bool contaminationAdjustment,std::vector<double> &contamination,
ThreadPool * thrPool,ThreadPoolManager * thrPoolManager,std::string makePileup, float seekSubclones,
std::string myName,std::vector<int> &unexplainedChromosomes, bool CompleteGenomicsData) ;
std::string myName,std::vector<int> &unexplainedChromosomes, bool CompleteGenomicsData,bool normalization) ;


#endif //SVFINDER_H
Expand Down
Binary file modified src/freec
Binary file not shown.
62 changes: 41 additions & 21 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,44 +352,52 @@ int main(int argc, char *argv[])
bool is_sample_pileup = (sample_inputFormat.compare("pileup") == 0 || sample_inputFormat.compare("SAMtools pileup") == 0);
bool is_control_pileup = (control_inputFormat.compare("pileup") == 0 || control_inputFormat.compare("SAMtools pileup") == 0);


bool has_BAF = cf.hasValue("BAF","SNPfile");
std::string makePileup = (std::string)cf.Value("BAF","makePileup", "false");
std::string fastaFile = (std::string)cf.Value("BAF","fastaFile", "false");
std::string miniPileupFileSample = (std::string)cf.Value("sample","miniPileup", "false");
std::string miniPileupFileControl = (std::string)cf.Value("sample","miniPileup", "false");

bool isHasMiniPileUPsample = (miniPileupFileSample=="false")?0:1;
bool isHasMiniPileUPcontrol = (miniPileupFileControl=="false")?0:1;

if (makePileup != "false" && fastaFile=="false") {
if (makePileup != "false" && fastaFile=="false" && (!isHasMiniPileUPsample || isControlIsPresent && !isHasMiniPileUPcontrol)) {
cerr << "To create a usable .pileup file from .BAM you need to provide a fasta file for the whole genome with option \"fastaFile\""<<endl;
cerr << "If you only want copy number profiles (no genotypes), then remove or comment all the lines in the group of parameters [BAF]"<<endl;

exit(0);
}

if (makePileup != "false")
if (isHasMiniPileUPsample) {
has_BAF = false;
}

if (makePileup != "false" && ((!isControlIsPresent)&&(!isHasMiniPileUPsample) || isControlIsPresent&&(!isHasMiniPileUPcontrol||miniPileupFileSample=="false" )))
{
cout << "FREEC will create a pileup to compute BAF profile! \n";
cout << "...File with SNPs : " << makePileup << "\n";
has_BAF = false;
}

if (has_BAF && makePileup == "false" && !has_sample_MateFile) {
if (has_BAF && makePileup == "false" && !has_sample_MateFile && !isHasMiniPileUPsample) {
cerr << "ERROR: you need to provide a 'mateFile' for the [sample] (in SAMtools pileup format) to be able to calculate BAF profiles with options [BAF] or to provide a BED/VCF file with SNP positions (option \"makePileup\")\n";
exit (0);
}

if (has_BAF && !has_control_MateFile && isControlIsPresent && makePileup == "false") {
if (has_BAF && !has_control_MateFile && isControlIsPresent && makePileup == "false" && !isHasMiniPileUPcontrol) {
cerr << "ERROR: you need to provide a 'mateFile' for the [control] (in SAMtools pileup format) to be able to calculate BAF profiles with options [BAF] and detect somatic CNAs and LOH\n";
cerr << "..Otherwise, you may not to use the control data at all. Just comment or delete 'mateCopyNumberFile' in the [control] group of parameters\n";
exit (0);
}

if (!is_sample_pileup && has_BAF && makePileup == "false") {
if (!is_sample_pileup && has_BAF && makePileup == "false" && !isHasMiniPileUPsample) {
cerr << "Error: to calculate BAF values, you need to provide mateFile in SAMtools pileup format\n Or you can set 'makePileup' parameter true by providing a path to a VCF file with SNP positions\n";
cout << "..since you mateFile is not in SAMtools pileup format, the BAF values will not be calculated\n";
has_BAF=false;
}
string SNPinfoFile = std::string(cf.Value("BAF","SNPfile",""));

if (makePileup != "false" && SNPinfoFile=="") {
if (makePileup != "false" && SNPinfoFile=="" || isHasMiniPileUPsample&& SNPinfoFile=="") {
if (makePileup.substr(makePileup.size()-3,3)=="vcf" || makePileup.substr(makePileup.size()-6,6)=="vcf.gz") {
SNPinfoFile=makePileup;
} else {
Expand Down Expand Up @@ -574,9 +582,10 @@ int main(int argc, char *argv[])
cout << "..break-point type set to "<<breakPointType<<"\n";

bool noisyData = (bool)cf.Value("general","noisyData", "false");

if ((!noisyData) && ifTargeted && has_BAF) {
cout << "Warning: consider using '[general] noisyData=true' if you expect to have highly nonuniform coverage along the genome\n";
} else if (noisyData && !has_BAF && makePileup=="false"){
} else if (noisyData && !has_BAF && makePileup=="false" && !isHasMiniPileUPsample){
cout << "Warning: Parameter '[general] noisyData=true' will not have effect since FREEC won't use BAF information to correct predicted copy numbers\n";
}else if (noisyData && !ifTargeted ){
cout << "Warning: I would not recommend using '[general] noisyData=true' for whole genome data; you can miss some real CNAs in this case\n";
Expand Down Expand Up @@ -701,19 +710,30 @@ int main(int argc, char *argv[])
string controlPileup;
string samplePileup;

if (makePileup != "false") {
cout << "Creating Pileup file to compute BAF profile...\n";
minipileup.makepileup(sampleCopyNumber, controlCopyNumber, sample_MateFile, control_MateFile, myName, makePileup, sample_MateFile,
sample_inputFormat, sample_mateOrientation, pathToSamtools, pathToSambamba, SambambaThreads, chrLenFile, controlName, targetBed, pathToBedtools, fastaFile, minimalQualityPerPosition);
cout << "... -> Done!\n";
if (makePileup != "false" || isHasMiniPileUPcontrol || isHasMiniPileUPsample) {
if (!isHasMiniPileUPsample || (isControlIsPresent && !isHasMiniPileUPcontrol)) {
cout << "Creating Pileup file to compute BAF profile...\n";
minipileup.makepileup(sampleCopyNumber, controlCopyNumber, sample_MateFile, control_MateFile, myName, makePileup, sample_MateFile,
sample_inputFormat, sample_mateOrientation, pathToSamtools, pathToSambamba, SambambaThreads, chrLenFile, controlName, targetBed, pathToBedtools, fastaFile, minimalQualityPerPosition);
cout << "... -> Done!\n";
}

GenomeCopyNumberReadMateFileArgWrapper* readMateFileArg;
cout << "..will use SNP positions from "<< SNPinfoFile << " to calculate BAF profiles\n";

thrPool = thrPoolManager->newThreadPool("GenomeCopyNumber_readMateFile");
snpingenome.readSNPs(SNPinfoFile);

controlPileup = controlName + "_minipileup" +".pileup";
samplePileup = myName + "_minipileup" +".pileup";
if (!isHasMiniPileUPsample || (isControlIsPresent && !isHasMiniPileUPcontrol)) {
controlPileup = controlName + "_minipileup" +".pileup";
samplePileup = myName + "_minipileup" +".pileup";
}
if (isHasMiniPileUPsample) {
samplePileup = miniPileupFileSample;
}
if (isControlIsPresent && isHasMiniPileUPcontrol) {
controlPileup = miniPileupFileControl;
}

if (is_sample_pileup && !has_sample_mateCopyNumberFile && has_window)
{
Expand Down Expand Up @@ -992,7 +1012,7 @@ int main(int argc, char *argv[])
}

double breakPointThreshold_BAF=1;
if (has_BAF || makePileup != "false") {
if (has_BAF || makePileup != "false" || isHasMiniPileUPsample) {
breakPointThreshold_BAF = 0.8;
if (ifTargeted)
breakPointThreshold_BAF = 1.6;
Expand All @@ -1004,7 +1024,7 @@ int main(int argc, char *argv[])

SNPinGenomePerformArgWrapper* snpArg;

if (makePileup == "false")
if (makePileup == "false" && !isHasMiniPileUPsample)
{
snpArg = new SNPinGenomePerformArgWrapper(snpingenome, sample_MateFile, sample_inputFormat, minimalTotalLetterCountPerPosition,minimalQualityPerPosition, noisyData,CompleteGenomicsData,sampleCopyNumber, breakPointThreshold_BAF, breakPointType, minCNAlength, "Sample");
thrPool->addThread(SNPinGenome_perform_wrapper, snpArg);
Expand All @@ -1016,15 +1036,15 @@ int main(int argc, char *argv[])
}
//the same for the control sample:
if (isControlIsPresent) {
if (makePileup == "false")
if (makePileup == "false" && !isHasMiniPileUPcontrol)
{
snpArg = new SNPinGenomePerformArgWrapper(snpingenomeControl, control_MateFile, control_inputFormat, minimalTotalLetterCountPerPosition,minimalQualityPerPosition, noisyData, CompleteGenomicsData,controlCopyNumber, breakPointThreshold_BAF, breakPointType, minCNAlength, "Control");
thrPool->addThread(SNPinGenome_perform_wrapper, snpArg);
}
else
{
//snpArg = new SNPinGenomePerformArgWrapper(snpingenomeControl, control_MateFile,"pileup", minimalTotalLetterCountPerPosition,minimalQualityPerPosition, noisyData, controlCopyNumber, breakPointThreshold_BAF, breakPointType, minCNAlength, "Control");
//thrPool->addThread(SNPinGenome_perform_wrapper, snpArg);
{// the two lines below were commented for some unknown reason..
snpArg = new SNPinGenomePerformArgWrapper(snpingenomeControl, controlPileup,"pileup", minimalTotalLetterCountPerPosition,minimalQualityPerPosition, noisyData, CompleteGenomicsData, controlCopyNumber, breakPointThreshold_BAF, breakPointType, minCNAlength, "Control");
thrPool->addThread(SNPinGenome_perform_wrapper, snpArg);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/myFunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ char* getLine(char* buffer, int buffer_size, FILE* stream, std::string& line);
float get_sd (const std::vector<float>& data, float mean);
float get_median(const std::vector<float>& data) ;
float get_median(const std::vector<float>& data, int start, int end) ;
float get_medianNotNA(const std::vector<float> & myvector) ;
float get_mean(const std::vector<float>& data) ;
float get_weighted_mean(const std::vector<float>& data, const std::vector<float>& weights) ;
float get_sum(const std::vector<float>& data) ;
Expand Down
2 changes: 1 addition & 1 deletion src/version.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#define VERSION_H

const double VERSION_OFFSET = 3;
const double FREEC_VERSION = 10.5;
const double FREEC_VERSION = 10.6;
const double CONTROL_FREEC_VERSION = FREEC_VERSION - VERSION_OFFSET;

#endif

0 comments on commit a45206e

Please sign in to comment.