Skip to content

Commit

Permalink
version 10.6
Browse files Browse the repository at this point in the history
  • Loading branch information
valeu committed Apr 20, 2017
1 parent 5df72ec commit fea52d3
Show file tree
Hide file tree
Showing 5 changed files with 280 additions and 95 deletions.
74 changes: 67 additions & 7 deletions src/ChrCopyNumber.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ ChrCopyNumber::ChrCopyNumber(int windowSize, int chrLength, std::string const& c
isMedianCalculated_ = false;
isSmoothed_ = false;
ploidy_=NA;
float meanTargetRegionLength=0;
if (targetBed == "") {
if (windowSize ==0) {
cerr << "Error: windowSize is set to Zero\n";
Expand Down Expand Up @@ -150,7 +151,7 @@ ChrCopyNumber::ChrCopyNumber(int windowSize, int chrLength, std::string const& c
oss.flush();
genes_names.push_back(oss.str());
}

meanTargetRegionLength+=atoi(endstr.c_str())-atoi(start.c_str());
++length_;

}
Expand All @@ -162,10 +163,14 @@ ChrCopyNumber::ChrCopyNumber(int windowSize, int chrLength, std::string const& c
}

exons_Countchr_ = length_;

meanTargetRegionLength/=length_;
readCount_ = vector<float>(exons_Countchr_,0);

cout << "Number of exons analysed in chromosome "<< chromosome_ << " : " << exons_Countchr_ << "\n";
cout << "Average exon length in chromosome "<< chromosome_ << " : " << meanTargetRegionLength << "\n";
if (meanTargetRegionLength <30) {
cerr << "WARNING: check your file with targeted regions: the average length of targeted regions is unexpectedly short\n";
}

}else {
std::cerr << "Failed to open " << captureFile << "\n";
Expand Down Expand Up @@ -415,6 +420,23 @@ void ChrCopyNumber::removeLowReadCountWindows(const int RCThresh) {
}
}

void ChrCopyNumber::fillInRatio(bool islog) {
if ((int)ratio_.size()!=length_)
ratio_.resize(length_);

for (int i = 0; i<length_; i++) {
if (readCount_[i]>=0 && !(mappabilityProfile_.size() > 0 && mappabilityProfile_[i] <= minMappabilityPerWindow)) {
if (islog) {
ratio_[i] = log(readCount_[i]+1)/log(2.0);
}else {
ratio_[i] = readCount_[i];
}
} else {
ratio_[i] = NA;
}
}
}

void ChrCopyNumber::calculateRatioLog(ChrCopyNumber control, const double * a, const int degree){
if ((int)ratio_.size()!=length_)
ratio_.resize(length_);
Expand Down Expand Up @@ -623,15 +645,36 @@ void ChrCopyNumber::recalculateRatio (float constant) {
ratio_[i] /= constant;
}

void ChrCopyNumber::recalculateRatioWithContam (float contamination, float normGenytype) { //normGenytype==1 if AB, normGenytype==0.5 if A
void ChrCopyNumber::recalculateLogRatio (float constant) {
for (int i = 0; i<length_; i++)
if (ratio_[i] != NA) {
if (ratio_[i] != NA)
ratio_[i] -= constant;
}

void ChrCopyNumber::recalculateRatioWithContam (float contamination, float normGenytype, bool isLogged) { //normGenytype==1 if AB, normGenytype==0.5 if A
if (!isLogged) {
for (int i = 0; i<length_; i++)
if (ratio_[i] != NA) {
//ratio_[i] = (ratio_[i]-contamination*normGenytype)/(1-contamination); //correct only for ploidy 2
ratio_[i] = (ratio_[i]*(1-contamination+2*contamination/ploidy_) -contamination*normGenytype/ploidy_*2)/(1-contamination);
ratio_[i] = (ratio_[i]*(1-contamination+2*contamination/ploidy_) -contamination*normGenytype/ploidy_*2)/(1-contamination);

if (ratio_[i]<0)
ratio_[i] = 0;
}
} else {
for (int i = 0; i<length_; i++)
if (ratio_[i] != NA) {
float realCopy = pow(2,ratio_[i]);
ratio_[i] = (realCopy*(1-contamination+2*contamination/ploidy_) -contamination*normGenytype/ploidy_*2)/(1-contamination);
if (ratio_[i]<0)
ratio_[i] = NA;
else {
ratio_[i]=log2(ratio_[i]);
}

}

}
}


Expand Down Expand Up @@ -659,7 +702,7 @@ int ChrCopyNumber::getCoveredPart(int breakPointStart, int breakPointEnd) { //fo
return lengthCovered;
}

void ChrCopyNumber::calculateCopyNumberMedian(int ploidy, int minCNAlength, bool noisyData, bool CompleteGenomicsData){ //create median profiles using 'bpfinal_' and store them in medianProfile_, info about medians themselves is stored in medianValues_ and about SD in sd_, lengths of fragments in bpLengths_
void ChrCopyNumber::calculateCopyNumberMedian(int ploidy, int minCNAlength, bool noisyData, bool CompleteGenomicsData, bool isLogged){ //create median profiles using 'bpfinal_' and store them in medianProfile_, info about medians themselves is stored in medianValues_ and about SD in sd_, lengths of fragments in bpLengths_
if (ploidy!=ploidy_) {
cerr << "..Warning: in calculateCopyNumberMedian() class's ploidy is different from "<<ploidy<<"\n";
ploidy_=ploidy;
Expand Down Expand Up @@ -733,8 +776,11 @@ void ChrCopyNumber::calculateCopyNumberMedian(int ploidy, int minCNAlength, bool

bool ifHomoz = false;
float locMedian=NA;
if (int(data.size())>=minCNAlength && data.size()>0)
if (int(data.size())>=minCNAlength && data.size()>0) {
locMedian = get_median(data); //including the last point
if (isLogged)
locMedian=pow(2, locMedian);
}
if (isBAFpresent && notNA > 100 && locMedian < 1 && noisyData ) {
vector<string>heteroValuesPerWindowStrings = split(BAFValuesInTheSegment, ';');
int numberofBAFpoints =heteroValuesPerWindowStrings.size();
Expand Down Expand Up @@ -768,6 +814,8 @@ void ChrCopyNumber::calculateCopyNumberMedian(int ploidy, int minCNAlength, bool
}
} else {
median = get_median(data); //including the last point
if (isLogged)
median=pow(2, median);
if (isBAFpresent) {
// if (dataBAF.size()>0)
// medianBAF = get_median(dataBAF);
Expand Down Expand Up @@ -1209,6 +1257,18 @@ void ChrCopyNumber::deleteFlanks(int telo_centromeric_flanks) {

}


int ChrCopyNumber::removeLargeExons(float threshold) {
int howManyRemoved = 0;
for (int i =0; i< length_; i++) {
if (ends_[i]-coordinates_[i]>threshold) {
howManyRemoved++;
readCount_[i]=NA;
}
}
return howManyRemoved;
}

void ChrCopyNumber::recalcFlanks(int telo_centromeric_flanks, int minNumberOfWindows) {
int maxRegionLengthToDelete = int(telo_centromeric_flanks/step_);
for (int i = 0; i < (int)medianValues_.size(); i++) {
Expand Down
134 changes: 105 additions & 29 deletions src/GenomeCopyNumber.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,9 +401,9 @@ void GenomeCopyNumber::recalculateRatio (float contamination) {
for ( it=chrCopyNumber_.begin() ; it != chrCopyNumber_.end(); it++ ) {
if (sex_.compare("XY")==0 && (it->getChromosome().find("X")!=string::npos || it->getChromosome().find("Y")!=string::npos)) {
//should take into account that normally one has only one copy of X and Y..
it->recalculateRatioWithContam(contamination,0.5);
it->recalculateRatioWithContam(contamination,0.5, isRatioLogged_);
} else
it->recalculateRatioWithContam(contamination,1);
it->recalculateRatioWithContam(contamination,1, isRatioLogged_);
}

}
Expand Down Expand Up @@ -1101,19 +1101,21 @@ long double GenomeCopyNumber::calculateRSS(int ploidy)
int index = findIndex(chrNumber);
int length = chrCopyNumber_[index].getLength();
for (int i = 0; i< length; i++) {
float observed = 0;
float expected = 0;
{
observed = chrCopyNumber_[index].getRatioAtBin(i);
expected = observed;
float observed = chrCopyNumber_[index].getRatioAtBin(i);
if (observed!=NA) {
if (isRatioLogged_) {
observed=pow(2,observed);
}
float expected = observed;
if (chrCopyNumber_[index].isMedianCalculated()) {
expected = chrCopyNumber_[index].getMedianProfileAtI(i);
if (chrCopyNumber_[index].isSmoothed())
expected = chrCopyNumber_[index].getSmoothedProfileAtI(i);
}

}
observedvalues.push_back(observed);
expectedvalues.push_back(expected);
observedvalues.push_back(observed);
expectedvalues.push_back(expected);
}
}
}

Expand All @@ -1126,7 +1128,12 @@ long double GenomeCopyNumber::calculateRSS(int ploidy)
RSS = RSS + (long double)pow(diff,2);
}
}
return RSS;
if (observedvalues.size()==0) {
return 0;
}
double normRSS = (RSS/observedvalues.size());
observedvalues.clear();expectedvalues.clear();
return normRSS;
}


Expand Down Expand Up @@ -1223,7 +1230,35 @@ void GenomeCopyNumber::calculateRatioUsingCG_Regression( GenomeCopyNumber & cont
//XXX
}

int GenomeCopyNumber::calculateRatio( GenomeCopyNumber & controlCopyNumber, int degree, bool intercept,bool logLogNorm) {
int GenomeCopyNumber::fillInRatio() {
vector <float> countValues;
vector<ChrCopyNumber>::iterator it;

for ( it=chrCopyNumber_.begin() ; it != chrCopyNumber_.end(); it++ ) {
if ((sex_.compare("XY")==0) && (it->getChromosome().find("X")!=string::npos || it->getChromosome().find("Y")!=string::npos)) {
//should take into account that normally one has only one copy of X and Y..
it->fillInRatio(isRatioLogged_);
} else {
it->fillInRatio(isRatioLogged_);
for (int i = 0; i< it->getLength(); i++) {
if (it->getValueAt(i)>0) {
countValues.push_back(it->getRatioAtBin(i));
}
}
}
}
float median=get_medianNotNA(countValues);
for ( it=chrCopyNumber_.begin() ; it != chrCopyNumber_.end(); it++ ) {
if (!isRatioLogged_) {
it->recalculateRatio(median);
} else {
it->recalculateLogRatio(median);
}
}
return 1;
}

int GenomeCopyNumber::calculateRatio( GenomeCopyNumber & controlCopyNumber, int degree, bool intercept) {


int maximalNumberOfIterations = 300;
Expand All @@ -1232,7 +1267,7 @@ int GenomeCopyNumber::calculateRatio( GenomeCopyNumber & controlCopyNumber, int

int successfulFit = 0;

if(logLogNorm) {
if(isRatioLogged_) {
intercept=1; degree=1;//because it is loglogscale
vector <float> y; //y ~ a0x+a1
vector <float> x;
Expand Down Expand Up @@ -2920,6 +2955,8 @@ void GenomeCopyNumber::printRatioBedGraph(std::string const& chr, std::ofstream

for (int i = 0; i< length; i++) {
value=chrCopyNumber_[index].getRatioAtBin(i);
if (isRatioLogged_ && value!=NA)
value=pow(2,value);
position=chrCopyNumber_[index].getCoordinateAtBin(i);
float valueToPrint;
if (chrCopyNumber_[index].isSmoothed())
Expand Down Expand Up @@ -2962,8 +2999,12 @@ void GenomeCopyNumber::printRatio(std::string const& chr, std::ofstream & file,
int length = chrCopyNumber_[index].getLength();
//cout <<length<<" == "<<chrCopyNumber_[index].getValues().size() <<"\n";
for (int i = 0; i< length; i++) {
if (printNA || chrCopyNumber_[index].getRatioAtBin(i)!=NA) {//process this this window
file << chrNumber <<"\t"<<chrCopyNumber_[index].getCoordinateAtBin(i)+1<<"\t"<<chrCopyNumber_[index].getRatioAtBin(i) ;
float ratioToPrint = chrCopyNumber_[index].getRatioAtBin(i);
if (isRatioLogged_ && ratioToPrint!= NA) {
ratioToPrint=pow(2,ratioToPrint);
}
if (printNA || ratioToPrint!=NA) {//process this this window
file << chrNumber <<"\t"<<chrCopyNumber_[index].getCoordinateAtBin(i)+1<<"\t"<<ratioToPrint ;
if (chrCopyNumber_[index].isMedianCalculated()) {
file << "\t"<<chrCopyNumber_[index].getMedianProfileAtI(i) ;
float valueToPrint;
Expand Down Expand Up @@ -3127,7 +3168,7 @@ void GenomeCopyNumber::calculateCopyNumberMedians (int minCNAlength, bool noisyD
vector<ChrCopyNumber>::iterator it;
for ( it=chrCopyNumber_.begin() ; it != chrCopyNumber_.end(); it++ ) {
cout << "..calculating medians for " << it->getChromosome()<< "\n";
it->calculateCopyNumberMedian(ploidy_, minCNAlength, noisyData, CompleteGenomicsData);
it->calculateCopyNumberMedian(ploidy_, minCNAlength, noisyData, CompleteGenomicsData, isRatioLogged_);
}
}

Expand Down Expand Up @@ -3186,6 +3227,8 @@ void GenomeCopyNumber::calculateSDAndMed(int ploidy, map <float,float> &sds,map
med = it->getMedianProfileAtI(i);
float level = round_by_ploidy(med, ploidy);
value = it->getRatioAtBin(i);
if (isRatioLogged_&& value!=NA)
value=pow(2,value);
if (value != NA) {
if (mymap.count(level) == 0) {
vector <float> a;
Expand Down Expand Up @@ -3223,6 +3266,8 @@ void GenomeCopyNumber::calculateSDs(int ploidy, map <float,float> &sds) {
med = it->getMedianProfileAtI(i);
float level = round_by_ploidy(med, ploidy);
value = it->getRatioAtBin(i);
if (isRatioLogged_&& value!=NA)
value=pow(2,value);
if (value != NA) {
if (mymap.count(level) == 0) {
vector <float> a;
Expand Down Expand Up @@ -3267,6 +3312,8 @@ float GenomeCopyNumber::calculateVarianceForNormalCopy(int ploidy) { //geting th
med = it->getMedianProfileAtI(i);
if ((med>lowBoundary)&&(med < highBoundary)) {
value = it->getRatioAtBin(i);
if (isRatioLogged_&& value!=NA)
value=pow(2,value);
if (value != NA) {
myfile << value-1 << "\n";
variance += (value-1)*(value-1);
Expand Down Expand Up @@ -3319,19 +3366,23 @@ float GenomeCopyNumber::evaluateContamination () {
int length = chrCopyNumber_[index].getLength();
for (int i = 0; i< length; i++) {
float observed = chrCopyNumber_[index].getRatioAtBin(i);
float expected = observed;
if (chrCopyNumber_[index].isMedianCalculated()) {
expected = chrCopyNumber_[index].getMedianProfileAtI(i) ;
if (chrCopyNumber_[index].isSmoothed() && WESanalysis == false)
expected = chrCopyNumber_[index].getSmoothedProfileAtI(i);
}
if (!(expected == 1 || expected <= 0 || expected >= 2 || observed > 3 || observed <= 0)
&& (((1>observed)&&(1>expected))||((1<observed)&&(1<expected)))) {// should it be something related to ploidy_ and not 2
float p = (observed-expected)/(observed-expected+2/ploidy_*(1-observed));
if (p>-0.5 && p<1.5) {
values.push_back(p);
weights.push_back(chrCopyNumber_[index].getFragmentLengths_notNA_At(i));
}
if (observed!=NA) {
if(isRatioLogged_)
observed=pow(2,observed);
float expected = observed;
if (chrCopyNumber_[index].isMedianCalculated()) {
expected = chrCopyNumber_[index].getMedianProfileAtI(i) ;
if (chrCopyNumber_[index].isSmoothed() && WESanalysis == false)
expected = chrCopyNumber_[index].getSmoothedProfileAtI(i);
}
if (!(expected == 1 || expected <= 0 || expected >= 2 || observed > 3 || observed <= 0)
&& (((1>observed)&&(1>expected))||((1<observed)&&(1<expected)))) {// should it be something related to ploidy_ and not 2
float p = (observed-expected)/(observed-expected+2/ploidy_*(1-observed));
if (p>-0.5 && p<1.5) {
values.push_back(p);
weights.push_back(chrCopyNumber_[index].getFragmentLengths_notNA_At(i));
}
}
}
}
}
Expand Down Expand Up @@ -3373,6 +3424,8 @@ float GenomeCopyNumber::evaluateContaminationwithLR () {
for (int i = 0; i< length; i++) {
float observed = chrCopyNumber_[index].getRatioAtBin(i);
if (observed!=NA) {
if (isRatioLogged_)
observed=pow(2,observed);
float expected = observed;
if (chrCopyNumber_[index].isMedianCalculated()) {
expected = round_by_ploidy(chrCopyNumber_[index].getMedianProfileAtI(i),ploidy_) ;
Expand Down Expand Up @@ -4075,7 +4128,26 @@ int GenomeCopyNumber::processReadWithBowtie(std::string const& inputFormat, std:
}
return 0;
}
float GenomeCopyNumber::removeLargeExons(float iqrToKeep) {
float maxLength = 0;
vector<ChrCopyNumber>::iterator it;
int totalNumberExons=0;
float numberOfRemovedExons = 0;
vector <float> exonLengths;

for ( it=chrCopyNumber_.begin() ; it != chrCopyNumber_.end(); it++ ) {
for (int i=0; i < it->getLength(); i++) {
exonLengths.push_back(it->getEndAtBin(i)-it->getCoordinateAtBin(i));
totalNumberExons++;
}
}
maxLength=get_iqr(exonLengths)/2*iqrToKeep+get_median(exonLengths);

for ( it=chrCopyNumber_.begin() ; it != chrCopyNumber_.end(); it++ ) {
numberOfRemovedExons+=it->removeLargeExons(maxLength);
}
return numberOfRemovedExons/totalNumberExons;
}
int GenomeCopyNumber::focusOnCapture (std::string const& captureFile) {
ifstream file (captureFile.c_str());
string line;
Expand Down Expand Up @@ -4191,6 +4263,10 @@ void GenomeCopyNumber::setWESanalysis(bool WESgiven)
WESanalysis = WESgiven;
}

void GenomeCopyNumber::setIfLogged(bool isRatioLogged) {
isRatioLogged_=isRatioLogged;
}

void GenomeCopyNumber::setmakingPileup(bool makingPileup_given)
{
makingPileup = makingPileup_given;
Expand Down
Binary file modified src/freec
Binary file not shown.
Loading

0 comments on commit fea52d3

Please sign in to comment.