Skip to content

Commit

Permalink
Subclone detection finally working
Browse files Browse the repository at this point in the history
  • Loading branch information
valeu committed Nov 24, 2016
1 parent 103ba7c commit bebce82
Show file tree
Hide file tree
Showing 12 changed files with 205 additions and 167 deletions.
34 changes: 22 additions & 12 deletions src/BAFpileup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ BAFpileup::BAFpileup()
void BAFpileup::makepileup(GenomeCopyNumber & sampleCopyNumber, GenomeCopyNumber & controlCopyNumber,
std::string sample_MateFile, std::string control_Matefile, std::string outputDir, std::string makeminipileup,
std::string const& mateFileName ,std::string const& inputFormat, std::string const& matesOrientation,
std::string pathToSamtools, std::string chrLenFileName, std::string controlName, std::string targetBed, std::string pathToBedtools,
std::string pathToSamtools, std::string pathToSambamba, std::string SambambaThreads, std::string chrLenFileName, std::string controlName, std::string targetBed, std::string pathToBedtools,
std::string fastaFile, int minQualPerPos)
{
//create a .bed file with regions of interest to create a minipileup: targeted + flanks for WES or all chromosomes for WGS:
std::string bedFileWithRegionsOfInterest = outputDir + "_NewCaptureRegions" + ".bed";

if (targetBed != "")
{
int flanks = calculateFlankLength(mateFileName, inputFormat, matesOrientation, pathToSamtools);
int flanks = calculateFlankLength(mateFileName, inputFormat, matesOrientation, pathToSamtools, pathToSambamba, SambambaThreads);
calculateNewBoundaries(targetBed, flanks, bedFileWithRegionsOfInterest);
}
else
Expand All @@ -27,17 +27,18 @@ void BAFpileup::makepileup(GenomeCopyNumber & sampleCopyNumber, GenomeCopyNumber
}
pathToBedtools_=pathToBedtools; // /*
string intersected = intersectWithBedtools(makeminipileup, outputDir, bedFileWithRegionsOfInterest, chrLenFileName);
string sampleOutFileName = createPileUpFile( outputDir, pathToSamtools, sample_MateFile, intersected, fastaFile,minQualPerPos);
string sampleOutFileName = createPileUpFile( outputDir, pathToSamtools , pathToSambamba, SambambaThreads, sample_MateFile, intersected, fastaFile,minQualPerPos);

//BAFtumor = computeBAF(sampleCopyNumber, _sample, outputDir, "_sample");

if (controlName.compare("")!=0) {
string controlOutFileName = createPileUpFile( controlName, pathToSamtools, control_Matefile, intersected, fastaFile,minQualPerPos);
string controlOutFileName = createPileUpFile( controlName, pathToSamtools, pathToSambamba, SambambaThreads, control_Matefile, intersected, fastaFile,minQualPerPos);
}
//computeBAF(controlCopyNumber, _control, outputDir, "_control");
remove(intersected.c_str()); // */
}
}

float BAFpileup::calculateFlankLength(std::string const& mateFileName, std::string const& inputFormat_str, std::string const& matesOrientation_str, std::string pathToSamtools_)
float BAFpileup::calculateFlankLength(std::string const& mateFileName, std::string const& inputFormat_str, std::string const& matesOrientation_str, std::string pathToSamtools_, std::string pathToSambamba, std::string SambambaThreads)
{
if (matesOrientation_str=="0") return 0; // do not add anything in case of single end data
if (getInputFormat(inputFormat_str)!=SAM_INPUT_FORMAT) return 0;
Expand Down Expand Up @@ -65,8 +66,12 @@ float BAFpileup::calculateFlankLength(std::string const& mateFileName, std::stri
int fragmentLength=0;
if(mateFileName.substr(mateFileName.size()-3,3).compare("bam")==0 || mateFileName.substr(mateFileName.size()-3,3).compare(".gz")==0) {
string command;
if (mateFileName.substr(mateFileName.size()-3,3).compare("bam")==0) {
command = pathToSamtools_ + " view "+mateFileName;
if (mateFileName.substr(mateFileName.size()-3,3).compare("bam")==0) {
if (pathToSambamba != "") {
command = pathToSambamba + " view -t " + SambambaThreads + " " + mateFileName;
} else {
command = pathToSamtools_ + " view "+mateFileName;
}
}
if (mateFileName.substr(mateFileName.size()-3,3).compare(".gz")==0) {
command = "gzip -c -d "+mateFileName;
Expand Down Expand Up @@ -264,15 +269,20 @@ std::string BAFpileup::intersectWithBedtools(std::string makeminipileup, std::st
return intersectedBed;
}


std::string BAFpileup::createPileUpFile(std::string outputDir, std::string samtools_path,std::string control_MateFile, std::string intersected, std::string fastaFile, int minQualPerPos)
std::string BAFpileup::createPileUpFile(std::string outputDir, std::string samtools_path, std::string pathToSambamba,std::string SambambaThreads , std::string control_MateFile, std::string intersected, std::string fastaFile, int minQualPerPos)
{
string minipileup = outputDir + "_minipileup" +".pileup";
FILE *stream;
string command = samtools_path + " mpileup -f "+fastaFile+" -d 8000 -Q "+int2string(minQualPerPos)+" -q 1 -l " + intersected + " " + control_MateFile + " > " + minipileup; //discard reads wit 0 mapping quality

string command;
if (pathToSambamba != "") {
string samtools_arg = "--samtools -f " +fastaFile+ " -d 8000 -Q "+int2string(minQualPerPos)+ " -q 1 -l " + intersected;
command = pathToSambamba + " mpileup -t " + SambambaThreads + " -o " + minipileup + " " + control_MateFile + " " + samtools_arg ;
} else {
command = samtools_path + " mpileup -f "+fastaFile+" -d 8000 -Q "+int2string(minQualPerPos)+" -q 1 -l " + intersected + " " + control_MateFile + " > " + minipileup; //discard reads wit 0 mapping quality
}

stream =
stream =
#if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__))
_popen(command.c_str(), "w");
#else
Expand Down
9 changes: 5 additions & 4 deletions src/BAFpileup.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@ class BAFpileup
std::string sample_MateFile,std::string control_MateFile,
std::string outputDir,std::string makepileup, std::string const& mateFileName,
std::string const& inputFormat, std::string const& matesOrientation,
std::string pathToSamtools, std::string chrLen, std::string controlName, std::string targetBed = "",
std::string pathToBedtools = "", std::string fastaFile="", int minQualPerPos=0);
float calculateFlankLength(std::string const& mateFileName ,std::string const& inputFormat, std::string const& matesOrientation, std::string pathToSamtools);
std::string pathToSamtools, std::string chrLen, std::string controlName,
std::string pathToSambamba, std::string SambambaThreads, std::string targetBed = "",
std::string pathToBedtools = "", std::string fastaFile="", int minQualPerPos=0);
float calculateFlankLength(std::string const& mateFileName ,std::string const& inputFormat, std::string const& matesOrientation, std::string pathToSamtools,std::string pathToSambamba, std::string SambambaThreads);
void calculateNewBoundaries(std::string targetBed, int flanks, std::string bedFileWithRegionsOfInterest);
std::string intersectWithBedtools(std::string makeminipileup, std::string outputDir, std::string bedFileWithRegionsOfInterest, std::string chrLen);
void createBedFileWithChromosomeLengths (std::string bedFileWithRegionsOfInterest, std::string chrLenFile);
std::string createPileUpFile(std::string outputDir, std::string samtools_path,std::string control_tumor, std::string intersected, std::string fastaFile,int minQualPerPos);
std::string createPileUpFile(std::string outputDir, std::string samtools_path, std::string pathToSambamba, std::string SambambaThreads, std::string control_tumor, std::string intersected, std::string fastaFile,int minQualPerPos);
std::vector < std::vector<float> >computeBAF(GenomeCopyNumber & sampleorcontrol, std::string minipileup, std::string outputDir, std::string filename);
std::vector <int> coordinates_;
std::vector <int> ends_;
Expand Down
48 changes: 32 additions & 16 deletions src/ChrCopyNumber.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ ChrCopyNumber::ChrCopyNumber(void)
}

ChrCopyNumber::ChrCopyNumber(std::string const& chrName) {
copy_number_subc = vector<int>(length_,0);
chromosome_ = chrName;
isMedianCalculated_ = false;
isSmoothed_ = false;
Expand All @@ -54,8 +53,6 @@ ChrCopyNumber::ChrCopyNumber(int windowSize, int chrLength, std::string const& c
length_ = chrLength/windowSize+1;
coordinates_ = vector<int>(length_);
readCount_ = vector<float>(length_,0);
copy_number_subc = vector<int>(length_,0);
population_subc = vector<float>(length_,0);
for (int i = 0; i<length_; i++) {
coordinates_[i] = i*windowSize;
}
Expand All @@ -69,8 +66,6 @@ ChrCopyNumber::ChrCopyNumber(int windowSize, int chrLength, std::string const& c
chromosome_ = chrName;
isMedianCalculated_ = false;
isSmoothed_ = false;
copy_number_subc = vector<int>(length_,0);
population_subc = vector<float>(length_,0);
ploidy_=NA;
if (targetBed == "") {
if (windowSize ==0) {
Expand Down Expand Up @@ -215,8 +210,6 @@ ChrCopyNumber::ChrCopyNumber(int windowSize, int chrLength, std::string const& c
exons_Countchr_ = length_;

readCount_ = vector<float>(exons_Countchr_,0);
copy_number_subc = vector<int>(exons_Countchr_,0);
population_subc = vector<float>(exons_Countchr_,0);

cout << "Number of exons analysed in chromosome "<< chromosome_ << " : " << exons_Countchr_ << "\n";

Expand Down Expand Up @@ -333,24 +326,24 @@ void ChrCopyNumber::setValueAt(int i, float val) {
}
void ChrCopyNumber::setCN_subc(int i, int CN_subc)
{
cerr << copy_number_subc[i];
copy_number_subc[i] = CN_subc;
//cerr << copy_number_subc_[i]; //WHAT IS THIS OUTPUT, CARINO?
copy_number_subc_[i] = CN_subc; //THIS VECTOR IS EMPTY!
}

int ChrCopyNumber::getCN_subc(int i)
{
return copy_number_subc[i];
return copy_number_subc_[i];
}


void ChrCopyNumber::setPopulation_subc(int i, float pop_subc)
{
population_subc[i] = pop_subc;
population_subc_[i] = pop_subc;
}

float ChrCopyNumber::getPopulation_subc(int i)
{
return population_subc[i] ;
return population_subc_[i] ;
}


Expand Down Expand Up @@ -429,11 +422,13 @@ void ChrCopyNumber::setVectorLength(int length){

void ChrCopyNumber::setCN_subcLength(int len)
{
copy_number_subc = vector<int>(len,-1);
copy_number_subc_.clear();
copy_number_subc_ = vector<int>(len,0);
}
void ChrCopyNumber::setpop_subcLength(int len)
{
population_subc = vector<float>(len,-1);
population_subc_.clear();
population_subc_ = vector<float>(len,0);
}

void ChrCopyNumber::setChrLength(int chrLength) {
Expand Down Expand Up @@ -1559,6 +1554,12 @@ float ChrCopyNumber::getEstimatedBAFuncertaintyAtI(int i) {
float ChrCopyNumber::getSmoothedProfileAtI(int i) {
return smoothedProfile_[i];
}

float ChrCopyNumber::getSmoothedForInterval(int start , int end) {
return get_median (smoothedProfile_,start,end);
}


void ChrCopyNumber::pushSmoothedProfile(float value) {
smoothedProfile_.push_back(value);
}
Expand All @@ -1567,15 +1568,30 @@ int ChrCopyNumber::getEndsSize() {
return ends_.size();
}

void ChrCopyNumber::setLookingForSubclones(bool value) {
isLookingForSubclones_=value;
if (value) {
if (coordinates_.size()==0) {cerr << "Warning: you should intialize the ChrCopyNumber object before calling this function!!!\n";}
if (copy_number_subc_.size()==0) {
copy_number_subc_=vector <int> (coordinates_.size(),0);
}
if (population_subc_.size()==0) {
population_subc_=vector <float> (coordinates_.size(),0.0);
}
}
}



ChrCopyNumber::~ChrCopyNumber(void)
{
coordinates_.clear();
readCount_.clear();
smoothedProfile_.clear();
fragmentNotNA_lengths_.clear(); //TODO all other vectors
length_ = 0;
copy_number_subc.clear();
population_subc.clear();
copy_number_subc_.clear();
population_subc_.clear();
}

void ChrCopyNumber::createBAF(float value) {
Expand Down
9 changes: 5 additions & 4 deletions src/ChrCopyNumber.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ class ChrCopyNumber
float getPopulation_subc(int i);
void setCN_subcLength(int len);
void setpop_subcLength(int len);


void setLookingForSubclones(bool);
float getSmoothedForInterval(int start , int end);

private:
// std::vector <std::string> coordinatesTmp_;
Expand All @@ -159,9 +159,10 @@ class ChrCopyNumber
std::vector <std::string> genes_names;
// int exons_Count;
int exons_Countchr_;
std::vector <int> copy_number_subc;
std::vector <float> population_subc;
std::vector <int> copy_number_subc_;
std::vector <float> population_subc_;

bool isLookingForSubclones_;


int ploidy_;
Expand Down
38 changes: 21 additions & 17 deletions src/GenomeCopyNumber.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ GenomeCopyNumber::GenomeCopyNumber(void)
ifUsedControl_ = false;
normalContamination_=0;
sex_="";
SeekingSubc = false;
SeekingSubc_ = false;
}

void GenomeCopyNumber::readCopyNumber(std::string const& mateFileName ,std::string const& inputFormat, std::string const& matesOrientation, std::string const& chrLenFileName, float coefficientOfVariation ) {
Expand Down Expand Up @@ -893,6 +893,7 @@ void GenomeCopyNumber::setPloidy(int ploidy) {

}


double GenomeCopyNumber::calculateMedianRatioAround (float interval, float around) {

float maxCG = around+interval;
Expand Down Expand Up @@ -1069,11 +1070,11 @@ long double GenomeCopyNumber::calculateRSS(int ploidy)
map<string,int>::iterator it;
for (it=chromosomesInd_.begin() ; it != chromosomesInd_.end(); it++ ) {
string chrNumber = (*it).first;
if ( ( pos = chrNumber.find("chr", pos)) != string::npos )
if ( ( pos = chrNumber.find("chr")) != string::npos )
chrNumber.replace( pos, 3, "" );
if ( ( pos = chrNumber.find("X", pos)) != string::npos ) //exclude X and Y from the analysis
if ( ( pos = chrNumber.find("X")) != string::npos ) //exclude X and Y from the analysis
continue;
if ( ( pos = chrNumber.find("Y", pos)) != string::npos )
if ( ( pos = chrNumber.find("Y")) != string::npos )
continue;
int index = findIndex(chrNumber);
int length = chrCopyNumber_[index].getLength();
Expand Down Expand Up @@ -1485,9 +1486,9 @@ void GenomeCopyNumber::printRatio(std::string const& outFile, bool ifBedGraphOut
{
file << "\tGene";
}
if (SeekingSubc == true)
if (SeekingSubc_ == true)
{
file << "\tSubclones_cn\tSubclones_pop";
file << "\tSubclone_CN\tSubclone_Population";
}
file << "\n";
for ( it=chromosomesInd_.begin() ; it != chromosomesInd_.end(); it++ ) {
Expand Down Expand Up @@ -2649,7 +2650,7 @@ void GenomeCopyNumber::readCopyNumber(std::string const& inFile) {
chrCopyNumber_[(*it).second].setWindowSize(windowSize_);
int length = chrCopyNumber_[(*it).second].getValues().size();
chrCopyNumber_[(*it).second].setVectorLength(length);
if (SeekingSubc == true)
if (SeekingSubc_ == true)
{
chrCopyNumber_[(*it).second].setCN_subcLength(length+3);
chrCopyNumber_[(*it).second].setpop_subcLength(length+3);
Expand Down Expand Up @@ -2877,8 +2878,8 @@ void GenomeCopyNumber::printRatio(std::string const& chr, std::ofstream & file,
if (WESanalysis == true && chrCopyNumber_[index].getGeneNameAtBin(i)!= "") {
file << "\t" << chrCopyNumber_[index].getGeneNameAtBin(i);
}
if (SeekingSubc == true) {
file << "\t" << chrCopyNumber_[index].getCN_subc(i) << "\t" << chrCopyNumber_[index].getPopulation_subc(i);
if (SeekingSubc_ == true) {
file << "\t" << chrCopyNumber_[index].getCN_subc(i) << "\t" << chrCopyNumber_[index].getPopulation_subc(i); //check that it is still there
}

file << "\n";
Expand Down Expand Up @@ -3187,11 +3188,11 @@ float GenomeCopyNumber::evaluateContamination () {
map<string,int>::iterator it;
for ( it=chromosomesInd_.begin() ; it != chromosomesInd_.end(); it++ ) {
string chrNumber = (*it).first;
if ( ( pos = chrNumber.find("chr", pos)) != string::npos )
if ( ( pos = chrNumber.find("chr")) != string::npos )
chrNumber.replace( pos, 3, "" );
if ( ( pos = chrNumber.find("X", pos)) != string::npos ) //exclude X and Y from the analysis
if ( ( pos = chrNumber.find("X")) != string::npos ) //exclude X and Y from the analysis
continue;
if ( ( pos = chrNumber.find("Y", pos)) != string::npos )
if ( ( pos = chrNumber.find("Y")) != string::npos )
continue;
int index = findIndex(chrNumber);
if (index == NA) {
Expand Down Expand Up @@ -3241,11 +3242,11 @@ float GenomeCopyNumber::evaluateContaminationwithLR () {

for ( it=chromosomesInd_.begin() ; it != chromosomesInd_.end(); it++ ) {
string chrNumber = (*it).first;
if ( ( pos = chrNumber.find("chr", pos)) != string::npos )
if ( ( pos = chrNumber.find("chr")) != string::npos )
chrNumber.replace( pos, 3, "" );
if ( ( pos = chrNumber.find("X", pos)) != string::npos ) //exclude X and Y from the analysis
if ( ( pos = chrNumber.find("X")) != string::npos ) //exclude X and Y from the analysis
continue;
if ( ( pos = chrNumber.find("Y", pos)) != string::npos )
if ( ( pos = chrNumber.find("Y")) != string::npos )
continue;
int index = findIndex(chrNumber);
if (index == NA) {
Expand Down Expand Up @@ -4423,7 +4424,10 @@ makingPileup = makingPileup_given;

void GenomeCopyNumber::setSeekSubclones(bool seekSubclones)
{
SeekingSubc = seekSubclones;
SeekingSubc_ = seekSubclones;
vector<ChrCopyNumber>::iterator it;
for ( it=chrCopyNumber_.begin() ; it != chrCopyNumber_.end(); it++ )
it->setLookingForSubclones(seekSubclones);
}

void* GenomeCopyNumber_readMateFile_wrapper(void *arg)
Expand Down Expand Up @@ -4467,7 +4471,7 @@ double GenomeCopyNumber::Percentage_GenomeExplained(int & unexplainedChromosomes
if (fragmentLength>threshold) {
numberOfPoints+=fragmentLength;
if (fragment_median2!=NA && abs(fragment_median2-round_by_ploidy(fragment_median2,ploidy_)) >= 1.0/3/ploidy_) {
cout << "Unexplained segment: "<<fragment_median2 << "\t";
//cout << "Unexplained segment: "<<fragment_median2 << "\t";
unexplained=1;
sum_frags+=fragmentLength;
}
Expand Down
9 changes: 4 additions & 5 deletions src/GenomeCopyNumber.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,23 +118,22 @@ class GenomeCopyNumber
void setSambamba(std::string const& pathToSambamba, std::string const& SambambaThreads_);
bool ifHasBAF();
void setSex(std::string sex);

void setSeekSubclones(bool seekSubclones);

int findWinNumber(int position, std::string myName, std::string const& matefile);
void setWESanalysis(bool WESgiven);
void setSeekSubclones(bool seekSubclones);
void setmakingPileup(bool makingPileup_given);
double Percentage_GenomeExplained(int &);
long double calculateRSS(int ploidy);


private:

std::vector<ChrCopyNumber> chrCopyNumber_; //should stay private !!! why is it public now, Carino????
std::map<std::string, int> chromosomesInd_; //should stay private
std::vector<ChrCopyNumber> chrCopyNumber_; //should stay private !!!
std::map<std::string, int> chromosomesInd_; //should stay private!!!
bool WESanalysis;
bool makingPileup;
bool SeekingSubc;
bool SeekingSubc_;
void fillMyHash(std::string const& mateFileName , std::string const& inputFormat, std::string const& matesOrientation, int windowSize, int step, std::string targetBed = "");
int windowSize_;
int step_;
Expand Down
Loading

0 comments on commit bebce82

Please sign in to comment.