From 45d18095781249779c5a31aa290bc2b43635e9a1 Mon Sep 17 00:00:00 2001 From: Nadia Davidson Date: Tue, 25 Jun 2019 05:39:39 +0000 Subject: [PATCH] Tidying up code --- corset.cc | 78 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/corset.cc b/corset.cc index 91ecf00..e111824 100644 --- a/corset.cc +++ b/corset.cc @@ -22,7 +22,7 @@ ** clusters and counts are output for each of these smaller groups. ** ** Author: Nadia Davidson, nadia.davidson@mcri.edu.au - ** Modified: 22 June 2019 + ** Modified: 25 June 2019 **/ #include @@ -57,6 +57,7 @@ using namespace std; const string corset_extension=".corset-reads"; + // a function to parse a bam file. It required samtools // to read it. The alignments are stored in a ReadList object. ReadList * read_bam_file(string all_file_names, TranscriptList * trans, int sample){ @@ -157,6 +158,35 @@ ReadList * read_fasta_file(string all_file_names, TranscriptList * trans, int sa return rList; //return the alignment list } + +#define READS_REDISTRIBUTED 0 +#define READS_COUNTED 1 +#define READS_FILTERED 2 + +/** Code to add an EC into the readList. This code is common to corset and + salmon EC file parsing **/ +int add_equivalence_class(ReadList * rList, int & sample, vector & transNames, int & weight) { + //case that the number of reads is smaller than threshold for a link b/n transcripts + if(weight this_tran; + this_tran.push_back(transNames.at(rr)); + rList->add_alignment(this_tran,sample,this_weight); + } + return READS_REDISTRIBUTED; + } + else if(transNames.size()<=Transcript::max_alignments || Transcript::max_alignments<=0){ + rList->add_alignment(transNames,sample,weight); + return READS_COUNTED; + } + return READS_FILTERED; +} + +/** Process a corset format equivalence class file **/ ReadList * read_corset_file(string all_file_names, TranscriptList * trans, int sample){ ReadList * rList = new ReadList(trans); string filename; @@ -176,6 +206,7 @@ ReadList * read_corset_file(string all_file_names, TranscriptList * trans, int s string line; int reads_counted=0; int reads_filtered=0; + int reads_redistributed=0; while(getline(file, line)){ istringstream istream(line); int weight; @@ -183,19 +214,19 @@ ReadList * read_corset_file(string all_file_names, TranscriptList * trans, int s string name; int alignments=-1; //start at -1 because first column is weight istream >> weight; - if(Transcript::max_alignments>=0){ // workout the number of alignments - istringstream tempstream(line); - while(tempstream >> name) alignments++; - } //check that number of alignments and supporting reads is okay - if(weight>=Transcript::min_reads_for_link & - alignments<=Transcript::max_alignments){ - reads_counted+=weight; - while(istream >> name) - transNames.push_back(name); - rList->add_alignment(transNames,sample,weight); //add - } else {reads_filtered+=weight; } + while(istream >> name) + transNames.push_back(name); + int ret_flag=add_equivalence_class(rList,sample,transNames,weight); + switch(ret_flag){ + case READS_REDISTRIBUTED : reads_redistributed+=weight; break; + case READS_COUNTED : reads_counted+=weight; break; + case READS_FILTERED : reads_filtered+=weight; break; + } } - cout<> weight; - //case that the number of reads is smaller than threshold for a link b/n transcripts - if(weight randTran; - randTran.push_back(randName); - rList->add_alignment(randTran,sample,weight); - reads_redistributed+=weight; - } - else if(eq_size<=Transcript::max_alignments || Transcript::max_alignments<=0){ - rList->add_alignment(transNames,sample,weight); - reads_counted+=weight; + int ret_flag=add_equivalence_class(rList,sample,transNames,weight); + switch(ret_flag){ + case READS_REDISTRIBUTED : reads_redistributed+=weight; break; + case READS_COUNTED : reads_counted+=weight; break; + case READS_FILTERED : reads_filtered+=weight; break; } - else { reads_filtered+=weight; } } cout< If running with -i corset or salmon_eq_classes, this will filter out a link between contigs" << endl; cout << "\t if the link is supported by less than this many reads (performed sample-wise). Reads will " << endl; - cout << "\t be randomly reassigned to one of the contigs in the equivalence class." << endl; + cout << "\t be reassigned uniformly to the contigs in the equivalence class. This option will" << endl; + cout << "\t improve runtime and memory usage, but will increase the number of clusters reported." << endl; cout << "\t Default: 1 (no filtering)" << endl; cout << endl; cout << "\t -x If running with -i corset or salmon_eq_classes, this option will filter out reads that" << endl;