-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Nadia Davidson
committed
Jun 25, 2019
1 parent
c33e2c6
commit 45d1809
Showing
1 changed file
with
51 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,7 @@ | |
** clusters and counts are output for each of these smaller groups. | ||
** | ||
** Author: Nadia Davidson, [email protected] | ||
** Modified: 22 June 2019 | ||
** Modified: 25 June 2019 | ||
**/ | ||
|
||
#include <iostream> | ||
|
@@ -57,6 +57,7 @@ using namespace std; | |
|
||
const string corset_extension=".corset-reads"; | ||
|
||
|
||
// a function to parse a bam file. It required samtools | ||
// to read it. The alignments are stored in a ReadList object. | ||
ReadList * read_bam_file(string all_file_names, TranscriptList * trans, int sample){ | ||
|
@@ -157,6 +158,35 @@ ReadList * read_fasta_file(string all_file_names, TranscriptList * trans, int sa | |
return rList; //return the alignment list | ||
} | ||
|
||
|
||
#define READS_REDISTRIBUTED 0 | ||
#define READS_COUNTED 1 | ||
#define READS_FILTERED 2 | ||
|
||
/** Code to add an EC into the readList. This code is common to corset and | ||
salmon EC file parsing **/ | ||
int add_equivalence_class(ReadList * rList, int & sample, vector<string> & transNames, int & weight) { | ||
//case that the number of reads is smaller than threshold for a link b/n transcripts | ||
if(weight<Transcript::min_reads_for_link){ | ||
shuffle(transNames.begin(),transNames.end(),default_random_engine()); | ||
for(int rr=0; rr < transNames.size() ; rr++){ | ||
int this_weight = weight/transNames.size(); | ||
if(rr < (weight%transNames.size()) ) | ||
this_weight++; | ||
vector<string> this_tran; | ||
this_tran.push_back(transNames.at(rr)); | ||
rList->add_alignment(this_tran,sample,this_weight); | ||
} | ||
return READS_REDISTRIBUTED; | ||
} | ||
else if(transNames.size()<=Transcript::max_alignments || Transcript::max_alignments<=0){ | ||
rList->add_alignment(transNames,sample,weight); | ||
return READS_COUNTED; | ||
} | ||
return READS_FILTERED; | ||
} | ||
|
||
/** Process a corset format equivalence class file **/ | ||
ReadList * read_corset_file(string all_file_names, TranscriptList * trans, int sample){ | ||
ReadList * rList = new ReadList(trans); | ||
string filename; | ||
|
@@ -176,26 +206,27 @@ ReadList * read_corset_file(string all_file_names, TranscriptList * trans, int s | |
string line; | ||
int reads_counted=0; | ||
int reads_filtered=0; | ||
int reads_redistributed=0; | ||
while(getline(file, line)){ | ||
istringstream istream(line); | ||
int weight; | ||
vector<string> transNames; | ||
string name; | ||
int alignments=-1; //start at -1 because first column is weight | ||
istream >> weight; | ||
if(Transcript::max_alignments>=0){ // workout the number of alignments | ||
istringstream tempstream(line); | ||
while(tempstream >> name) alignments++; | ||
} //check that number of alignments and supporting reads is okay | ||
if(weight>=Transcript::min_reads_for_link & | ||
alignments<=Transcript::max_alignments){ | ||
reads_counted+=weight; | ||
while(istream >> name) | ||
transNames.push_back(name); | ||
rList->add_alignment(transNames,sample,weight); //add | ||
} else {reads_filtered+=weight; } | ||
while(istream >> name) | ||
transNames.push_back(name); | ||
int ret_flag=add_equivalence_class(rList,sample,transNames,weight); | ||
switch(ret_flag){ | ||
case READS_REDISTRIBUTED : reads_redistributed+=weight; break; | ||
case READS_COUNTED : reads_counted+=weight; break; | ||
case READS_FILTERED : reads_filtered+=weight; break; | ||
} | ||
} | ||
cout<<reads_counted<< " reads counted, "<<reads_filtered<<" reads filtered."<<endl; | ||
cout<<reads_counted<< " reads counted, " | ||
<<reads_filtered<<" reads filtered, " | ||
<<reads_redistributed<<" reads redistributed." | ||
<<endl; | ||
} | ||
return rList; | ||
} | ||
|
@@ -235,20 +266,12 @@ ReadList * read_salmon_eq_classes_file(string all_file_names, TranscriptList * t | |
} | ||
int weight; | ||
istream >> weight; | ||
//case that the number of reads is smaller than threshold for a link b/n transcripts | ||
if(weight<Transcript::min_reads_for_link){ | ||
//randomly select a transcript to dump the reads to | ||
string randName = transNames.at(rand() % transNames.size()); | ||
vector<string> randTran; | ||
randTran.push_back(randName); | ||
rList->add_alignment(randTran,sample,weight); | ||
reads_redistributed+=weight; | ||
} | ||
else if(eq_size<=Transcript::max_alignments || Transcript::max_alignments<=0){ | ||
rList->add_alignment(transNames,sample,weight); | ||
reads_counted+=weight; | ||
int ret_flag=add_equivalence_class(rList,sample,transNames,weight); | ||
switch(ret_flag){ | ||
case READS_REDISTRIBUTED : reads_redistributed+=weight; break; | ||
case READS_COUNTED : reads_counted+=weight; break; | ||
case READS_FILTERED : reads_filtered+=weight; break; | ||
} | ||
else { reads_filtered+=weight; } | ||
} | ||
cout<<reads_counted<< " reads counted, " | ||
<<reads_filtered<<" reads filtered, " | ||
|
@@ -334,7 +357,8 @@ void print_usage(){ | |
cout << endl; | ||
cout << "\t -l <int> If running with -i corset or salmon_eq_classes, this will filter out a link between contigs" << endl; | ||
cout << "\t if the link is supported by less than this many reads (performed sample-wise). Reads will " << endl; | ||
cout << "\t be randomly reassigned to one of the contigs in the equivalence class." << endl; | ||
cout << "\t be reassigned uniformly to the contigs in the equivalence class. This option will" << endl; | ||
cout << "\t improve runtime and memory usage, but will increase the number of clusters reported." << endl; | ||
cout << "\t Default: 1 (no filtering)" << endl; | ||
cout << endl; | ||
cout << "\t -x <int> If running with -i corset or salmon_eq_classes, this option will filter out reads that" << endl; | ||
|