diff --git a/util/aligner2counts.cpp b/util/aligner2counts.cpp index 690f49b..b020011 100755 --- a/util/aligner2counts.cpp +++ b/util/aligner2counts.cpp @@ -13,6 +13,7 @@ #include #include #include +#include const unsigned int FLAG_0 = 0; // 0 primary alignment const unsigned int FLAG_1 = 1; // 0x1 template having multiple segments in sequencing @@ -158,7 +159,8 @@ std::pair get_seqid_alncov(std::pair &alnpos, std::strin // Sequence identity only considers aligned region (soft clip region is ignored) // Alignment coverage is calculated w.r.t full read length - for (unsigned int i = start; i < end; i++) { + assert(end >= 0); // assert that end does not accidentally overflows here + for (unsigned int i = start; i < (unsigned int) end; i++) { bool is_match; if (future_matches > 0) { future_matches--; @@ -196,8 +198,8 @@ std::pair get_seqid_alncov(std::pair &alnpos, std::strin exit(1); } - seq_id = (static_cast(matches) * 100) / (matches+mismatches); - alignment_coverage = (static_cast(alignment_length) * 100) / (qual_str.length() + start); // account for variable read length + seq_id = (static_cast(matches) * 100.0) / (matches+mismatches); + alignment_coverage = (static_cast(alignment_length) * 100.0) / (qual_str.length() + start); // account for variable read length // std::cout << matches << " matches " << mismatches << " mismatches " << seq_id << " " << alignment_coverage << " " << matches+mismatches << " in alnstats \n"; return std::make_pair(seq_id, alignment_coverage); } @@ -430,7 +432,7 @@ void process_alignment_line( AlnMapids& alnmapids, bool& strobealign) { if (contigs_map.size() == 0) { - std::cerr << "Input sam/bam file doesn't has header. Please provide input file with header \n"; + std::cerr << "Input sam file doesn't has header. Please provide input file with header \n"; exit(1); } std::string currentread_id, contig_id, cigar_str, qual_str, md_str, field; diff --git a/util/convertfasta_multi2single.cpp b/util/convertfasta_multi2single.cpp index a5422c3..5d9c657 100644 --- a/util/convertfasta_multi2single.cpp +++ b/util/convertfasta_multi2single.cpp @@ -6,69 +6,85 @@ #include #include - namespace fs = std::filesystem; -bool condition_header(std::string &line) { - if (line[0] == '>') { +bool condition_header(std::string &line) +{ + if (line[0] == '>') + { return true; } - else { + else + { return false; } } -void get_complete_sequence(std::ifstream &fasta, std::string &line, std::string &sequence) { +void get_complete_sequence(std::ifstream &fasta, std::string &line, std::string &sequence) +{ getline(fasta, line); - while (!condition_header(line) && !fasta.eof()) { + while (!condition_header(line) && !fasta.eof()) + { sequence.append(line); getline(fasta, line); } } -int main(int argc, char *argv[]) { +int main(int argc, char *argv[]) +{ - if(argc < 3 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) + if (argc < 3 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) { std::cout << "Usage: convertfasta_multi2single input.fasta outdir min_length (optional, int) --length (print length, optional) \n"; return EXIT_SUCCESS; } - - else { + + else + { auto start = std::chrono::high_resolution_clock::now(); std::string contigs_sequences = argv[1]; - auto pos = contigs_sequences.find_last_of ('/'); + auto pos = contigs_sequences.find_last_of('/'); std::string outname; - if (pos != std::string::npos) { - outname = contigs_sequences.substr(pos+1); - } else { + if (pos != std::string::npos) + { + outname = contigs_sequences.substr(pos + 1); + } + else + { outname = contigs_sequences; } - + std::string dir = argv[2]; - if (!fs::exists(dir)) { - if (!fs::create_directories(dir)) { + if (!fs::exists(dir)) + { + if (!fs::create_directories(dir)) + { std::cerr << "Error: Unable to create output directory!" << "\n"; return 1; } } unsigned int min_length = 0; - bool return_length = 0; + bool return_length = false; std::ofstream length_file; - for (int i = 3; i < argc; ++i) { - if (strcmp(argv[i], "--length") == 0) { + for (int i = 3; i < argc; ++i) + { + if (strcmp(argv[i], "--length") == 0) + { return_length = true; - length_file.open(dir +"/contig_length"); - } else { + length_file.open(dir + "/contig_length"); + } + else + { min_length = std::stoi(argv[i]); } } std::ifstream fasta(contigs_sequences); - std::cout<<"Input file: " << contigs_sequences << "\n"; - std::cout<<"Output directory: " << dir << "\n"; + std::cout << "Input file: " << contigs_sequences << "\n"; + std::cout << "Output directory: " << dir << "\n"; std::ofstream outfile(dir + "/single_" + outname); - if (!fasta.is_open()) { + if (!fasta.is_open()) + { std::cerr << "Error: Unable to open Fasta file!" << "\n"; return 1; } @@ -78,9 +94,10 @@ int main(int argc, char *argv[]) { getline(fasta, line); std::string header; unsigned int sequence_counter = 0; - while (condition_header(line)) { + while (condition_header(line)) + { header = line; - + // Find the start position, which is after '>' size_t start_pos = line.find('>') + 1; @@ -92,29 +109,34 @@ int main(int argc, char *argv[]) { sequence = ""; get_complete_sequence(fasta, line, sequence); - if (!sequence.empty()) { - if (sequence.length() >= min_length) { + if (!sequence.empty()) + { + if (sequence.length() >= min_length) + { outfile << header << "\n"; outfile << sequence << "\n"; - if (return_length==1) { + if (return_length) + { length_file << seq_id << "\t" << sequence.length() << "\n"; } } } - else { + else + { std::cerr << "empty sequence for a header" << "\n"; return 1; } sequence_counter++; } outfile.close(); - if (return_length && length_file.is_open()) { + if (return_length && length_file.is_open()) + { length_file.close(); } std::cout << sequence_counter << " sequences processed \n"; auto stop = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(stop - start); std::cout << duration.count() << " seconds\n"; - return EXIT_SUCCESS; + return EXIT_SUCCESS; } } \ No newline at end of file diff --git a/util/get_sequence_bybin.cpp b/util/get_sequence_bybin.cpp index 557c0bb..51b9c2b 100644 --- a/util/get_sequence_bybin.cpp +++ b/util/get_sequence_bybin.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -11,7 +10,7 @@ namespace fs = std::filesystem; -bool condition_header(std::ifstream &fasta, std::string &line) { +bool condition_header(std::string &line) { if (line[0] == '>') { return true; } @@ -32,7 +31,7 @@ bool line_check(std::ifstream &fasta, std::string &line) { void get_complete_sequence(std::ifstream &fasta, std::string &line, std::string &sequence) { fasta >> line; - while (!condition_header(fasta, line) && !fasta.eof()) { + while (!condition_header(line) && !fasta.eof()) { if (line_check(fasta, line)) { sequence.append(line); fasta >> line; @@ -114,7 +113,7 @@ int main(int argc, char *argv[]) { } fastaFile >> line; - while (condition_header(fastaFile, line)) { + while (condition_header(line)) { sequence = ""; auto range = bins_ids.equal_range(line.substr(1,-1)); if (range.first != range.second) { @@ -137,7 +136,7 @@ int main(int argc, char *argv[]) { } else { fastaFile >> line; - while (!condition_header(fastaFile, line) && !fastaFile.eof()) { + while (!condition_header(line) && !fastaFile.eof()) { fastaFile >> line; } }