diff --git a/src/compare_doubles.h b/src/compare_doubles.h index ba746e6..080d9de 100644 --- a/src/compare_doubles.h +++ b/src/compare_doubles.h @@ -7,6 +7,6 @@ namespace compare_doubles { /// checks if two doubles are approximately equal /// bool is_equal(double one, double two); -} +} #endif /* COMPARE_DOUBLES_H */ diff --git a/src/f_config.cpp b/src/f_config.cpp index 32f6b12..1975f42 100644 --- a/src/f_config.cpp +++ b/src/f_config.cpp @@ -218,8 +218,9 @@ void f_config::assign_feature_by_pattern(fasta::SequenceList& sequences, if (pattern.size() > 0) { boost::regex re(pattern); for (size_t i = 0; i < sequences.size(); ++i) { - std::string seq = fasta::sequence_to_string(sequences[i]); - std::string seq_nogaps = seq; + //std::string seq = fasta::sequence_to_string(sequences[i]); + auto seq = fasta::sequence_to_string(sequences[i]); + auto seq_nogaps = seq; seq_nogaps.erase(std::remove(seq_nogaps.begin(), seq_nogaps.end(), '-'), seq_nogaps.end()); for(auto it = boost::sregex_iterator(seq_nogaps.begin(), seq_nogaps.end(), @@ -227,8 +228,8 @@ void f_config::assign_feature_by_pattern(fasta::SequenceList& sequences, it != boost::sregex_iterator(); ++it) { - int match_start = find_real_pos(seq, it->position()); - int match_end = find_real_pos(seq, match_start + it->str().size()); + auto match_start = find_real_pos(seq, it->position()); + auto match_end = find_real_pos(seq, match_start + it->str().size()); for (int j = match_start; j < match_end; ++j) { if (sequences[i].residues[j].codon[0] != '-') { sequences[i].residues[j].features.push_back(feat_name); diff --git a/src/fasta.cpp b/src/fasta.cpp index e034675..c0dcfeb 100644 --- a/src/fasta.cpp +++ b/src/fasta.cpp @@ -41,7 +41,7 @@ fasta::FastaData fasta::parse_fasta(std::string const& filename, std::string line; std::string header; std::string seq_line; - bool in_sequence_section = true; + auto in_sequence_section = true; fasta::FastaData fd; while (std::getline(fastafile, line)) { if (line.substr(0, 1) == ">") { @@ -103,7 +103,7 @@ fasta::Sequence fasta::make_sequence(const std::string& description, for (unsigned i = 0; i < codons.size(); i += codon_length) { boost::regex re("[a-zA-Z0-9-]{" + std::to_string(codon_length) + "}"); - std::string codon = codons.substr(i, codon_length); + auto codon = codons.substr(i, codon_length); if (!boost::regex_match(codon, re)) { throw std::runtime_error("Invalid codon: " + codon); @@ -144,11 +144,11 @@ bool fasta::check_length(fasta::SequenceList const& sequences, int limit) { if (limit == 0) { limit = sequences.size(); } - bool result = true; - size_t prev_length = sequences[0].residues.size(); - int i = 1; + auto result = true; + auto prev_length = sequences[0].residues.size(); + auto i = 1; while (result && i < limit) { - size_t length = sequences[i].residues.size(); + auto length = sequences[i].residues.size(); if (length != prev_length) { result = false; } @@ -159,7 +159,7 @@ bool fasta::check_length(fasta::SequenceList const& sequences, int limit) { fasta::SequenceList fasta::remove_gaps( const fasta::SequenceList& sequences) { - fasta::SequenceList s = sequences; + auto s = sequences; for (auto& seq : s) { seq.residues.clear(); } diff --git a/src/feature_scores.cpp b/src/feature_scores.cpp index 07d5fbe..154cd8d 100644 --- a/src/feature_scores.cpp +++ b/src/feature_scores.cpp @@ -87,7 +87,7 @@ double FeatureScores::score_ptm( //pop back last character to get just the ptm type ptm_type.pop_back(); // level of annotation - last character of feature's name - char ptm_level = ptm_name.back(); + auto ptm_level = ptm_name.back(); double ptm_score = 0.; // first set ptm_score based on annotation level of the query ptm if (ptm_level == '0') { @@ -109,11 +109,11 @@ double FeatureScores::score_ptm( // get occurrence based score for (auto feat_it = m_occurences.begin(); feat_it != m_occurences.end(); ++feat_it) { - std::string i_name = feat_it->first; + auto i_name = feat_it->first; // get just the ptm type without its level of annotation (last char) std::string i_type = i_name.substr(0, i_name.size() - 1); if (i_type == ptm_type) { - char i_level = i_name.back(); + auto i_level = i_name.back(); double score = feat_it->second[position]; if (i_level == '0') { result += score; diff --git a/src/kmad.cpp b/src/kmad.cpp index 7015405..5a21f40 100644 --- a/src/kmad.cpp +++ b/src/kmad.cpp @@ -155,7 +155,6 @@ int main(int argc, char *argv[]) { } // Load sequence data - fasta::FastaData fasta_data; try { fasta_data = fasta::parse_fasta(filename, codon_length); @@ -163,10 +162,10 @@ int main(int argc, char *argv[]) { std::cerr << "Error: " << e.what() << std::endl; std::exit(EXIT_FAILURE); } - bool gapped = false; + auto gapped = false; // Combine sequence and feature settings // - fasta::FastaData fasta_data_cfg = f_config::get_conf_data( + auto fasta_data_cfg = f_config::get_conf_data( fasta_data, f_set, gapped); // Perform the alignment @@ -182,7 +181,7 @@ int main(int argc, char *argv[]) { first_gapped, optimize, fade_out, no_feat); } else { gapped = true; - fasta::FastaData fasta_data_cfg_aligned = f_config::get_conf_data( + auto fasta_data_cfg_aligned = f_config::get_conf_data( fasta_data, f_set, gapped); if (refine_limit == 0) { refine_limit = fasta_data.sequences.size(); @@ -201,7 +200,7 @@ int main(int argc, char *argv[]) { // Write alignment to file // TODO: al_out_index is always 1. Also, what is it? - int al_out_index = 1; + auto al_out_index = 1; if (first_gapped) { first_gapped = 0; } diff --git a/src/msa.cpp b/src/msa.cpp index c5763e8..e368340 100644 --- a/src/msa.cpp +++ b/src/msa.cpp @@ -27,8 +27,7 @@ std::vector msa::run_msa( // query_seq_list - the profiles are built based only on the first // sequence fasta::SequenceList query_seq_list = {fasta_data.sequences[0]}; - profile::ProfileMap profile = profile::create_score_profile( - query_seq_list, sbst_mat); + auto profile = profile::create_score_profile(query_seq_list, sbst_mat); std::vector identities = {1}; if (!no_feat) { f_profile.update_scores(query_seq_list, f_set, identities, fade_out); @@ -41,7 +40,7 @@ std::vector msa::run_msa( std::vector alignment; - int alignments_number = 0; + auto alignments_number = 0; double cutoff = 0; // pointer to the function performing single round of msa, can be either // for gapped or ungapped first sequence @@ -67,7 +66,7 @@ std::vector msa::run_msa( if (!one_round) { for (int i = 8; i >= 0; --i) { cutoff = double(i) / 10; - int prev_alignments = alignments_number; + auto prev_alignments = alignments_number; alignment = perform_msa_round_ptr(fasta_data, fasta_data, profile, f_profile, gap_open_pen, end_pen, gap_ext_pen, cutoff, @@ -88,7 +87,7 @@ std::vector msa::run_msa( } // set alignments number to 0 to align (again) // all sequences to the profile - int iterations = 1; + auto iterations = 1; if (one_round) { iterations = 1; } @@ -107,7 +106,7 @@ std::vector msa::run_msa( profile = profile::create_score_profile(alignment[0], sbst_mat); } if (optimize) { - int counter = 0; + auto counter = 0; std::vector previous; while (!msa::compare_alignments(previous, alignment) && counter < 15) { @@ -147,12 +146,11 @@ std::vector msa::refine_alignment( const bool fade_out, int refine_seq, const bool no_feat) { fasta::SequenceList query_seq = {fasta_data_plain.sequences[0]}; - profile::ProfileMap profile_single = profile::create_score_profile( - query_seq, sbst_mat); - FeatureScores f_profile_single(fasta_data_plain.feature_list, - domain_modifier, ptm_modifier, - motif_modifier, strct_modifier, - fasta_data_plain.probabilities); + auto profile_single = profile::create_score_profile(query_seq, sbst_mat); + FeatureScores f_profile_single( + fasta_data_plain.feature_list, domain_modifier, ptm_modifier, + motif_modifier, strct_modifier, fasta_data_plain.probabilities + ); std::vector identities = {1}; f_profile_single.update_scores(query_seq, f_set, identities, fade_out); identities = msa::set_identities(fasta_data_plain, profile_single, @@ -169,8 +167,7 @@ std::vector msa::refine_alignment( query_seq_list.push_back(fasta_data_alignment.sequences[i]); } // fasta::SequenceList query_seq_list = fasta_data_alignment.sequences; - profile::ProfileMap profile = profile::create_score_profile( - query_seq_list, sbst_mat); + auto profile = profile::create_score_profile(query_seq_list, sbst_mat); if (!no_feat) { f_profile.update_scores(query_seq_list, f_set, identities, fade_out); } @@ -178,7 +175,7 @@ std::vector msa::refine_alignment( // Align all sequences vs first to determine the identities std::vector alignment; - int alignments_number = 0; + auto alignments_number = 0; double cutoff = 0; // pointer to the function performing single round of msa, can be either // for gapped or ungapped first sequence @@ -228,7 +225,7 @@ std::vector msa::refine_alignment( alignments_number, f_set, alignment, refine_seq, no_feat); if (optimize) { - int counter = 0; + auto counter = 0; std::vector previous; while (!compare_alignments(previous, alignment) && counter < 15) { previous = alignment; @@ -236,15 +233,6 @@ std::vector msa::refine_alignment( motif_modifier, ptm_modifier, sbst_mat); ++counter; } - // f_profile.update_scores(alignment[0], f_set, identities, fade_out); - // profile = profile::create_score_profile(alignment[0], sbst_mat); - // alignments_number = 0; - // cutoff = 0; - // alignment = perform_msa_round_ptr(fasta_data_plain, profile, - // f_profile, gap_open_pen, - // end_pen, gap_ext_pen, cutoff, - // codon_length, identities, - // alignments_number, f_set, alignment); } return alignment; } @@ -263,7 +251,7 @@ std::vector msa::set_identities( fasta::Sequence aligned_seq_uppercase; //pairwise alignment with lowercase characters where chars were removed fasta::Sequence aligned_seq_with_lower; - bool gapped = true; + auto gapped = true; for (size_t i = 1; i < fasta_data.sequences.size(); ++i) { // aligned_sequence: vector // first element is a dummy polyA sequence to indicate where are the gaps @@ -276,9 +264,8 @@ std::vector msa::set_identities( gap_ext_pen, codon_length, gapped, no_feat); - double identity = msa::calc_identity(aligned_sequence[0], - aligned_sequence[1], - fasta_data.sequences[0]); + auto identity = msa::calc_identity( + aligned_sequence[0], aligned_sequence[1], fasta_data.sequences[0]); identities.push_back(identity); } return identities; @@ -288,7 +275,7 @@ double msa::calc_identity(const fasta::Sequence& dummy_sequence, const fasta::Sequence& aligned_sequence, const fasta::Sequence& query_sequence) { double identical_residues = 0; - int gap_count = 0; + auto gap_count = 0; // sequences should be aligned (therefore lengths should be equal) assert(aligned_sequence.residues.size() == dummy_sequence.residues.size()); for (unsigned i = 0; i < aligned_sequence.residues.size(); ++i) { @@ -306,8 +293,8 @@ double msa::calc_identity(const fasta::Sequence& dummy_sequence, fasta::SequenceList msa::remove_gaps(const fasta::SequenceList& alignment) { fasta::Sequence new_seq; fasta::SequenceList aligned_seq = {new_seq, new_seq}; - char gap = '-'; - bool lower_flag = false; + auto gap = '-'; + auto lower_flag = false; for (size_t i = 0; i < alignment[0].residues.size(); ++i) { if (alignment[0].residues[i].codon[0] == gap) { if (aligned_seq[1].residues.size() > 0) { @@ -348,7 +335,7 @@ fasta::SequenceList msa::align_pairwise(const fasta::Sequence& input_sequence, int codon_length, const bool first_gapped, const bool no_feat) { - int profile_length = profile.begin()->second.size(); + auto profile_length = profile.begin()->second.size(); ScoringMatrix scores(profile_length, input_sequence.residues.size(), gap_open_pen, end_pen, gap_ext_pen, no_feat); scores.calculate_scores(input_sequence, profile, f_profile, codon_length); @@ -389,7 +376,7 @@ std::vector msa::perform_msa_round_ungapped( alignment[1].push_back(fasta_data_alignment.sequences[i]); } } - int next_alignments = count_alignments(identity_cutoff, identities); + auto next_alignments = count_alignments(identity_cutoff, identities); if (next_alignments > prev_alignments) { fasta::SequenceList aligned_seq; for (size_t i = start; i < fasta_data.sequences.size(); ++i) { @@ -417,7 +404,7 @@ std::vector msa::perform_msa_round_ungapped( int msa::count_alignments(double identity_cutoff, const std::vector& identities) { - int count = 0; + auto count = 0; for (auto& item: identities) { if (item >= identity_cutoff) { ++count; @@ -444,7 +431,7 @@ std::vector msa::perform_msa_round_gapped( const bool no_feat) { std::vector alignment = {{}, {}}; - int next_alignments = count_alignments(identity_cutoff, identities); + auto next_alignments = count_alignments(identity_cutoff, identities); if (next_alignments > prev_alignments) { fasta::SequenceList aligned_seq; for (size_t i = 0; i < fasta_data.sequences.size(); ++i) { @@ -477,10 +464,10 @@ std::vector msa::merge_alignments( std::vector multi_alignment; /// TODO: change it so that changing the format is no longer needed at the /// end of the function - multi_alignment = {{pairwise_alignments[0][0], - pairwise_alignments[0][0]}, - {pairwise_alignments[1][0], - pairwise_alignments[1][0]}}; + multi_alignment = { + {pairwise_alignments[0][0], pairwise_alignments[0][0]}, + {pairwise_alignments[1][0], pairwise_alignments[1][0]} + }; assert(pairwise_alignments[0].size() == pairwise_alignments[1].size()); for (size_t i = 1; i < pairwise_alignments[0].size(); ++i) { @@ -507,10 +494,10 @@ std::vector msa::add_alignment( fasta::Sequence s; std::vector merged(multi_alignment.size() + 1, fasta::SequenceList(2, s)); - int i = 0; - int j = 0; - const std::vector *profile1 = &multi_alignment[0][0].residues; - const std::vector *profile2 = &pairwise_alignment[0].residues; + auto i = 0; + auto j = 0; + const auto *profile1 = &multi_alignment[0][0].residues; + const auto *profile2 = &pairwise_alignment[0].residues; int length1 = profile1->size(); int length2 = profile2->size(); @@ -563,10 +550,10 @@ std::vector msa::remove_gapcolumns( std::vector alignment) { std::vector result = alignment; - int erased = 0; + auto erased = 0; for (size_t i = 0; i < alignment[0][0].residues.size(); ++i) { if (alignment[0][0].residues[i].codon[0] == '-') { - bool gaps = true; + auto gaps = true; size_t j = 1; while (gaps && j < alignment[0].size()) { if (alignment[0][j].residues[i].codon[0] != '-') { @@ -591,7 +578,7 @@ std::vector msa::remove_gapcolumns( bool msa::compare_alignments(const std::vector& al1, const std::vector& al2) { - bool result = true; + auto result = true; if (al1.size() != al2.size() || al1[0].size() != al2[0].size()) { result = false; } diff --git a/src/optimizer.cpp b/src/optimizer.cpp index fe4375c..a31cfa7 100644 --- a/src/optimizer.cpp +++ b/src/optimizer.cpp @@ -21,7 +21,7 @@ std::vector optimizer::optimize_alignment( const std::vector& alignment, double domain_modifier, double motif_modifier, double ptm_modifier, const std::string& sbst_mat) { - std::vector m = optimizer::calculate_move_scores( + auto m = optimizer::calculate_move_scores( alignment, domain_modifier, motif_modifier, ptm_modifier, sbst_mat); optimizer::filter_move_data(m); std::vector new_alignment; @@ -36,7 +36,7 @@ std::vector optimizer::calculate_move_scores( double domain_modifier, double motif_modifier, double ptm_modifier, const std::string& sbst_mat) { - size_t alignment_length = alignment[0][0].residues.size(); + auto alignment_length = alignment[0][0].residues.size(); std::vector move_data; std::string side; const sbst::SimilarityScoresMap* sim_scores; @@ -114,7 +114,7 @@ std::vector optimizer::remove_residues( assert(alignment.size() == 2); assert(alignment[0].size() == alignment[1].size()); - std::vector new_alignment = alignment; + auto new_alignment = alignment; for (auto& i : move_data) { fasta::Residue tmp = new_alignment[0][i.seq_number].residues[i.new_position]; new_alignment[0][i.seq_number].residues[i.new_position] = @@ -135,30 +135,10 @@ double optimizer::get_two_res_score(fasta::Residue res1, fasta::Residue res2, double domain_modifier, double motif_modifier, double ptm_modifier) { double result = 0; - char aa2 = res2.codon[0]; + auto aa2 = res2.codon[0]; if (sim_scores->find(aa2) != sim_scores->end() && res1_index >= 0) { result += sim_scores->at(aa2)[res1_index]; } - // else if (aa2 == 'B' && res1_index >= 0) { - // result += sim_scores->at('D')[res1_index] * 0.5 - // + sim_scores->at('N')[res1_index] * 0.5; - // } else if (aa2 == 'Z' && res1_index >= 0) { - // result += sim_scores->at('Q')[res1_index] * 0.5 - // + sim_scores->at('E')[res1_index] * 0.5; - // } - // else if ((aa2 == 'B' && res1.codon[0] == 'Z') - // || (aa2 == 'Z' && res1.codon[0] == 'B')) { - // result += 0.25 * (sim_scores->at('N')[5] + sim_scores->at('N')[6] - // + sim_scores->at('D')[5] + sim_scores->at('D')[6]); - // } else if (res1.codon[0] == 'Z' - // && sim_scores->find(aa2) != sim_scores->end()) { - // result += sim_scores->at(aa2)[5] * 0.5 - // + sim_scores->at(aa2)[6] * 0.5; - // } else if (res1.codon[0] == 'B' - // && sim_scores->find(aa2) != sim_scores->end()) { - // result += sim_scores->at(aa2)[2] * 0.5 - // + sim_scores->at(aa2)[3] * 0.5; - // } result += optimizer::score_ptm(res1, res2, ptm_modifier); result += optimizer::score_domain(res1, res2, domain_modifier); result += optimizer::score_motif(res1, res2, motif_modifier); @@ -172,13 +152,13 @@ optimizer::MoveData optimizer::single_move_score( const sbst::SimilarityScoresMap* sim_scores, double domain_modifier, double motif_modifier, double ptm_modifier) { double pre_score = 0; - fasta::Residue res1 = alignment[0][seq_number].residues[position]; - char aa1 = res1.codon[0]; - int index = (std::find(sbst::ALPHABET.begin(), sbst::ALPHABET.end(), aa1) + auto res1 = alignment[0][seq_number].residues[position]; + auto aa1 = res1.codon[0]; + auto index = (std::find(sbst::ALPHABET.begin(), sbst::ALPHABET.end(), aa1) - sbst::ALPHABET.begin()); for (size_t i = 0; i < alignment[0].size(); ++i) { - fasta::Residue res2 = alignment[0][i].residues[position]; + auto res2 = alignment[0][i].residues[position]; if (i != seq_number && res2.codon[0] != '-') { pre_score += get_two_res_score(res1, res2, index, sim_scores, domain_modifier, motif_modifier, @@ -218,7 +198,7 @@ optimizer::MoveData optimizer::single_move_score( int optimizer::find_gap_end(const fasta::Sequence& seq, int start) { int gap_end = seq.residues.size(); - bool not_found = true; + auto not_found = true; for(size_t i = start; not_found && i < seq.residues.size(); i++) { if (seq.residues[i].codon[0] != '-') { not_found = false; @@ -231,7 +211,7 @@ int optimizer::find_gap_end(const fasta::Sequence& seq, int start) { int optimizer::find_gap_start(const fasta::Sequence& seq, int gap_end) { int gap_start = -1; - bool not_found = true; + auto not_found = true; for(size_t i = gap_end; not_found && i > 0; --i) { if (seq.residues[i].codon[0] != '-') { not_found = false; @@ -249,7 +229,7 @@ double optimizer::score_ptm(fasta::Residue res1, fasta::Residue res2, std::string ptm_type1; std::string ptm_type2; char ptm_level; - bool found1 = false; + auto found1 = false; double multiplier1 = 0; for (auto& f : res1.features) { if (f.substr(0, 2) == "p_") { @@ -261,7 +241,7 @@ double optimizer::score_ptm(fasta::Residue res1, fasta::Residue res2, } } if (found1) { - bool found2 = false; + auto found2 = false; double multiplier2 = 0; for (auto& f : res2.features) { if (f.substr(0, 2) == "p_") { @@ -285,7 +265,7 @@ double optimizer::score_motif(fasta::Residue res1, fasta::Residue res2, double score = 0; std::string name1; std::string name2; - bool found1 = false; + auto found1 = false; for (auto& f : res1.features) { if (f.substr(0, 2) == "m_") { name1 = f; @@ -293,7 +273,7 @@ double optimizer::score_motif(fasta::Residue res1, fasta::Residue res2, } } if (found1) { - bool found2 = false; + auto found2 = false; for (auto& f : res2.features) { if (f.substr(0, 2) == "m_") { name2 = f; @@ -313,7 +293,7 @@ double optimizer::score_domain(fasta::Residue res1, fasta::Residue res2, double score = 0; std::string name1; std::string name2; - bool found1 = false; + auto found1 = false; for (auto& f : res1.features) { if (f.substr(0, 2) == "d_") { name1 = f; @@ -321,7 +301,7 @@ double optimizer::score_domain(fasta::Residue res1, fasta::Residue res2, } } if (found1) { - bool found2 = false; + auto found2 = false; for (auto& f : res2.features) { if (f.substr(0, 2) == "d_") { name2 = f; diff --git a/src/optimizer.h b/src/optimizer.h index 2cb2f98..c07ece6 100644 --- a/src/optimizer.h +++ b/src/optimizer.h @@ -11,13 +11,13 @@ namespace optimizer { struct MoveData { MoveData(int seq_number, int old_position, int new_position, - double score_gain) + double score_gain) : seq_number(seq_number), old_position(old_position), new_position(new_position), score_gain(score_gain) {} MoveData() {} - int seq_number; + int seq_number; int old_position; int new_position; double score_gain; @@ -49,7 +49,7 @@ namespace optimizer { const std::vector& move_data); /// - /// + /// /// void filter_move_data(std::vector& move_data); bool reverse_sort(int i, int j); @@ -65,13 +65,12 @@ namespace optimizer { int find_gap_start(const fasta::Sequence& seq, int start); - double get_two_res_score(fasta::Residue res1, fasta::Residue res2, - int res1_index, - const sbst::SimilarityScoresMap* sim_scores, - double domain_modifier, - double motif_modifier, - double ptm_modifier); - + double get_two_res_score( + fasta::Residue res1, fasta::Residue res2, int res1_index, + const sbst::SimilarityScoresMap* sim_scores, double domain_modifier, + double motif_modifier, double ptm_modifier + ); + double score_ptm(fasta::Residue res1, fasta::Residue res2, double ptm_modifier); diff --git a/src/profile.cpp b/src/profile.cpp index 7d9218f..3c1fef5 100644 --- a/src/profile.cpp +++ b/src/profile.cpp @@ -49,13 +49,13 @@ namespace { {'S', { 3, 1, 4, 2, 2, 2, 1, 2, 1, 0, 0, 1, -1, 0, 2, 9, 4, -1, 0, 1}}, {'T', { 4, 1, 3, 2, 1, 2, 1, 1, 1, 2, 1, 2, 1, 0, 2, 4, 10, -2, 0, 3}}, {'W', {-1, 2, -2, -3, 3, 1, -2, 0, 1, 1, 2, -2, 0, 6, -1, -1, -2, 18, 6, 0}}, - {'Y', { 0, -1, 1, -1, 2, 0, -2, -2, 4, 2, 2, -1, 0, 8, -2, 0, 0, 6, 14, 1}}, + {'Y', { 0, -1, 1, -1, 2, 0, -2, -2, 4, 2, 2, -1, 0, 8, -2, 0, 0, 6, 14, 1}}, {'V', { 3, 0, 0, 0, 1, 1, 0, 0, 0, 7, 4, 0, 3, 3, 1, 1, 3, 0, 1, 11}}}; } profile::ProfileMap profile::create_score_profile( const fasta::SequenceList& sequences, const std::string& sbst_mat) { - profile::ProfileMap p = create_profile(sequences); + auto p = create_profile(sequences); // Convert the profile occurrences to probabilities. for (auto& occ: p) { @@ -72,9 +72,9 @@ profile::ProfileMap profile::create_score_profile( sim_scores = &DISORDER; } for (unsigned i = 0; i < p['A'].size(); ++i) { - std::vector score_column(ALPHABET.size(), 0); + std::vector score_column(ALPHABET.size(), 0); for (auto &prob: p) { - std::vector sbst_column = sim_scores->at(prob.first); + auto sbst_column = sim_scores->at(prob.first); for (size_t k = 0; k < sbst_column.size(); ++k) { score_column[k] += sbst_column[k] * prob.second[i]; } @@ -101,8 +101,7 @@ profile::ProfileMap profile::create_score_profile( profile::ProfileMap profile::create_profile( const fasta::SequenceList& sequences) { profile::ProfileMap p; - - // Initialise profile map with all letters except for B, X, Z to + // Initialise profile map with all letters except for B, X, Z to // a vector of the correct // size, with all values set to 0. for (size_t i = 0; i < ALPHABET.size(); ++i) { @@ -111,11 +110,7 @@ profile::ProfileMap profile::create_profile( for (size_t i = 0; i < sequences[0].residues.size(); ++i) { for (size_t j = 0; j < sequences.size(); ++j) { - char amino_acid = sequences[j].residues[i].codon[0]; - - // if (amino_acid == '-') { - // continue; - // } + auto amino_acid = sequences[j].residues[i].codon[0]; if (amino_acid == 'B') { assert(p.find('D') != p.end() && p.find('N') != p.end()); diff --git a/src/scoring_matrix.cpp b/src/scoring_matrix.cpp index 6d4dd3f..11610e4 100644 --- a/src/scoring_matrix.cpp +++ b/src/scoring_matrix.cpp @@ -51,7 +51,7 @@ void ScoringMatrix::calculate_scores(const fasta::Sequence& sequence, ///V // double profile_score = // profile.at(sequence.residues[j - 1].codon[0])[i - 1]; - double profile_score = profile::get_score(profile, i - 1, + auto profile_score = profile::get_score(profile, i - 1, sequence.residues[j - 1].codon[0]); double feature_score = 0; if (!m_no_feat) { @@ -84,8 +84,8 @@ void ScoringMatrix::calculate_scores(const fasta::Sequence& sequence, ValueCoords ScoringMatrix::find_best_score() { - int max_i = m_matrix_v.size()-1; - int max_j = m_matrix_v[0].size()-1; + auto max_i = m_matrix_v.size()-1; + auto max_j = m_matrix_v[0].size()-1; int n = max_i; // last row of m_matrix_v int m = max_j; // last column of m_matrix_v double max_i_val = m_matrix_v[max_i][max_j]; @@ -134,14 +134,14 @@ fasta::SequenceList ScoringMatrix::backtrace_alignment_path( fasta::Sequence new_s1; fasta::Sequence new_s2; - const size_t profile_length = profile.begin()->second.size(); + const auto profile_length = profile.begin()->second.size(); // TODO: i and j aren't great names...are they? - size_t i = profile_length; - size_t j = sequence.residues.size(); + auto i = profile_length; + auto j = sequence.residues.size(); //if bestScore isn't in the lower right corner, then add gaps //to new_s1 or new_s2 - ValueCoords best_score = find_best_score(); + auto best_score = find_best_score(); // TODO: comparing value to size of something? fishy! if (best_score[0] != (signed)m_matrix_v.size()-1 @@ -171,7 +171,7 @@ fasta::SequenceList ScoringMatrix::backtrace_alignment_path( new_res2 = sequence.residues[j - 1]; // double profile_score = profile.at( // sequence.residues[j - 1].codon[0])[i - 1]; - double profile_score = profile::get_score(profile, i - 1, + auto profile_score = profile::get_score(profile, i - 1, sequence.residues[j - 1].codon[0]); double feature_score = 0; if (!m_no_feat) { diff --git a/src/scoring_matrix.h b/src/scoring_matrix.h index 4864f8f..564f139 100644 --- a/src/scoring_matrix.h +++ b/src/scoring_matrix.h @@ -34,10 +34,9 @@ class ScoringMatrix { /// traces back the alignment path in the scoring matrices /// fasta::SequenceList backtrace_alignment_path( - const fasta::Sequence& sequence, - const profile::ProfileMap& profile, - const FeatureScores& f_profile, - int codon_length); + const fasta::Sequence& sequence, const profile::ProfileMap& profile, + const FeatureScores& f_profile, int codon_length + ); /// TODO: maybe remove, only used in tests SingleScoringMatrix get_V_matrix(); diff --git a/src/substitution_matrix.h b/src/substitution_matrix.h index c5d075b..4c0de9d 100644 --- a/src/substitution_matrix.h +++ b/src/substitution_matrix.h @@ -48,7 +48,7 @@ namespace substitution_matrix { {'S', { 3, 1, 4, 2, 2, 2, 1, 2, 1, 0, 0, 1, -1, 0, 2, 9, 4, -1, 0, 1}}, {'T', { 4, 1, 3, 2, 1, 2, 1, 1, 1, 2, 1, 2, 1, 0, 2, 4, 10, -2, 0, 3}}, {'W', {-1, 2, -2, -3, 3, 1, -2, 0, 1, 1, 2, -2, 0, 6, -1, -1, -2, 18, 6, 0}}, - {'Y', { 0, -1, 1, -1, 2, 0, -2, -2, 4, 2, 2, -1, 0, 8, -2, 0, 0, 6, 14, 1}}, + {'Y', { 0, -1, 1, -1, 2, 0, -2, -2, 4, 2, 2, -1, 0, 8, -2, 0, 0, 6, 14, 1}}, {'V', { 3, 0, 0, 0, 1, 1, 0, 0, 0, 7, 4, 0, 3, 3, 1, 1, 3, 0, 1, 11}}}; }