From 54790ef6bb988ea104353086a2fb70544a66c19f Mon Sep 17 00:00:00 2001 From: Andre Mueller Date: Mon, 11 Mar 2024 08:10:16 +0100 Subject: [PATCH] changed default comment formatting to one space after // --- src/alignment.h | 30 +++---- src/building.cpp | 22 ++--- src/candidate_generation.h | 30 +++---- src/candidate_structs.h | 6 +- src/chunk_allocator.h | 14 ++-- src/classification.cpp | 141 ++++++++++++++++---------------- src/classification.h | 4 +- src/classification_statistics.h | 18 ++-- src/config.h | 4 +- src/database.cpp | 26 +++--- src/database.h | 12 +-- src/database_query.h | 2 +- src/dna_encoding.h | 16 ++-- src/filesys_utility.cpp | 2 +- src/gpu_hashmap.cu | 38 ++++----- src/gpu_hashmap.cuh | 2 +- src/gpu_hashmap_operations.cuh | 4 +- src/gpu_result_processing.cuh | 4 +- src/hash_dna.h | 6 +- src/hash_int.h | 10 +-- src/hash_multimap.h | 36 ++++---- src/host_hashmap.h | 22 ++--- src/io_serialize.h | 6 +- src/mode_build.cpp | 2 +- src/mode_build_query.cpp | 2 +- src/mode_info.cpp | 4 +- src/mode_merge.cpp | 16 ++-- src/mode_query.cpp | 4 +- src/options.cpp | 44 +++++----- src/options.h | 58 ++++++------- src/query_batch.cu | 2 +- src/querying.cpp | 20 ++--- src/sequence_batch.cuh | 4 +- src/sequence_io.cpp | 16 ++-- src/sequence_io.h | 2 +- src/sequence_iostream.h | 4 +- src/stat_combined.cu | 2 +- src/stat_combined.cuh | 2 +- src/stat_combined.h | 2 +- src/stat_confusion.h | 6 +- src/stat_moments.h | 10 +-- src/taxonomy.h | 16 ++-- src/taxonomy_io.cpp | 42 +++++----- 43 files changed, 358 insertions(+), 355 deletions(-) diff --git a/src/alignment.h b/src/alignment.h index 0cb1af2..85c6daa 100644 --- a/src/alignment.h +++ b/src/alignment.h @@ -195,43 +195,43 @@ align_semi_global(const QuerySequence& query, typename SubjectSequence::value_type>::value, "query and subject value type must be identical"); - //datatypes from scoring scheme + // datatypes from scoring scheme using value_t = typename QuerySequence::value_type; using index_t = typename QuerySequence::size_type; using score_t = typename ScoringScheme::score_type; using predc_t = typename ScoringScheme::predecessor; - //get lengths from sequence containers + // get lengths from sequence containers const index_t len_q = query.size(); const index_t len_s = subject.size(); - //quadratic memory solution for relaxing and backtracking + // quadratic memory solution for relaxing and backtracking std::vector score((len_q+1)*(len_s+1), 0); std::vector predc((len_q+1)*(len_s+1), predc_t(0)); for (index_t q = 1; q < len_q+1; q++) { - //cache the query at position q + // cache the query at position q const auto vquery = query[q-1]; for (index_t s = 1; s < len_s+1; s++) { - //cache the subject at position s + // cache the subject at position s auto vsubject = subject[s-1]; - //cache diagonal, above and left entry of score matrix + // cache diagonal, above and left entry of score matrix score_t diag = score[(q-1)*(len_s+1)+(s-1)]; score_t above = score[(q-1)*(len_s+1)+(s-0)]; score_t left = score[(q-0)*(len_s+1)+(s-1)]; - //relax ingoing edges + // relax ingoing edges auto argmax = scoring.relax(diag, above, left, vsubject, vquery); - //update current node + // update current node score[q*(len_s+1)+s] = argmax.score; predc[q*(len_s+1)+s] = argmax.predc; } } - //searching the best score + // searching the best score auto bsf_q = len_q; auto bsfS = len_s; auto bsf_v = score[len_q*(len_s+1)+len_s]; @@ -254,26 +254,26 @@ align_semi_global(const QuerySequence& query, } } - //construct the alignment + // construct the alignment auto res = alignment{}; res.score = bsf_v; if (mode == alignment_mode::backtrace) { auto pred = predc[bsf_q*(len_s+1)+bsfS]; - //backtracing predecessor information + // backtracing predecessor information do { - //caution, encode changes the values of bsf_q and bsfS + // caution, encode changes the values of bsf_q and bsfS auto symbol = scoring.encode(bsf_q, bsfS, pred, query, subject); - //write down the aligment + // write down the aligment res.query.push_back(std::get<0>(symbol)); res.subject.push_back(std::get<1>(symbol)); - //update pred + // update pred pred = predc[bsf_q*(len_s+1)+bsfS]; } while (bool(pred)); - //reverse the alignment + // reverse the alignment std::reverse(res.query.begin(), res.query.end()); std::reverse(res.subject.begin(), res.subject.end()); } diff --git a/src/building.cpp b/src/building.cpp index cf29708..4eff537 100644 --- a/src/building.cpp +++ b/src/building.cpp @@ -83,8 +83,8 @@ void rank_targets_with_mapping_file(taxonomy_cache& taxonomy, } bool showProgress = showInfo && fsize > 100000000; - //accession2taxid files have 40-450M lines - //update progress indicator every 1M lines + // accession2taxid files have 40-450M lines + // update progress indicator every 1M lines size_t step = 0; size_t statStep = 1UL << 20; if (showProgress) show_progress_indicator(cout, 0); @@ -94,13 +94,13 @@ void rank_targets_with_mapping_file(taxonomy_cache& taxonomy, std::uint64_t taxid; string gi; - //skip header + // skip header getline(is, acc); acc.clear(); while (is >> acc >> accver >> taxid >> gi) { - //target in database? - //accession.version is the default + // target in database? + // accession.version is the default const taxon* tax = taxonomy.taxon_with_name(accver); if (!tax) { @@ -108,7 +108,7 @@ void rank_targets_with_mapping_file(taxonomy_cache& taxonomy, if (!tax) tax = taxonomy.taxon_with_name(gi); } - //if in database then set parent + // if in database then set parent if (tax) { auto i = targetTaxa.find(tax); if (i != targetTaxa.end()) { @@ -218,16 +218,16 @@ taxon_id find_taxon_id( if (name2tax.empty()) return taxonomy::none_id(); if (name.empty()) return taxonomy::none_id(); - //try to find exact match + // try to find exact match auto i = name2tax.find(name); if (i != name2tax.end()) return i->second; - //find nearest match + // find nearest match i = name2tax.upper_bound(name); if (i == name2tax.end()) return taxonomy::none_id(); - //if nearest match contains 'name' as prefix -> good enough - //e.g. accession vs. accession.version + // if nearest match contains 'name' as prefix -> good enough + // e.g. accession vs. accession.version if (i->first.compare(0,name.size(),name) != 0) return taxonomy::none_id(); return i->second; } @@ -490,7 +490,7 @@ void post_process_features(database& db, const build_options& opt) if (dbconf.removeOverpopulatedFeatures) { auto old = db.feature_count(); auto maxlpf = db.max_locations_per_feature() - 1; - if (maxlpf > 0) { //always keep buckets with size 1 + if (maxlpf > 0) { // always keep buckets with size 1 if (notSilent) { cout << "\nRemoving features with more than " << maxlpf << " locations... " << flush; diff --git a/src/candidate_generation.h b/src/candidate_generation.h index f8a4a60..eb09f09 100644 --- a/src/candidate_generation.h +++ b/src/candidate_generation.h @@ -55,10 +55,10 @@ void for_all_contiguous_window_ranges( { using hit_count = match_candidate::count_type; - //list empty? + // list empty? if (fst == end) return; - //first entry in list + // first entry in list hit_count hits = 1; match_candidate curBest; curBest.tax = nullptr; @@ -69,32 +69,32 @@ void for_all_contiguous_window_ranges( auto lst = fst; ++lst; - //rest of list: check hits per query sequence + // rest of list: check hits per query sequence while (lst != end) { - //look for neighboring windows with the highest total hit count - //as long as we are in the same target and the windows are in a - //contiguous range + // look for neighboring windows with the highest total hit count + // as long as we are in the same target and the windows are in a + // contiguous range if (lst->tgt == curBest.tgt) { - //add new hits to the right + // add new hits to the right hits++; - //subtract hits to the left that fall out of range + // subtract hits to the left that fall out of range while (fst != lst && (lst->win - fst->win) >= numWindows) { hits--; - //move left side of range + // move left side of range ++fst; } - //track best of the local sub-ranges + // track best of the local sub-ranges if (hits > curBest.hits) { curBest.hits = hits; curBest.pos.beg = fst->win; curBest.pos.end = lst->win; } } - else { //end of current target + else { // end of current target if (!consume(curBest)) return; - //reset to new target + // reset to new target fst = lst; hits = 1; curBest.tax = nullptr; @@ -204,7 +204,7 @@ class best_distinct_matches_in_contiguous_window_ranges top_.resize(rules.maxCandidates); } } - //above sequence level, taxa can occur more than once + // above sequence level, taxa can occur more than once else { auto i = std::find_if (top_.begin(), top_.end(), [&] (const match_candidate& c) { @@ -212,13 +212,13 @@ class best_distinct_matches_in_contiguous_window_ranges }); if (i != top_.end()) { - //taxon already in list, update, if more hits + // taxon already in list, update, if more hits if (cand.hits > i->hits) { *i = cand; std::sort(top_.begin(), i+1, greater); } } - //taxon not in list yet + // taxon not in list yet else { auto j = std::upper_bound(top_.begin(), top_.end(), cand, greater); diff --git a/src/candidate_structs.h b/src/candidate_structs.h index ce9af7a..7a57fc6 100644 --- a/src/candidate_structs.h +++ b/src/candidate_structs.h @@ -116,13 +116,13 @@ struct candidate_generation_rules { constexpr candidate_generation_rules() noexcept = default; - //maximum length of contiguous window range + // maximum length of contiguous window range window_id maxWindowsInRange = 3; - //maximum number of candidates to be generated + // maximum number of candidates to be generated std::size_t maxCandidates = std::numeric_limits::max(); - //list only the best candidate of a taxon on rank + // list only the best candidate of a taxon on rank taxon_rank mergeBelow = taxon_rank::Sequence; }; diff --git a/src/chunk_allocator.h b/src/chunk_allocator.h index 83965b8..65695d6 100644 --- a/src/chunk_allocator.h +++ b/src/chunk_allocator.h @@ -102,7 +102,7 @@ class chunk_allocator using value_type = T; chunk_allocator(): - minChunkSize_(128*1024*1024/sizeof(T)), //128 MiB + minChunkSize_(128*1024*1024/sizeof(T)), // 128 MiB freeSize_(0), chunks_{} {} @@ -144,8 +144,8 @@ class chunk_allocator T* allocate(std::size_t n) { // std::lock_guard lock(mutables_); - //at the moment chunks will only be used, - //if they have been reserved explicitly + // at the moment chunks will only be used, + // if they have been reserved explicitly if (n <= freeSize_) { for (auto& c : chunks_) { auto p = c.next_buffer(n); @@ -155,11 +155,11 @@ class chunk_allocator } } } - //make new chunk + // make new chunk // chunks_.emplace_back(std::max(minChunkSize_,n)); // auto p = chunks_.back().next_buffer(n); // if (p) return p; - //fallback + // fallback try { auto p = new T[n]; return p; @@ -172,7 +172,7 @@ class chunk_allocator { // std::lock_guard lock(mutables_); - //at the moment occupied chunk buffers are not given back + // at the moment occupied chunk buffers are not given back auto it = std::find_if (begin(chunks_), end(chunks_), [p](const chunk& c){ return c.owns(p); }); @@ -183,7 +183,7 @@ class chunk_allocator chunk_allocator select_on_container_copy_construction() const { - //don't propagate + // don't propagate return chunk_allocator{}; } diff --git a/src/classification.cpp b/src/classification.cpp index 7de6531..59f5c26 100644 --- a/src/classification.cpp +++ b/src/classification.cpp @@ -56,8 +56,8 @@ using std::cerr; *****************************************************************************/ template inline auto -make_view_from_window_range(const Sequence& s, const window_range& range, - int size, int stride) +make_view_from_window_range (const Sequence& s, const window_range& range, + int size, int stride) { auto end = s.begin() + (stride * range.end) + size; if (end > s.end()) end = s.end(); @@ -74,23 +74,23 @@ make_view_from_window_range(const Sequence& s, const window_range& range, *****************************************************************************/ template alignment -make_semi_global_alignment(const sequence_query& query, - const Subject& subject) +make_semi_global_alignment (const sequence_query& query, + const Subject& subject) { std::size_t score = 0; std::size_t scorer = 0; const auto scheme = default_alignment_scheme{}; - //compute alignment + // compute alignment auto align = align_semi_global(query.seq1, subject, scheme); score = align.score; - //reverse complement + // reverse complement auto query1r = make_reverse_complement(query.seq1); auto alignr = align_semi_global(query1r, subject, scheme); scorer = alignr.score; - //align paired read as well + // align paired read as well if (!query.seq2.empty()) { score += align_semi_global_score(query.seq2, subject, scheme); auto query2r = make_reverse_complement(query.seq2); @@ -107,9 +107,9 @@ make_semi_global_alignment(const sequence_query& query, * *****************************************************************************/ const taxon* -ground_truth(const taxonomy_cache& taxonomy, const string& header) +ground_truth (const taxonomy_cache& taxonomy, const string& header) { - //try to extract query id and find the corresponding target in database + // try to extract query id and find the corresponding target in database const taxon* tax = nullptr; tax = taxonomy.taxon_with_name(extract_accession_string(header, sequence_id_type::acc_ver)); if (tax) return taxonomy.cached_next_ranked_ancestor(tax); @@ -117,11 +117,11 @@ ground_truth(const taxonomy_cache& taxonomy, const string& header) tax = taxonomy.taxon_with_similar_name(extract_accession_string(header, sequence_id_type::acc)); if (tax) return taxonomy.cached_next_ranked_ancestor(tax); - //try to extract id from header + // try to extract id from header tax = taxonomy.taxon_with_id(extract_taxon_id(header)); if (tax) return taxonomy.cached_next_ranked_ancestor(tax); - //try to find entire header as sequence identifier + // try to find entire header as sequence identifier tax = taxonomy.taxon_with_name(header); if (tax) return taxonomy.cached_next_ranked_ancestor(tax); @@ -136,12 +136,12 @@ ground_truth(const taxonomy_cache& taxonomy, const string& header) * *****************************************************************************/ const taxon* -classify(const taxonomy_cache& taxonomy, const classification_options& opt, - const span cand) +classify (const taxonomy_cache& taxonomy, const classification_options& opt, + const span cand) { if (cand.empty() || !cand[0].tax) return nullptr; - //hits below threshold => considered not classifiable + // hits below threshold => considered not classifiable if (cand[0].hits < opt.hitsMin) return nullptr; // begin lca with first candidate @@ -199,7 +199,7 @@ struct classification * *****************************************************************************/ classification -make_classification( +make_classification ( const sequence_query& query, const span candidates, const taxonomy_cache& taxonomy, @@ -224,26 +224,26 @@ make_classification( * @brief add difference between result and truth to statistics * *****************************************************************************/ -void update_coverage_statistics(const taxonomy_cache& taxonomy, - const classification& cls, - classification_statistics& stats) +void update_coverage_statistics (const taxonomy_cache& taxonomy, + const classification& cls, + classification_statistics& stats) { if (!cls.groundTruth) return; - //check if taxa are covered in DB + // check if taxa are covered in DB for (const taxon* tax : taxonomy.cached_ranks(cls.groundTruth)) { if (tax) { auto r = tax->rank(); if (taxonomy.covers(*tax)) { - if (!cls.best || r < cls.best->rank()) { //unclassified on rank + if (!cls.best || r < cls.best->rank()) { // unclassified on rank stats.count_coverage_false_neg(r); - } else { //classified on rank + } else { // classified on rank stats.count_coverage_true_pos(r); } } else { - if (!cls.best || r < cls.best->rank()) { //unclassified on rank + if (!cls.best || r < cls.best->rank()) { // unclassified on rank stats.count_coverage_true_neg(r); - } else { //classified on rank + } else { // classified on rank stats.count_coverage_false_pos(r); } } @@ -258,7 +258,7 @@ void update_coverage_statistics(const taxonomy_cache& taxonomy, * @brief evaluate classification of one query * *****************************************************************************/ -void evaluate_classification( +void evaluate_classification ( const classification& cls, const taxonomy_cache& taxonomy, const classification_evaluation_options& opt, @@ -290,9 +290,12 @@ void evaluate_classification( * @brief estimate read counts per taxon at specific level * *****************************************************************************/ -void estimate_abundance(const taxonomy_cache& taxonomy, taxon_count_map& allTaxCounts, const taxon_rank rank) { +void estimate_abundance (const taxonomy_cache& taxonomy, + taxon_count_map& allTaxCounts, + const taxon_rank rank) +{ if (rank != taxon_rank::Sequence) { - //prune taxon below estimation rank + // prune taxon below estimation rank taxon t{0,0,"",rank-1}; auto begin = allTaxCounts.lower_bound(&t); for (auto taxCount = begin; taxCount != allTaxCounts.end();) { @@ -314,38 +317,38 @@ void estimate_abundance(const taxonomy_cache& taxonomy, taxon_count_map& allTaxC std::unordered_map taxWeights; taxWeights.reserve(allTaxCounts.size()); - //initialize weigths for fast lookup + // initialize weigths for fast lookup for (const auto& taxCount : allTaxCounts) { taxWeights[taxCount.first] = 0; } - //for every taxon find its parent and add to their count - //traverse allTaxCounts from leafs to root + // for every taxon find its parent and add to their count + // traverse allTaxCounts from leafs to root for (auto taxCount = allTaxCounts.rbegin(); taxCount != allTaxCounts.rend(); ++taxCount) { - //find closest parent + // find closest parent auto lineage = taxonomy.cached_ranks(taxCount->first); const taxon* parent = nullptr; auto index = static_cast(taxCount->first->rank()+1); while (index < lineage.size()) { parent = lineage[index++]; if (parent && taxWeights.count(parent)) { - //add own count to parent + // add own count to parent taxWeights[parent] += taxWeights[taxCount->first] + taxCount->second; - //link from parent to child + // link from parent to child taxChildren[parent].emplace_back(taxCount->first); break; } } } - //distribute counts to children and erase parents - //traverse allTaxCounts from root to leafs + // distribute counts to children and erase parents + // traverse allTaxCounts from root to leafs for (auto taxCount = allTaxCounts.begin(); taxCount != allTaxCounts.end();) { auto children = taxChildren.find(taxCount->first); if (children != taxChildren.end()) { query_id sumChildren = taxWeights[taxCount->first]; - //distribute proportionally + // distribute proportionally for (const auto& child : children->second) { allTaxCounts[child] += taxCount->second * (allTaxCounts[child]+taxWeights[child]) / sumChildren; } @@ -355,7 +358,7 @@ void estimate_abundance(const taxonomy_cache& taxonomy, taxon_count_map& allTaxC ++taxCount; } } - //remaining tax counts are leafs + // remaining tax counts are leafs } @@ -365,18 +368,18 @@ void estimate_abundance(const taxonomy_cache& taxonomy, taxon_count_map& allTaxC * @brief compute alignment of top hits and optionally show it * *****************************************************************************/ -void show_alignment(std::ostream& os, - const sketching_opt& targetSketching, - const classification_output_options& opt, - const sequence_query& query, - const span tophits) +void show_alignment (std::ostream& os, + const sketching_opt& targetSketching, + const classification_output_options& opt, + const sequence_query& query, + const span tophits) { - //try to align to top target + // try to align to top target const taxon* tgtTax = tophits[0].tax; if (tgtTax && tgtTax->rank() == taxon_rank::Sequence) { const auto& src = tgtTax->source(); try { - //load candidate file and forward to sequence + // load candidate file and forward to sequence sequence_reader reader{src.filename}; reader.skip(src.index-1); @@ -389,7 +392,7 @@ void show_alignment(std::ostream& os, auto align = make_semi_global_alignment(query, subject); - //print alignment to top candidate + // print alignment to top candidate const auto w = targetSketching.winstride; const auto& comment = opt.format.tokens.comment; os << '\n' @@ -471,7 +474,7 @@ void show_query_mapping( if (fmt.showQueryIds) os << query.id << colsep; - //print query header (first contiguous string only) + // print query header (first contiguous string only) auto l = query.header.find(' '); if (l != string::npos) { auto oit = std::ostream_iterator{os, ""}; @@ -583,7 +586,7 @@ void filter_targets_by_coverage( float coveragePercentagesSum = 0; - //calculate coverage percentages + // calculate coverage percentages for (const auto& mapping : tgtMatches) { target_id target = mapping.first; const taxon* tax = taxonomy.cached_taxon_of_target(target); @@ -598,13 +601,13 @@ void filter_targets_by_coverage( coveragePercentages.emplace_back(target, covP); } - //sort by coverage descending + // sort by coverage descending std::sort(coveragePercentages.begin(), coveragePercentages.end(), [](coverage_percentage& a, coverage_percentage& b){ return a.second < b.second; }); - //filter out targets + // filter out targets float coveragePercentagesPartSum = 0; for (auto it = coveragePercentages.begin(); it != coveragePercentages.end(); ++it) { coveragePercentagesPartSum += it->second; @@ -665,7 +668,7 @@ void redo_classification_batched( const query_options& opt, classification_results& results) { - //parallel + // parallel std::vector> threads; std::mutex mtx; @@ -695,7 +698,7 @@ void redo_classification_batched( })); } - //wait for all threads to finish + // wait for all threads to finish for (auto& thread : threads) { thread.get(); } @@ -730,27 +733,27 @@ void map_queries_to_targets_default( { const auto& fmt = opt.output.format; - //global target -> query_id/win:hits... list + // global target -> query_id/win:hits... list matches_per_target tgtMatches; moodycamel::ConcurrentQueue queryMappingsQueue; if (opt.output.evaluate.precision || opt.output.evaluate.determineGroundTruth) { - //groundtruth may be outside of target lineages - //cache lineages of *all* taxa + // groundtruth may be outside of target lineages + // cache lineages of *all* taxa db.taxo_cache().update_cached_lineages(taxon_rank::none); } - //input queries are divided into batches; - //each batch might be processed by a different thread; - //the following 4 lambdas define actions that should be performed - //on such a batch and its associated buffer; - //the batch buffer can be used to cache intermediate results + // input queries are divided into batches; + // each batch might be processed by a different thread; + // the following 4 lambdas define actions that should be performed + // on such a batch and its associated buffer; + // the batch buffer can be used to cache intermediate results - //creates an empty batch buffer + // creates an empty batch buffer const auto makeBatchBuffer = [] { return mappings_buffer(); }; - //updates buffer with the database answer of a single query + // updates buffer with the database answer of a single query const auto processQuery = [&] ( mappings_buffer& buf, const sequence_query& query, @@ -760,8 +763,8 @@ void map_queries_to_targets_default( if (query.empty()) return; if (opt.output.analysis.showHitsPerTargetList || opt.classify.covPercentile > 0) { - //insert all candidates with at least 'hitsMin' hits into - //target -> match list + // insert all candidates with at least 'hitsMin' hits into + // target -> match list buf.hitsPerTarget.insert(query.id, tophits, opt.classify.hitsMin); } @@ -784,14 +787,14 @@ void map_queries_to_targets_default( } }; - //runs before a batch buffer is discarded + // runs before a batch buffer is discarded const auto finalizeBatch = [&] (mappings_buffer&& buf) { if (opt.output.analysis.showHitsPerTargetList || opt.classify.covPercentile > 0) { - //merge batch (target->hits) lists into global one + // merge batch (target->hits) lists into global one tgtMatches.merge(std::move(buf.hitsPerTarget)); } if (opt.classify.covPercentile > 0) { - //move mappings to global map + // move mappings to global map queryMappingsQueue.enqueue(std::move(buf.queryMappings)); buf.queryMappings.clear(); } @@ -800,17 +803,17 @@ void map_queries_to_targets_default( } }; - //runs if something needs to be appended to the output + // runs if something needs to be appended to the output const auto appendToOutput = [&] (const std::string& msg) { results.perReadOut << fmt.tokens.comment << msg << '\n'; }; - //run (parallel) database queries according to processing options + // run (parallel) database queries according to processing options query_database(infiles, db, opt, makeBatchBuffer, processQuery, finalizeBatch, appendToOutput); - //filter all matches by coverage + // filter all matches by coverage if (opt.classify.covPercentile > 0) { filter_targets_by_coverage(db.taxo_cache(), tgtMatches, opt.classify.covPercentile); @@ -874,7 +877,7 @@ void map_candidates_to_targets(vector&& queryHeaders, show_query_mapping_header(results.perReadOut, opt.output); } - //taxon -> read count + // taxon -> read count taxon_count_map allTaxCounts; for (size_t i = 0; i < queryHeaders.size(); ++i) { diff --git a/src/classification.h b/src/classification.h index 9199bba..78c41ae 100644 --- a/src/classification.h +++ b/src/classification.h @@ -36,7 +36,7 @@ namespace mc { -/// @brief forward declarations +// / @brief forward declarations struct query_options; @@ -90,7 +90,7 @@ struct classification_results std::ostream& status; timer time; classification_statistics statistics; - taxon_count_map taxCounts; //global taxon -> read count + taxon_count_map taxCounts; // global taxon -> read count }; diff --git a/src/classification_statistics.h b/src/classification_statistics.h index 020ae74..1524edd 100644 --- a/src/classification_statistics.h +++ b/src/classification_statistics.h @@ -89,18 +89,18 @@ class classification_statistics { assign(assigned); - //plausibility check + // plausibility check if (correct < assigned) correct = assigned; if (correct < known) correct = known; - //if ground truth known -> count correct and wrong assignments + // if ground truth known -> count correct and wrong assignments ++known_[int(known)]; if (known != rank::none) { ++correct_[int(correct)]; - //if ranks below the correct rank are known and assigned, - //then all ranks below the correct rank are wrong + // if ranks below the correct rank are known and assigned, + // then all ranks below the correct rank are wrong if (correct > known && correct > assigned) { ++wrong_[int(correct)-1]; } @@ -109,19 +109,19 @@ class classification_statistics //--------------------------------------------------------------- - /// @details concurrency-safe + // / @details concurrency-safe void count_coverage_true_pos(rank r) { coverage_[int(r)].count_true_pos(); } - /// @details concurrency-safe + // / @details concurrency-safe void count_coverage_false_pos(rank r) { coverage_[int(r)].count_false_pos(); } - /// @details concurrency-safe + // / @details concurrency-safe void count_coverage_true_neg(rank r) { coverage_[int(r)].count_true_neg(); } - /// @details concurrency-safe + // / @details concurrency-safe void count_coverage_false_neg(rank r) { coverage_[int(r)].count_false_neg(); } @@ -222,7 +222,7 @@ class classification_statistics return known(r) > 0 ? correct(r) / double(known(r)) : 0; } double precision(rank r) const noexcept { - //note that in general tot != assigned(r) and tot != known(r) + // note that in general tot != assigned(r) and tot != known(r) double tot = correct(r) + wrong(r); return tot > 0 ? correct(r) / tot : 0; } diff --git a/src/config.h b/src/config.h index d0c9a27..7e7ef75 100644 --- a/src/config.h +++ b/src/config.h @@ -88,7 +88,7 @@ using query_id = std::uint_least64_t; /************************************************************************** * @brief define sequence batch sizes */ -//TODO tune sizes +// TODO tune sizes #define MAX_TARGETS_PER_BATCH 100 #define MAX_LENGTH_PER_BATCH 10000000 @@ -111,7 +111,7 @@ using sketch_size_type = typename sketcher::sketch_type::size_type; * note: std::hash * is mostly implemented as the identity function */ -//using feature_hash = std::hash; +// using feature_hash = std::hash; using feature_hash = same_size_hash; diff --git a/src/database.cpp b/src/database.cpp index 92ea25b..ce22012 100644 --- a/src/database.cpp +++ b/src/database.cpp @@ -85,7 +85,7 @@ part_id database::read_meta(const std::string& filename, std::future& taxo throw file_access_error{"Could not read database file '" + filename + "'"}; } - //database version info + // database version info using std::uint64_t; using std::uint8_t; uint64_t dbVer = 0; @@ -98,7 +98,7 @@ part_id database::read_meta(const std::string& filename, std::future& taxo + " (uses version " + std::to_string(MC_DB_VERSION) + ")" }; } - //data type widths + // data type widths uint8_t featureSize = 0; read_binary(is, featureSize); uint8_t targetSize = 0; read_binary(is, targetSize); uint8_t windowSize = 0; read_binary(is, windowSize); @@ -130,12 +130,12 @@ part_id database::read_meta(const std::string& filename, std::future& taxo clear(); - //sketching parameters + // sketching parameters read_binary(is, targetSketchingOptions_); - //TODO: remove this and change db version + // TODO: remove this and change db version read_binary(is, targetSketchingOptions_); - //target insertion parameters + // target insertion parameters uint64_t maxLocationsPerFeature = 0; read_binary(is, maxLocationsPerFeature); max_locations_per_feature(maxLocationsPerFeature); @@ -165,7 +165,7 @@ void database::read_cache(const std::string& filename, part_id partId, throw file_access_error{"Could not read database file '" + filename + "'"}; } - //hash table + // hash table read_binary(is, featureStore_, partId, readingProgress); } @@ -242,10 +242,10 @@ void database::write_meta(const std::string& filename) const throw file_access_error{"can't open file " + filename}; } - //database version info + // database version info write_binary(os, uint64_t( MC_DB_VERSION )); - //data type widths + // data type widths write_binary(os, uint8_t(sizeof(feature))); write_binary(os, uint8_t(sizeof(target_id))); write_binary(os, uint8_t(sizeof(window_id))); @@ -254,19 +254,19 @@ void database::write_meta(const std::string& filename) const write_binary(os, uint8_t(sizeof(taxon_id))); write_binary(os, uint8_t(taxonomy::num_ranks)); - //sketching parameters + // sketching parameters write_binary(os, targetSketchingOptions_); - //TODO: remove this and change db version + // TODO: remove this and change db version write_binary(os, targetSketchingOptions_); - //target insertion parameters + // target insertion parameters write_binary(os, uint64_t(max_locations_per_feature())); write_binary(os, target_id(targetCount_)); write_binary(os, num_parts()); - //taxon & target metadata + // taxon & target metadata write_binary(os, taxonomyCache_); std::cerr << "done.\n"; @@ -287,7 +287,7 @@ void database::write_cache(const std::string& filename, part_id partId) const throw file_access_error{"can't open file " + filename}; } - //hash table + // hash table write_binary(os, featureStore_, partId); std::cerr << "done.\n"; diff --git a/src/database.h b/src/database.h index fa9c2ea..1372efd 100644 --- a/src/database.h +++ b/src/database.h @@ -144,7 +144,7 @@ class database * these are stored in the in-memory database and on disk */ #ifndef GPU_MODE - //avoid padding bits + // avoid padding bits #pragma pack(push, 1) #endif struct location @@ -171,17 +171,17 @@ class database } }; #ifndef GPU_MODE - //avoid padding bits + // avoid padding bits #pragma pack(pop) #endif //----------------------------------------------------- - using sketch = typename sketcher::sketch_type; //range of features + using sketch = typename sketcher::sketch_type; // range of features using feature = typename sketcher::feature_type; private: - //use negative numbers for sequence level taxon ids + // use negative numbers for sequence level taxon ids static constexpr taxon_id taxon_id_of_target(target_id id) noexcept { return ranked_lineages_of_targets::taxon_id_of_target(id); @@ -189,12 +189,12 @@ class database //----------------------------------------------------- - /// @brief "heart of the database": maps features to target locations + // / @brief "heart of the database": maps features to target locations #ifndef GPU_MODE using feature_store = host_hashmap; using result_handler = query_handler; #else - using feature_store = gpu_hashmap; //key, value + using feature_store = gpu_hashmap; // key, value using result_handler = query_batch; #endif diff --git a/src/database_query.h b/src/database_query.h index a2dcd74..1e14475 100644 --- a/src/database_query.h +++ b/src/database_query.h @@ -347,7 +347,7 @@ void query_database( // files -> infiles[i], infiles[i+1] for (size_t i = 0; i < infilenames.size(); i += stride+1) { - //pair up reads from two consecutive files in the list + // pair up reads from two consecutive files in the list const auto& fname1 = infilenames[i]; const auto& fname2 = (opt.pairing == pairing_mode::none) diff --git a/src/dna_encoding.h b/src/dna_encoding.h index c7fb8c7..4e297f2 100644 --- a/src/dna_encoding.h +++ b/src/dna_encoding.h @@ -149,7 +149,7 @@ using dna_2bit_encoding_t = typename detail::dna_2bit_encoding::type; -namespace { //internal linkage +namespace { // internal linkage /*************************************************************************//** * @param k : length to consider (in 2-bit letters, so #bits = 2*k) @@ -283,13 +283,13 @@ for_each_kmer_2bit(numk_t k, auto kmerMsk = UInt(~0); kmerMsk >>= (sizeof(kmerMsk) * CHAR_BIT) - (k * 2); - auto ambig = ambig_t(0); //bitfield marking ambiguous nucleotides + auto ambig = ambig_t(0); // bitfield marking ambiguous nucleotides auto ambigMsk = ambig_t(~0); ambigMsk >>= (sizeof(ambigMsk) * CHAR_BIT) - k; ++last; for (auto ssend = next(first); ssend != last; ++first, ++ssend) { - //encode next letter + // encode next letter kmer <<= 2; ambig <<= 1; switch (*first) { @@ -300,14 +300,14 @@ for_each_kmer_2bit(numk_t k, default: ambig |= 1; break; } --k; - //make sure we load k letters at the beginning + // make sure we load k letters at the beginning if (k == 0) { - kmer &= kmerMsk; //stamp out 2*k lower bits - ambig &= ambigMsk; //stamp out k lower bits + kmer &= kmerMsk; // stamp out 2*k lower bits + ambig &= ambigMsk; // stamp out k lower bits - //do something with the kmer (and the ambiguous letters flag) + // do something with the kmer (and the ambiguous letters flag) consume(kmer, ambig); - ++k; //we want only one letter next time + ++k; // we want only one letter next time } } } diff --git a/src/filesys_utility.cpp b/src/filesys_utility.cpp index 31d4ae5..4999cad 100644 --- a/src/filesys_utility.cpp +++ b/src/filesys_utility.cpp @@ -23,7 +23,7 @@ #include "filesys_utility.h" #include -#include //POSIX header +#include // POSIX header #include diff --git a/src/gpu_hashmap.cu b/src/gpu_hashmap.cu index 1297583..66ef335 100644 --- a/src/gpu_hashmap.cu +++ b/src/gpu_hashmap.cu @@ -648,7 +648,7 @@ class gpu_hashmap::query_hash_table { key_type, value_type, // warpcore::defaults::empty_key(), //=0 key_type(-2), - warpcore::defaults::tombstone_key(), //=-1 + warpcore::defaults::tombstone_key(), // =-1 warpcore::defaults::probing_scheme_t, // warpcore::storage::key_value::SoAStore>; warpcore::storage::key_value::AoSStore>; @@ -750,27 +750,27 @@ private: auto batchValuesOffset = valuesOffset; - //load batch + // load batch read_binary(is, h_keyBuffer, batchSize); read_binary(is, bsizeBuffer.data(), batchSize); for (len_t i = 0; i < batchSize; ++i) { - //store offset and size together in 64bit - //default is 56bit offset, 8bit size + // store offset and size together in 64bit + // default is 56bit offset, 8bit size h_offsetBuffer[i] = (valuesOffset << sizeof(bucket_size_type)*CHAR_BIT) + bsizeBuffer[i]; valuesOffset += bsizeBuffer[i]; } - //check status from previous batch - //implicit sync + // check status from previous batch + // implicit sync const auto tableStatus = hashTable_.pop_status(stream); if (tableStatus.has_any()) { std::cerr << tableStatus << '\n'; } - //insert batch + // insert batch cudaMemcpy(d_keyBuffer, h_keyBuffer, batchSize*sizeof(key_type), cudaMemcpyHostToDevice); cudaMemcpy(d_offsetBuffer, h_offsetBuffer, batchSize*sizeof(uint64_t), @@ -781,7 +781,7 @@ private: std::uint64_t batchValuesCount = valuesOffset - batchValuesOffset; - //read batches of locations and copy to device + // read batches of locations and copy to device const len_t numBatches = batchValuesCount / valBatchSize; const size_t remainingSize = batchValuesCount % valBatchSize; @@ -797,7 +797,7 @@ private: d_values += valBatchSize; } - //read remaining locations and copy to device + // read remaining locations and copy to device const len_t id = numBatches % 2; cudaEventSynchronize(events[id]); read_binary(is, valueBuffers[id], remainingSize); @@ -819,18 +819,18 @@ public: len_t batchSize = 0; read_binary(is, batchSize); - //TODO tune sizes + // TODO tune sizes const len_t valBatchSize = 1UL << 20; cudaStream_t stream = 0; - //allocate large memory chunk for all locations, - //individual buckets will then point into this array + // allocate large memory chunk for all locations, + // individual buckets will then point into this array cudaMalloc(&locations_, nlocations*sizeof(location)); CUERR uint64_t locsOffset = 0; - {//load hash table - //allocate insert buffers + {// load hash table + // allocate insert buffers key_type * h_keyBuffer; key_type * d_keyBuffer; cudaMallocHost(&h_keyBuffer, batchSize*sizeof(key_type)); @@ -856,7 +856,7 @@ public: cudaMallocManaged(&status, batchSize*sizeof(handler_base_type)); cudaMemset(status, 0, batchSize*sizeof(handler_base_type)); - //load full batches + // load full batches const len_t numBatches = nkeys / batchSize; for (len_t b = 0; b < numBatches; ++b) { auto batchValuesCount = deserialize_batch_of_buckets(is, @@ -872,7 +872,7 @@ public: + batchValuesCount*sizeof(location); } - //load last batch + // load last batch const size_t remainingSize = nkeys % batchSize; if (remainingSize) { auto batchValuesCount = deserialize_batch_of_buckets(is, @@ -887,8 +887,8 @@ public: batchSize*(sizeof(key_type)+sizeof(bucket_size_type)) + batchValuesCount*sizeof(location); - //check status from last batch - //implicit sync + // check status from last batch + // implicit sync const auto tableStatus = hashTable_.pop_status(stream); if (tableStatus.has_any()) { std::cerr << tableStatus << '\n'; @@ -1162,7 +1162,7 @@ window_id gpu_hashmap::add_target( distance(first, last) >= targetSketching.kmerlen; first += processedWindows*targetSketching.winstride) { - //fill sequence batch + // fill sequence batch processedWindows = insertBuffers_[gpuId].current_seq_batch().add_target( first, last, tgt, totalWindows, targetSketching); diff --git a/src/gpu_hashmap.cuh b/src/gpu_hashmap.cuh index 924ac52..a93415d 100644 --- a/src/gpu_hashmap.cuh +++ b/src/gpu_hashmap.cuh @@ -59,7 +59,7 @@ class gpu_hashmap class query_hash_table; //----------------------------------------------------- - /// @brief needed for batched, asynchonous insertion into build hash table + // / @brief needed for batched, asynchonous insertion into build hash table struct insert_buffer { insert_buffer() : diff --git a/src/gpu_hashmap_operations.cuh b/src/gpu_hashmap_operations.cuh index 42b1a9a..77b8349 100644 --- a/src/gpu_hashmap_operations.cuh +++ b/src/gpu_hashmap_operations.cuh @@ -561,7 +561,7 @@ void query_hashtable( for (int i = groupId; i < sketchSize; i += groupsPerWarp) { typename Hashtable::value_type valuesOffset = 0; - //if key not found valuesOffset stays 0 + // if key not found valuesOffset stays 0 const auto status = hashtable.retrieve( sketch[i], valuesOffset, group); @@ -637,7 +637,7 @@ void copy_loctions( bucket_size_type bucketSize = min(bucket_size_type(bucketOffset), maxLocationsPerFeature); bucketOffset >>= sizeof(bucket_size_type)*CHAR_BIT; - //copy locations + // copy locations for (uint32_t i = warpLane; i < bucketSize; i += WARPSIZE) { out[i] = locations[bucketOffset + i]; } diff --git a/src/gpu_result_processing.cuh b/src/gpu_result_processing.cuh index c1395c9..4a65470 100644 --- a/src/gpu_result_processing.cuh +++ b/src/gpu_result_processing.cuh @@ -328,7 +328,7 @@ void generate_top_candidates( { using hit_count = match_candidate::count_type; - //TODO reduce size + // TODO reduce size __shared__ struct { location locs[96]; uint32_t hits[96]; @@ -420,7 +420,7 @@ void generate_top_candidates( maxHits = otherHits; } - //TODO use whole warp for insertion + // TODO use whole warp for insertion bool insert = false; if (32-1 - tid == (maxHits & ((1 << 5) - 1))) { insert = true; diff --git a/src/hash_dna.h b/src/hash_dna.h index 5f0004b..4194616 100644 --- a/src/hash_dna.h +++ b/src/hash_dna.h @@ -56,7 +56,7 @@ for_each_window(InputIterator first, InputIterator last, Consumer&& consume) { using std::distance; - //sequence not longer than window? + // sequence not longer than window? if (size_t(distance(first,last)) <= len) { consume(first,last); } @@ -225,7 +225,7 @@ class single_function_unique_min_hasher auto h = hash_(kmer); if (h < sketch.back()) { auto pos = std::lower_bound(sketch.begin(), sketch.end(), h); - //make sure we don't insert the same feature more than once + // make sure we don't insert the same feature more than once if (pos != sketch.end() && *pos != h) { sketch.pop_back(); sketch.insert(pos, h); @@ -233,7 +233,7 @@ class single_function_unique_min_hasher } }); - //check if some features are invalid (in case of many ambiguous kmers) + // check if some features are invalid (in case of many ambiguous kmers) if (!sketch.empty() && sketch.back() == feature_type(~0)) { for (auto i = sketch.begin(), e = sketch.end(); i != e; ++i) { if (*i == feature_type(~0)) { diff --git a/src/hash_int.h b/src/hash_int.h index e145e31..18d4a54 100644 --- a/src/hash_int.h +++ b/src/hash_int.h @@ -48,7 +48,7 @@ thomas_mueller_hash(std::uint32_t x) noexcept { return x; } -//makes sure we cant't use the wrong types +// makes sure we cant't use the wrong types template void thomas_mueller_hash(T) = delete; @@ -70,7 +70,7 @@ nvidia_hash(std::uint32_t x) noexcept { return x; } -//makes sure we cant't use the wrong types +// makes sure we cant't use the wrong types template void nvidia_hash(T) = delete; @@ -102,7 +102,7 @@ murmur3_fmix(std::uint32_t x) noexcept { return x; } -//makes sure we cant't use the wrong types +// makes sure we cant't use the wrong types template void murmur3_fmix(T) = delete; @@ -123,7 +123,7 @@ splitmix64_hash(std::uint64_t x) noexcept return x; } -//makes sure we cant't use the wrong types +// makes sure we cant't use the wrong types template void splitmix64_hash(T) = delete; @@ -146,7 +146,7 @@ halve_size_hash(std::uint64_t x) noexcept return std::uint32_t(x); } -//makes sure we cant't use the wrong types +// makes sure we cant't use the wrong types template void halve_size_hash(T) = delete; diff --git a/src/hash_multimap.h b/src/hash_multimap.h index 62548aa..6d46ab6 100644 --- a/src/hash_multimap.h +++ b/src/hash_multimap.h @@ -372,7 +372,7 @@ class hash_multimap } //----------------------------------------------------- - /// @brief does not change size! + // / @brief does not change size! bool reserve(value_allocator& alloc, std::size_t n) { if (n > max_bucket_size()) return false; @@ -380,7 +380,7 @@ class hash_multimap if (n > capacity_) { auto ncap = std::size_t(n + 0.3*size_); if (ncap > max_bucket_size()) ncap = max_bucket_size(); - //make new array and copy old values + // make new array and copy old values auto nvals = value_alloc::allocate(alloc, ncap); if (!nvals) return false; std::copy(values_, values_ + size_, nvals); @@ -390,7 +390,7 @@ class hash_multimap } } else { - //make new array + // make new array auto nvals = value_alloc::allocate(alloc, n); if (!nvals) return false; values_ = nvals; @@ -639,13 +639,13 @@ class hash_multimap { if (!rehash_possible(n)) return false; - //make temporary new map - //buckets resize might throw + // make temporary new map + // buckets resize might throw hash_multimap newmap{n}; newmap.maxLoadFactor_ = maxLoadFactor_; - //move old bucket contents into new hash slots - //this should use only non-throwing operations + // move old bucket contents into new hash slots + // this should use only non-throwing operations for (auto& b : buckets_) { if (!b.unused()) { newmap.insert_into_slot(std::move(b.key_), @@ -653,7 +653,7 @@ class hash_multimap } } - //should all be noexcept + // should all be noexcept buckets_ = std::move(newmap.buckets_); hash_ = std::move(newmap.hash_); return true; @@ -752,7 +752,7 @@ class hash_multimap { if (numKeys_ < 1) return; - //free bucket memory + // free bucket memory for (auto& b : buckets_) { b.free(alloc_); } @@ -1059,9 +1059,9 @@ class hash_multimap read_binary(is, batchSize); if (nkeys > 0) { - //if the allocator supports it: reserve one large memory chunk - //for all values; individual buckets will then point into this - //array; the default chunk_allocator does this + // if the allocator supports it: reserve one large memory chunk + // for all values; individual buckets will then point into this + // array; the default chunk_allocator does this reserve_keys(nkeys); reserve_values(nvalues); const auto valuesPointer = alloc_.allocate(nvalues); @@ -1168,7 +1168,7 @@ class hash_multimap buckets_.begin() + (hash_(key) % buckets_.size()), buckets_.begin(), buckets_.end()}; - //find bucket + // find bucket do { if (it->unused()) return buckets_.end(); if (keyEqual_(it->key(), key)) return iterator(it); @@ -1187,7 +1187,7 @@ class hash_multimap buckets_.begin(), buckets_.end()}; do { - //empty slot found + // empty slot found if (it->unused()) { if (it->insert(alloc_, std::forward(newvalues)...)) { it->key_ = std::move(key); @@ -1195,10 +1195,10 @@ class hash_multimap numValues_ += it->size(); return iterator(it); } - //could not insert + // could not insert return buckets_.end(); } - //key already inserted + // key already inserted if (keyEqual_(it->key(), key)) { auto oldsize = it->size(); if (it->insert(alloc_, std::forward(newvalues)...)) { @@ -1230,10 +1230,10 @@ class hash_multimap //--------------------------------------------------------------- bool rehash_possible(size_type n) const noexcept { - //number of buckets must be greater or equal to the number of keys + // number of buckets must be greater or equal to the number of keys if (n == bucket_count() || n < key_count()) return false; - //make sure we stay below the maximum load factor + // make sure we stay below the maximum load factor auto newload = (load_factor() * (float(n)/bucket_count())); if (n < bucket_count() && newload > max_load_factor()) return false; return true; diff --git a/src/host_hashmap.h b/src/host_hashmap.h index d2a7a67..b155ed4 100644 --- a/src/host_hashmap.h +++ b/src/host_hashmap.h @@ -46,21 +46,21 @@ class host_hashmap using location = ValueT; using bucket_size_type = mc::loclist_size_t; - using sketch = typename sketcher::sketch_type; //range of features + using sketch = typename sketcher::sketch_type; // range of features using feature = typename sketcher::feature_type; private: //----------------------------------------------------- - /// @brief "heart of the database": maps features to target locations - using hash_table = hash_multimap, //key comparator - chunk_allocator, //value allocator - std::allocator, //bucket+key allocator - bucket_size_type>; //location list size + // / @brief "heart of the database": maps features to target locations + using hash_table = hash_multimap, // key comparator + chunk_allocator, // value allocator + std::allocator, // bucket+key allocator + bucket_size_type>; // location list size //----------------------------------------------------- - /// @brief needed for batched, asynchonous insertion into feature_store + // / @brief needed for batched, asynchonous insertion into feature_store struct window_sketch { window_sketch() : @@ -414,7 +414,7 @@ class host_hashmap sketchers_[part].for_each_sketch(seq, opt, [&, this] (const auto& sk) { if (inserters_[part]->valid()) { - //insert sketch into batch + // insert sketch into batch auto& sketch = inserters_[part]->next_item(); sketch.tgt = tgt; sketch.win = win; @@ -430,7 +430,7 @@ class host_hashmap //--------------------------------------------------------------- void add_sketch_batch(part_id part, const sketch_batch& batch) { for (const auto& windowSketch : batch) { - //insert features from sketch into database + // insert features from sketch into database for (const auto& f : windowSketch.sk) { auto it = hashTables_[part].insert( f, location{windowSketch.win, windowSketch.tgt}); diff --git a/src/io_serialize.h b/src/io_serialize.h index 3cb20d5..81ee8cb 100644 --- a/src/io_serialize.h +++ b/src/io_serialize.h @@ -73,7 +73,7 @@ template inline void write_binary(std::ostream& os, const std::array& a) { - //dummy + // dummy std::uint64_t l = n; os.write(reinterpret_cast(&l), sizeof(l)); if (l > 0) @@ -138,10 +138,10 @@ template inline void read_binary(std::istream& is, std::array& a) { - //dummy + // dummy std::uint64_t l = 0; is.read(reinterpret_cast(&l), sizeof(l)); - //what if l > n? + // what if l > n? if (l > 0 && l <= n) is.read(reinterpret_cast(a.data()), l * sizeof(T)); } diff --git a/src/mode_build.cpp b/src/mode_build.cpp index a00798d..68fb4c9 100644 --- a/src/mode_build.cpp +++ b/src/mode_build.cpp @@ -57,7 +57,7 @@ void add_to_database_and_save(database& db, const build_options& opt) cout << "Total build time: " << time.seconds() << " s" << endl; } - //prevents slow deallocation + // prevents slow deallocation db.clear_without_deallocation(); } diff --git a/src/mode_build_query.cpp b/src/mode_build_query.cpp index 032b430..b334b0a 100644 --- a/src/mode_build_query.cpp +++ b/src/mode_build_query.cpp @@ -73,7 +73,7 @@ void add_to_database_and_query(database& db, build_query_options& opt) write_database(db, opt.build); } - //prevents slow deallocation + // prevents slow deallocation db.clear_without_deallocation(); } diff --git a/src/mode_info.cpp b/src/mode_info.cpp index 704996f..d02c9b1 100644 --- a/src/mode_info.cpp +++ b/src/mode_info.cpp @@ -185,7 +185,7 @@ void show_lineage_table(const info_options& opt) auto db = make_database(opt.dbfile, dbPart, database::scope::metadata_only); if (db.target_count() < 1) return; - //table header + // table header cout << "name"; for (auto r = rank::Sequence; r <= rank::Domain; ++r) { cout << '\t' << taxonomy::rank_name(r); @@ -194,7 +194,7 @@ void show_lineage_table(const info_options& opt) const auto& taxonomy = db.taxo_cache(); - //rows + // rows for (const auto& ranks : taxonomy.target_lineages()) { cout << ranks[0]->name(); for (auto r = rank::Sequence; r <= rank::Domain; ++r) { diff --git a/src/mode_merge.cpp b/src/mode_merge.cpp index cdb4ca6..d21f4ad 100644 --- a/src/mode_merge.cpp +++ b/src/mode_merge.cpp @@ -90,7 +90,7 @@ get_results_file_properties(const string& filename) string line; - //check classification rank + // check classification rank while (ifs.good()) { getline(ifs, line); if (line[0] != '#') { @@ -104,7 +104,7 @@ get_results_file_properties(const string& filename) } } - //get layout + // get layout while (ifs.good()) { getline(ifs, line); if (line[0] != '#') { @@ -134,12 +134,12 @@ get_results_file_properties(const string& filename) throw io_format_error("no top_hits in file " + filename); char lineBegin = ifs.peek(); - //skip comments + // skip comments while (ifs.good() && lineBegin == '#') { forward(ifs, '\n'); lineBegin = ifs.peek(); } - //count query results + // count query results res.resultsBegin = ifs.tellg(); while (ifs.good()) { if (lineBegin != '#') ++res.numQueries; @@ -251,7 +251,7 @@ void merge_result_files(const vector& infiles, rules.mergeBelow = opt.classify.lowestRank; if (opt.classify.maxNumCandidatesPerQuery > 0) rules.maxCandidates = opt.classify.maxNumCandidatesPerQuery; - //else default to 2 + // else default to 2 const auto& comment = opt.output.format.tokens.comment; @@ -307,7 +307,7 @@ void process_result_files(const vector& infiles, if (mapFile.good()) { cout << "Per-Read mappings will be written to file: " << queryMappingsFilename << endl; perReadOut = &mapFile; - //default: auxiliary output same as mappings output + // default: auxiliary output same as mappings output perTargetOut = perReadOut; perTaxonOut = perReadOut; } @@ -372,7 +372,7 @@ void process_result_files(const vector& infiles, "None of the query sequence files could be opened"}; } - //process all input files at once + // process all input files at once process_result_files(infiles, db, opt, opt.queryMappingsFile, opt.output.analysis.abundanceFile, @@ -406,7 +406,7 @@ void main_mode_merge(const cmdline_args& args) } } - //TODO parallelize result processing? + // TODO parallelize result processing? if (opt.infiles.size() >= 2) { cerr << "Merging result files.\n"; diff --git a/src/mode_query.cpp b/src/mode_query.cpp index 5409e7e..b1ae2cf 100644 --- a/src/mode_query.cpp +++ b/src/mode_query.cpp @@ -69,7 +69,7 @@ read_database(const query_options& opt) maxlpf = database::max_supported_locations_per_feature() - 1; maxlpf = std::min(maxlpf, db.max_locations_per_feature() - 1); - if (maxlpf > 0) { //always keep buckets with size 1 + if (maxlpf > 0) { // always keep buckets with size 1 cerr << "\nRemoving features with more than " << maxlpf << " locations...\n"; @@ -77,7 +77,7 @@ read_database(const query_options& opt) cerr << rem << " of " << old << " removed.\n"; } - //in case new max is less than the database setting + // in case new max is less than the database setting db.max_locations_per_feature(dbopt.maxLocationsPerFeature); } else if (dbopt.maxLocationsPerFeature > 1) { diff --git a/src/options.cpp b/src/options.cpp index 333905e..84e8eda 100644 --- a/src/options.cpp +++ b/src/options.cpp @@ -110,7 +110,7 @@ string taxon_rank_names(const string& separator = ", ") //------------------------------------------------------------------- -/// @brief remove extension from database filename and extract part id +// / @brief remove extension from database filename and extract part id void sanitize_database_name(string& name, int& partId) { auto pos = name.find(".meta"); @@ -134,7 +134,7 @@ void sanitize_database_name(string& name, int& partId) //------------------------------------------------------------------- -/// @return replaces '\t' with tab char and remove other special chars +// / @return replaces '\t' with tab char and remove other special chars string sanitize_special_chars(const string& text) { return std::regex_replace( std::regex_replace(text, @@ -192,7 +192,7 @@ auto cli_usage_formatting() //------------------------------------------------------------------- -/// @brief adds 'parameter' that catches unknown args with '-' prefix +// / @brief adds 'parameter' that catches unknown args with '-' prefix clipp::parameter catch_unknown(error_messages& err) { return clipp::any(clipp::match::prefix{"-"}, @@ -248,7 +248,7 @@ void raise_default_error(const error_messages& err, * *****************************************************************************/ -/// @brief +// / @brief clipp::parameter database_parameter(string& filename, int& partId, error_messages& err) { @@ -289,7 +289,7 @@ info_level_cli(info_level& lvl, error_messages& err) //------------------------------------------------------------------- -/// @brief shared command-line options for taxonomy +// / @brief shared command-line options for taxonomy clipp::group taxonomy_cli(taxonomy_options& opt, error_messages& err) { @@ -316,7 +316,7 @@ taxonomy_cli(taxonomy_options& opt, error_messages& err) //------------------------------------------------------------------- -/// @brief shared command-line options for sequence sketching +// / @brief shared command-line options for sequence sketching clipp::group sketching_options_cli(sketching_opt& opt, error_messages& err) { @@ -362,7 +362,7 @@ sketching_options_cli(sketching_opt& opt, error_messages& err) //------------------------------------------------------------------- -/// @brief shared command-line options for sequence sketching +// / @brief shared command-line options for sequence sketching clipp::group database_storage_options_cli(database_storage_options& opt, error_messages& err) { @@ -446,13 +446,13 @@ void augment_taxonomy_options(taxonomy_options& opt) opt.mappingPreFilesGlobal.push_back(opt.path + "assembly_summary_genbank.txt"); opt.mappingPreFilesGlobal.push_back(opt.path + "assembly_summary_genbank_historical.txt"); - //default NCBI accession to taxon map file names + // default NCBI accession to taxon map file names opt.mappingPostFiles.push_back(opt.path + "nucl_gb.accession2taxid"); opt.mappingPostFiles.push_back(opt.path + "nucl_wgs.accession2taxid"); opt.mappingPostFiles.push_back(opt.path + "nucl_est.accession2taxid"); opt.mappingPostFiles.push_back(opt.path + "nucl_gss.accession2taxid"); - //find additional maps by file extension ".accession2taxid" + // find additional maps by file extension ".accession2taxid" for (const auto& f : files_in_directory(opt.path)) { if (f.find(".accession2taxid") != string::npos) { if (std::find(opt.mappingPostFiles.begin(), @@ -476,7 +476,7 @@ void augment_taxonomy_options(taxonomy_options& opt) * * *****************************************************************************/ -/// @brief build mode command-line options +// / @brief build mode command-line options clipp::group build_mode_cli(build_options& opt, error_messages& err) { @@ -640,7 +640,7 @@ string build_mode_docs() { * *****************************************************************************/ -/// @brief modify mode command-line options +// / @brief modify mode command-line options clipp::group modify_mode_cli(build_options& opt, error_messages& err) { @@ -780,7 +780,7 @@ string modify_mode_docs() { * * *****************************************************************************/ -/// @brief command line interface for classification parameter tuning +// / @brief command line interface for classification parameter tuning clipp::group classification_params_cli(classification_options& opt, error_messages& err) { @@ -856,7 +856,7 @@ classification_params_cli(classification_options& opt, error_messages& err) //------------------------------------------------------------------- -/// @brief query mode command-line options +// / @brief query mode command-line options clipp::group classification_output_format_cli(classification_output_formatting& opt, error_messages& err) @@ -927,7 +927,7 @@ classification_output_format_cli(classification_output_formatting& opt, //------------------------------------------------------------------- -/// @brief query mode command-line options +// / @brief query mode command-line options clipp::group classification_analysis_cli(classification_analysis_options& opt, error_messages& err) { @@ -1004,7 +1004,7 @@ classification_analysis_cli(classification_analysis_options& opt, error_messages //------------------------------------------------------------------- -/// @brief query mode command-line options +// / @brief query mode command-line options clipp::group classification_evaluation_cli(classification_evaluation_options& opt, error_messages&) @@ -1043,7 +1043,7 @@ classification_evaluation_cli(classification_evaluation_options& opt, //------------------------------------------------------------------- -/// @brief query mode command-line options +// / @brief query mode command-line options clipp::group performance_options_cli(performance_tuning_options& opt, error_messages& err) { @@ -1084,7 +1084,7 @@ performance_options_cli(performance_tuning_options& opt, error_messages& err) //------------------------------------------------------------------- -/// @brief query mode command-line options +// / @brief query mode command-line options clipp::group query_mode_cli(query_options& opt, error_messages& err) { @@ -1244,14 +1244,14 @@ void process_query_options(query_options& opt) if (perf.queryLimit < 0) perf.queryLimit = 0; - //output file consistency checks + // output file consistency checks auto& ana = opt.output.analysis; if (ana.targetMappingsFile == opt.queryMappingsFile) ana.targetMappingsFile.clear(); if (ana.abundanceFile == opt.queryMappingsFile) ana.abundanceFile.clear(); // output option checks and consistency - //always show query ids if hits per target list requested + // always show query ids if hits per target list requested auto& fmt = opt.output.format; // output ranks are the same as classification ranks fmt.lowestRank = cl.lowestRank; @@ -1409,7 +1409,7 @@ string query_mode_docs() { * * *****************************************************************************/ -/// @brief build+query mode command-line options +// / @brief build+query mode command-line options clipp::group build_query_mode_cli(build_query_options& opt, error_messages& err) { @@ -1667,7 +1667,7 @@ string build_query_mode_docs() { * *****************************************************************************/ -/// @brief merge mode command-line options +// / @brief merge mode command-line options clipp::group merge_mode_cli(merge_options& opt, error_messages& err) { @@ -1849,7 +1849,7 @@ string merge_mode_docs() { * * *****************************************************************************/ -/// @brief shared command-line options for taxonomy +// / @brief shared command-line options for taxonomy clipp::group info_mode_cli(info_options& opt, error_messages& err) { diff --git a/src/options.h b/src/options.h index 9b23d2f..eb5348f 100644 --- a/src/options.h +++ b/src/options.h @@ -225,7 +225,7 @@ struct performance_tuning_options unsigned numThreads = std::min(std::thread::hardware_concurrency(), 8U); std::size_t batchSize = 8192; #endif - //limits number of reads per sequence source (file) + // limits number of reads per sequence source (file) std::int_least64_t queryLimit = std::numeric_limits::max(); unsigned replication = 1; @@ -239,13 +239,13 @@ struct performance_tuning_options *****************************************************************************/ struct classification_options { - //ranks/taxa to classify on + // ranks/taxa to classify on taxon_rank lowestRank = taxon_rank::Sequence; taxon_rank highestRank = taxon_rank::Domain; - std::uint16_t hitsMin = 0; //< 1 : deduced from database parameters + std::uint16_t hitsMin = 0; // < 1 : deduced from database parameters float hitsDiffFraction = 1.0f; - //maximum range in sequence that read (pair) is expected to be in + // maximum range in sequence that read (pair) is expected to be in std::size_t insertSizeMax = 0; std::size_t maxNumCandidatesPerQuery = 2; @@ -261,14 +261,14 @@ struct classification_options *****************************************************************************/ struct classification_evaluation_options { - //show ground thruth if available + // show ground thruth if available bool showGroundTruth = false; - //test precision (ground truth must be available) + // test precision (ground truth must be available) bool precision = false; bool taxonCoverage = false; - //show known taxon (or complete lineage if 'showLineage' on) + // show known taxon (or complete lineage if 'showLineage' on) bool determineGroundTruth = false; }; @@ -283,17 +283,17 @@ struct classification_evaluation_options * *****************************************************************************/ struct formatting_tokens { - //prefix for each non-mapping line + // prefix for each non-mapping line std::string comment = "# "; std::string none = "--"; - //column separator + // column separator std::string column = "\t|\t"; - //taxon separator (in lineage output) + // taxon separator (in lineage output) std::string taxSeparator = ","; - //separates rank and taxon name or rank and taxid + // separates rank and taxon name or rank and taxid std::string rankSuffix = ":"; - //if both taxid AND taxon name are to be printed, - //taxids will be enclosed by these: + // if both taxid AND taxon name are to be printed, + // taxids will be enclosed by these: std::string taxidPrefix = "("; std::string taxidSuffix = ")"; }; @@ -306,19 +306,19 @@ struct formatting_tokens { *****************************************************************************/ struct classification_output_formatting { - //how to show classification (read mappings), if 'none', only summary will be shown + // how to show classification (read mappings), if 'none', only summary will be shown map_view_mode mapViewMode = map_view_mode::all; bool showQueryIds = false; - //show all ranks that a sequence could be classified on + // show all ranks that a sequence could be classified on bool showLineage = false; - //don't print full lineage for unclassified queries + // don't print full lineage for unclassified queries bool collapseUnclassifiedLineages = true; - //print all classification info in separate columns + // print all classification info in separate columns bool useSeparateCols = false; - //ranks/taxa to show + // ranks/taxa to show taxon_rank lowestRank = taxon_rank::Sequence; taxon_rank highestRank = taxon_rank::Domain; @@ -335,27 +335,27 @@ struct classification_output_formatting *****************************************************************************/ struct classification_analysis_options { - //show top candidate sequences and their associated k-mer hash hit count + // show top candidate sequences and their associated k-mer hash hit count bool showTopHits = false; - //show all k-mer-hash hits in database for each given read + // show all k-mer-hash hits in database for each given read bool showAllHits = false; - //show candidate position(s) in reference sequence(s) + // show candidate position(s) in reference sequence(s) bool showLocations = false; - //make statistics of semi-global alignment scores of queries against - //target candidate(s) + // make statistics of semi-global alignment scores of queries against + // target candidate(s) bool showAlignment = false; - //show list of target -> hit mappings + // show list of target -> hit mappings bool showHitsPerTargetList = false; - //output filename for mappings per target + // output filename for mappings per target std::string targetMappingsFile; - //show list of taxon -> number of reads + // show list of taxon -> number of reads bool showTaxAbundances = false; - //show estimated number of reads at specific rank + // show estimated number of reads at specific rank taxon_rank showAbundanceEstimatesOnRank = taxon_rank::none; - //output filename for mappings per taxon + // output filename for mappings per taxon std::string abundanceFile; }; @@ -371,7 +371,7 @@ struct classification_output_options classification_output_formatting format; classification_evaluation_options evaluate; - //show classification summary + // show classification summary bool showQueryParams = true; bool showSummary = true; bool showDBproperties = false; diff --git a/src/query_batch.cu b/src/query_batch.cu index 461f628..bc21b46 100644 --- a/src/query_batch.cu +++ b/src/query_batch.cu @@ -607,7 +607,7 @@ void query_batch::generate_and_copy_top_candidates_async( const index_type numBlocks = hostData_[hostId].num_queries(); - //TODO different max cand cases + // TODO different max cand cases if (maxCandidatesPerQuery_ <= 2) { constexpr int maxCandidates = 2; diff --git a/src/querying.cpp b/src/querying.cpp index 1d16703..2f7e1b4 100644 --- a/src/querying.cpp +++ b/src/querying.cpp @@ -70,7 +70,7 @@ void process_input_files(const vector& infiles, if (mapFile.good()) { cout << "Per-Read mappings will be written to file: " << queryMappingsFilename << endl; perReadOut = &mapFile; - //default: auxiliary output same as mappings output + // default: auxiliary output same as mappings output perTargetOut = perReadOut; perTaxonOut = perReadOut; } @@ -152,7 +152,7 @@ void process_input_files(const database& db, } } - //process files / file pairs separately + // process files / file pairs separately const auto& ano = opt.output.analysis; if (opt.splitOutputPerInput) { @@ -167,7 +167,7 @@ void process_input_files(const database& db, vector input; if (stride == 2) { - //process each input file pair separately + // process each input file pair separately const auto& f1 = infiles[i]; const auto& f2 = infiles[i+1]; suffix = "_" + extract_filename(f1) @@ -176,7 +176,7 @@ void process_input_files(const database& db, input = vector{f1,f2}; } else { - //process each input file separately + // process each input file separately const auto& f = infiles[i]; suffix = "_" + extract_filename(f) + ".txt"; input = vector{f}; @@ -199,7 +199,7 @@ void process_input_files(const database& db, queryMappingsFile, targetMappingsFile, abundanceFile); } } - //process all input files at once + // process all input files at once else { process_input_files(infiles, db, opt, opt.queryMappingsFile, @@ -220,7 +220,7 @@ void adapt_options_to_database(query_options& opt, const database& db) { sketching_opt& skopt = opt.sketching; - //use sketching scheme from database? + // use sketching scheme from database? const auto& dbsk = db.target_sketching(); skopt.kmerlen = dbsk.kmerlen; @@ -245,7 +245,7 @@ void adapt_options_to_database(query_options& opt, const database& db) classification_options& clopt = opt.classify; - //deduce hit threshold from database? + // deduce hit threshold from database? if (clopt.hitsMin < 1) { auto sks = db.target_sketching().sketchlen; if (sks >= 6) { @@ -288,15 +288,15 @@ void run_interactive_query_mode(const database& db, } else if (input.find("#") == 0) { - //comment line, do nothing + // comment line, do nothing } else { - //tokenize input into whitespace-separated words and build args list + // tokenize input into whitespace-separated words and build args list vector args {initOpt.dbfile}; std::istringstream iss(input); while (iss >> input) { args.push_back(input); } - //read command line options (use initial ones as defaults) + // read command line options (use initial ones as defaults) try { auto opt = get_query_options(args, initOpt); adapt_options_to_database(opt, db); diff --git a/src/sequence_batch.cuh b/src/sequence_batch.cuh index 7eebd7d..a7a48cb 100644 --- a/src/sequence_batch.cuh +++ b/src/sequence_batch.cuh @@ -148,7 +148,7 @@ public: const size_t seqLength = distance(first, last); // no kmers in sequence, nothing to do here - //TODO different case than batch full + // TODO different case than batch full if (seqLength < kmerSize) return processedWindows; // batch full, nothing processed @@ -167,7 +167,7 @@ public: const size_t providedLength = windowSize + (availableWindows-1) * windowStride; - //split sequence into [first,end] and [next,last] with overlap + // split sequence into [first,end] and [next,last] with overlap end = first + providedLength; processedWindows = availableWindows; } diff --git a/src/sequence_io.cpp b/src/sequence_io.cpp index 29fd444..4fc8c25 100644 --- a/src/sequence_io.cpp +++ b/src/sequence_io.cpp @@ -310,7 +310,7 @@ void sequence_pair_reader::next (sequence_pair& seq) // pair = 2 consecutive sequences from same file const auto idx = reader1_.index(); reader1_.next(seq.first); - //make sure the index is only increased after the 2nd 'next()' + // make sure the index is only increased after the 2nd 'next()' reader1_.index_offset(idx); reader1_.next(seq.second); break; @@ -338,7 +338,7 @@ sequence_pair_reader::next_header () // pair = 2 consecutive sequences from same file const auto idx = reader1_.index(); auto header = reader1_.next_header(); - //make sure the index is only increased after the 2nd 'next()' + // make sure the index is only increased after the 2nd 'next()' reader1_.index_offset(idx); reader1_.next_header(); return header; @@ -368,7 +368,7 @@ sequence_pair_reader::next_data (sequence::data_type& data1, // pair = 2 consecutive sequences from same file const auto idx = reader1_.index(); reader1_.next_data(data1); - //make sure the index is only increased after the 2nd 'next()' + // make sure the index is only increased after the 2nd 'next()' reader1_.index_offset(idx); return reader1_.next_data(data2); } @@ -398,7 +398,7 @@ sequence_pair_reader::next_header_and_data (sequence::header_type& header1, // pair = 2 consecutive sequences from same file const auto idx = reader1_.index(); reader1_.next_header_and_data(header1, data1); - //make sure the index is only increased after the 2nd 'next()' + // make sure the index is only increased after the 2nd 'next()' reader1_.index_offset(idx); return reader1_.next_data(data2); } @@ -512,9 +512,9 @@ extract_genbank_identifier (const string& text) auto i = text.find("gi|"); if (i != string::npos) { - //skip prefix + // skip prefix i += 3; - //find end of number + // find end of number auto j = text.find('|', i); if (j == string::npos) { j = text.find(' ', i); @@ -561,9 +561,9 @@ extract_taxon_id (const string& text) auto i = text.find("taxid"); if (i != string::npos) { - //skip "taxid" + separator char + // skip "taxid" + separator char i += 6; - //find end of number + // find end of number auto j = text.find('|', i); if (j == string::npos) { j = text.find(' ', i); diff --git a/src/sequence_io.h b/src/sequence_io.h index 8c4bb4b..89248c4 100644 --- a/src/sequence_io.h +++ b/src/sequence_io.h @@ -228,6 +228,6 @@ std::int_least64_t extract_taxon_id (const std::string&); -} //namespace mc +} // namespace mc #endif diff --git a/src/sequence_iostream.h b/src/sequence_iostream.h index 24b6681..31dbeed 100644 --- a/src/sequence_iostream.h +++ b/src/sequence_iostream.h @@ -18,7 +18,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . * - * FASTA / FASTQ file parsing inspired by https://github.com/attractivechaos/klib + * FASTA / FASTQ file parsing inspired by https:// github.com/attractivechaos/klib * by Attractive Chaos * *****************************************************************************/ @@ -395,6 +395,6 @@ class char_istream }; -} //namespace mc +} // namespace mc #endif diff --git a/src/stat_combined.cu b/src/stat_combined.cu index 59f0020..5ca3926 100644 --- a/src/stat_combined.cu +++ b/src/stat_combined.cu @@ -164,4 +164,4 @@ void statistics_accumulator_gpu::accumulate(Value * values, size template void statistics_accumulator_gpu::accumulate(uint64_t *, size_type); -} //namespace mc +} // namespace mc diff --git a/src/stat_combined.cuh b/src/stat_combined.cuh index 48d639c..dada08a 100644 --- a/src/stat_combined.cuh +++ b/src/stat_combined.cuh @@ -215,7 +215,7 @@ private: }; -} //namespace mc +} // namespace mc #endif diff --git a/src/stat_combined.h b/src/stat_combined.h index 97817b0..7ce8436 100644 --- a/src/stat_combined.h +++ b/src/stat_combined.h @@ -118,7 +118,7 @@ class statistics_accumulator -} //namespace mc +} // namespace mc #endif diff --git a/src/stat_confusion.h b/src/stat_confusion.h index 926440a..3f87dbb 100644 --- a/src/stat_confusion.h +++ b/src/stat_confusion.h @@ -96,16 +96,16 @@ class confusion_statistics return (true_pos() + true_neg()) / double(total()); } - //true positive rate, hit rate, recall + // true positive rate, hit rate, recall double sensitivity() const noexcept { return true_pos() / double(condition_pos()); } - //true negative rate + // true negative rate double specificity() const noexcept { return true_neg() / double(condition_neg()); } - //positive predictive value + // positive predictive value double precision() const noexcept { return true_pos() / double(outcome_pos()); } diff --git a/src/stat_moments.h b/src/stat_moments.h index bc65f93..5135b82 100644 --- a/src/stat_moments.h +++ b/src/stat_moments.h @@ -253,10 +253,10 @@ skewness(InputIterator begin, InputIterator end) const auto n2 = n*n; s *= s; - //2nd central moment + // 2nd central moment auto cm2 = (s2 - s /n) /(n - fp_t(1)); - //3rd central moment + // 3rd central moment auto cm3 = (n2*s3 - fp_t(3)*n*(s2*s2) + fp_t(2)*s)/(n*n2); return fp_t(cm3 / pow(cm2, fp_t(3)/fp_t(2)) ); @@ -351,10 +351,10 @@ kurtosis(InputIterator begin, InputIterator end) const auto n2 = n*n; const auto ss = s*s ; - //2nd central moment + // 2nd central moment const auto cm2 = ((s2 - ss /n) /(n - fp_t(1))); - //4th central moment + // 4th central moment const auto cm4 = ((n2*n*s4 - fp_t(4)*n2*(s*s3) + fp_t(6)*n*(ss*s2) - fp_t(3)*(ss*s)) / (n2*n2)); @@ -1049,7 +1049,7 @@ template using kurtosis_accumulator = moments_accumulator; -} //namespace mc +} // namespace mc #endif diff --git a/src/taxonomy.h b/src/taxonomy.h index 28ab5d3..48b0598 100644 --- a/src/taxonomy.h +++ b/src/taxonomy.h @@ -283,7 +283,7 @@ class taxonomy index_t index; }; - //default: empty taxon + // default: empty taxon explicit taxon(taxon_id taxonId = none_id(), taxon_id parentId = none_id(), @@ -569,7 +569,7 @@ class taxonomy if (it->rank() != rank::none) { return &(*it); } - if (it->parent_ == id) break; //break cycles + if (it->parent_ == id) break; // break cycles id = it->parent_; } return nullptr; @@ -594,7 +594,7 @@ class taxonomy if (it->rank() != rank::none) { lin[static_cast(it->rank())] = &(*it); } - if (it->parent_ == id) break; //break cycles + if (it->parent_ == id) break; // break cycles id = it->parent_; } return lin; @@ -635,7 +635,7 @@ class taxonomy auto it = taxa_.find(taxon{id}); if (it != taxa_.end()) { lin.push_back(&(*it)); - if (it->parent_ != id) { //break cycles + if (it->parent_ != id) { // break cycles id = it->parent_; } else { id = none_id(); @@ -888,13 +888,13 @@ class ranked_lineages_cache } //--------------------------------------------------------------- - /// @brief only works if tax is cached - make sure to call update first + // / @brief only works if tax is cached - make sure to call update first const ranked_lineage& operator [](const taxon* tax) const { return tax ? operator [](*tax) : empty_; } //----------------------------------------------------- - /// @brief only works if tax is cached - make sure to call update first + // / @brief only works if tax is cached - make sure to call update first const ranked_lineage& operator [](const taxon& tax) const { assert(outdated_ == false); @@ -930,7 +930,7 @@ class ranked_lineages_of_targets using taxon_id = taxonomy::taxon_id; using taxon_rank = taxonomy::rank; - //use negative numbers for sequence level taxon ids + // use negative numbers for sequence level taxon ids static constexpr taxon_id taxon_id_of_target(target_id id) noexcept { return -taxon_id(id)-1; } @@ -1380,7 +1380,7 @@ class taxonomy_cache for (auto& t : tax.non_target_taxa()) { taxa_.insert_or_replace_non_target_taxon(std::move(t)); } - //re-initialize ranks cache + // re-initialize ranks cache targetLineages_.reset(); taxonLineages_.init_from_targets(targetLineages_.lineages()); } diff --git a/src/taxonomy_io.cpp b/src/taxonomy_io.cpp index 8389c3c..f9f29a0 100644 --- a/src/taxonomy_io.cpp +++ b/src/taxonomy_io.cpp @@ -64,8 +64,8 @@ make_taxonomic_hierarchy(const string& taxNodesFile, const bool showInfo = infoLvl != info_level::silent; - //read scientific taxon names - //failure to do so will not be fatal + // read scientific taxon names + // failure to do so will not be fatal auto taxonNames = std::map{}; std::ifstream is{taxNamesFile}; @@ -103,7 +103,7 @@ make_taxonomic_hierarchy(const string& taxNodesFile, } is.close(); - //read merged taxa + // read merged taxa taxonomy tax; auto mergedTaxa = std::map{}; @@ -129,7 +129,7 @@ make_taxonomic_hierarchy(const string& taxNodesFile, } is.close(); - //read taxonomic structure + // read taxonomic structure is.open(taxNodesFile); // each line consists of taxonId, parentId, rank, ... // field terminator is "\t|\t" @@ -148,7 +148,7 @@ make_taxonomic_hierarchy(const string& taxNodesFile, getline(is, rankName, '\t'); forward(is, '\n'); - //get taxon name + // get taxon name auto it = taxonNames.find(taxonId); auto taxonName = (it != taxonNames.end()) ? it->second : string("--"); @@ -156,8 +156,8 @@ make_taxonomic_hierarchy(const string& taxNodesFile, taxonName = "<" + std::to_string(taxonId) + ">"; } - //replace ids with new ids according to mergers - //TODO this is stupid, handle mergers properly + // replace ids with new ids according to mergers + // TODO this is stupid, handle mergers properly auto mi = mergedTaxa.find(taxonId); if (mi != mergedTaxa.end()) taxonId = mi->second; mi = mergedTaxa.find(parentId); @@ -173,10 +173,10 @@ make_taxonomic_hierarchy(const string& taxNodesFile, return tax; } - //set rank of root + // set rank of root tax.reset_rank(1, taxonomy::rank::root); - //make sure every taxon has a rank designation + // make sure every taxon has a rank designation // tax.rank_all_unranked(); return tax; @@ -201,15 +201,15 @@ void read_sequence_to_taxon_id_mapping(const string& mappingFile, const auto fsize = file_size(mappingFile); bool showProgress = showInfo && fsize > 100000000; - //assembly_summary files have up to 550K lines - //update progress indicator every 128K lines + // assembly_summary files have up to 550K lines + // update progress indicator every 128K lines size_t step = 0; size_t statStep = 1UL << 17; if (showProgress) show_progress_indicator(cout, 0); - //read first line(s) and determine the columns which hold - //sequence ids (keys) and taxon ids + // read first line(s) and determine the columns which hold + // sequence ids (keys) and taxon ids int headerRow = 0; { string line; @@ -220,7 +220,7 @@ void read_sequence_to_taxon_id_mapping(const string& mappingFile, if (headerRow > 0) --headerRow; } - //reopen and forward to header row + // reopen and forward to header row is.close(); is.open(mappingFile); { @@ -229,7 +229,7 @@ void read_sequence_to_taxon_id_mapping(const string& mappingFile, } if (is.good()) { - //process header row + // process header row int keycol = 0; int taxcol = 0; { @@ -253,9 +253,9 @@ void read_sequence_to_taxon_id_mapping(const string& mappingFile, ++col; } } - //taxid column assignment not found + // taxid column assignment not found if (taxcol < 1) { - //reopen file and use 1st column as key and 2nd column as taxid + // reopen file and use 1st column as key and 2nd column as taxid is.close(); is.open(mappingFile); taxcol = 1; @@ -264,10 +264,10 @@ void read_sequence_to_taxon_id_mapping(const string& mappingFile, string key; taxon_id taxonId; while (is.good()) { - //forward to column with key + // forward to column with key for (int i = 0; i < keycol; ++i) forward(is, '\t'); is >> key; - //forward to column with taxid + // forward to column with taxid for (int i = 0; i < taxcol; ++i) forward(is, '\t'); is >> taxonId; forward(is, '\n'); @@ -294,8 +294,8 @@ make_sequence_to_taxon_id_map(const std::vector& localMappingFilenames, const std::vector& infilenames, info_level infoLvl) { - //gather all taxonomic mapping files that can be found in any - //of the input directories + // gather all taxonomic mapping files that can be found in any + // of the input directories auto indirs = unique_directories(infilenames); auto map = std::map{};