From 8c15147be4c3b7e38e7cbd03449bfffd89f3329b Mon Sep 17 00:00:00 2001 From: Greg Gibeling Date: Fri, 29 Jan 2021 09:55:05 -0800 Subject: [PATCH 1/2] Convert "properly read" constants to command line parameters --- bwamem.c | 5 +++++ bwamem.h | 4 ++++ bwamem_pair.c | 13 +++++++------ fastmap.c | 8 +++++++- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/bwamem.c b/bwamem.c index 966e8d0f..6fac0032 100644 --- a/bwamem.c +++ b/bwamem.c @@ -105,6 +105,11 @@ mem_opt_t *mem_opt_init() o->min_chain_weight = 0; o->max_chain_extend = 1<<30; o->mapQ_coef_len = 50; o->mapQ_coef_fac = log(o->mapQ_coef_len); + + o->outlier_bound_mem = 2.0; + o->mapping_bound_mem = 3.0; + o->max_stddev_mem = 4.0; + bwa_fill_scmat(o->a, o->b, o->mat); return o; } diff --git a/bwamem.h b/bwamem.h index 0a0e3bb4..76341fba 100644 --- a/bwamem.h +++ b/bwamem.h @@ -81,6 +81,10 @@ typedef struct { int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end int max_XA_hits, max_XA_hits_alt; // if there are max_hits or fewer, output them all int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset + + float outlier_bound_mem; // controls how to calculate low and high boundaries for computing mean and std.dev + float mapping_bound_mem; // controls how to calculate low and high boundaries for proper pairs. Low and high boundaries is calculated : F(Q1, Q2, Q3, mapping_bound_mem, max_stddev_mem) where Q2 is median insert size, Q1 and Q3 are lower and higher quartile (also see max_stddev_mem). + float max_stddev_mem; // controls how to calculate low and high boundaries for proper pairs. Low and high boundaries is calculated : F(Q1, Q2, Q3, mapping_bound_mem, max_stddev_mem) where Q2 is median insert size, Q1 and Q3 are lower and higher quartile (also see mapping_bound_mem). } mem_opt_t; typedef struct { diff --git a/bwamem_pair.c b/bwamem_pair.c index ef795218..6d9fb108 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -42,6 +42,7 @@ #define MIN_RATIO 0.8 #define MIN_DIR_CNT 10 #define MIN_DIR_RATIO 0.05 + #define OUTLIER_BOUND 2.0 #define MAPPING_BOUND 3.0 #define MAX_STDDEV 4.0 @@ -103,9 +104,9 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v * p25 = q->a[(int)(.25 * q->n + .499)]; p50 = q->a[(int)(.50 * q->n + .499)]; p75 = q->a[(int)(.75 * q->n + .499)]; - r->low = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499); + r->low = (int)(p25 - opt->outlier_bound_mem * (p75 - p25) + .499); if (r->low < 1) r->low = 1; - r->high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499); + r->high = (int)(p75 + opt->outlier_bound_mem * (p75 - p25) + .499); fprintf(stderr, "[M::%s] (25, 50, 75) percentile: (%d, %d, %d)\n", __func__, p25, p50, p75); fprintf(stderr, "[M::%s] low and high boundaries for computing mean and std.dev: (%d, %d)\n", __func__, r->low, r->high); for (i = x = 0, r->avg = 0; i < q->n; ++i) @@ -117,10 +118,10 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v * r->std += (q->a[i] - r->avg) * (q->a[i] - r->avg); r->std = sqrt(r->std / x); fprintf(stderr, "[M::%s] mean and std.dev: (%.2f, %.2f)\n", __func__, r->avg, r->std); - r->low = (int)(p25 - MAPPING_BOUND * (p75 - p25) + .499); - r->high = (int)(p75 + MAPPING_BOUND * (p75 - p25) + .499); - if (r->low > r->avg - MAX_STDDEV * r->std) r->low = (int)(r->avg - MAX_STDDEV * r->std + .499); - if (r->high < r->avg + MAX_STDDEV * r->std) r->high = (int)(r->avg + MAX_STDDEV * r->std + .499); + r->low = (int)(p25 - opt->mapping_bound_mem * (p75 - p25) + .499); + r->high = (int)(p75 + opt->mapping_bound_mem * (p75 - p25) + .499); + if (r->low > r->avg - opt->max_stddev_mem * r->std) r->low = (int)(r->avg - opt->max_stddev_mem * r->std + .499); + if (r->high < r->avg + opt->max_stddev_mem * r->std) r->high = (int)(r->avg + opt->max_stddev_mem * r->std + .499); if (r->low < 1) r->low = 1; fprintf(stderr, "[M::%s] low and high boundaries for proper pairs: (%d, %d)\n", __func__, r->low, r->high); free(q->a); diff --git a/fastmap.c b/fastmap.c index c865426c..53d791d5 100644 --- a/fastmap.c +++ b/fastmap.c @@ -156,8 +156,11 @@ int main_mem(int argc, char *argv[]) aux.opt = opt = mem_opt_init(); memset(&opt0, 0, sizeof(mem_opt_t)); - while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:")) >= 0) { + while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:b:e:g:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; + else if (c == 'b') opt->outlier_bound_mem = atof(optarg); + else if (c == 'e') opt->mapping_bound_mem = atof(optarg); + else if (c == 'g') opt->max_stddev_mem = atof(optarg); else if (c == '1') no_mt_io = 1; else if (c == 'x') mode = optarg; else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1; @@ -267,6 +270,9 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, "Usage: bwa mem [options] [in2.fq]\n\n"); fprintf(stderr, "Algorithm options:\n\n"); fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); + fprintf(stderr, " -b FLOAT controls how to calculate low and high boundaries for computing mean and std.dev [%f]\n", opt->outlier_bound_mem); + fprintf(stderr, " -e FLOAT mapping bound mem. Controls how to calculate low and high boundaries for proper pairs. [%f]\n", opt->mapping_bound_mem); + fprintf(stderr, " -g FLOAT max stddev mem. Controls how to calculate low and high boundaries for proper pairs. [%f]\n", opt->max_stddev_mem); fprintf(stderr, " -k INT minimum seed length [%d]\n", opt->min_seed_len); fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w); fprintf(stderr, " -d INT off-diagonal X-dropoff [%d]\n", opt->zdrop); From de39326ecb525b617a5688615cd85dfda7193b25 Mon Sep 17 00:00:00 2001 From: Greg Gibeling Date: Fri, 29 Jan 2021 09:56:29 -0800 Subject: [PATCH 2/2] Remove unused constants --- bwamem_pair.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bwamem_pair.c b/bwamem_pair.c index 6d9fb108..d681891f 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -43,10 +43,6 @@ #define MIN_DIR_CNT 10 #define MIN_DIR_RATIO 0.05 -#define OUTLIER_BOUND 2.0 -#define MAPPING_BOUND 3.0 -#define MAX_STDDEV 4.0 - static inline int mem_infer_dir(int64_t l_pac, int64_t b1, int64_t b2, int64_t *dist) { int64_t p2;