Skip to content

Commit

Permalink
Output the MD tag for each XA record with -D.
Browse files Browse the repository at this point in the history
  • Loading branch information
nh13 committed Aug 22, 2024
1 parent 8d9f50c commit 31fdac3
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 9 deletions.
27 changes: 22 additions & 5 deletions bwase.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,11 +291,17 @@ void bwa_correct_trimmed(bwa_seq_t *s)
s->len = s->full_len;
}

void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq)

void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq)
{
bwa_refine_gapped2(bns, n_seqs, seqs, _pacseq, 0);
}

void bwa_refine_gapped2(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq, int with_md)
{
ubyte_t *pacseq;
int i, j, k;
kstring_t *str;
kstring_t *str = (kstring_t*)calloc(1, sizeof(kstring_t));

if (!_pacseq) {
pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
Expand All @@ -312,15 +318,25 @@ void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t
q->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, q->strand? s->rseq : s->seq, q->ref_shift, &q->pos, &n_cigar);
q->n_cigar = n_cigar;
if (q->cigar) s->multi[k++] = *q;
} else s->multi[k++] = *q;
} else {
s->multi[k++] = *q;
if (with_md) { // create the cigar, needed for bwa_cal_md1 below
q->n_cigar = 1;
q->cigar = calloc(q->n_cigar, sizeof(uint32_t));
q->cigar[0] = __cigar_create(FROM_M, s->len);
}
}
if (with_md) {
int nm;
q->md = bwa_cal_md1(q->n_cigar, q->cigar, s->len, q->pos, q->strand? s->rseq : s->seq, bns->l_pac, pacseq, str, &nm);
}
}
s->n_multi = k; // this squeezes out gapped alignments which failed the CIGAR generation
if (s->type == BWA_TYPE_NO_MATCH || s->type == BWA_TYPE_MATESW || s->n_gapo == 0) continue;
s->cigar = bwa_refine_gapped_core(bns->l_pac, pacseq, s->len, s->strand? s->rseq : s->seq, s->ref_shift, &s->pos, &s->n_cigar);
if (s->cigar == 0) s->type = BWA_TYPE_NO_MATCH;
}
// generate MD tag
str = (kstring_t*)calloc(1, sizeof(kstring_t));
for (i = 0; i != n_seqs; ++i) {
bwa_seq_t *s = seqs + i;
if (s->type != BWA_TYPE_NO_MATCH) {
Expand Down Expand Up @@ -480,7 +496,8 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in
for (k = 0; k < q->n_cigar; ++k)
err_printf("%d%c", __cigar_len(q->cigar[k]), "MIDS"[__cigar_op(q->cigar[k])]);
} else err_printf("%dM", p->len);
err_printf(",%d;", q->gap + q->mm);
if (q->md) err_printf(",%d,%s;", q->gap + q->mm, q->md);
else err_printf(",%d,.;", q->gap + q->mm);
}
}
}
Expand Down
1 change: 1 addition & 0 deletions bwase.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ extern "C" {
void bwa_cal_pac_pos(const bntseq_t *bns, const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr);
void bwa_cal_pac_pos_with_bwt(const bntseq_t *bns, const char *prefix, int n_seqs, bwa_seq_t *seqs, int max_mm, float fnr, bwt_t *bwt);
void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq);
void bwa_refine_gapped2(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq, int with_md);
void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2);

#ifdef __cplusplus
Expand Down
11 changes: 7 additions & 4 deletions bwtaln.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ gap_opt_t *gap_init_opt()
o->n_threads = 1;
o->max_top2 = 30;
o->trim_qual = 0;
o->n_occ = 3;
o->sam = 0;
o->interactive_mode = 0;
o->rg_line = NULL;
o->n_occ = 3;
o->interactive_mode = 0;
o->with_md = 0;
return o;
}

Expand Down Expand Up @@ -247,7 +248,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

fprintf(stderr, "[bwa_aln_core] refine gapped alignments... ");
bwa_refine_gapped(bns, n_seqs, seqs, 0);
bwa_refine_gapped2(bns, n_seqs, seqs, 0, opt->with_md);
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();

fprintf(stderr, "[bwa_aln_core] print alignments... ");
Expand Down Expand Up @@ -283,7 +284,7 @@ int bwa_aln(int argc, char *argv[])
char *prefix;

opt = gap_init_opt();
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:LR:m:t:NM:O:E:q:f:b012IYB:Sr:X:Z")) >= 0) {
while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:LR:m:t:NM:O:E:q:f:b012IYB:Sr:X:ZD")) >= 0) {
switch (c) {
case 'n':
if (strstr(optarg, ".")) opt->fnr = atof(optarg), opt->max_diff = -1;
Expand Down Expand Up @@ -318,6 +319,7 @@ int bwa_aln(int argc, char *argv[])
break;
case 'X': opt->n_occ = atoi(optarg); break;
case 'Z': opt->interactive_mode = 1; break;
case 'D': opt->with_md = 1; break;
default: return 1;
}
}
Expand Down Expand Up @@ -359,6 +361,7 @@ int bwa_aln(int argc, char *argv[])
fprintf(stderr, " -r STR read group line for SAM output\n");
fprintf(stderr, " -X INT maximum # of hits to report (SAM output only, equivalent to -n in samse)\n");
fprintf(stderr, " -Z interactive mode (no input buffer and empty lines between records force processing)\n");
fprintf(stderr, " -D output the MD to each alignment in the XA tag, otherwise use \".\"\n");
fprintf(stderr, "\n");
return 1;
}
Expand Down
2 changes: 2 additions & 0 deletions bwtaln.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ typedef struct {
int ref_shift;
bwtint_t pos;
bwa_cigar_t *cigar;
char *md;
} bwt_multi1_t;

typedef struct {
Expand Down Expand Up @@ -117,6 +118,7 @@ typedef struct {
char *rg_line;
int n_occ;
int interactive_mode;
int with_md;
} gap_opt_t;

#define BWA_PET_STD 1
Expand Down

0 comments on commit 31fdac3

Please sign in to comment.