Skip to content

Commit

Permalink
Added option to add chimera score in fasta output files
Browse files Browse the repository at this point in the history
  • Loading branch information
torognes committed Jan 26, 2016
1 parent 082d86b commit 50eebf2
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 21 deletions.
11 changes: 9 additions & 2 deletions man/vsearch.1
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,11 @@ database.
No vote pseudo-count (parameter \fIn\fR in the chimera scoring
function) (default value is 1.4).
.TP
.B \-\-fasta_score
Add the chimera score to the headers in the fasta output files for chimeras,
non-chimeras and borderline sequences. A string similar to
";uchime_denovo=0.1234;" or ";uchime_ref=5.6789;" will be added.
.TP
.BI \-\-mindiffs\~ "positive integer"
Minimum number of differences per segment (default value is 3).
.TP
Expand Down Expand Up @@ -2369,7 +2374,8 @@ Fixed bug in aligned sequences produced with \-\-fastapairs and
.TP
.BR v1.9.7\~ "released January 12th, 2016"
Masking behavior is changed somewhat to keep the letter case of the
input sequences unchanged when no masking is performed. Masking is now performed also during chimera detection. Documentation updated.
input sequences unchanged when no masking is performed. Masking is now
performed also during chimera detection. Documentation updated.
.TP
.BR v1.9.8\~ "released January 22nd, 2016"
Fixed bug causing segfault when chimera detection is performed on
Expand All @@ -2384,7 +2390,8 @@ Fixed bug related to masking and lower case database sequences.
.TP
.BR v1.10.1\~ "released January 26, 2016"
Improved merging of paired-end reads and adjusted defaults slightly.
Removed progress indicator when stderr is not a terminal.
Removed progress indicator when stderr is not a terminal. Added
\-\-fasta_score option to report chimera scores in FASTA files.
.RE
.LP
.\" ============================================================================
Expand Down
100 changes: 85 additions & 15 deletions src/chimera.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ struct chimera_info_s
char * ignore;

struct hit * all_hits;
double best_h;
};

static struct chimera_info_s * cia;
Expand Down Expand Up @@ -674,6 +675,8 @@ int eval_parents(struct chimera_info_s * ci)
}
}

ci->best_h = best_h > 0 ? best_h : 0.0;

if (best_h >= 0.0)
{
status = 2;
Expand Down Expand Up @@ -1082,6 +1085,45 @@ void chimera_thread_exit(struct chimera_info_s * ci)
free(ci->query_head);
}

void fasta_print_with_score(FILE * fp,
char * head,
char * seq,
int len,
const char * score_name,
double score)
{
int alloc = strlen(head) + strlen(score_name) + log10(MAX(score,1)) + 16;
// printf("alloc: %d\n", alloc);
char * newheader = (char*) xmalloc(alloc);
if (head[strlen(head)-1] == ';')
snprintf(newheader, alloc, "%s%s=%.4lf;", head, score_name, score);
else
snprintf(newheader, alloc, "%s%s=%.4lf;", head, score_name, score);
fasta_print(fp, newheader, seq, len);
free(newheader);
}

void fasta_print_relabel_with_score(FILE * fp,
char * seq,
int len,
char * header,
int head_len,
int abundance,
int ordinal,
const char * score_name,
double score)
{
int alloc = head_len + strlen(score_name) + log10(MAX(score,1)) + 16;
//printf("head_len: %d alloc: %d\n", head_len, alloc);
char * newheader = (char*) xmalloc(alloc);
if (header[head_len-1] == ';')
snprintf(newheader, alloc, "%s%s=%.4lf;", header, score_name, score);
else
snprintf(newheader, alloc, "%s%s=%.4lf;", header, score_name, score);
fasta_print_relabel(fp, seq, len, newheader, strlen(newheader), abundance, ordinal);
free(newheader);
}

unsigned long chimera_thread_core(struct chimera_info_s * ci)
{
chimera_thread_init(ci);
Expand Down Expand Up @@ -1291,10 +1333,18 @@ unsigned long chimera_thread_core(struct chimera_info_s * ci)

if (opt_chimeras)
{
fasta_print(fp_chimeras,
ci->query_head,
ci->query_seq,
ci->query_len);
if (opt_fasta_score)
fasta_print_with_score(fp_chimeras,
ci->query_head,
ci->query_seq,
ci->query_len,
opt_uchime_ref ? "uchime_ref" : "uchime_denovo",
ci->best_h);
else
fasta_print(fp_chimeras,
ci->query_head,
ci->query_seq,
ci->query_len);
}
}

Expand All @@ -1304,10 +1354,18 @@ unsigned long chimera_thread_core(struct chimera_info_s * ci)
borderline_abundance += ci->query_size;
if (opt_borderline)
{
fasta_print(fp_borderline,
ci->query_head,
ci->query_seq,
ci->query_len);
if (opt_fasta_score)
fasta_print_with_score(fp_borderline,
ci->query_head,
ci->query_seq,
ci->query_len,
opt_uchime_ref ? "uchime_ref" : "uchime_denovo",
ci->best_h);
else
fasta_print(fp_borderline,
ci->query_head,
ci->query_seq,
ci->query_len);
}
}

Expand Down Expand Up @@ -1337,13 +1395,25 @@ unsigned long chimera_thread_core(struct chimera_info_s * ci)

if (opt_nonchimeras)
{
fasta_print_relabel(fp_nonchimeras,
ci->query_seq,
ci->query_len,
ci->query_head,
ci->query_head_len,
ci->query_size,
nonchimera_count);
if (opt_fasta_score)
fasta_print_relabel_with_score(fp_nonchimeras,
ci->query_seq,
ci->query_len,
ci->query_head,
ci->query_head_len,
ci->query_size,
nonchimera_count,
opt_uchime_ref ?
"uchime_ref" : "uchime_denovo",
ci->best_h);
else
fasta_print_relabel(fp_nonchimeras,
ci->query_seq,
ci->query_len,
ci->query_head,
ci->query_head_len,
ci->query_size,
nonchimera_count);
}
}

Expand Down
8 changes: 8 additions & 0 deletions src/vsearch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@

/* options */

bool opt_fasta_score;
bool opt_fastq_allowmergestagger;
bool opt_fastq_nostagger;
bool opt_fastq_eeout;
Expand Down Expand Up @@ -532,6 +533,7 @@ void args_init(int argc, char **argv)
opt_eetabbedout = 0;
opt_fastaout_notmerged_fwd = 0;
opt_fastaout_notmerged_rev = 0;
opt_fasta_score = 0;
opt_fasta_width = 80;
opt_fastaout = 0;
opt_fastaout_discarded = 0;
Expand Down Expand Up @@ -849,6 +851,7 @@ void args_init(int argc, char **argv)
{"fastaout_notmerged_rev",required_argument, 0, 0 },
{"reverse", required_argument, 0, 0 },
{"eetabbedout", required_argument, 0, 0 },
{"fasta_score", no_argument, 0, 0 },
{ 0, 0, 0, 0 }
};

Expand Down Expand Up @@ -1555,6 +1558,10 @@ void args_init(int argc, char **argv)
opt_eetabbedout = optarg;
break;

case 166:
opt_fasta_score = 1;
break;

default:
fatal("Internal error in option parsing");
}
Expand Down Expand Up @@ -1781,6 +1788,7 @@ void cmd_help()
" --chimeras FILENAME output chimeric sequences to file\n"
" --db FILENAME reference database for --uchime_ref\n"
" --dn REAL 'no' vote pseudo-count (1.4)\n"
" --fasta_score include chimera score in fasta output\n"
" --mindiffs INT minimum number of differences in segment (3)\n"
" --mindiv REAL minimum divergence from closest parent (0.8)\n"
" --minh REAL minimum score (0.28)\n"
Expand Down
9 changes: 5 additions & 4 deletions src/vsearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@
extern bool opt_clusterout_id;
extern bool opt_clusterout_sort;
extern bool opt_eeout;
extern bool opt_fasta_score;
extern bool opt_fastq_allowmergestagger;
extern bool opt_fastq_eeout;
extern bool opt_fastq_nostagger;
Expand Down Expand Up @@ -187,20 +188,20 @@ extern char * opt_dbnotmatched;
extern char * opt_derep_fulllength;
extern char * opt_derep_prefix;
extern char * opt_eetabbedout;
extern char * opt_fastaout_notmerged_fwd;
extern char * opt_fastaout_notmerged_rev;
extern char * opt_fastaout;
extern char * opt_fastaout_discarded;
extern char * opt_fastaout_notmerged_fwd;
extern char * opt_fastaout_notmerged_rev;
extern char * opt_fastapairs;
extern char * opt_fastq_chars;
extern char * opt_fastq_convert;
extern char * opt_fastq_filter;
extern char * opt_fastq_mergepairs;
extern char * opt_fastqout_notmerged_fwd;
extern char * opt_fastqout_notmerged_rev;
extern char * opt_fastq_stats;
extern char * opt_fastqout;
extern char * opt_fastqout_discarded;
extern char * opt_fastqout_notmerged_fwd;
extern char * opt_fastqout_notmerged_rev;
extern char * opt_fastx_mask;
extern char * opt_fastx_revcomp;
extern char * opt_fastx_subsample;
Expand Down

0 comments on commit 50eebf2

Please sign in to comment.