Skip to content

Commit

Permalink
VSEARCH 1.0.14: Fix for ignored strand in multiple sequence alignment…
Browse files Browse the repository at this point in the history
… after clustering
  • Loading branch information
torognes committed Feb 17, 2015
1 parent 9c0f3a1 commit d82dc91
Show file tree
Hide file tree
Showing 10 changed files with 47 additions and 21 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,22 @@ If you can't find an answer in the VSEARCH documentation, please visit the [VSEA

In the example below, VSEARCH will identify sequences in the file database.fsa that are at least 90% identical on the plus strand to the query sequences in the file queries.fsa and write the results to the file alnout.txt.

`./vsearch-1.0.13-linux-x86_64 --usearch_global queries.fsa --db database.fsa --id 0.9 --alnout alnout.txt`
`./vsearch-1.0.14-linux-x86_64 --usearch_global queries.fsa --db database.fsa --id 0.9 --alnout alnout.txt`

## Download and install

The latest releases of VSEARCH are available [here](https://github.com/torognes/vsearch/releases).

Binary executables of VSEARCH are available in the `bin` folder for [GNU/Linux on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.13-linux-x86_64) and [Apple Mac OS X on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.13-osx-x86_64). These executables include support for input files compressed by zlib and bzip2 (with files usually ending in .gz or .bz2).
Binary executables of VSEARCH are available in the `bin` folder for [GNU/Linux on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.14-linux-x86_64) and [Apple Mac OS X on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.14-osx-x86_64). These executables include support for input files compressed by zlib and bzip2 (with files usually ending in .gz or .bz2).

Download the appropriate executable and make a symbolic link in a folder included in your `$PATH` from `vsearch` to the appropriate binary. You may use the following commands (assuming `~/bin` is in your `$PATH`):

```sh
cd ~
mkdir -p bin
cd bin
wget https://github.com/torognes/vsearch/releases/download/v1.0.13/vsearch-1.0.13-linux-x86_64
ln -s vsearch-1.0.13-linux-x86_64 vsearch
wget https://github.com/torognes/vsearch/releases/download/v1.0.14/vsearch-1.0.14-linux-x86_64
ln -s vsearch-1.0.14-linux-x86_64 vsearch
```

Substitute `linux` with `osx` in those lines if you're on a Mac.
Expand Down
10 changes: 8 additions & 2 deletions doc/vsearch.1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.\" ============================================================================
.TH vsearch 1 "February 17, 2015" "version 1.0.13" "USER COMMANDS"
.TH vsearch 1 "February 17, 2015" "version 1.0.14" "USER COMMANDS"
.\" ============================================================================
.SH NAME
vsearch \(em chimera detection, clustering, dereplication, masking, pairwise alignment, searching, shuffling and sorting of amplicons from metagenomic projects.
Expand Down Expand Up @@ -1524,10 +1524,16 @@ clustering, pairwise alignment and searching).
.BR v1.0.12\~ "released February 6th, 2015"
Temporarily fixes a problem with long headers in FASTA files.
.TP
.BR v1.0.13\~ "released February 6th, 2015"
.BR v1.0.13\~ "released February 17th, 2015"
Fix a memory allocation problem when computing multiple sequence alignments
with the --msaout and --consout options, as well as a memory leak.
Also increased line buffer for reading FASTA files to 4MB.
.TP
.BR v1.0.14\~ "released February 17th, 2015"
Fix a bug where the multiple alignment and consensus sequence computed
after clustering ignored the strand of the sequences.
Also decreased size of line buffer for reading FASTA files to 1MB again
due to excessive stack memory usage.
.LP
.\" ============================================================================
.\" TODO:
Expand Down
Binary file modified doc/vsearch_manual.pdf
Binary file not shown.
7 changes: 7 additions & 0 deletions src/cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ typedef struct clusterinfo_s
int seqno;
int clusterno;
char * cigar;
int strand;
} clusterinfo_t;

static clusterinfo_t * clusterinfo = 0;
Expand Down Expand Up @@ -711,6 +712,7 @@ void cluster_core_parallel()
clusterinfo[seqno].seqno = seqno;
clusterinfo[seqno].clusterno = clusterinfo[target].clusterno;
clusterinfo[seqno].cigar = best->nwalignment;
clusterinfo[seqno].strand = best->strand;
best->nwalignment = 0;
}
else
Expand All @@ -724,6 +726,7 @@ void cluster_core_parallel()
clusterinfo[seqno].seqno = seqno;
clusterinfo[seqno].clusterno = clusters;
clusterinfo[seqno].cigar = 0;
clusterinfo[seqno].strand = 0;

/* add current sequence to database */
dbindex_addsequence(seqno);
Expand Down Expand Up @@ -826,13 +829,15 @@ void cluster_core_serial()
clusterinfo[seqno].seqno = seqno;
clusterinfo[seqno].clusterno = clusterinfo[target].clusterno;
clusterinfo[seqno].cigar = best->nwalignment;
clusterinfo[seqno].strand = best->strand;
best->nwalignment = 0;
}
else
{
clusterinfo[seqno].seqno = seqno;
clusterinfo[seqno].clusterno = clusters;
clusterinfo[seqno].cigar = 0;
clusterinfo[seqno].strand = 0;
dbindex_addsequence(seqno);
cluster_core_results_nohit(clusters,
si_p->query_head,
Expand Down Expand Up @@ -1130,6 +1135,7 @@ void cluster(char * dbname,
int clusterno = clusterinfo[i].clusterno;
int seqno = clusterinfo[i].seqno;
char * cigar = clusterinfo[i].cigar;
int strand = clusterinfo[i].strand;

if (clusterno != lastcluster)
{
Expand All @@ -1145,6 +1151,7 @@ void cluster(char * dbname,
/* add current sequence to the cluster */
msa_target_list[msa_target_count].seqno = seqno;
msa_target_list[msa_target_count].cigar = cigar;
msa_target_list[msa_target_count].strand = strand;
msa_target_count++;

progress_update(i);
Expand Down
2 changes: 1 addition & 1 deletion src/db.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ seqinfo_t * seqindex = 0;
static char * datap = 0;

#define MEMCHUNK 10485760
#define LINEALLOC 4194304
#define LINEALLOC 1048576

regex_t db_regexp;

Expand Down
22 changes: 22 additions & 0 deletions src/msa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,19 @@ void msa(FILE * fp_msaout, FILE * fp_consout,
memset(profile, 0, 4 * sizeof(int) * alnlen);
aln = (char *) xmalloc(alnlen+1);
char * cons = (char *) xmalloc(alnlen+1);

/* Find longest target sequence on reverse strand and allocate buffer */
long longest_reversed = 0;
for(int i=0; i < target_count; i++)
if (target_list[i].strand)
{
long len = db_getsequencelen(target_list[i].seqno);
if (len > longest_reversed)
longest_reversed = len;
}
char * rc_buffer = 0;
if (longest_reversed > 0)
rc_buffer = (char*) xmalloc(longest_reversed + 1);

/* blank line before each msa */
if (fp_msaout)
Expand All @@ -161,6 +174,13 @@ void msa(FILE * fp_msaout, FILE * fp_consout,
int target_seqno = target_list[j].seqno;
char * target_seq = db_getsequence(target_seqno);

if (target_list[j].strand)
{
reverse_complement(rc_buffer, target_seq,
db_getsequencelen(target_seqno));
target_seq = rc_buffer;
}

int inserted = 0;
int qpos = 0;
int tpos = 0;
Expand Down Expand Up @@ -235,6 +255,8 @@ void msa(FILE * fp_msaout, FILE * fp_consout,
}
}

if (rc_buffer)
free(rc_buffer);

/* consensus */

Expand Down
3 changes: 2 additions & 1 deletion src/msa.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright (C) 2014 Torbjorn Rognes
Copyright (C) 2014-2015 Torbjorn Rognes
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
Expand All @@ -23,6 +23,7 @@ struct msa_target_s
{
int seqno;
char * cigar;
int strand;
};

void msa(FILE * fp_msaout, FILE * fp_consout,
Expand Down
2 changes: 1 addition & 1 deletion src/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
be overwritten on the next call of query_getnext. */

#define MEMCHUNK 4096
#define LINEALLOC 4194304
#define LINEALLOC 1048576

extern unsigned int chrstatus[256];

Expand Down
12 changes: 1 addition & 11 deletions src/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,19 +183,9 @@ void reverse_complement(char * rc, char * seq, long len)
The memory for rc must be long enough for the rc of the sequence
(identical to the length of seq + 1. */

#if 0
printf("Sequence:\n");
fprint_fasta_seq_only(stdout, seq, len, 60);
#endif
for(long i=0; i<len; i++)
{
rc[i] = chrmap_complement[(int)(seq[len-1-i])];
}
rc[i] = chrmap_complement[(int)(seq[len-1-i])];
rc[len] = 0;
#if 0
printf("RC:\n");
fprint_fasta_seq_only(stdout, rc, len, 60);
#endif
}

#ifdef HAVE_BZLIB
Expand Down
2 changes: 1 addition & 1 deletion src/vsearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
#include "allpairs.h"

#define PROG_NAME "vsearch"
#define PROG_VERSION "v1.0.13"
#define PROG_VERSION "v1.0.14"

#ifdef __APPLE__
#define PROG_ARCH "osx_x86_64"
Expand Down

0 comments on commit d82dc91

Please sign in to comment.