From 31b6e7d1773caaabce82db0fb22aee31b978c7f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torbj=C3=B8rn=20Rognes?= Date: Fri, 10 Mar 2017 17:26:54 +0100 Subject: [PATCH] VSEARCH 2.4.2: Improved paired-end merging --- README.md | 24 ++++++++++++------------ configure.ac | 2 +- man/vsearch.1 | 9 +++++++-- src/mergepairs.cc | 13 +------------ 4 files changed, 21 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 213f9128..5038d907 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,9 @@ In the example below, VSEARCH will identify sequences in the file database.fsa t **Source distribution** To download the source distribution from a [release](https://github.com/torognes/vsearch/releases) and build the executable and the documentation, use the following commands: ``` -wget https://github.com/torognes/vsearch/archive/v2.4.1.tar.gz -tar xzf v2.4.1.tar.gz -cd vsearch-2.4.1 +wget https://github.com/torognes/vsearch/archive/v2.4.2.tar.gz +tar xzf v2.4.2.tar.gz +cd vsearch-2.4.2 ./autogen.sh ./configure make @@ -68,33 +68,33 @@ Binary distributions are provided for x86-64 systems running GNU/Linux, macOS (v Download the appropriate executable for your system using the following commands if you are using a Linux x86_64 system: ```sh -wget https://github.com/torognes/vsearch/releases/download/v2.4.1/vsearch-2.4.1-linux-x86_64.tar.gz -tar xzf vsearch-2.4.1-linux-x86_64.tar.gz +wget https://github.com/torognes/vsearch/releases/download/v2.4.2/vsearch-2.4.2-linux-x86_64.tar.gz +tar xzf vsearch-2.4.2-linux-x86_64.tar.gz ``` Or these commands if you are using a Linux ppc64le system: ```sh -wget https://github.com/torognes/vsearch/releases/download/v2.4.1/vsearch-2.4.1-linux-ppc64le.tar.gz -tar xzf vsearch-2.4.1-linux-ppc64le.tar.gz +wget https://github.com/torognes/vsearch/releases/download/v2.4.2/vsearch-2.4.2-linux-ppc64le.tar.gz +tar xzf vsearch-2.4.2-linux-ppc64le.tar.gz ``` Or these commands if you are using a Mac: ```sh -wget https://github.com/torognes/vsearch/releases/download/v2.4.1/vsearch-2.4.1-macos-x86_64.tar.gz -tar xzf vsearch-2.4.1-macos-x86_64.tar.gz +wget https://github.com/torognes/vsearch/releases/download/v2.4.2/vsearch-2.4.2-macos-x86_64.tar.gz +tar xzf vsearch-2.4.2-macos-x86_64.tar.gz ``` Or if you are using Windows, download and extract (unzip) the contents of this file: ``` -https://github.com/torognes/vsearch/releases/download/v2.4.1/vsearch-2.4.1-win-x86_64.zip +https://github.com/torognes/vsearch/releases/download/v2.4.2/vsearch-2.4.2-win-x86_64.zip ``` -Linux and Mac: You will now have the binary distribution in a folder called `vsearch-2.4.1-linux-x86_64` or `vsearch-2.4.1-macos-x86_64` in which you will find three subfolders `bin`, `man` and `doc`. We recommend making a copy or a symbolic link to the vsearch binary `bin/vsearch` in a folder included in your `$PATH`, and a copy or a symbolic link to the vsearch man page `man/vsearch.1` in a folder included in your `$MANPATH`. The PDF version of the manual is available in `doc/vsearch_manual.pdf`. +Linux and Mac: You will now have the binary distribution in a folder called `vsearch-2.4.2-linux-x86_64` or `vsearch-2.4.2-macos-x86_64` in which you will find three subfolders `bin`, `man` and `doc`. We recommend making a copy or a symbolic link to the vsearch binary `bin/vsearch` in a folder included in your `$PATH`, and a copy or a symbolic link to the vsearch man page `man/vsearch.1` in a folder included in your `$MANPATH`. The PDF version of the manual is available in `doc/vsearch_manual.pdf`. -Windows: You will now have the binary distribution in a folder called `vsearch-2.4.1-win-x86_64`. The vsearch executable is called `vsearch.exe`. The manual in PDF format is called `vsearch_manual.pdf`. +Windows: You will now have the binary distribution in a folder called `vsearch-2.4.2-win-x86_64`. The vsearch executable is called `vsearch.exe`. The manual in PDF format is called `vsearch_manual.pdf`. **Documentation** The VSEARCH user's manual is available in the `man` folder in the form of a [man page](https://github.com/torognes/vsearch/blob/master/doc/vsearch.1). A pdf version (vsearch_manual.pdf) will be generated by `make`. To install the manpage manually, copy the `vsearch.1` file or a create a symbolic link to `vsearch.1` in a folder included in your `$MANPATH`. The manual in both formats is also available with the binary distribution. The manual in PDF form (vsearch_manual.pdf) is also attached to the latest [release](https://github.com/torognes/vsearch/releases). diff --git a/configure.ac b/configure.ac index a1ca9048..8fbb592e 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ([2.63]) -AC_INIT([vsearch], [2.4.1], [torognes@ifi.uio.no]) +AC_INIT([vsearch], [2.4.2], [torognes@ifi.uio.no]) AC_CANONICAL_TARGET AM_INIT_AUTOMAKE([subdir-objects]) AC_LANG([C++]) diff --git a/man/vsearch.1 b/man/vsearch.1 index fc056ebb..7bc7db9e 100644 --- a/man/vsearch.1 +++ b/man/vsearch.1 @@ -1,5 +1,5 @@ .\" ============================================================================ -.TH vsearch 1 "March 1, 2017" "version 2.4.1" "USER COMMANDS" +.TH vsearch 1 "March 10, 2017" "version 2.4.2" "USER COMMANDS" .\" ============================================================================ .SH NAME vsearch \(em chimera detection, clustering, dereplication and @@ -1054,7 +1054,7 @@ with the \-\-fastq_maxns are also discarded (no limit by default). Staggered reads are not merged unless the \-\-fastq_allowmergestagger option is specified. The minimum length of the overlap region between the reads may be specified with the -\-\-minovlen option (default 10), and the overlap region may not +\-\-minovlen option (default 16), and the overlap region may not include more mismatches than specified with the \-\-maxdiffs option (5 by default), otherwise the read pair is discarded. The mimimum and maximum length of the merged sequence may be specified with the @@ -3058,6 +3058,11 @@ command in help text. Fixed an overflow bug in fastq_stats and fastq_eestats affecting analysis of very large FASTQ files. Fixed maximum memory usage reporting on Windows. +.TP +.BR v2.4.2\~ "released March 10th, 2017" +Default value for fastq_minovlen increased to 16 in accordance with +help text and for compatibility with usearch. Minor changes for +improved accuracy of paired-end read merging. .RE .LP .\" ============================================================================ diff --git a/src/mergepairs.cc b/src/mergepairs.cc index 599610fe..71729777 100644 --- a/src/mergepairs.cc +++ b/src/mergepairs.cc @@ -61,12 +61,11 @@ #include "vsearch.h" #define INPUTCHUNKSIZE 10000 -#define SCOREMETHOD 2 /* scores */ const double alpha = 4.0; -const double beta = -5.0; +const double beta = -22.0; /* static variables */ @@ -207,11 +206,7 @@ void precompute_qual() p = 1.0 - px - py + px * py * 4.0 / 3.0; -#if SCOREMETHOD == 2 - match_score[x][y] = alpha * p + beta * (1.0 - p); -#else match_score[x][y] = alpha * p; -#endif /* Mismatch */ @@ -221,11 +216,7 @@ void precompute_qual() p = 1.0 - (px + py) / 3.0 + px * py * 4.0 / 9.0; -#if SCOREMETHOD == 2 - mism_score[x][y] = alpha * (1.0 - p) + beta * p; -#else mism_score[x][y] = beta * p; -#endif } } @@ -462,7 +453,6 @@ double overlap_score(merge_data_t * ip, int64_t optimize(merge_data_t * ip) { - // int64_t i1 = opt_fastq_minovlen; int64_t i1 = 1; i1 = MAX(i1, ip->fwd_trunc + ip->rev_trunc - opt_fastq_maxmergelen); @@ -582,7 +572,6 @@ void process(merge_data_t * ip) if (!skip) ip->offset = optimize(ip); - // if (ip->offset) if (ip->offset >= opt_fastq_minovlen) merge(ip); }