Skip to content

Commit

Permalink
VSEARCH 2.4.1: Bug fixes for fastq_stats and _eestats
Browse files Browse the repository at this point in the history
  • Loading branch information
torognes committed Mar 1, 2017
1 parent b30ba14 commit 6eee7c2
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 60 deletions.
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
VSEARCH: a versatile open source tool for metagenomics

Copyright (C) 2014-2015, Torbjorn Rognes, Frederic Mahe and Tomas Flouri
Copyright (C) 2014-2017, Torbjorn Rognes, Frederic Mahe and Tomas Flouri
All rights reserved.

Contact: Torbjorn Rognes <[email protected]>,
Expand Down
24 changes: 12 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ In the example below, VSEARCH will identify sequences in the file database.fsa t
**Source distribution** To download the source distribution from a [release](https://github.com/torognes/vsearch/releases) and build the executable and the documentation, use the following commands:

```
wget https://github.com/torognes/vsearch/archive/v2.4.0.tar.gz
tar xzf v2.4.0.tar.gz
cd vsearch-2.4.0
wget https://github.com/torognes/vsearch/archive/v2.4.1.tar.gz
tar xzf v2.4.1.tar.gz
cd vsearch-2.4.1
./autogen.sh
./configure
make
Expand Down Expand Up @@ -68,33 +68,33 @@ Binary distributions are provided for x86-64 systems running GNU/Linux, macOS (v
Download the appropriate executable for your system using the following commands if you are using a Linux x86_64 system:

```sh
wget https://github.com/torognes/vsearch/releases/download/v2.4.0/vsearch-2.4.0-linux-x86_64.tar.gz
tar xzf vsearch-2.4.0-linux-x86_64.tar.gz
wget https://github.com/torognes/vsearch/releases/download/v2.4.1/vsearch-2.4.1-linux-x86_64.tar.gz
tar xzf vsearch-2.4.1-linux-x86_64.tar.gz
```

Or these commands if you are using a Linux ppc64le system:

```sh
wget https://github.com/torognes/vsearch/releases/download/v2.4.0/vsearch-2.4.0-linux-ppc64le.tar.gz
tar xzf vsearch-2.4.0-linux-ppc64le.tar.gz
wget https://github.com/torognes/vsearch/releases/download/v2.4.1/vsearch-2.4.1-linux-ppc64le.tar.gz
tar xzf vsearch-2.4.1-linux-ppc64le.tar.gz
```

Or these commands if you are using a Mac:

```sh
wget https://github.com/torognes/vsearch/releases/download/v2.4.0/vsearch-2.4.0-macos-x86_64.tar.gz
tar xzf vsearch-2.4.0-macos-x86_64.tar.gz
wget https://github.com/torognes/vsearch/releases/download/v2.4.1/vsearch-2.4.1-macos-x86_64.tar.gz
tar xzf vsearch-2.4.1-macos-x86_64.tar.gz
```

Or if you are using Windows, download and extract (unzip) the contents of this file:

```
https://github.com/torognes/vsearch/releases/download/v2.4.0/vsearch-2.4.0-win-x86_64.zip
https://github.com/torognes/vsearch/releases/download/v2.4.1/vsearch-2.4.1-win-x86_64.zip
```

Linux and Mac: You will now have the binary distribution in a folder called `vsearch-2.4.0-linux-x86_64` or `vsearch-2.4.0-macos-x86_64` in which you will find three subfolders `bin`, `man` and `doc`. We recommend making a copy or a symbolic link to the vsearch binary `bin/vsearch` in a folder included in your `$PATH`, and a copy or a symbolic link to the vsearch man page `man/vsearch.1` in a folder included in your `$MANPATH`. The PDF version of the manual is available in `doc/vsearch_manual.pdf`.
Linux and Mac: You will now have the binary distribution in a folder called `vsearch-2.4.1-linux-x86_64` or `vsearch-2.4.1-macos-x86_64` in which you will find three subfolders `bin`, `man` and `doc`. We recommend making a copy or a symbolic link to the vsearch binary `bin/vsearch` in a folder included in your `$PATH`, and a copy or a symbolic link to the vsearch man page `man/vsearch.1` in a folder included in your `$MANPATH`. The PDF version of the manual is available in `doc/vsearch_manual.pdf`.

Windows: You will now have the binary distribution in a folder called `vsearch-2.4.0-win-x86_64`. The vsearch executable is called `vsearch.exe`. The manual in PDF format is called `vsearch_manual.pdf`.
Windows: You will now have the binary distribution in a folder called `vsearch-2.4.1-win-x86_64`. The vsearch executable is called `vsearch.exe`. The manual in PDF format is called `vsearch_manual.pdf`.

**Documentation** The VSEARCH user's manual is available in the `man` folder in the form of a [man page](https://github.com/torognes/vsearch/blob/master/doc/vsearch.1). A pdf version (vsearch_manual.pdf) will be generated by `make`. To install the manpage manually, copy the `vsearch.1` file or a create a symbolic link to `vsearch.1` in a folder included in your `$MANPATH`. The manual in both formats is also available with the binary distribution. The manual in PDF form (vsearch_manual.pdf) is also attached to the latest [release](https://github.com/torognes/vsearch/releases).

Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.

AC_PREREQ([2.63])
AC_INIT([vsearch], [2.4.0], [[email protected]])
AC_INIT([vsearch], [2.4.1], [[email protected]])
AC_CANONICAL_TARGET
AM_INIT_AUTOMAKE([subdir-objects])
AC_LANG([C++])
Expand Down
16 changes: 10 additions & 6 deletions man/vsearch.1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.\" ============================================================================
.TH vsearch 1 "February 8, 2017" "version 2.4.0" "USER COMMANDS"
.TH vsearch 1 "March 1, 2017" "version 2.4.1" "USER COMMANDS"
.\" ============================================================================
.SH NAME
vsearch \(em chimera detection, clustering, dereplication and
Expand Down Expand Up @@ -265,11 +265,10 @@ fatal error messages.
Number of computation threads to use (1 to 256). The number of threads
should be lesser or equal to the number of available CPU cores. The
default is to use all available resources and to launch one thread per
logical core. The following commands are multi-threaded: uchime_ref,
cluster_fast, cluster_size, cluster_smallmem, fastq_mergepairs,
maskfasta, allpairs_global, usearch_global. Only one thread is used
for the other commands.
.\" fastq_mergepairs is not multithreaded in v2.1.0
logical core. The following commands are multi-threaded:
allpairs_global, cluster_fast, cluster_size, cluster_smallmem,
fastq_mergepairs, maskfasta, search_exact, uchime_ref, and
usearch_global. Only one thread is used for the other commands.
.TP
.B \-\-version | \-v
Output version information and exit.
Expand Down Expand Up @@ -3054,6 +3053,11 @@ Added support for Linux on Power8 systems (ppc64le) and Windows on
x86_64. Improved detection of pipes when reading FASTA and FASTQ
files. Corrected option for specifiying output from fastq_eestats
command in help text.
.TP
.BR v2.4.1\~ "released March 1st, 2017"
Fixed an overflow bug in fastq_stats and fastq_eestats affecting
analysis of very large FASTQ files. Fixed maximum memory usage
reporting on Windows.
.RE
.LP
.\" ============================================================================
Expand Down
2 changes: 1 addition & 1 deletion src/arch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ uint64_t arch_get_memused()
GetProcessMemoryInfo(GetCurrentProcess(),
&pmc,
sizeof(PROCESS_MEMORY_COUNTERS));
return pmc.WorkingSetSize;
return pmc.PeakWorkingSetSize;

#else

Expand Down
32 changes: 16 additions & 16 deletions src/eestats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,15 @@ void fastq_eestats()

int64_t ee_size = ee_start(len_alloc, resolution);

int * read_length_table = (int*) xmalloc(sizeof(int) * len_alloc);
memset(read_length_table, 0, sizeof(int) * len_alloc);
uint64_t * read_length_table = (uint64_t*) xmalloc(sizeof(uint64_t) * len_alloc);
memset(read_length_table, 0, sizeof(uint64_t) * len_alloc);

int * qual_length_table = (int*) xmalloc(sizeof(int) * len_alloc *
uint64_t * qual_length_table = (uint64_t*) xmalloc(sizeof(uint64_t) * len_alloc *
(max_quality+1));
memset(qual_length_table, 0, sizeof(int) * len_alloc * (max_quality+1));
memset(qual_length_table, 0, sizeof(uint64_t) * len_alloc * (max_quality+1));

int * ee_length_table = (int*) xmalloc(sizeof(int) * ee_size);
memset(ee_length_table, 0, sizeof(int) * ee_size);
uint64_t * ee_length_table = (uint64_t*) xmalloc(sizeof(uint64_t) * ee_size);
memset(ee_length_table, 0, sizeof(uint64_t) * ee_size);

double * sum_ee_length_table = (double*) xmalloc(sizeof(double) * len_alloc);
memset(sum_ee_length_table, 0, sizeof(double) * len_alloc);
Expand All @@ -131,20 +131,20 @@ void fastq_eestats()
{
int64_t new_ee_size = ee_start(new_alloc, resolution);

read_length_table = (int*) xrealloc(read_length_table,
sizeof(int) * new_alloc);
read_length_table = (uint64_t*) xrealloc(read_length_table,
sizeof(uint64_t) * new_alloc);
memset(read_length_table + len_alloc, 0,
sizeof(int) * (new_alloc - len_alloc));
sizeof(uint64_t) * (new_alloc - len_alloc));

qual_length_table = (int*) xrealloc(qual_length_table, sizeof(int) *
qual_length_table = (uint64_t*) xrealloc(qual_length_table, sizeof(uint64_t) *
new_alloc * (max_quality+1));
memset(qual_length_table + (max_quality+1) * len_alloc, 0,
sizeof(int) * (new_alloc - len_alloc) * (max_quality+1));
sizeof(uint64_t) * (new_alloc - len_alloc) * (max_quality+1));

ee_length_table = (int*) xrealloc(ee_length_table, sizeof(int) *
ee_length_table = (uint64_t*) xrealloc(ee_length_table, sizeof(uint64_t) *
new_ee_size);
memset(ee_length_table + ee_size, 0,
sizeof(int) * (new_ee_size - ee_size));
sizeof(uint64_t) * (new_ee_size - ee_size));

sum_ee_length_table = (double*) xrealloc(sum_ee_length_table,
sizeof(double) * new_alloc);
Expand Down Expand Up @@ -210,7 +210,7 @@ void fastq_eestats()

ee += pe;

int e_int = MIN(resolution*(i+1), (int)(resolution * ee));
int64_t e_int = MIN(resolution*(i+1), (int)(resolution * ee));
ee_length_table[ee_start(i, resolution) + e_int]++;

sum_ee_length_table[i] += ee;
Expand Down Expand Up @@ -285,9 +285,9 @@ void fastq_eestats()
int64_t max_errors = resolution * (i+1);

n = 0;
for(int e=0; e<=max_errors; e++)
for(int64_t e=0; e<=max_errors; e++)
{
int x = ee_length_table[ee_offset + e];
int64_t x = ee_length_table[ee_offset + e];
n += x;

if ((min_ee<0) && (x > 0))
Expand Down
46 changes: 23 additions & 23 deletions src/fastqops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -561,17 +561,17 @@ void fastq_stats()

int64_t read_length_alloc = 512;

int * read_length_table = (int*) xmalloc(sizeof(int) * read_length_alloc);
memset(read_length_table, 0, sizeof(int) * read_length_alloc);
uint64_t * read_length_table = (uint64_t*) xmalloc(sizeof(uint64_t) * read_length_alloc);
memset(read_length_table, 0, sizeof(uint64_t) * read_length_alloc);

int * qual_length_table = (int*) xmalloc(sizeof(int) * read_length_alloc * 256);
memset(qual_length_table, 0, sizeof(int) * read_length_alloc * 256);
uint64_t * qual_length_table = (uint64_t*) xmalloc(sizeof(uint64_t) * read_length_alloc * 256);
memset(qual_length_table, 0, sizeof(uint64_t) * read_length_alloc * 256);

int * ee_length_table = (int *) xmalloc(sizeof(int) * read_length_alloc * 4);
memset(ee_length_table, 0, sizeof(int) * read_length_alloc * 4);
uint64_t * ee_length_table = (uint64_t *) xmalloc(sizeof(uint64_t) * read_length_alloc * 4);
memset(ee_length_table, 0, sizeof(uint64_t) * read_length_alloc * 4);

int * q_length_table = (int *) xmalloc(sizeof(int) * read_length_alloc * 4);
memset(q_length_table, 0, sizeof(int) * read_length_alloc * 4);
uint64_t * q_length_table = (uint64_t *) xmalloc(sizeof(uint64_t) * read_length_alloc * 4);
memset(q_length_table, 0, sizeof(uint64_t) * read_length_alloc * 4);

double * sumee_length_table = (double *) xmalloc(sizeof(double) * read_length_alloc);
memset(sumee_length_table, 0, sizeof(double) * read_length_alloc);
Expand All @@ -597,25 +597,25 @@ void fastq_stats()

if (len+1 > read_length_alloc)
{
read_length_table = (int*) xrealloc(read_length_table,
sizeof(int) * (len+1));
read_length_table = (uint64_t*) xrealloc(read_length_table,
sizeof(uint64_t) * (len+1));
memset(read_length_table + read_length_alloc, 0,
sizeof(int) * (len + 1 - read_length_alloc));
sizeof(uint64_t) * (len + 1 - read_length_alloc));

qual_length_table = (int*) xrealloc(qual_length_table,
sizeof(int) * (len+1) * 256);
qual_length_table = (uint64_t*) xrealloc(qual_length_table,
sizeof(uint64_t) * (len+1) * 256);
memset(qual_length_table + 256 * read_length_alloc, 0,
sizeof(int) * (len + 1 - read_length_alloc) * 256);
sizeof(uint64_t) * (len + 1 - read_length_alloc) * 256);

ee_length_table = (int*) xrealloc(ee_length_table,
sizeof(int) * (len+1) * 4);
ee_length_table = (uint64_t*) xrealloc(ee_length_table,
sizeof(uint64_t) * (len+1) * 4);
memset(ee_length_table + 4 * read_length_alloc, 0,
sizeof(int) * (len + 1 - read_length_alloc) * 4);
sizeof(uint64_t) * (len + 1 - read_length_alloc) * 4);

q_length_table = (int*) xrealloc(q_length_table,
sizeof(int) * (len+1) * 4);
q_length_table = (uint64_t*) xrealloc(q_length_table,
sizeof(uint64_t) * (len+1) * 4);
memset(q_length_table + 4 * read_length_alloc, 0,
sizeof(int) * (len + 1 - read_length_alloc) * 4);
sizeof(uint64_t) * (len + 1 - read_length_alloc) * 4);

sumee_length_table = (double *) xrealloc(sumee_length_table,
sizeof(double) * (len+1));
Expand Down Expand Up @@ -698,7 +698,7 @@ void fastq_stats()

/* compute various distributions */

int * length_dist = (int*) xmalloc(sizeof(int) * (len_max+1));
uint64_t * length_dist = (uint64_t*) xmalloc(sizeof(uint64_t) * (len_max+1));
int64_t * symb_dist = (int64_t*) xmalloc(sizeof(int64_t) * (len_max+1));

double * rate_dist = (double*) xmalloc(sizeof(double) * (len_max+1));
Expand Down Expand Up @@ -744,7 +744,7 @@ void fastq_stats()
for(int64_t i = len_max; i >= len_min; i--)
{
if (read_length_table[i] > 0)
fprintf(fp_log, "%2s%5" PRId64 " %10d %5.1lf%% %5.1lf%%\n",
fprintf(fp_log, "%2s%5" PRId64 " %10" PRIu64 " %5.1lf%% %5.1lf%%\n",
(i == len_max ? ">=" : " "),
i,
read_length_table[i],
Expand Down Expand Up @@ -849,7 +849,7 @@ void fastq_stats()
}

fprintf(fp_log, "\n");
fprintf(fp_log, "%10" PRIu64 " Recs (%.1lfM), 0 too int64_t\n",
fprintf(fp_log, "%10" PRIu64 " Recs (%.1lfM), 0 too long\n",
seq_count, seq_count / 1.0e6);
fprintf(fp_log, "%10.1lf Avg length\n", 1.0 * symbols / seq_count);
fprintf(fp_log, "%9.1lfM Bases\n", symbols / 1.0e6);
Expand Down

0 comments on commit 6eee7c2

Please sign in to comment.