diff --git a/LICENSE.txt b/LICENSE.txt index 96432e89..a8d3aa1a 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ VSEARCH: a versatile open source tool for metagenomics - Copyright (C) 2014-2015, Torbjorn Rognes, Frederic Mahe and Tomas Flouri + Copyright (C) 2014-2017, Torbjorn Rognes, Frederic Mahe and Tomas Flouri All rights reserved. Contact: Torbjorn Rognes , diff --git a/README.md b/README.md index 588db67e..213f9128 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,9 @@ In the example below, VSEARCH will identify sequences in the file database.fsa t **Source distribution** To download the source distribution from a [release](https://github.com/torognes/vsearch/releases) and build the executable and the documentation, use the following commands: ``` -wget https://github.com/torognes/vsearch/archive/v2.4.0.tar.gz -tar xzf v2.4.0.tar.gz -cd vsearch-2.4.0 +wget https://github.com/torognes/vsearch/archive/v2.4.1.tar.gz +tar xzf v2.4.1.tar.gz +cd vsearch-2.4.1 ./autogen.sh ./configure make @@ -68,33 +68,33 @@ Binary distributions are provided for x86-64 systems running GNU/Linux, macOS (v Download the appropriate executable for your system using the following commands if you are using a Linux x86_64 system: ```sh -wget https://github.com/torognes/vsearch/releases/download/v2.4.0/vsearch-2.4.0-linux-x86_64.tar.gz -tar xzf vsearch-2.4.0-linux-x86_64.tar.gz +wget https://github.com/torognes/vsearch/releases/download/v2.4.1/vsearch-2.4.1-linux-x86_64.tar.gz +tar xzf vsearch-2.4.1-linux-x86_64.tar.gz ``` Or these commands if you are using a Linux ppc64le system: ```sh -wget https://github.com/torognes/vsearch/releases/download/v2.4.0/vsearch-2.4.0-linux-ppc64le.tar.gz -tar xzf vsearch-2.4.0-linux-ppc64le.tar.gz +wget https://github.com/torognes/vsearch/releases/download/v2.4.1/vsearch-2.4.1-linux-ppc64le.tar.gz +tar xzf vsearch-2.4.1-linux-ppc64le.tar.gz ``` Or these commands if you are using a Mac: ```sh -wget https://github.com/torognes/vsearch/releases/download/v2.4.0/vsearch-2.4.0-macos-x86_64.tar.gz -tar xzf vsearch-2.4.0-macos-x86_64.tar.gz +wget https://github.com/torognes/vsearch/releases/download/v2.4.1/vsearch-2.4.1-macos-x86_64.tar.gz +tar xzf vsearch-2.4.1-macos-x86_64.tar.gz ``` Or if you are using Windows, download and extract (unzip) the contents of this file: ``` -https://github.com/torognes/vsearch/releases/download/v2.4.0/vsearch-2.4.0-win-x86_64.zip +https://github.com/torognes/vsearch/releases/download/v2.4.1/vsearch-2.4.1-win-x86_64.zip ``` -Linux and Mac: You will now have the binary distribution in a folder called `vsearch-2.4.0-linux-x86_64` or `vsearch-2.4.0-macos-x86_64` in which you will find three subfolders `bin`, `man` and `doc`. We recommend making a copy or a symbolic link to the vsearch binary `bin/vsearch` in a folder included in your `$PATH`, and a copy or a symbolic link to the vsearch man page `man/vsearch.1` in a folder included in your `$MANPATH`. The PDF version of the manual is available in `doc/vsearch_manual.pdf`. +Linux and Mac: You will now have the binary distribution in a folder called `vsearch-2.4.1-linux-x86_64` or `vsearch-2.4.1-macos-x86_64` in which you will find three subfolders `bin`, `man` and `doc`. We recommend making a copy or a symbolic link to the vsearch binary `bin/vsearch` in a folder included in your `$PATH`, and a copy or a symbolic link to the vsearch man page `man/vsearch.1` in a folder included in your `$MANPATH`. The PDF version of the manual is available in `doc/vsearch_manual.pdf`. -Windows: You will now have the binary distribution in a folder called `vsearch-2.4.0-win-x86_64`. The vsearch executable is called `vsearch.exe`. The manual in PDF format is called `vsearch_manual.pdf`. +Windows: You will now have the binary distribution in a folder called `vsearch-2.4.1-win-x86_64`. The vsearch executable is called `vsearch.exe`. The manual in PDF format is called `vsearch_manual.pdf`. **Documentation** The VSEARCH user's manual is available in the `man` folder in the form of a [man page](https://github.com/torognes/vsearch/blob/master/doc/vsearch.1). A pdf version (vsearch_manual.pdf) will be generated by `make`. To install the manpage manually, copy the `vsearch.1` file or a create a symbolic link to `vsearch.1` in a folder included in your `$MANPATH`. The manual in both formats is also available with the binary distribution. The manual in PDF form (vsearch_manual.pdf) is also attached to the latest [release](https://github.com/torognes/vsearch/releases). diff --git a/configure.ac b/configure.ac index 6e2d5de7..a1ca9048 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ([2.63]) -AC_INIT([vsearch], [2.4.0], [torognes@ifi.uio.no]) +AC_INIT([vsearch], [2.4.1], [torognes@ifi.uio.no]) AC_CANONICAL_TARGET AM_INIT_AUTOMAKE([subdir-objects]) AC_LANG([C++]) diff --git a/man/vsearch.1 b/man/vsearch.1 index 86cdd6b1..fc056ebb 100644 --- a/man/vsearch.1 +++ b/man/vsearch.1 @@ -1,5 +1,5 @@ .\" ============================================================================ -.TH vsearch 1 "February 8, 2017" "version 2.4.0" "USER COMMANDS" +.TH vsearch 1 "March 1, 2017" "version 2.4.1" "USER COMMANDS" .\" ============================================================================ .SH NAME vsearch \(em chimera detection, clustering, dereplication and @@ -265,11 +265,10 @@ fatal error messages. Number of computation threads to use (1 to 256). The number of threads should be lesser or equal to the number of available CPU cores. The default is to use all available resources and to launch one thread per -logical core. The following commands are multi-threaded: uchime_ref, -cluster_fast, cluster_size, cluster_smallmem, fastq_mergepairs, -maskfasta, allpairs_global, usearch_global. Only one thread is used -for the other commands. -.\" fastq_mergepairs is not multithreaded in v2.1.0 +logical core. The following commands are multi-threaded: +allpairs_global, cluster_fast, cluster_size, cluster_smallmem, +fastq_mergepairs, maskfasta, search_exact, uchime_ref, and +usearch_global. Only one thread is used for the other commands. .TP .B \-\-version | \-v Output version information and exit. @@ -3054,6 +3053,11 @@ Added support for Linux on Power8 systems (ppc64le) and Windows on x86_64. Improved detection of pipes when reading FASTA and FASTQ files. Corrected option for specifiying output from fastq_eestats command in help text. +.TP +.BR v2.4.1\~ "released March 1st, 2017" +Fixed an overflow bug in fastq_stats and fastq_eestats affecting +analysis of very large FASTQ files. Fixed maximum memory usage +reporting on Windows. .RE .LP .\" ============================================================================ diff --git a/src/arch.cc b/src/arch.cc index 2db9cc02..2a0ff11b 100644 --- a/src/arch.cc +++ b/src/arch.cc @@ -70,7 +70,7 @@ uint64_t arch_get_memused() GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(PROCESS_MEMORY_COUNTERS)); - return pmc.WorkingSetSize; + return pmc.PeakWorkingSetSize; #else diff --git a/src/eestats.cc b/src/eestats.cc index e3b36e48..0d50dd76 100644 --- a/src/eestats.cc +++ b/src/eestats.cc @@ -97,15 +97,15 @@ void fastq_eestats() int64_t ee_size = ee_start(len_alloc, resolution); - int * read_length_table = (int*) xmalloc(sizeof(int) * len_alloc); - memset(read_length_table, 0, sizeof(int) * len_alloc); + uint64_t * read_length_table = (uint64_t*) xmalloc(sizeof(uint64_t) * len_alloc); + memset(read_length_table, 0, sizeof(uint64_t) * len_alloc); - int * qual_length_table = (int*) xmalloc(sizeof(int) * len_alloc * + uint64_t * qual_length_table = (uint64_t*) xmalloc(sizeof(uint64_t) * len_alloc * (max_quality+1)); - memset(qual_length_table, 0, sizeof(int) * len_alloc * (max_quality+1)); + memset(qual_length_table, 0, sizeof(uint64_t) * len_alloc * (max_quality+1)); - int * ee_length_table = (int*) xmalloc(sizeof(int) * ee_size); - memset(ee_length_table, 0, sizeof(int) * ee_size); + uint64_t * ee_length_table = (uint64_t*) xmalloc(sizeof(uint64_t) * ee_size); + memset(ee_length_table, 0, sizeof(uint64_t) * ee_size); double * sum_ee_length_table = (double*) xmalloc(sizeof(double) * len_alloc); memset(sum_ee_length_table, 0, sizeof(double) * len_alloc); @@ -131,20 +131,20 @@ void fastq_eestats() { int64_t new_ee_size = ee_start(new_alloc, resolution); - read_length_table = (int*) xrealloc(read_length_table, - sizeof(int) * new_alloc); + read_length_table = (uint64_t*) xrealloc(read_length_table, + sizeof(uint64_t) * new_alloc); memset(read_length_table + len_alloc, 0, - sizeof(int) * (new_alloc - len_alloc)); + sizeof(uint64_t) * (new_alloc - len_alloc)); - qual_length_table = (int*) xrealloc(qual_length_table, sizeof(int) * + qual_length_table = (uint64_t*) xrealloc(qual_length_table, sizeof(uint64_t) * new_alloc * (max_quality+1)); memset(qual_length_table + (max_quality+1) * len_alloc, 0, - sizeof(int) * (new_alloc - len_alloc) * (max_quality+1)); + sizeof(uint64_t) * (new_alloc - len_alloc) * (max_quality+1)); - ee_length_table = (int*) xrealloc(ee_length_table, sizeof(int) * + ee_length_table = (uint64_t*) xrealloc(ee_length_table, sizeof(uint64_t) * new_ee_size); memset(ee_length_table + ee_size, 0, - sizeof(int) * (new_ee_size - ee_size)); + sizeof(uint64_t) * (new_ee_size - ee_size)); sum_ee_length_table = (double*) xrealloc(sum_ee_length_table, sizeof(double) * new_alloc); @@ -210,7 +210,7 @@ void fastq_eestats() ee += pe; - int e_int = MIN(resolution*(i+1), (int)(resolution * ee)); + int64_t e_int = MIN(resolution*(i+1), (int)(resolution * ee)); ee_length_table[ee_start(i, resolution) + e_int]++; sum_ee_length_table[i] += ee; @@ -285,9 +285,9 @@ void fastq_eestats() int64_t max_errors = resolution * (i+1); n = 0; - for(int e=0; e<=max_errors; e++) + for(int64_t e=0; e<=max_errors; e++) { - int x = ee_length_table[ee_offset + e]; + int64_t x = ee_length_table[ee_offset + e]; n += x; if ((min_ee<0) && (x > 0)) diff --git a/src/fastqops.cc b/src/fastqops.cc index 4ed4347c..d56bbe9c 100644 --- a/src/fastqops.cc +++ b/src/fastqops.cc @@ -561,17 +561,17 @@ void fastq_stats() int64_t read_length_alloc = 512; - int * read_length_table = (int*) xmalloc(sizeof(int) * read_length_alloc); - memset(read_length_table, 0, sizeof(int) * read_length_alloc); + uint64_t * read_length_table = (uint64_t*) xmalloc(sizeof(uint64_t) * read_length_alloc); + memset(read_length_table, 0, sizeof(uint64_t) * read_length_alloc); - int * qual_length_table = (int*) xmalloc(sizeof(int) * read_length_alloc * 256); - memset(qual_length_table, 0, sizeof(int) * read_length_alloc * 256); + uint64_t * qual_length_table = (uint64_t*) xmalloc(sizeof(uint64_t) * read_length_alloc * 256); + memset(qual_length_table, 0, sizeof(uint64_t) * read_length_alloc * 256); - int * ee_length_table = (int *) xmalloc(sizeof(int) * read_length_alloc * 4); - memset(ee_length_table, 0, sizeof(int) * read_length_alloc * 4); + uint64_t * ee_length_table = (uint64_t *) xmalloc(sizeof(uint64_t) * read_length_alloc * 4); + memset(ee_length_table, 0, sizeof(uint64_t) * read_length_alloc * 4); - int * q_length_table = (int *) xmalloc(sizeof(int) * read_length_alloc * 4); - memset(q_length_table, 0, sizeof(int) * read_length_alloc * 4); + uint64_t * q_length_table = (uint64_t *) xmalloc(sizeof(uint64_t) * read_length_alloc * 4); + memset(q_length_table, 0, sizeof(uint64_t) * read_length_alloc * 4); double * sumee_length_table = (double *) xmalloc(sizeof(double) * read_length_alloc); memset(sumee_length_table, 0, sizeof(double) * read_length_alloc); @@ -597,25 +597,25 @@ void fastq_stats() if (len+1 > read_length_alloc) { - read_length_table = (int*) xrealloc(read_length_table, - sizeof(int) * (len+1)); + read_length_table = (uint64_t*) xrealloc(read_length_table, + sizeof(uint64_t) * (len+1)); memset(read_length_table + read_length_alloc, 0, - sizeof(int) * (len + 1 - read_length_alloc)); + sizeof(uint64_t) * (len + 1 - read_length_alloc)); - qual_length_table = (int*) xrealloc(qual_length_table, - sizeof(int) * (len+1) * 256); + qual_length_table = (uint64_t*) xrealloc(qual_length_table, + sizeof(uint64_t) * (len+1) * 256); memset(qual_length_table + 256 * read_length_alloc, 0, - sizeof(int) * (len + 1 - read_length_alloc) * 256); + sizeof(uint64_t) * (len + 1 - read_length_alloc) * 256); - ee_length_table = (int*) xrealloc(ee_length_table, - sizeof(int) * (len+1) * 4); + ee_length_table = (uint64_t*) xrealloc(ee_length_table, + sizeof(uint64_t) * (len+1) * 4); memset(ee_length_table + 4 * read_length_alloc, 0, - sizeof(int) * (len + 1 - read_length_alloc) * 4); + sizeof(uint64_t) * (len + 1 - read_length_alloc) * 4); - q_length_table = (int*) xrealloc(q_length_table, - sizeof(int) * (len+1) * 4); + q_length_table = (uint64_t*) xrealloc(q_length_table, + sizeof(uint64_t) * (len+1) * 4); memset(q_length_table + 4 * read_length_alloc, 0, - sizeof(int) * (len + 1 - read_length_alloc) * 4); + sizeof(uint64_t) * (len + 1 - read_length_alloc) * 4); sumee_length_table = (double *) xrealloc(sumee_length_table, sizeof(double) * (len+1)); @@ -698,7 +698,7 @@ void fastq_stats() /* compute various distributions */ - int * length_dist = (int*) xmalloc(sizeof(int) * (len_max+1)); + uint64_t * length_dist = (uint64_t*) xmalloc(sizeof(uint64_t) * (len_max+1)); int64_t * symb_dist = (int64_t*) xmalloc(sizeof(int64_t) * (len_max+1)); double * rate_dist = (double*) xmalloc(sizeof(double) * (len_max+1)); @@ -744,7 +744,7 @@ void fastq_stats() for(int64_t i = len_max; i >= len_min; i--) { if (read_length_table[i] > 0) - fprintf(fp_log, "%2s%5" PRId64 " %10d %5.1lf%% %5.1lf%%\n", + fprintf(fp_log, "%2s%5" PRId64 " %10" PRIu64 " %5.1lf%% %5.1lf%%\n", (i == len_max ? ">=" : " "), i, read_length_table[i], @@ -849,7 +849,7 @@ void fastq_stats() } fprintf(fp_log, "\n"); - fprintf(fp_log, "%10" PRIu64 " Recs (%.1lfM), 0 too int64_t\n", + fprintf(fp_log, "%10" PRIu64 " Recs (%.1lfM), 0 too long\n", seq_count, seq_count / 1.0e6); fprintf(fp_log, "%10.1lf Avg length\n", 1.0 * symbols / seq_count); fprintf(fp_log, "%9.1lfM Bases\n", symbols / 1.0e6);