From 727f65c88c2bf716037944523aa03f4a603c1697 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torbj=C3=B8rn=20Rognes?= <torognes@ifi.uio.no>
Date: Fri, 23 Jan 2015 17:31:53 +0100
Subject: [PATCH] VSEARCH 1.0.10: Fix bug with ignored sizein, update copyright
 years, README.md

---
 README.md           | 22 ++++++++++---
 src/Makefile        |  2 +-
 src/Makefile.BZLIB  |  2 +-
 src/Makefile.ZLIB   |  2 +-
 src/Makefile.static |  2 +-
 src/align.cc        |  2 +-
 src/align.h         |  2 +-
 src/align_simd.cc   |  2 +-
 src/allpairs.cc     |  2 +-
 src/chimera.cc      |  2 +-
 src/cluster.cc      |  6 ++--
 src/cluster.h       |  2 +-
 src/db.cc           |  2 +-
 src/derep.cc        |  2 +-
 src/linmemalign.cc  | 79 +++++----------------------------------------
 src/results.cc      |  2 +-
 src/searchcore.cc   |  2 +-
 src/searchcore.h    |  2 +-
 src/sortbylength.cc |  2 +-
 src/userfields.cc   |  2 +-
 src/util.cc         |  2 +-
 src/util.h          |  2 +-
 src/vsearch.cc      | 10 +++---
 src/vsearch.h       |  8 ++---
 24 files changed, 56 insertions(+), 107 deletions(-)

diff --git a/README.md b/README.md
index 9aede18f..ac8558ba 100644
--- a/README.md
+++ b/README.md
@@ -30,13 +30,13 @@ If you can't find an answer in the VSEARCH documentation, please visit the [VSEA
 
 In the example below, VSEARCH will identify sequences in the file database.fsa that are at least 90% identical on the plus strand to the query sequences in the file queries.fsa and write the results to the file alnout.txt.
 
-`./vsearch-1.0.9-linux-x86_64 --usearch_global queries.fsa --db database.fsa --id 0.9 --alnout alnout.txt`
+`./vsearch-1.0.10-linux-x86_64 --usearch_global queries.fsa --db database.fsa --id 0.9 --alnout alnout.txt`
 
 ## Download and install
 
 The latest releases of VSEARCH are available [here](https://github.com/torognes/vsearch/releases).
 
-Binary executables of VSEARCH are available in the `bin` folder for [GNU/Linux on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.9-linux-x86_64) and [Apple Mac OS X on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.9-osx-x86_64). These executables include support for  input files compressed by zlib and bzip2 (with files usually ending in .gz or .bz2).
+Binary executables of VSEARCH are available in the `bin` folder for [GNU/Linux on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.10-linux-x86_64) and [Apple Mac OS X on x86-64 systems](https://github.com/torognes/vsearch/blob/master/bin/vsearch-1.0.10-osx-x86_64). These executables include support for  input files compressed by zlib and bzip2 (with files usually ending in .gz or .bz2).
 
 Download the appropriate executable and make a symbolic link in a folder included in your `$PATH` from `vsearch` to the appropriate binary. You may use the following commands (assuming `~/bin` is in your `$PATH`):
 
@@ -44,8 +44,8 @@ Download the appropriate executable and make a symbolic link in a folder include
 cd ~
 mkdir -p bin
 cd bin
-wget https://github.com/torognes/vsearch/releases/download/v1.0.9/vsearch-1.0.9-linux-x86_64
-ln -s vsearch-1.0.9-linux-x86_64 vsearch
+wget https://github.com/torognes/vsearch/releases/download/v1.0.10/vsearch-1.0.10-linux-x86_64
+ln -s vsearch-1.0.10-linux-x86_64 vsearch
 ```
 
 Substitute `linux` with `osx` in those lines if you're on a Mac.
@@ -68,7 +68,7 @@ We have chosen to select all unique kmers from the query. At least 6 of these km
 
 It appears that there are differences in usearch between the searches performed by the `--usearch_global` command and the clustering commands. Notably, it appears like `--usearch_global` simply ignores the options `--wordlength`, `--slots` and `--pattern`, while the clustering commands takes them into account. VSEARCH supports the `--wordlength` option for kmer lengths from 3 to 15, but the options `--slots` and `--pattern` are ignored.
 
-**Alignment:** VSEARCH uses a 8-way 16-bit SIMD vectorized implementation of the full dynamic programming algorithm (Needleman-Wunsch) for global sequence alignment. It is an adaptation of the method described by Rognes (2011). USEARCH by default uses a heuristic procedure involving seeding, extension and banded dynamic programming. If the `--fulldp` option is specified to USEARCH it will also use a full dynamic programming approach, but USEARCH is then considerably slower.
+**Alignment:** VSEARCH uses a 8-way 16-bit SIMD vectorized implementation of the full dynamic programming algorithm (Needleman-Wunsch) for global sequence alignment. It is an adaptation of the method described by Rognes (2011). Due to the extreme memory requirements of this method when aligning two long sequences (e.g. more than 5000bp long), an alternative algorithm described by Hirschberg (1975) and Myers and Miller (1988) is used when aligning a pair of long sequences. This alternative algorithm uses only a linear amount of memory but is much slower. USEARCH by default uses a heuristic procedure involving seeding, extension and banded dynamic programming. If the `--fulldp` option is specified to USEARCH it will also use a full dynamic programming approach, but USEARCH is then considerably slower.
 
 **Search Accuracy:** The accuracy of VSEARCH searches has been assessed and compared to USEARCH version 7.0.1090. The Rfam 11.0 database was used for the assessment, as described on the [USEARCH website](http://drive5.com/usearch/benchmark_rfam.html). A similar procedure was described in the USEARCH paper using the Rfam 9.1 database.
 
@@ -365,25 +365,37 @@ the [Protist Ribosomal Database](http://ssu-rrna.org/) (Guillou et al. 2012).
 **Search and clustering orders of magnitude faster than BLAST.**
 *Bioinformatics*, 26 (19): 2460-2461.
 doi:[10.1093/bioinformatics/btq461](http://dx.doi.org/10.1093/bioinformatics/btq461)
+
 * Edgar RC, Haas BJ, Clemente JC, Quince C, Knight R (2011)
 **UCHIME improves sensitivity and speed of chimera detection.**
 *Bioinformatics*, 27 (16): 2194-2200.
 doi:[10.1093/bioinformatics/btr381](http://dx.doi.org/10.1093/bioinformatics/btr381)
+
 * Farrar M (2007)
 **Striped Smith-Waterman speeds database searches six times over other SIMD implementations.**
 *Bioinformatics* (2007) 23 (2): 156-161.
 doi:[10.1093/bioinformatics/btl582](http://dx.doi.org/10.1093/bioinformatics/btl582)
+
 * Guillou L., Bachar D., Audic S., Bass D., Berney C., Bittner L., Boutte C., Burgaud G., de Vargas C., Decelle J., del Campo J., Dolan J., Dunthorn M., Edvardsen B., Holzmann M., Kooistra W., Lara E., Lebescot N., Logares R., Mahé F., Massana R., Montresor M., Morard R., Not F., Pawlowski J., Probert I., Sauvadet A.-L., Siano R., Stoeck T., Vaulot D., Zimmermann P. & Christen R. (2013)
 **The Protist Ribosomal Reference database (PR2): a catalog of unicellular eukaryote Small Sub-Unit rRNA sequences with curated taxonomy.**
 *Nucleic Acids Research*, 41 (D1), D597-D604.
 doi:[10.1093/nar/gks1160](http://dx.doi.org/10.1093/nar/gks1160)
+
+* Hirschberg D.S (1975) A linear space algorithm for computing maximal common subsequences. *Comm ACM*, 18(6), 341-343. doi:[10.1145/360825.360861](http://dx.doi.org/10.1145/360825.360861)
+
 * Karsenti E., González Acinas S., Bork P., Bowler C., de Vargas C., Raes J., Sullivan M. B., Arendt D., Benzoni F., Claverie J.-M., Follows M., Jaillon O., Gorsky G., Hingamp P., Iudicone D., Kandels-Lewis S., Krzic U., Not F., Ogata H., Pesant S., Reynaud E. G., Sardet C., Sieracki M. E., Speich S., Velayoudon D., Weissenbach J., Wincker P. & the Tara Oceans Consortium (2011)
 **A holistic approach to marine eco-systems biology.**
 *PLoS Biology*, 9(10), e1001177.
 doi:[10.1371/journal.pbio.1001177](http://dx.doi.org/10.1371/journal.pbio.1001177)
+
 * Logares R., Audic S., Bass D., Bittner L., Boutte C., Christen R., Claverie J.-M., Decelle J., Dolan J. R., Dunthorn M., Edvardsen B., Gobet A., Kooistra W. H. C. F., Mahé F., Not F., Ogata H., Pawlowski J., Pernice M. C., Romac S., Shalchian-Tabrizi K., Simon N., Stoeck T., Santini S., Siano R., Wincker P., Zingone A., Richards T., de Vargas C. & Massana R. (2014) The patterning of rare and abundant community assemblages in coastal marine-planktonic microbial eukaryotes.
 *Current Biology*, 24(8), 813-821.
 doi:[10.1016/j.cub.2014.02.050](http://dx.doi.org/10.1016/j.cub.2014.02.050)
+
+* Myers E.W., & Miller W. (1988) Optimal alignments in linear space.
+*Comput Appl Biosci*, 4(1), 11-17.
+doi:[10.1093/bioinformatics/4.1.11](http://dx.doi.org/10.1093/bioinformatics/4.1.11)
+
 * Rognes T (2011)
 **Faster Smith-Waterman database searches by inter-sequence SIMD parallelisation.**
 *BMC Bioinformatics*, 12: 221.
diff --git a/src/Makefile b/src/Makefile
index 6f300b8b..ba927e9a 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 2014 Torbjorn Rognes
+# Copyright (C) 2014-2015 Torbjorn Rognes
 # 
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
diff --git a/src/Makefile.BZLIB b/src/Makefile.BZLIB
index f74bd8bb..1db2aba9 100644
--- a/src/Makefile.BZLIB
+++ b/src/Makefile.BZLIB
@@ -1,4 +1,4 @@
-# Copyright (C) 2014 Torbjorn Rognes & Tomas Fluori
+# Copyright (C) 2014-2015 Torbjorn Rognes & Tomas Fluori
 # 
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
diff --git a/src/Makefile.ZLIB b/src/Makefile.ZLIB
index 842bd67f..1003deb5 100644
--- a/src/Makefile.ZLIB
+++ b/src/Makefile.ZLIB
@@ -1,4 +1,4 @@
-# Copyright (C) 2014 Torbjorn Rognes & Tomas Flouri
+# Copyright (C) 2014-2015 Torbjorn Rognes & Tomas Flouri
 # 
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
diff --git a/src/Makefile.static b/src/Makefile.static
index 6bb43154..34e9cc99 100644
--- a/src/Makefile.static
+++ b/src/Makefile.static
@@ -1,4 +1,4 @@
-# Copyright (C) 2014 Torbjorn Rognes & Tomas Flouri
+# Copyright (C) 2014-2015 Torbjorn Rognes & Tomas Flouri
 # 
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as
diff --git a/src/align.cc b/src/align.cc
index fff9ef9f..9d0608d8 100644
--- a/src/align.cc
+++ b/src/align.cc
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2014 Torbjorn Rognes
+    Copyright (C) 2014-2015 Torbjorn Rognes
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as
diff --git a/src/align.h b/src/align.h
index 5d6948fb..027fe981 100644
--- a/src/align.h
+++ b/src/align.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2014 Torbjorn Rognes
+    Copyright (C) 2014-2015 Torbjorn Rognes
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as
diff --git a/src/align_simd.cc b/src/align_simd.cc
index 34da1064..e9e1c914 100644
--- a/src/align_simd.cc
+++ b/src/align_simd.cc
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2012-2014 Torbjorn Rognes & Frederic Mahe
+    Copyright (C) 2012-2015 Torbjorn Rognes & Frederic Mahe
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as
diff --git a/src/allpairs.cc b/src/allpairs.cc
index 369ab2c6..1f1b9203 100644
--- a/src/allpairs.cc
+++ b/src/allpairs.cc
@@ -1,5 +1,5 @@
 /*
-  Copyright (C) 2014 Torbjorn Rognes
+  Copyright (C) 2014-2015 Torbjorn Rognes
 
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU Affero General Public License as
diff --git a/src/chimera.cc b/src/chimera.cc
index a0f8ac06..8e9b3ce3 100644
--- a/src/chimera.cc
+++ b/src/chimera.cc
@@ -1,5 +1,5 @@
 /*
-  Copyright (C) 2014 Torbjorn Rognes
+  Copyright (C) 2014-2015 Torbjorn Rognes
 
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU Affero General Public License as
diff --git a/src/cluster.cc b/src/cluster.cc
index 6211664c..715e30b8 100644
--- a/src/cluster.cc
+++ b/src/cluster.cc
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2014 Torbjorn Rognes
+    Copyright (C) 2014-2015 Torbjorn Rognes
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as
@@ -996,7 +996,7 @@ void cluster(char * dbname,
                                     db_getsequencelen(seqno),
                                     opt_fasta_width);
             }
-          abundance += db_getabundance(seqno);
+          abundance += opt_sizein ? db_getabundance(seqno) : 1;
           size++;
         }
       else
@@ -1044,7 +1044,7 @@ void cluster(char * dbname,
             }
   
           centroid = clusterinfo[i].seqno;
-          abundance = db_getabundance(seqno);
+          abundance = opt_sizein ? db_getabundance(seqno) : 1;
           size = 1;
           lastcluster = clusterno;
         }
diff --git a/src/cluster.h b/src/cluster.h
index dc177efa..ea83623d 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2014 Torbjorn Rognes
+    Copyright (C) 2014-2015 Torbjorn Rognes
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as
diff --git a/src/db.cc b/src/db.cc
index c65d7849..ec9a4862 100644
--- a/src/db.cc
+++ b/src/db.cc
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2014 Torbjorn Rognes & Tomas Flouri
+    Copyright (C) 2014-2015 Torbjorn Rognes & Tomas Flouri
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as
diff --git a/src/derep.cc b/src/derep.cc
index af8eea23..47f908f9 100644
--- a/src/derep.cc
+++ b/src/derep.cc
@@ -248,7 +248,7 @@ void derep_fulllength()
             }
         }
 
-      long ab = db_getabundance(i); 
+      long ab = opt_sizein ? db_getabundance(i) : 1;
       sumsize += ab;
 
       if (bp->size)
diff --git a/src/linmemalign.cc b/src/linmemalign.cc
index 182c9701..297d4198 100644
--- a/src/linmemalign.cc
+++ b/src/linmemalign.cc
@@ -28,9 +28,11 @@
   
   These functions are based on the following articles:
   - Hirschberg (1975) Comm ACM 18:341-343
-  - Huang, Hardison & Miller (1990) CABIOS 6:373-381
   - Myers & Miller (1988) CABIOS 4:11-17
   
+  The method has been adapted for the use of different
+  gap penalties for query/target/left/interior/right gaps.
+
   scorematrix consists of 16x16 long integers
   
   Sequences and alignment matrix:
@@ -42,8 +44,6 @@
 
 */
 
-//#define DEBUG
-
 LinearMemoryAligner::LinearMemoryAligner()
 {
   scorematrix = 0;
@@ -183,13 +183,6 @@ void LinearMemoryAligner::diff(long a_start,
                                bool b_left,      /* includes left end of b  */
                                bool b_right)     /* includes right end of b */
 {
-#ifdef DEBUG
-  printf("diff(a_start=%ld, b_start=%ld, a_len=%ld, b_len=%ld, gap_b_left=%d, gap_b_right=%d, a_left=%d, a_right=%d, b_left=%d, b_right=%d)\n",
-         a_start, b_start, a_len, b_len,
-         gap_b_left, gap_b_right,
-         a_left, a_right, b_left, b_right);
-#endif
-
   long MaxScore = 0;
 
   if (b_len == 0)
@@ -299,10 +292,6 @@ void LinearMemoryAligner::diff(long a_start,
 	    }
 	}
 
-#ifdef DEBUG
-      printf("MaxScore=%ld best=%ld\n", MaxScore, best);
-#endif
-
       if (best == -1)
 	{
 	  cigar_add('D', 1);
@@ -347,10 +336,6 @@ void LinearMemoryAligner::diff(long a_start,
 
       /* compute matrix */
 
-#ifdef DEBUG
-          printf("Forward:\n");
-#endif
-
       for (i = 1; i <= I; i++)
 	{
 	  long p = HH[0];
@@ -362,10 +347,6 @@ void LinearMemoryAligner::diff(long a_start,
           HH[0] = h;
 	  long f = LONG_MIN;
           
-#ifdef DEBUG
-          printf("%4ld", h);
-#endif
-
 	  for (j = 1; j <= b_len; j++)
 	    {
               f = MAX(f, h - go_q_i) - ge_q_i;
@@ -382,17 +363,9 @@ void LinearMemoryAligner::diff(long a_start,
 		h = EE[j];
 	      p = HH[j];
 	      HH[j] = h;
-
-#ifdef DEBUG
-              printf(" %4ld", h);
-#endif
-
 	    }
-#ifdef DEBUG
-          printf("\n");
-#endif
-
 	}
+
       EE[0] = HH[0];
 
       // Compute XX & YY in reverse phase
@@ -411,10 +384,7 @@ void LinearMemoryAligner::diff(long a_start,
       
       /* compute matrix */
 
-#ifdef DEBUG
-      printf("Reverse:\n");
-#endif
-for (i = 1; i <= a_len - I; i++)
+      for (i = 1; i <= a_len - I; i++)
 	{
 	  long p = XX[0];
 
@@ -424,10 +394,6 @@ for (i = 1; i <= a_len - I; i++)
           XX[0] = h;
 	  long f = LONG_MIN;
 
-#ifdef DEBUG
-          printf("%4ld", h);
-#endif
-          
 	  for (j = 1; j <= b_len; j++)
 	    {
               f = MAX(f, h - go_q_i) - ge_q_i;
@@ -444,15 +410,9 @@ for (i = 1; i <= a_len - I; i++)
 		h = YY[j];
 	      p = XX[j];
 	      XX[j] = h;
-
-#ifdef DEBUG
-              printf(" %4ld", h);
-#endif
 	    }
-#ifdef DEBUG
-          printf("\n");
-#endif
 	}
+
       YY[0] = XX[0];
 
 
@@ -463,38 +423,24 @@ for (i = 1; i <= a_len - I; i++)
 
       /* solutions with diagonal at break */
       
-#ifdef DEBUG
-      printf("Forward scores:");
-#endif
-
       for (j=0; j <= b_len; j++)
 	{
 	  long Score = HH[j] + XX[b_len - j];
-#ifdef DEBUG
-          printf(" %ld+%ld=%ld", HH[j], XX[b_len-j], Score);
-#endif
+
 	  if (Score > MaxScore0)
 	    {
 	      MaxScore0 = Score;
 	      best0 = j;
 	    }
 	}
-#ifdef DEBUG
-      printf("\n");
-#endif
 
       long MaxScore1 = LONG_MIN;
       long best1 = -1;
 
       /* solutions that end with a gap in b from both ends at break */
 
-#ifdef DEBUG
-      printf("Reverse scores:");
-#endif
-
       for (j=0; j <= b_len; j++)
 	{
-
           long g;
           if (b_left && (j==0))
             g = go_t_l;
@@ -504,18 +450,13 @@ for (i = 1; i <= a_len - I; i++)
             g = go_t_i;
 
 	  long Score = EE[j] + YY[b_len - j] + g;
-#ifdef DEBUG
-          printf(" %ld+%ld=%ld", EE[j], YY[b_len-j], Score);
-#endif
+
 	  if (Score > MaxScore1)
 	    {
 	      MaxScore1 = Score;
 	      best1 = j;
 	    }
 	}
-#ifdef DEBUG
-      printf("\n");
-#endif
       
       long P;
       long best;
@@ -545,10 +486,6 @@ for (i = 1; i <= a_len - I; i++)
             }
         }
 
-#ifdef DEBUG
-      printf("Selection: P=%ld best=%ld, MaxScore=%ld\n", P, best, MaxScore);
-#endif
-
       /* recursively compute upper left and lower right parts */
 
       if (P == 0)
diff --git a/src/results.cc b/src/results.cc
index 25a9d112..06d026b6 100644
--- a/src/results.cc
+++ b/src/results.cc
@@ -1,5 +1,5 @@
 /*
-  Copyright (C) 2014 Torbjorn Rognes
+  Copyright (C) 2014-2015 Torbjorn Rognes
 
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU Affero General Public License as
diff --git a/src/searchcore.cc b/src/searchcore.cc
index 85c09dcb..7649a832 100644
--- a/src/searchcore.cc
+++ b/src/searchcore.cc
@@ -1,5 +1,5 @@
 /*
-  Copyright (C) 2014 Torbjorn Rognes
+  Copyright (C) 2014-2015 Torbjorn Rognes
 
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU Affero General Public License as
diff --git a/src/searchcore.h b/src/searchcore.h
index 8bd54e43..75583dec 100644
--- a/src/searchcore.h
+++ b/src/searchcore.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2014 Torbjorn Rognes
+    Copyright (C) 2014-2015 Torbjorn Rognes
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as
diff --git a/src/sortbylength.cc b/src/sortbylength.cc
index 50494c50..920a5bd0 100644
--- a/src/sortbylength.cc
+++ b/src/sortbylength.cc
@@ -1,5 +1,5 @@
 /*
-  Copyright (C) 2014 Torbjorn Rognes
+  Copyright (C) 2014-2015 Torbjorn Rognes
 
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU Affero General Public License as
diff --git a/src/userfields.cc b/src/userfields.cc
index f2819b38..3b804f28 100644
--- a/src/userfields.cc
+++ b/src/userfields.cc
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2014 Torbjorn Rognes
+    Copyright (C) 2014-2015 Torbjorn Rognes
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as
diff --git a/src/util.cc b/src/util.cc
index 177ba2cd..a06e13ae 100644
--- a/src/util.cc
+++ b/src/util.cc
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2014 Torbjorn Rognes & Tomas Flouri
+    Copyright (C) 2014-2015 Torbjorn Rognes & Tomas Flouri
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as
diff --git a/src/util.h b/src/util.h
index 65f9e9e7..820659c8 100644
--- a/src/util.h
+++ b/src/util.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2014 Torbjorn Rognes & Tomas Flouri
+    Copyright (C) 2014-2015 Torbjorn Rognes & Tomas Flouri
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as
diff --git a/src/vsearch.cc b/src/vsearch.cc
index 56d5a1ce..561ba670 100644
--- a/src/vsearch.cc
+++ b/src/vsearch.cc
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2014 Torbjorn Rognes & Frederic Mahe
+    Copyright (C) 2014-2015 Torbjorn Rognes & Frederic Mahe
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as
@@ -1352,7 +1352,7 @@ void cmd_help()
           "  --iddef INT                 id definition, 0-4=CD-HIT,all,int,MBL,BLAST (2)\n"
           "  --msaout FILENAME           output multiple seq. alignments to FASTA file\n"
           "  --qmask none|dust|soft      mask seqs with dust, soft or no method (dust)\n"
-          "  --sizein                    read abundance annotation from input\n"
+          "  --sizein                    propagate abundance annotation from input\n"
           "  --sizeout                   write cluster abundances to centroid file\n"
           "  --strand plus|both          cluster using plus or both strands (plus)\n"
           "  --uc FILENAME               filename for UCLUST-like output\n"
@@ -1363,7 +1363,7 @@ void cmd_help()
           "  --maxuniquesize INT         maximum abundance for output from dereplication\n"
           "  --minuniquesize INT         minimum abundance for output from dereplication\n"
           "  --output FILENAME           output FASTA file\n"
-          "  --sizein                    read abundance annotation from input\n"
+          "  --sizein                    propagate abundance annotation from input\n"
           "  --sizeout                   write abundance annotation to output\n"
           "  --strand plus|both          dereplicate plus or both strands (plus)\n"
           "  --topn INT                  output just the n most abundant sequences\n"
@@ -1420,7 +1420,7 @@ void cmd_help()
           "  --rowlen INT                width of alignment lines in alnout output (64)\n"
           "  --self                      reject if labels identical\n"
           "  --selfid                    reject if sequences identical\n"
-          "  --sizeout                   write abundance annotation to output\n"
+          "  --sizeout                   write abundance annotation to dbmatched file\n"
           "  --strand plus|both          search plus or both strands (plus)\n"
           "  --target_cov REAL           reject if fraction of target seq. aligned lower\n"
           "  --top_hits_only             output only hits with identity equal to the best\n"
@@ -1443,7 +1443,7 @@ void cmd_help()
           "  --minsize INT               minimum abundance for sortbysize\n"
           "  --output FILENAME           output FASTA file\n"
           "  --relabel STRING            relabel with this prefix string after sorting\n"
-          "  --sizeout                   add abundance annotation to output\n"
+          "  --sizeout                   add abundance to output when relabelling\n"
           "  --sortbylength FILENAME     sort sequences by length in given FASTA file\n"
           "  --sortbysize FILENAME       abundance sort sequences in given FASTA file\n"
           "  --topn INT                  output just top n seqs after sorting\n"
diff --git a/src/vsearch.h b/src/vsearch.h
index 3bb48661..891fdd87 100644
--- a/src/vsearch.h
+++ b/src/vsearch.h
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2014 Torbjorn Rognes and Tomas Flouri
+    Copyright (C) 2014-2015 Torbjorn Rognes and Tomas Flouri
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU Affero General Public License as
@@ -82,6 +82,9 @@
 #include "cpu.h"
 #include "allpairs.h"
 
+#define PROG_NAME "vsearch"
+#define PROG_VERSION "v1.0.10"
+
 #ifdef __APPLE__
 #define PROG_ARCH "osx_x86_64"
 #else
@@ -102,9 +105,6 @@
 #define FORMAT_BZIP  2
 #define FORMAT_GZIP  3
 
-#define PROG_NAME "vsearch"
-#define PROG_VERSION "v1.0.9"
-
 /* options */
 
 extern char * opt_allpairs_global;