Skip to content

Commit

Permalink
SWARM 2.1.1: Fix a bug with the fastidious option
Browse files Browse the repository at this point in the history
  • Loading branch information
torognes committed Mar 31, 2015
1 parent 7fce762 commit 57e9c7b
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 54 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Table of Content
* [Get fasta sequences for all amplicons in a swarm](#extract_all)
* [Troubleshooting](#troubleshooting)
* [New features](#features)
* [version 2.1.1](#version211)
* [version 2.1.0](#version210)
* [version 2.0.7](#version207)
* [version 2.0.6](#version206)
Expand Down Expand Up @@ -322,6 +323,12 @@ released since 2004.
<a name="features"/>
## New features##

<a name="version210"/>
### version 2.1.1 ###

**swarm** 2.1.1 fixes a bug with the fastidious option that caused it
to ignore some connections between heavy and light swarms.

<a name="version210"/>
### version 2.1.0 ###

Expand Down
6 changes: 5 additions & 1 deletion man/swarm.1
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
.\" ============================================================================
.TH swarm 1 "March 24, 2015" "version 2.1.0" "USER COMMANDS"
.TH swarm 1 "March 31, 2015" "version 2.1.1" "USER COMMANDS"
.\" ============================================================================
.SH NAME
swarm \(em find clusters of nearly-identical nucleotide amplicons
Expand Down Expand Up @@ -294,6 +294,10 @@ New features and important modifications of \fBswarm\fR (short lived
or minor bug releases are not mentioned):
.RS
.TP
.BR v2.1.0\~ "released March 31, 2015"
Version 2.1.1 fixes a bug with the fastidious option that caused it
to ignore some connections between heavy and light swarms.
.TP
.BR v2.1.0\~ "released March 24, 2015"
Version 2.1.0 marks the first official release of swarm 2.
.TP
Expand Down
Binary file modified man/swarm_manual.pdf
Binary file not shown.
104 changes: 54 additions & 50 deletions src/algod1.cc
Original file line number Diff line number Diff line change
Expand Up @@ -696,8 +696,18 @@ bool hash_check_attach(char * seq,
return 0;
}

long expected_variant_count(char * seq, int len)
{
int c = 0;
for(int i=1; i<len; i++)
if (seq[i] != seq[i-1])
c++;
return 6*len+5+c;
}


long fastidious_mark_small_var(BloomFilter * bloom,
char * buffer,
char * varseq,
int seed)
{
/*
Expand All @@ -710,17 +720,14 @@ long fastidious_mark_small_var(BloomFilter * bloom,

long variants = 0;

char * varseq = buffer;
unsigned char * seq = (unsigned char*) db_getsequence(seed);
unsigned long start = 0;
unsigned long seqlen = db_getsequencelen(seed);
unsigned long end = seqlen;

/* make an exact copy */
memcpy(varseq, seq, seqlen);

/* substitutions */
for(unsigned int i=start; i<end; i++)
for(unsigned int i=0; i<seqlen; i++)
{
for (int v=1; v<5; v++)
if (v != seq[i])
Expand All @@ -733,10 +740,9 @@ long fastidious_mark_small_var(BloomFilter * bloom,
}

/* deletions */
memcpy(varseq, seq, start);
if (start < seqlen-1)
memcpy(varseq+start, seq+start+1, seqlen-start-1);
for(unsigned int i=start; i<end; i++)
if (seqlen > 1)
memcpy(varseq, seq+1, seqlen-1);
for(unsigned int i=0; i<seqlen; i++)
{
if ((i==0) || (seq[i] != seq[i-1]))
{
Expand All @@ -747,9 +753,8 @@ long fastidious_mark_small_var(BloomFilter * bloom,
}

/* insertions */
memcpy(varseq, seq, start);
memcpy(varseq+start+1, seq+start, seqlen-start);
for(unsigned int i=start; i<end; i++)
memcpy(varseq+1, seq, seqlen);
for(unsigned int i=0; i<seqlen+1; i++)
{
for(int v=1; v<5; v++)
{
Expand All @@ -763,6 +768,11 @@ long fastidious_mark_small_var(BloomFilter * bloom,
if (i<seqlen)
varseq[i] = seq[i];
}
#if 0
long e = expected_variant_count((char*)seq, seqlen);
if (variants != e)
fprintf(logfile, "Incorrect number of variants: %ld Expected: %ld\n", variants, e);
#endif
return variants;
}

Expand All @@ -777,14 +787,11 @@ long fastidious_check_large_var_2(char * seq,

long matches = 0;

unsigned long start = 0;
unsigned long end = seqlen;

/* make an exact copy */
memcpy(varseq, seq, seqlen);

/* substitutions */
for(unsigned int i=start; i<end; i++)
for(unsigned int i=0; i<seqlen; i++)
{
for (int v=1; v<5; v++)
if (v != seq[i])
Expand All @@ -797,10 +804,9 @@ long fastidious_check_large_var_2(char * seq,
}

/* deletions */
memcpy(varseq, seq, start);
if (start < seqlen-1)
memcpy(varseq+start, seq+start+1, seqlen-start-1);
for(unsigned int i=start; i<end; i++)
if (seqlen > 1)
memcpy(varseq, seq+1, seqlen-1);
for(unsigned int i=0; i<seqlen; i++)
{
if ((i==0) || (seq[i] != seq[i-1]))
{
Expand All @@ -811,9 +817,8 @@ long fastidious_check_large_var_2(char * seq,
}

/* insertions */
memcpy(varseq, seq, start);
memcpy(varseq+start+1, seq+start, seqlen-start);
for(unsigned int i=start; i<end; i++)
memcpy(varseq+1, seq, seqlen);
for(unsigned int i=0; i<seqlen+1; i++)
{
for(int v=1; v<5; v++)
{
Expand All @@ -831,11 +836,11 @@ long fastidious_check_large_var_2(char * seq,
}

void fastidious_check_large_var(BloomFilter * bloom,
char * buffer1,
char * buffer2,
int seed,
long * m,
long * v)
char * varseq,
char * buffer2,
int seed,
long * m,
long * v)
{
/*
bloom is a BloomFilter in which to enter the variants
Expand All @@ -849,17 +854,14 @@ void fastidious_check_large_var(BloomFilter * bloom,
long variants = 0;
long matches = 0;

char * varseq = buffer1;
unsigned char * seq = (unsigned char*) db_getsequence(seed);
unsigned long start = 0;
unsigned long seqlen = db_getsequencelen(seed);
unsigned long end = seqlen;

/* make an exact copy */
memcpy(varseq, seq, seqlen);

/* substitutions */
for(unsigned int i=start; i<end; i++)
for(unsigned int i=0; i<seqlen; i++)
{
for (int v=1; v<5; v++)
if (v != seq[i])
Expand All @@ -868,35 +870,33 @@ void fastidious_check_large_var(BloomFilter * bloom,
variants++;
if (bloom->get(varseq, seqlen))
matches += fastidious_check_large_var_2(varseq,
seqlen,
buffer2,
seed);
seqlen,
buffer2,
seed);
}
varseq[i] = seq[i];
}

/* deletions */
memcpy(varseq, seq, start);
if (start < seqlen-1)
memcpy(varseq+start, seq+start+1, seqlen-start-1);
for(unsigned int i=start; i<end; i++)
if (seqlen > 1)
memcpy(varseq, seq+1, seqlen-1);
for(unsigned int i=0; i<seqlen; i++)
{
if ((i==0) || (seq[i] != seq[i-1]))
{
variants++;
if (bloom->get(varseq, seqlen-1))
matches += fastidious_check_large_var_2(varseq,
seqlen-1,
buffer2,
seed);
seqlen-1,
buffer2,
seed);
}
varseq[i] = seq[i];
}

/* insertions */
memcpy(varseq, seq, start);
memcpy(varseq+start+1, seq+start, seqlen-start);
for(unsigned int i=start; i<end; i++)
memcpy(varseq+1, seq, seqlen);
for(unsigned int i=0; i<seqlen+1; i++)
{
for(int v=1; v<5; v++)
{
Expand All @@ -906,16 +906,22 @@ void fastidious_check_large_var(BloomFilter * bloom,
variants++;
if (bloom->get(varseq, seqlen+1))
matches += fastidious_check_large_var_2(varseq,
seqlen+1,
buffer2,
seed);
seqlen+1,
buffer2,
seed);
}
}
if (i<seqlen)
varseq[i] = seq[i];
}
*m = matches;
*v = variants;

#if 0
long e = expected_variant_count((char*)seq, seqlen);
if (variants != e)
fprintf(logfile, "Incorrect number of variants: %ld Expected: %ld\n", variants, e);
#endif
}


Expand Down Expand Up @@ -1142,8 +1148,6 @@ void algo_d1_run()
if (opt_fastidious)
{
fprintf(logfile, "\n");
fprintf(logfile, "WARNING: The fastidious option is a beta feature "
"in rapid development.\n");
fprintf(logfile, "Results before fastidious processing:\n");
fprintf(logfile, "Number of swarms: %lu\n", swarmcount);
fprintf(logfile, "Largest swarm: %d\n", largest);
Expand Down
4 changes: 2 additions & 2 deletions src/swarm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ void args_usage()
/* 0 1 2 3 4 5 6 7 */
/* 01234567890123456789012345678901234567890123456789012345678901234567890123456789 */

fprintf(stderr, "Usage: %s [OPTIONS] [filename]\n", progname);
fprintf(stderr, "Usage: swarm [OPTIONS] [filename]\n");
fprintf(stderr, " -d, --differences INTEGER resolution (1)\n");
fprintf(stderr, " -h, --help display this help and exit\n");
fprintf(stderr, " -o, --output-file FILENAME output result filename (stdout)\n");
Expand All @@ -212,9 +212,9 @@ void args_usage()
fprintf(stderr, " -i, --internal-structure FILENAME write internal swarm structure to file\n");
fprintf(stderr, " -l, --log FILENAME log to file, not to stderr\n");
fprintf(stderr, " -n, --no-otu-breaking never break OTUs\n");
fprintf(stderr, " -w, --seeds FILENAME write seed seqs with abundances to FASTA\n");
fprintf(stderr, " -f, --fastidious link nearby low-abundance swarms\n");
fprintf(stderr, " -b, --boundary INTEGER min mass of large OTU for fastidious (3)\n");
fprintf(stderr, " -w, --seeds FILENAME write seed seqs with abundances to FASTA\n");
fprintf(stderr, " -y, --bloom-bits INTEGER bits used per Bloom filter entry (16)\n");
fprintf(stderr, " -c, --ceiling INTEGER max memory in MB used for fastidious\n");
fprintf(stderr, "\n");
Expand Down
2 changes: 1 addition & 1 deletion src/swarm.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
#define LINE_MAX 2048
#endif

#define SWARM_VERSION "2.1.0"
#define SWARM_VERSION "2.1.1"
#define WIDTH 32
#define WIDTH_SHIFT 5
#define BLOCKWIDTH 32
Expand Down

0 comments on commit 57e9c7b

Please sign in to comment.