Skip to content

Commit

Permalink
merged, new docs
Browse files Browse the repository at this point in the history
  • Loading branch information
swo committed Jun 13, 2017
2 parents d5603fc + 7b43951 commit 61713a1
Show file tree
Hide file tree
Showing 10 changed files with 440 additions and 412 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ __pycache__/*
dist/*
build/*
.tox/*
*.pyc
8 changes: 6 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ Version history
- 1.3.0: Improved OTU file header. Split the log file into a debug and progress log.
- 1.4.0: Made an improvement to the Levenshtein-based genetic dissimilarity metric.
- 1.4.1: Account for pandas API change to ``MultiIndex``
- 1.5.0: Added the restart and rep seq scripts

To-do
-----

- Fix the output format (maybe put ``OTU_ID`` in the first field and row?)
- Find a way to avoid reading the entire count file? (The fasta file is not all loaded because we use ``SeqIO.index``.)
- Testing for the restart scripts
- Better coverage for unit tests

Citation
Expand All @@ -70,4 +70,8 @@ distribution-based OTU calling. *PLoS ONE* (2017) doi:10.1371/journal.pone.01763
Author
------

If you find a bug or have a request for a new feature, open an issue_.

.. _issue: https://github.com/swo/dbotu3/issues

Scott Olesen / *swo at alum.mit.edu*
230 changes: 1 addition & 229 deletions data/output/debug_log.txt

Large diffs are not rendered by default.

260 changes: 130 additions & 130 deletions data/output/log.txt
Original file line number Diff line number Diff line change
@@ -1,132 +1,132 @@
---
time_started: 2017-04-21 15:57:44.341285
genetic_criterion_threshold: 0.1
abundance_criterion_threshold: 10.0
distribution_criterion_threshold: 0.0005
sequence_table_filename: /Users/scott/lib/dbotu/data/input/counts.txt
fasta_filename: /Users/scott/lib/dbotu/data/input/seq.fa
otu_table_output_filename: /Users/scott/lib/dbotu/data/output/otu.txt
progress_log_output_filename: /Users/scott/lib/dbotu/data/output/log.txt
membership_output_filename: /Users/scott/lib/dbotu/data/output/membership.txt
debug_log_output_filename: /Users/scott/lib/dbotu/data/output/debug_log.txt
time_started 2017-06-02 12:20:26.231840
genetic_criterion_threshold 0.1
abundance_criterion_threshold 10.0
distribution_criterion_threshold 0.0005
sequence_table_filename /Users/scott/lib/dbotu/data/input/counts.txt
fasta_filename /Users/scott/lib/dbotu/data/input/seq.fa
otu_table_output_filename /Users/scott/lib/dbotu/data/output/otu.txt
progress_log_output_filename /Users/scott/lib/dbotu/data/output/log.txt
membership_output_filename /Users/scott/lib/dbotu/data/output/membership.txt
debug_log_output_filename /Users/scott/lib/dbotu/data/output/debug_log.txt
---
- "seq106"
- "seq53"
- "seq86"
- "seq89"
- "seq118"
- "seq0"
- "seq7"
- "seq47"
- "seq17"
- "seq67"
- "seq38"
- "seq87"
- "seq62"
- "seq50"
- "seq60"
- "seq48"
- "seq93"
- "seq34"
- "seq110"
- "seq65"
- "seq6"
- "seq79"
- "seq85"
- "seq72"
- "seq28"
- "seq91"
- "seq109"
- "seq96"
- "seq74"
- "seq10"
- "seq71"
- "seq75"
- "seq76"
- "seq84"
- "seq108"
- "seq13"
- "seq78"
- "seq44"
- "seq9"
- "seq113"
- "seq57"
- "seq8"
- "seq12"
- "seq64"
- "seq63"
- "seq56"
- "seq25"
- "seq31"
- "seq92"
- "seq29"
- "seq95"
- "seq82"
- "seq20"
- "seq15"
- "seq54"
- "seq21"
- "seq80"
- "seq27"
- "seq99"
- "seq102"
- "seq5"
- "seq112"
- "seq2"
- "seq43"
- "seq24"
- "seq14"
- "seq49"
- "seq41"
- "seq94"
- "seq68"
- "seq77"
- "seq52"
- "seq37"
- "seq104"
- ["seq32", "seq84"]
- ["seq40", "seq84"]
- "seq3"
- ["seq16", "seq84"]
- ["seq103", "seq84"]
- ["seq45", "seq84"]
- ["seq55", "seq84"]
- "seq58"
- "seq30"
- ["seq19", "seq84"]
- "seq97"
- "seq88"
- ["seq115", "seq17"]
- ["seq4", "seq64"]
- "seq66"
- ["seq73", "seq34"]
- "seq1"
- "seq22"
- ["seq105", "seq34"]
- ["seq11", "seq58"]
- "seq70"
- "seq18"
- ["seq119", "seq109"]
- "seq23"
- ["seq83", "seq63"]
- ["seq117", "seq84"]
- ["seq116", "seq41"]
- "seq114"
- ["seq111", "seq110"]
- ["seq107", "seq86"]
- "seq101"
- "seq100"
- "seq98"
- ["seq90", "seq29"]
- ["seq81", "seq86"]
- ["seq26", "seq30"]
- ["seq69", "seq2"]
- ["seq61", "seq15"]
- ["seq51", "seq31"]
- ["seq46", "seq63"]
- ["seq42", "seq12"]
- ["seq39", "seq14"]
- ["seq36", "seq38"]
- "seq35"
- ["seq33", "seq17"]
- ["seq59", "seq24"]
seq106
seq53
seq86
seq89
seq118
seq0
seq7
seq47
seq17
seq67
seq38
seq87
seq62
seq50
seq60
seq48
seq93
seq34
seq110
seq65
seq6
seq79
seq85
seq72
seq28
seq91
seq109
seq96
seq74
seq10
seq71
seq75
seq76
seq84
seq108
seq13
seq78
seq44
seq9
seq113
seq57
seq8
seq12
seq64
seq63
seq56
seq25
seq31
seq92
seq29
seq95
seq82
seq20
seq15
seq54
seq21
seq80
seq27
seq99
seq102
seq5
seq112
seq2
seq43
seq24
seq14
seq49
seq41
seq94
seq68
seq77
seq52
seq37
seq104
seq32 seq84
seq40 seq84
seq3
seq16 seq84
seq103 seq84
seq45 seq84
seq55 seq84
seq58
seq30
seq19 seq84
seq97
seq88
seq115 seq17
seq4 seq64
seq66
seq73 seq34
seq1
seq22
seq105 seq34
seq11 seq58
seq70
seq18
seq119 seq109
seq23
seq83 seq63
seq117 seq84
seq116 seq41
seq114
seq111 seq110
seq107 seq86
seq101
seq100
seq98
seq90 seq29
seq81 seq86
seq26 seq30
seq69 seq2
seq61 seq15
seq51 seq31
seq46 seq63
seq42 seq12
seq39 seq14
seq36 seq38
seq35
seq33 seq17
seq59 seq24
45 changes: 23 additions & 22 deletions dbotu.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,14 @@ def _print_debug_log(self, *fields):
if self.debug_log is not None:
print(*fields, sep='\t', file=self.debug_log)

def _print_progress_log(self, line):
def _print_progress_log(self, *fields):
'''
Write fields to progress log file (if present).
returns: nothing
'''
if self.progress_log is not None:
print(line, file=self.progress_log)
print(*fields, sep='\t', file=self.progress_log)

def ga_matches(self, candidate):
'''
Expand Down Expand Up @@ -226,7 +226,7 @@ def _merge_sequence(self, member, otu):
otu.absorb(member)
self.membership[otu.name].append(member.name)

self._print_progress_log('- ["{}", "{}"]'.format(member.name, otu.name))
self._print_progress_log(member.name, otu.name)
self._print_debug_log(member.name, 'merged_into', otu.name)

def _make_otu(self, otu):
Expand All @@ -241,7 +241,7 @@ def _make_otu(self, otu):
self.otus.append(otu)
self.membership[otu.name] = [otu.name]

self._print_progress_log('- "{}"'.format(otu.name))
self._print_progress_log(otu.name)
self._print_debug_log(otu.name, 'new_otu')

def run(self):
Expand Down Expand Up @@ -348,28 +348,29 @@ def call_otus(seq_table_fh, fasta_fn, output_fh, gen_crit, abund_crit, pval_crit
# set up the input fasta records
records = SeqIO.index(fasta_fn, 'fasta')

# write the setup values to the log file, if present
if log is not None:
print('---', file=log)
print('time_started:', datetime.datetime.now(), file=log)
print('genetic_criterion_threshold:', gen_crit, file=log)
print('abundance_criterion_threshold:', abund_crit, file=log)
print('distribution_criterion_threshold:', pval_crit, file=log)
print('sequence_table_filename:', os.path.realpath(seq_table_fh.name), file=log)
print('fasta_filename:', os.path.realpath(fasta_fn), file=log)
print('otu_table_output_filename:', os.path.realpath(output_fh.name), file=log)
print('progress_log_output_filename:', os.path.realpath(log.name), file=log)
# generate the caller object
caller = DBCaller(seq_table, records, gen_crit, abund_crit, pval_crit, log, debug)

# write the setup values to the log file (if present)
caller._print_progress_log('---')
caller._print_progress_log('time_started', datetime.datetime.now())
caller._print_progress_log('genetic_criterion_threshold', gen_crit)
caller._print_progress_log('abundance_criterion_threshold', abund_crit)
caller._print_progress_log('distribution_criterion_threshold', pval_crit)
caller._print_progress_log('sequence_table_filename', os.path.realpath(seq_table_fh.name))
caller._print_progress_log('fasta_filename', os.path.realpath(fasta_fn))
caller._print_progress_log('otu_table_output_filename', os.path.realpath(output_fh.name))
caller._print_progress_log('progress_log_output_filename', os.path.realpath(log.name))

if membership is not None:
print('membership_output_filename:', os.path.realpath(membership.name), file=log)
if membership is not None:
caller._print_progress_log('membership_output_filename', os.path.realpath(membership.name))

if debug is not None:
print('debug_log_output_filename:', os.path.realpath(debug.name), file=log)
if debug is not None:
caller._print_progress_log('debug_log_output_filename', os.path.realpath(debug.name))

print('---', file=log)
caller._print_progress_log('---')

# generate the caller object
caller = DBCaller(seq_table, records, gen_crit, abund_crit, pval_crit, log, debug)
# run it!
caller.run()
caller.write_otu_table(output_fh)

Expand Down
Loading

0 comments on commit 61713a1

Please sign in to comment.