generated from coderefinery/word-count
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile
50 lines (44 loc) · 1.41 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# a list of all the books we are analyzing
DATA = glob_wildcards('data/{book}.txt').book
# this is for running on HPC resources
localrules: all, make_archive
# the default rule
rule all:
input:
'zipf_analysis.tar.gz'
# count words in one of our books
# logfiles from each run are put in .log files"
rule count_words:
input:
wc='source/wordcount.py',
book='data/{file}.txt'
output: 'processed_data/{file}.dat'
threads: 4
log: 'processed_data/{file}.log'
shell:
'''
echo "Running {input.wc} with {threads} cores on {input.book}." &> {log} &&
python {input.wc} {input.book} {output} >> {log} 2>&1
'''
# create a plot for each book
rule make_plot:
input:
plotcount='source/plotcount.py',
book='processed_data/{file}.dat'
output: 'results/{file}.png'
shell: 'python {input.plotcount} {input.book} {output}'
# generate summary table
rule zipf_test:
input:
zipf='source/zipf_test.py',
books=expand('processed_data/{book}.dat', book=DATA)
output: 'results/results.txt'
shell: 'python {input.zipf} {input.books} > {output}'
# create an archive with all of our results
rule make_archive:
input:
expand('results/{book}.png', book=DATA),
expand('processed_data/{book}.dat', book=DATA),
'results/results.txt'
output: 'zipf_analysis.tar.gz'
shell: 'tar -czvf {output} {input}'