-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
78 lines (61 loc) · 2.99 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""
Juno-typing
Author(s): Alejandra Hernandez-Segura, Kaitlin Weber, Edwin van der Kind and Maaike van den Beld
Organization: Rijksinstituut voor Volksgezondheid en Milieu (RIVM)
Department: Infektieziekteonderzoek, Diagnostiek en Laboratorium Surveillance (IDS), Bacteriologie (BPD)
Date: 12-01-2021
"""
#################################################################################
##### Import config file, sample_sheet and set output folder names #####
#################################################################################
from os.path import getsize, exists, abspath
from yaml import safe_load
#################################################################################
##### Load samplesheet, load genus dict and define output directory #####
#################################################################################
# Loading sample sheet as dictionary
# ("R1" and "R2" keys for fastq, and "assembly" for fasta)
sample_sheet = config["sample_sheet"]
SAMPLES = {}
with open(sample_sheet) as sample_sheet_file:
SAMPLES = safe_load(sample_sheet_file)
# OUT defines output directory for most rules.
OUT = config["out"]
# For some reason these aren't integers when called from python the snakemake API
for param in ["threads", "mem_gb"]:
for k in config[param]:
config[param][k] = int(config[param][k])
# @################################################################################
# @#### Processes #####
# @################################################################################
include: "bin/rules/mlst7_fastq.smk"
include: "bin/rules/mlst7_multireport.smk"
include: "bin/rules/serotype.smk"
include: "bin/rules/serotype_multireports.smk"
include: "bin/rules/16s_extraction.smk"
# @################################################################################
# @#### Finalize pipeline (error/success) #####
# @################################################################################
# TODO: eventually these files should be stored somewhere else and included in the pipeline as tmp files
onerror:
shell(
"""
find -maxdepth 1 -type d -empty -exec rm -rf {{}} \;
find -maxdepth 1 -type f -name "*.depth.txt*" -exec rm -rf {{}} \;
echo -e "Something went wrong with Juno-typing pipeline. Please check the logging files in {OUT}/log/"
"""
)
#################################################################################
##### Specify final output #####
#################################################################################
localrules:
all,
aggregate_serotypes,
no_serotyper,
build_seroba_db,
rule all:
input:
expand(OUT + "/mlst7/{sample}/results.txt", sample=SAMPLES),
expand(OUT + "/16s/{sample}/16S_seq.fasta", sample=SAMPLES),
OUT + "/serotype/serotyper_multireport.csv",
OUT + "/mlst7/mlst7_multireport.csv",