-
Notifications
You must be signed in to change notification settings - Fork 2
/
Snakefilev7
85 lines (72 loc) · 2.74 KB
/
Snakefilev7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#Adding support to GCS
from pathlib import Path
from snakemake.remote.GS import RemoteProvider as GSRemoteProvider
GS = GSRemoteProvider()
GS_INPUT = "hn-snakemake/big"
GS_PREFIX = "mw_test_metagenomics"
shell.executable("/bin/bash")
shell.prefix("source $HOME/.bashrc; ")
IDS, = glob_wildcards("runs/{id}.txt")
IDS2, = glob_wildcards("runs/{id}.txt")
rule all:
input: GS.remote(expand(GS_PREFIX + "/bam/{sample}.{sample2}.bam", sample=IDS, sample2=IDS2))
#rule download deleted as it is no longer necessary
rule cutadapt:
input:
R1=GS.remote(GS_INPUT + "/fastq/{id}_1.fastq.gz"),
R2=GS.remote(GS_INPUT + "/fastq/{id}_2.fastq.gz")
output:
R1=GS.remote(GS_PREFIX + "/trimmed/{id}_1.t.fastq.gz"),
R2=GS.remote(GS_PREFIX + "/trimmed/{id}_2.t.fastq.gz")
conda: "envs/cutadapt.yaml"
threads: 4
shell: "cutadapt -j 4 -a AGATCGGAAGAGC -A AGATCGGAAGAGC -o {output.R1} -p {output.R2} -O 5 --minimum-length=50 {input.R1} {input.R2}"
rule megahit:
input:
R1=GS.remote(GS_PREFIX + "/trimmed/{id}_1.t.fastq.gz"),
R2=GS.remote(GS_PREFIX + "/trimmed/{id}_2.t.fastq.gz")
params:
di=GS.remote(GS_PREFIX + "/megahit/{id}")
output:
# di=GS.remote(GS_PREFIX + "/megahit/{id}/"),
fa=GS.remote(GS_PREFIX + "/megahit/{id}/final.contigs.fa")
conda: "envs/megahit.yaml"
threads: 8
shell: "mkdir -p {params.di} && megahit --continue --k-list 27,47,67,87 --kmin-1pass -m 12e+10 --presets meta-large --min-contig-len 1000 -t {threads} -1 {input.R1} -2 {input.R2} -o {params.di}"
rule bwa_index:
input: GS.remote(GS_PREFIX + "/megahit/{id}/final.contigs.fa")
output:
ann=GS.remote(GS_PREFIX + "/bwa_indices/{id}.fa.ann"),
pac=GS.remote(GS_PREFIX + "/bwa_indices/{id}.fa.pac"),
amb=GS.remote(GS_PREFIX + "/bwa_indices/{id}.fa.amb"),
bwt=GS.remote(GS_PREFIX + "/bwa_indices/{id}.fa.bwt"),
sa =GS.remote(GS_PREFIX + "/bwa_indices/{id}.fa.sa")
params:
idx=GS.remote(GS_PREFIX + "/bwa_indices/{id}.fa")
conda: "envs/bwa.yaml"
threads: 8
shell:
'''
bwa index -p {params.idx} {input}
'''
rule bwa_mem:
input:
R1=GS.remote(GS_PREFIX + "/trimmed/{id}_1.t.fastq.gz"),
R2=GS.remote(GS_PREFIX + "/trimmed/{id}_2.t.fastq.gz"),
ann=GS.remote(GS_PREFIX + "/bwa_indices/{id}.fa.ann"),
pac=GS.remote(GS_PREFIX + "/bwa_indices/{id}.fa.pac"),
amb=GS.remote(GS_PREFIX + "/bwa_indices/{id}.fa.amb"),
bwt=GS.remote(GS_PREFIX + "/bwa_indices/{id}.fa.bwt"),
sa =GS.remote(GS_PREFIX + "/bwa_indices/{id}.fa.sa")
output:
bam=GS.remote(GS_PREFIX + "/bam/{id}.{id2}.bam")
params:
idx=GS.remote(GS_PREFIX + "/bwa_indices/{id}.fa")
conda: "envs/bwa.yaml"
threads: 8
shell:
'''
bwa mem -t 8 {params.idx} {input.R1} {input.R2} | samtools sort -@8 -m 500M -o {output.bam} -
samtools index {output.bam}
samtools flagstat {output.bam} > {output.bam}.flagstat
'''