-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathSnakefile
executable file
·103 lines (85 loc) · 3.13 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
configfile : "config.yaml"
include: "rules/00_download_data.skm"
#################################### Mapping and Quantification ################################
#
# In this module, we are declaring four rules that are designed to map all the reads to the
# genome (hisat2) and count the reads that map to each gene (featureCounts).
#
#########################################################################################
rule hisat2_Genome_index: #This is a rule and represent the first step of mapping the reads with hisat (indexing the genome)
input:
"Genome/dm6.fa"
output:
"Genome/Index/dm6.1.ht2"
threads: 7
conda:
"envs/core.yaml"
log:
"logs/hisat2_Genome_index.log"
shell:
"hisat2-build -p {threads} {input} Genome/Index/dm6 2> {log}"
rule hisat2_to_Genome:
input:
fastq = "FASTQ/{sample}.fastq.gz",
genome = "Genome/Index/dm6.1.ht2"
output:
temp("hisat2/{sample}.sam")
threads: 3
conda:
"envs/core.yaml"
shell:
"hisat2 -p 3 -U {input.fastq} -x Genome/Index/dm6 > {output} "
rule samTobam:
input:
"hisat2/{sample}.sam"
output:
"hisat2/{sample}.sorted.bam"
conda:
"envs/core.yaml"
shell:
"samtools view -b {input} | samtools sort - -o {output} && samtools index {output} "
rule bamstats:
input:
"hisat2/{sample}.sorted.bam"
output:
stats_txt = "QC/{sample}/{sample}.stats",
stats_html = "QC/{sample}/{sample}.plots.html"
params:
"QC/{sample}/{sample}.plots"
conda:
"envs/core.yaml"
shell:
"samtools stats {input} > {output.stats_txt} && plot-bamstats -p {params} {output.stats_txt}"
rule featureCounts:
input:
gtf = "Gene_annotation/dm6.ensGene.gtf",
bam = expand("hisat2/{sample}.sorted.bam", sample=SAMPLES)
output:
"featureCounts/total_samples.gene_count.txt"
threads: 1
conda:
"envs/core.yaml"
log:
"logs/featureCounts.total.log"
shell:
"featureCounts -a {input.gtf} -o {output} {input.bam} 2> {log}"
############# Downstream analysis #############
#
# Everything below corresponds to workflows to perform different anlyses to get meaningful
# quantitative data. On rules/ folder you can see the different snakemake modules (.skm files)
# which are `included` to be connected with the previous rules that are explicit on this
# current script. The `include` statement allows the integration of the .skm files. Notice
# that all these snakemake scripts work under python, thus any python syntax can be used.
#
###############################################
include: "rules/Pseudoalignment.skm"
rule run_salmon:
input:
expand( 'salmon/{sample}/quant.sf', sample=SAMPLES)
include: "rules/01_stringtie.skm"
include: "rules/02_bridge.skm"
include: "rules/03_whippet_quant.skm"
rule get_whippet_quant: #This is a calling point to run all whippet analysis
input:
expand("Whippet/Quant/{sample}.psi.gz", sample=SAMPLES)
include: "rules/04_whippet_delta.skm"