Skip to content

Commit ea4a446

Browse files
committed
Initial commit
1 parent 42feb56 commit ea4a446

18 files changed

+299
-0
lines changed

.test/config/config.yaml

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
genome: "hg38" # human or mouse
2+
ensembl_genome_build: "110"
3+
resources: # computing resources
4+
account: XXX
5+
partition: cclake
6+
max_jobs: 300
7+
trim:
8+
cpu: 8
9+
time: 60
10+
fastqc:
11+
cpu: 4
12+
time: 60
13+
damid:
14+
cpu: 8
15+
time: 120
16+
plotting:
17+
cpu: 2
18+
time: 20

.test/reads/README.md

Whitespace-only changes.

.test/reads/exp1/dam.fastq.gz

108 KB
Binary file not shown.

.test/reads/exp1/polII.fastq.gz

105 KB
Binary file not shown.

.test/reads/exp2/dam.fastq.gz

108 KB
Binary file not shown.

.test/reads/exp2/polII.fastq.gz

105 KB
Binary file not shown.

config/config.yaml

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
genome: "hg38" # human or mouse
2+
ensembl_genome_build: "110"
3+
paired_end: True # paired-end or single-end
4+
resources: # computing resources
5+
trim:
6+
cpu: 8
7+
time: 60
8+
fastqc:
9+
cpu: 4
10+
time: 60
11+
damid:
12+
cpu: 8
13+
time: 120
14+
index:
15+
cpu: 36
16+
time: 60
17+
plotting:
18+
cpu: 2
19+
time: 20

workflow/envs/damid.yaml

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
name: damid
2+
channels:
3+
- conda-forge
4+
- bioconda
5+
- defaults
6+
dependencies:
7+
- damidseq_pipeline=1.5.3
8+
- trim-galore=0.6.10
9+
- perl-inline-c=0.81

workflow/envs/deeptools.yaml

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
name: deeptools
2+
channels:
3+
- conda-forge
4+
- bioconda
5+
- defaults
6+
dependencies:
7+
- deeptools=3.5.4
8+
- python=3.10
9+
- ucsc-bedgraphtobigwig=445
+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
rule bedgraph2bigwig:
2+
input:
3+
cs=f"resources/{resources.genome}_chrom.sizes",
4+
bg="results/bedgraph/{dir}/{sample}-vs-Dam.gatc.bedgraph" # exclude dam sample from sample wildcard here!!!!
5+
output:
6+
bw="results/bigwig/{dir}/{sample}.bw"
7+
params:
8+
extra=""
9+
threads: config["resources"]["fastqc"]["cpu"]
10+
resources:
11+
runtime=config["resources"]["fastqc"]["time"],
12+
conda:
13+
"../envs/deeptools.yaml"
14+
shell:
15+
"bedGraphToBigWig "
16+
"{params.extra} "
17+
"{input.bg} "
18+
"{input.cs} "
19+
"{output} > {log} 2>&1"
20+
21+
22+
rule average_bigwigs:
23+
input:
24+
expand("results/bigwig/{dir}/{sample}.bw", dir=DIRS, sample=SAMPLES),
25+
output:
26+
bw="results/bigwig/average_bw/{sample}.bw",
27+
params:
28+
extra="",
29+
threads: config["resources"]["deeptools"]["cpu"]
30+
resources:
31+
runtime=config["resources"]["deeptools"]["time"]
32+
log:
33+
"logs/deeptools/bw_average_{condition}.log"
34+
conda:
35+
"../envs/deeptools.yaml"
36+
script:
37+
"../scripts/average_bigwig.py"

workflow/rules/deeptools.smk

Whitespace-only changes.

workflow/rules/peak_calling.smk

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
rule peak_calling:
2+
input:
3+
fp="resources/find_peaks",
4+
bg=expand("results/bedgraph/{dir}/{sample, ^((?!Dam).)*$}-vs-Dam.gatc.bedgraph", dir=DIRS, sample=SAMPLES), # exclude Dam sample from sample wildcard
5+
output:
6+
""
7+
8+
rules peaks2genes:
9+
input:
10+
fp="resources/find_peaks",
11+
gtf=resources.gtf,
12+
peaks="",
13+
output:
14+
"",

workflow/rules/setup.smk

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
rule make_gatc_tracks:
2+
input:
3+
fa=resources.fasta,
4+
output:
5+
gatc=f"resources/{resources.genome}.GATC.gff",
6+
params:
7+
genome=f"resources/{resources.genome}",
8+
threads: config["resources"]["fastqc"]["cpu"],
9+
resources:
10+
time=config["resources"]["fastqc"]["time"],
11+
conda:
12+
"../envs/damid.yaml",
13+
log:
14+
"logs/make_gatc_tracks/{params.genome}.log",
15+
shell:
16+
"gatc.track.maker.pl "
17+
"--name={params.genome} "
18+
"{input.fa} > {log} 2>&1 "
19+
20+
21+
rule bowtie2_build_index:
22+
input:
23+
ref=resources.fasta,
24+
output:
25+
multiext(
26+
f"resources/bowtie2_index/{resources.genome}/index",
27+
".1.bt2",
28+
".2.bt2",
29+
".3.bt2",
30+
".4.bt2",
31+
".rev.1.bt2",
32+
".rev.2.bt2",
33+
),
34+
log:
35+
"logs/bowtie2_build_index/build.log",
36+
params:
37+
extra="", # optional parameters
38+
threads: config["resources"]["index"]["cpu"]
39+
resources:
40+
runtime=config["resources"]["index"]["time"],
41+
wrapper:
42+
"v3.3.3/bio/bowtie2/build"
43+

workflow/scripts/average_bigwig.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from snakemake.shell import shell
2+
3+
# Load Snakemake variables
4+
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
5+
threads = snakemake.threads
6+
7+
all_bw = snakemake.input
8+
sample = snakemake.wildcards["sample"]
9+
out = snakemake.output["bw"]
10+
11+
# Get all samples in condition
12+
bw = [x for x in all_bw if sample in x] # use input lambda function instead? (just shell command in rule)
13+
14+
# Create average bigwig file
15+
shell(
16+
"bigwigAverage "
17+
"--bigwigs {bw} "
18+
"--outFileName {out} "
19+
"--numberOfProcessors {threads} "
20+
"{log}"
21+
)
22+

workflow/scripts/damidseq_pipeline.sh

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
# get the current working directory
6+
WORKDIR=$(pwd)
7+
8+
# go to directory with fastq files
9+
SAMPLEDIR=${snakemake_wildcards[dir]}
10+
cd "reads/${SAMPLEDIR}"
11+
12+
# Check if data is paired-end or single-end
13+
END=${snakemake_params[paired]}
14+
15+
if [ "$END" == "True" ]; then
16+
ARG="--paired"
17+
else
18+
ARG=""
19+
fi
20+
21+
# run DamID-seq pipeline
22+
damidseq_pipeline $ARG --gatc_frag_file=../${snakemake_input[gatc]} --bowtie2_genome_dir=../${snakemake_params[idxdir]} > ../${snakemake_log[0]} 2>&1
23+
24+
# go back to working directory
25+
cd ${WORKDIR}
26+
27+
# move output files to results/sample/bedgraph
28+
mkdir -p ${snakemake_output[dir]}
29+
mv *.bedgraph ${snakemake_output[dir]}
30+
#mkdir -p logs/damid_pipeline/
31+
#mv pipeline-*.log ${WORKDIR}/logs/damid_pipeline/
32+
33+
+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
def dirs():
2+
"""Each dir contains one replicate sets of fastq files
3+
"""
4+
DIRS = glob.glob("reads/*")
5+
DIRS = [os.path.basename(d) for d in DIRS]
6+
7+
return DIRS
8+
9+
10+
def samples():
11+
"""Checks sample names/files and returns sample wildcard values for Snakemake
12+
"""
13+
SAMPLES = csv["sample"]
14+
15+
# Check if sample names contain any characters that are not alphanumeric or underscore
16+
illegal = []
17+
for sample in SAMPLES:
18+
if not re.match("^[a-zA-Z0-9_]*$", sample):
19+
illegal.append(sample)
20+
if len(illegal) != 0:
21+
illegal = "\n".join(illegal)
22+
raise ValueError(f"ERROR: following samples contain illegal characters:\n{illegal}")
23+
24+
# Check if sample names match file names
25+
not_found = []
26+
for sample in SAMPLES:
27+
for dir in DIRS:
28+
if config["paired_end"]:
29+
r1= f"reads/{dir}/{sample}_R1_001.fastq.gz"
30+
r2= f"reads/{dir}/{sample}_R2_001.fastq.gz"
31+
if not os.path.isfile(r1):
32+
not_found.append(r1)
33+
if not os.path.isfile(r2):
34+
not_found.append(r2)
35+
else:
36+
r1= f"reads/{dir}/{sample}.fastq.gz"
37+
if not os.path.isfile(r1):
38+
not_found.append(r1)
39+
if len(not_found) != 0:
40+
not_found = "\n".join(not_found)
41+
raise ValueError(f"ERROR: following files not found:\n{not_found}")
42+
43+
return SAMPLES
44+
45+
46+
def targets():
47+
"""Returns file targets for rule all
48+
"""
49+
TARGETS = [
50+
expand("results/bedgraph/{dir}", dir=DIRS),
51+
]
52+
53+
return TARGETS
54+
55+
56+
def dam_control():
57+
"""Check if Dam only control is present
58+
"""
59+
pass
60+

workflow/scripts/get_resource.sh

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/usr/bin/env bash
2+
3+
LOG=${snakemake_log[0]}
4+
URL=${snakemake_params["url"]}
5+
OUTPUT=${snakemake_output[0]}
6+
7+
wget -q $URL -O $OUTPUT.gz 2> $LOG
8+
pigz -df $OUTPUT.gz 2>> $LOG

workflow/scripts/trim_galore.sh

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
6+
#TEMP_DIR=${snakemake_output[temp_dir]}
7+
#TEMP_DIR=temp/${snakemake_wildcards[dir]}/${snakemake_wildcards[sample]}
8+
DEST_DIR=results/trimmed/${snakemake_wildcards[dir]}
9+
BASENAME=${snakemake_wildcards[sample]}
10+
11+
#mkdir -p $TEMP_DIR
12+
mkdir -p $DEST_DIR
13+
14+
if [ ${snakemake_params[paired]} == "YES" ]
15+
then
16+
INPUT="${snakemake_input[r1]} ${snakemake_input[r2]}"
17+
else
18+
INPUT=${snakemake_input[r1]}
19+
fi
20+
21+
trim_galore ${snakemake_params[extra]} --cores ${snakemake[threads]} --output_dir $DEST_DIR --basename $BASENAME $INPUT > ${snakemake_log[0]} 2>&1
22+
23+
#mv $TEMP_DIR/*_trimmed.fq.gz $DEST_DIR
24+
#mv $TEMP_DIR/*trimming_report.txt $DEST_DIR
25+
26+
#rm -r $TEMP_DIR
27+

0 commit comments

Comments
 (0)