-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_patformm.smk
132 lines (121 loc) · 4.18 KB
/
run_patformm.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
# Define the path to the patformm executable and the base path
patformm_path = "/g/data/pq08/projects/biomodal/patformm"
patformm = os.path.join(patformm_path, "patformm")
wgbstools = "/g/data/pq08/software/mambaforge/envs/methyl_env/bin/wgbstools"
configfile: "config_271124.yaml"
# configfile: "config.yaml"
# configfile: "test_config.yaml"
# At top of file
MAMBA_PATH = "/g/data/pq08/software/mambaforge"
METHYL_ENV = f"{MAMBA_PATH}/envs/methyl_env"
# Define the rule to run all processes
rule all:
input:
expand("output/{sample}.done", sample=config["bam_files"].keys()),
expand("output_homog/{sample}.uxm.bed.gz", sample=config["bam_files"].keys()),
expand("output/{sample}.beta", sample=config["bam_files"].keys())
# Rule to parse_mm_tags
rule parse_mm_tags:
input:
lambda wildcards: "{input_path}/{bam}".format(input_path=config["bam_files"][wildcards.sample]["input_path"], bam=config["bam_files"][wildcards.sample]["bam"])
# bam=lambda wildcards: os.path.join(wildcards.input_path, wildcards.bam)
output:
bed=temp("/scratch/pq08/rd6078/patformm_tmp/{sample}.bed")
# bed=lambda wildcards: os.path.join(patformm_path, f"{wildcards.bam}.bed")
params:
patformm=patformm
threads: 8
resources:
mem_mb=64000,
walltime=36000,
ncpus=8,
jobfs="5G"
shell:
"""
echo "running patformm parse_mm_tags on {input}" &&
{params.patformm} parse_mm_tags --threads {threads} -o {output.bed} {input}
"""
# Rule to calculate_cpos
rule calculate_cpos:
input:
bed=rules.parse_mm_tags.output.bed
# bed="{bam}.bed"
output:
pat_gz="output/{sample}.pat.gz",
done="output/{sample}.done"
params:
patformm=patformm,
chunk_size=10000000,
pat="output/{sample}.pat"
threads: 8
log:
stderr="/g/data/pq08/projects/biomodal/patformm/snakemake_logs/{sample}.stderr",
stdout="/g/data/pq08/projects/biomodal/patformm/snakemake_logs/{sample}.stdout"
# cluster:
# queue="hugemem"
resources:
mem_mb=100000,
walltime=144000,
jobfs="5G"
shell:
"""
echo "running patformm calculate_cpos on {input.bed}" &&
{params.patformm} calculate_cpos --threads {threads} --chunk-size {params.chunk_size} -o {params.pat} {input.bed} &&
touch {output.done} &&
echo "{wildcards.sample} done."
"""
# Rule to perform homogeneity analysis
rule homogeneity_analysis:
input:
pat_gz=rules.calculate_cpos.output.pat_gz
output:
uxm="output_homog/{sample}.uxm.bed.gz"
params:
output_dir="output_homog",
wgbstools=wgbstools,
bed_file="data/wgbs_probes_methylonco_segments.chunk.bed.gz"
threads: 8
resources:
mem_mb=64000,
walltime=36000,
jobfs="5G"
log:
stderr="/g/data/pq08/projects/biomodal/patformm/snakemake_logs/{sample}_homogeneity.stderr",
stdout="/g/data/pq08/projects/biomodal/patformm/snakemake_logs/{sample}_homogeneity.stdout"
shell:
"""
source {MAMBA_PATH}/etc/profile.d/conda.sh
conda activate {METHYL_ENV}
echo "Running homogeneity analysis for {wildcards.sample}" &&
{params.wgbstools} homog \
{input.pat_gz} \
-b {params.bed_file} \
-o {params.output_dir} \
--thresholds 0.25,0.75
"""
# Rule to convert pat to beta values
rule pat2beta:
input:
pat_gz=rules.calculate_cpos.output.pat_gz
output:
beta="output/{sample}.beta"
params:
wgbstools=wgbstools
threads: 8
resources:
mem_mb=64000,
walltime=36000,
jobfs="5G"
log:
stderr="/g/data/pq08/projects/biomodal/patformm/snakemake_logs/{sample}_pat2beta.stderr",
stdout="/g/data/pq08/projects/biomodal/patformm/snakemake_logs/{sample}_pat2beta.stdout"
shell:
"""
source {MAMBA_PATH}/etc/profile.d/conda.sh
conda activate {METHYL_ENV}
echo "Converting pat to beta values for {wildcards.sample}" &&
{params.wgbstools} pat2beta \
-o output/ \
{input.pat_gz}
"""