forked from algaebrown/oligoCLIP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snake_peakanno_chi.py
124 lines (102 loc) · 3.93 KB
/
Snake_peakanno_chi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import pandas as pd
#snakemake -j 8 -s Snake_peakanno_chi.py --configfile config/annotate_chi/multiplex.yaml --cluster "qsub -l walltime={params.run_time}:00:00 -l nodes=1:ppn={params.cores} -q home-yeo" -n --use-conda
try:
WORKDIR=config['WORKDIR']
workdir: WORKDIR
print('WORKDIR', os.getcwd())
sample_labels,= glob_wildcards("input/{sample_label}.peak.chisq.normed.bed")
GTF = config['GTF']
SCRIPT_PATH=config['SCRIPT_PATH']
ANNOTATOR_SPECIES = config['ANNOTATOR_SPECIES']
GENOMEFA = config['GENOMEFA']
CHI_LOG10PVAL_THRES = config['CHI_LOG10PVAL_THRES']
config['CLIPper_pvalthes'] = None # to use module
all_rbfox = [s for s in sample_labels if 'RBFOX' in s]
except Exception as e:
print('config:', e)
module motif:
snakefile:
"rules/snake_scoreRBNS_SELEX.py"
config: config
module peak_anno:
snakefile:
"rules/Snake_peakanno.py"
config: config
rule all:
input:
expand("output/{sample_label}.peak.chisq.normed.annotate.bed", sample_label = sample_labels),
# expand("output/{sample_label}peak.chisq.normed. gc", sample_label = sample_labels)+
# expand("output/{sample_label}peak.chisq.normed. filtered.bed", sample_label = sample_labels)+
expand("output/motif/{sample_label}.peak.chisq.normed.filtered.annotate.svg", sample_label = sample_labels),
expand("output/{sample_label}.peak.chisq.normed.annotate.motifscore.csv", sample_label = all_rbfox),
expand("summary/chisq/{sample_label}.peaks.summary", sample_label = sample_labels)
output:
"snakepeak_annochi.txt"
params:
error_out_file = "error_files/all",
run_time = 1,
cores = "1",
memory = "20",
job_name = "all"
shell:
"""
echo $(date) > {output};
echo created by HLH and the Yeo lab >> {output}
"""
use rule annotate from peak_anno as annotate with:
input:
peak="input/{sample_label}.peak.chisq.normed.bed"
output:
"output/{sample_label}.peak.chisq.normed.annotate.bed"
use rule calc_partition_nuc from peak_anno as calc_partition_nuc with:
input:
partition = "input/{sample_label}.peak.chisq.normed.bed",
genome = GENOMEFA
output:
nuc = "output/{sample_label}.peak.chisq.normed.nuc",
gc = "output/{sample_label}.peak.chisq.normed.gc",
rule filter_peak_fc_pval:
input:
"input/{sample_label}.peak.chisq.normed.bed"
output:
"output/{sample_label}.peak.chisq.normed.filtered.bed"
params:
run_time=1,
error_out_file = "error_files/annotate",
cores = "1",
thres = CHI_LOG10PVAL_THRES, # log10 pval
shell:
"""
awk '{{ if ($5 > {params.thres}) print }}' {input} > {output}
"""
use rule motif_analysis from peak_anno as motif_analysis with:
input:
peak="output/{sample_label}.peak.chisq.normed.filtered.bed"
output:
pickle="output/motif/{sample_label}.peak.chisq.normed.filtered.annotate.pickle",
svg="output/motif/{sample_label}.peak.chisq.normed.filtered.annotate.svg",
params:
run_time=12,
error_out_file = "error_files/annotate",
cores = "1",
fa = config['GENOMEFA'],
sps = config['ANNOTATOR_SPECIES'],
homer="output/CLIPper/{sample_label}.peaks.chisq.normed.filtered.annotate.homer",
rule peak_summary_chi:
input:
peak="output/{sample_label}.peak.chisq.normed.annotate.bed"
output:
summary_csv = "summary/chisq/{sample_label}.peaks.summary"
params:
run_time=1,
error_out_file = "error_files/annotate",
cores = "1",
conda:
"rules/envs/metadensity.yaml"
shell:
"""
python {SCRIPT_PATH}/summarize_peak.py --bed {input.peak} \
--is_chi \
--outfile {output.summary_csv}
"""
use rule * from motif as motif_*