-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathspp_nodups.py
90 lines (82 loc) · 4.11 KB
/
spp_nodups.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/bin/env python
import sjm
import os
import chr_maps
import idr
import conf
from spp import *
BIN_DIR = conf.BIN_DIR
SPP_NODUPS_BINARY = conf.SPP_BINARY_NO_DUPS
QUEUE = conf.QUEUE
PROJECT = conf.SGE_PROJECT
NAME = 'spp_nodups'
USE_CONTROL_LOCK = True
def run_peakcaller(name, control, sample, options=None):
if not options:
options = {}
jobs = []
control_file = os.path.join(control.results_dir, '%s_merged.tagAlign' % control.run_name)
for r in sample.replicates + [sample.combined_replicate,]:
# Regular Run
r.merged_ta = os.path.join(r.temp_dir(sample), '%s.tagAlign' % r.rep_name(sample))
convert_cmd = os.path.join(BIN_DIR, 'eland2tagAlign.py')
convert_cmd += ' %s' % r.merged_file_location
convert_cmd += ' %s' % r.merged_ta
convert_cmd += ' %s' % sample.genome
spp_cmd = SPP_NODUPS_BINARY
spp_cmd += ' -rf' # overwrite existing plot files
spp_cmd += ' -c=%s' % r.merged_ta # sample
spp_cmd += ' -i=%s' % control_file # control
spp_cmd += ' -npeak=300000' # number of peaks to call
spp_cmd += ' -odir=%s' % r.results_dir(sample) # output directory
spp_cmd += ' -savr'
spp_cmd += ' -savp'
spp_cmd += ' -x=-500:50' # exclude frag lengths within this range
spp_cmd += ' -out=%s' % os.path.join(r.results_dir(sample), 'phantomPeakStatsReps.tab')
if 'filtchr' in options:
for filtchr in options['filtchr']:
spp_cmd += ' -filtchr=%s' % filtchr # ignore reads from filtchr chr
jobs.append(sjm.Job('SPP_' + sample.run_name + '_' + r.rep_name(sample), [convert_cmd, spp_cmd,], queue=QUEUE, project=PROJECT, memory='8G',sched_options="-m e"))
r.spp_results = os.path.join(r.results_dir(sample), r.rep_name(sample) + '_VS_' + control.run_name + '_merged.regionPeak.gz')
# Pseudoreplicate Runs
r.merged_ta_pr1 = os.path.join(r.temp_dir(sample), '%s_PR1.tagAlign' % r.rep_name(sample))
convert_cmd = os.path.join(BIN_DIR, 'eland2tagAlign.py')
convert_cmd += ' %s' % r.pr1_merged
convert_cmd += ' %s' % r.merged_ta_pr1
convert_cmd += ' %s' % sample.genome
spp_cmd = SPP_NODUPS_BINARY
spp_cmd += ' -rf' # overwrite existing plot files
spp_cmd += ' -c=%s' % r.merged_ta_pr1 # sample
spp_cmd += ' -i=%s' % control_file # control
spp_cmd += ' -npeak=300000' # number of peaks to call
spp_cmd += ' -odir=%s' % r.pr1_results_dir # output directory
spp_cmd += ' -savr'
spp_cmd += ' -savp'
spp_cmd += ' -x=-500:50' # exclude frag lengths within this range
spp_cmd += ' -out=%s' % os.path.join(r.results_dir(sample), 'phantomPeakStatsReps.tab')
if 'filtchr' in options:
for filtchr in options['filtchr']:
spp_cmd += ' -filtchr=%s' % filtchr # ignore reads from filtchr chr
jobs.append(sjm.Job('SPP_' + sample.run_name + '_' + r.rep_name(sample) + '_PR1', [convert_cmd, spp_cmd,], queue=QUEUE, project=PROJECT, memory='8G',sched_options="-m e"))
r.spp_results_pr1 = os.path.join(r.pr1_results_dir, r.rep_name(sample) + '_PR1_VS_' + control.run_name + '_merged.regionPeak.gz')
r.merged_ta_pr2 = os.path.join(r.temp_dir(sample), '%s_PR2.tagAlign' % r.rep_name(sample))
convert_cmd = os.path.join(BIN_DIR, 'eland2tagAlign.py')
convert_cmd += ' %s' % r.pr2_merged
convert_cmd += ' %s' % r.merged_ta_pr2
convert_cmd += ' %s' % sample.genome
spp_cmd = SPP_NODUPS_BINARY
spp_cmd += ' -rf' # overwrite existing plot files
spp_cmd += ' -c=%s' % r.merged_ta_pr2 # sample
spp_cmd += ' -i=%s' % control_file # control
spp_cmd += ' -npeak=300000' # number of peaks to call
spp_cmd += ' -odir=%s' % r.pr2_results_dir # output directory
spp_cmd += ' -savr'
spp_cmd += ' -savp'
spp_cmd += ' -x=-500:50' # exclude frag lengths within this range
spp_cmd += ' -out=%s' % os.path.join(r.results_dir(sample), 'phantomPeakStatsReps.tab')
if 'filtchr' in options:
for filtchr in options['filtchr']:
spp_cmd += ' -filtchr=%s' % filtchr # ignore reads from filtchr chr
jobs.append(sjm.Job('SPP_' + sample.run_name + '_' + r.rep_name(sample) + '_PR2', [convert_cmd, spp_cmd,], queue=QUEUE, project=PROJECT, memory='8G',sched_options="-m e"))
r.spp_results_pr2 = os.path.join(r.pr2_results_dir, r.rep_name(sample) + '_PR2_VS_' + control.run_name + '_merged.regionPeak.gz')
sample.add_jobs(name, jobs)