Skip to content

Commit 360615d

Browse files
authored
Merge pull request #383 from lcdb/v1.10.3rc
V1.10.3rc
2 parents d4f87aa + 9e0b485 commit 360615d

17 files changed

+382
-96
lines changed

.circleci/config.yml

+23-9
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,16 @@ variables:
5454
locales \
5555
locales-all \
5656
rsync \
57+
tree \
5758
wget \
5859
x11-utils
5960
6061
# support en_US.utf8
6162
rm -rf /var/lib/apt/lists/*
6263
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
6364
65+
# Set env vars to be used throughout; this is specific to how
66+
# circleci handles env vars.
6467
echo 'export DEPLOY=/tmp/lcdb-wf-test' >> $BASH_ENV
6568
echo 'export LCDBWF_ENV=lcdb-wf-test' >> $BASH_ENV
6669
echo 'export LCDBWF_ENV_R=lcdb-wf-test-r' >> $BASH_ENV
@@ -129,24 +132,35 @@ variables:
129132
conda info --envs
130133
conda config --show
131134
135+
# Copy the deploy script to a different location to simulate the
136+
# suggested deployment method of downloading just the script.
137+
cp deploy.py /tmp/deploy.py
138+
cd /tmp/
139+
132140
# Deploy to the new directory, so we are testing the real-world case of post-deployment.
133141
# Note that $DEPLOY is set in the "set-paths" step configured above.
134142
python deploy.py --flavor full --dest $DEPLOY --branch $CIRCLE_BRANCH --clone
135143
144+
set -x
145+
tree $DEPLOY
146+
tree $ORIG
147+
set +x
148+
136149
# Separately copy over some test-specific files
137-
cp workflows/chipseq/run_test.sh $DEPLOY/workflows/chipseq
138-
cp workflows/rnaseq/run_test.sh $DEPLOY/workflows/rnaseq
139-
cp workflows/rnaseq/run_downstream_test.sh $DEPLOY/workflows/rnaseq
140-
cp workflows/colocalization/run_test.sh $DEPLOY/workflows/references
141-
cp workflows/colocalization/run_test.sh $DEPLOY/workflows/colocalization
150+
cp $ORIG/workflows/chipseq/run_test.sh $DEPLOY/workflows/chipseq/run_test.sh
151+
cp $ORIG/workflows/rnaseq/run_test.sh $DEPLOY/workflows/rnaseq/run_test.sh
152+
cp $ORIG/workflows/rnaseq/run_downstream_test.sh $DEPLOY/workflows/rnaseq/run_downstream_test.sh
153+
cp $ORIG/workflows/references/run_test.sh $DEPLOY/workflows/references/run_test.sh
154+
cp $ORIG/workflows/colocalization/run_test.sh $DEPLOY/workflows/colocalization/run_test.sh
155+
142156
mkdir $DEPLOY/ci
143157
mkdir $DEPLOY/test
144-
cp test/lcdb-wf-test $DEPLOY/test
145-
cp test/workflow_test_params.yaml $DEPLOY/test
146-
cp ci/get-data.py $DEPLOY/ci
158+
cp $ORIG/test/lcdb-wf-test $DEPLOY/test/lcdb-wf-test
159+
cp $ORIG/test/workflow_test_params.yaml $DEPLOY/test/workflow_test_params.yaml
160+
cp $ORIG/ci/get-data.py $DEPLOY/ci/get-data.py
147161
148162
# the ./run_test.sh scripts run this
149-
cp ci/preprocessor.py $DEPLOY/ci
163+
cp $ORIG/ci/preprocessor.py $DEPLOY/ci/preprocessor.py
150164
151165
# download example data
152166
cd $DEPLOY

deploy.py

+42-37
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,34 @@
1212
import logging
1313
import hashlib
1414
from pathlib import Path
15-
from distutils import filelist, log
15+
from distutils import filelist
16+
17+
# Determine default staging area, used in help
18+
default_staging = "/tmp/{0}-lcdb-wf-staging".format(os.getenv('USER'))
19+
20+
usage = f"""
21+
This script assists in the deployment of relevant code from the lcdb-wf
22+
repository to a new deployment directory for running an analysis. It is
23+
intended to be run in a standalone fashion such that with just the script you
24+
can download and deploy a specified version of the workflows.
25+
26+
For example, the following command will clone the GitHub repo to {default_staging},
27+
check out the v9.999 branch, copy the files needed for RNA-seq over to the
28+
"my_analysis_dir" directory, store a read-only file .lcdb-wf-deployment.yaml
29+
with the metadata of the repo used for cloning, and build the conda
30+
environments within "my_analysis_dir":
31+
32+
./deploy.py \\
33+
--clone \\
34+
--dest my_analysis_dir \\
35+
--flavor rnaseq \\
36+
--build-envs \\
37+
--branch v9.999
38+
39+
Compared to directly cloning the repo, this results in a cleaner deployment
40+
directory that does not have various test infrastructure or workflows not
41+
relevant to the project.
42+
"""
1643

1744
logging.basicConfig(
1845
format="%(asctime)s [%(module)s] %(message)s",
@@ -31,10 +58,6 @@
3158
RESET = "\x1b[0m"
3259

3360

34-
# Determine default staging area
35-
default_staging = "/tmp/{0}-lcdb-wf-staging".format(os.getenv('USER'))
36-
37-
3861
def debug(s):
3962
logging.debug(GRAY + s + RESET)
4063

@@ -51,28 +74,11 @@ def error(s):
5174
logging.error(RED + s + RESET)
5275

5376

77+
def write_include_file(source, flavor='all'):
5478

55-
usage = f"""
56-
This script assists in the deployment of relevant code from the lcdb-wf
57-
repository to a new deployment directory for running an analysis.
58-
59-
For example, the following command will clone the GitHub repo to {default_staging},
60-
check out the v9.999 branch, copy the files needed for RNA-seq over to the
61-
"my_analysis_dir" directory, store a read-only file .lcdb-wf-deployment.yaml
62-
with the metadata of the repo used for cloning, and build the conda
63-
environments within "my_analysis_dir":
64-
65-
./deploy.py \\
66-
--clone \\
67-
--dest my_analysis_dir \\
68-
--flavor rnaseq \\
69-
--build-envs \\
70-
--branch v9.999
71-
72-
"""
73-
74-
75-
def write_include_file(flavor=None):
79+
# Patterns follow that of MANIFEST.in
80+
# (https://packaging.python.org/en/latest/guides/using-manifest-in/),
81+
# and distutils.filelist is used below to parse them.
7682

7783
PATTERN_DICT = {
7884
'rnaseq': [
@@ -107,17 +113,14 @@ def write_include_file(flavor=None):
107113
}
108114

109115
patterns = []
110-
if flavor is None or 'rnaseq':
116+
if flavor in ('full', 'rnaseq'):
111117
patterns.extend(PATTERN_DICT['rnaseq'])
112-
if flavor is None or 'chipseq':
118+
if flavor in ('full', 'chipseq'):
113119
patterns.extend(PATTERN_DICT['chipseq'])
114-
if flavor is None or 'full':
120+
if flavor == 'full':
115121
patterns.extend(PATTERN_DICT['full'])
116122
patterns.extend(PATTERN_DICT['all'])
117123

118-
HERE = Path(__file__).resolve().parent
119-
os.chdir(HERE)
120-
121124
def fastwalk(path):
122125
"""
123126
Find all files recursively, but short-circuit if we get to a conda env to
@@ -134,7 +137,7 @@ def fastwalk(path):
134137
yield os.path.join(root, f).replace(path + '/', '')
135138

136139
f = filelist.FileList()
137-
f.allfiles = list(fastwalk(str(HERE)))
140+
f.allfiles = list(fastwalk(source))
138141
for pattern in patterns:
139142
f.process_template_line(pattern)
140143
f.sort()
@@ -144,7 +147,7 @@ def fastwalk(path):
144147
sp.check_output(
145148
["git", "ls-tree", "-r", "HEAD", "--name-only"],
146149
universal_newlines=True,
147-
cwd=str(HERE),
150+
cwd=source,
148151
).splitlines(False),
149152
)
150153

@@ -153,6 +156,7 @@ def fastwalk(path):
153156
with open(include, 'w') as fout:
154157
fout.write('\n\n')
155158
fout.write('\n'.join(to_transfer))
159+
156160
return include
157161

158162

@@ -341,7 +345,6 @@ def build_envs(dest, conda_frontend="mamba"):
341345

342346
ap.add_argument(
343347
"--staging",
344-
default=default_staging,
345348
help="""Only used when --clone is specified. Clone the main git repo to
346349
this directory and do a diff on the deploy.py script found there to
347350
ensure this one is up-to-date, and if so then proceed using the new clone as the source.
@@ -384,12 +387,14 @@ def build_envs(dest, conda_frontend="mamba"):
384387
print("ERROR: --staging was specified but --clone was not. Did you want to use --clone?", file=sys.stderr)
385388
sys.exit(1)
386389
if args.clone:
387-
source = args.staging
390+
if args.staging is None:
391+
args.staging = default_staging
392+
source = os.path.abspath(args.staging)
388393
clone_repo(args.staging, args.branch, mismatch_ok=args.mismatch_ok)
389394
else:
390395
source = Path(__file__).parent.resolve()
391396

392-
include = write_include_file(source)
397+
include = write_include_file(source, flavor)
393398
rsync(include, source, dest, args.rsync_args)
394399
deployment_json(source, dest)
395400

docs/changelog.rst

+7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
Changelog
22
=========
33

4+
v1.10.3
5+
-------
6+
7+
- improve the deploy script (thanks @aliciaaevans)
8+
- support the epic2 peak-caller for the ChIP-seq workflow (thanks @Mira0507)
9+
- for later versions of featureCounts, add ``--countReadPairs`` argument to RNA-seq workflow (@therealgenna)
10+
411
v1.10.2
512
-------
613

docs/config-yaml.rst

+17
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ The major differences between ChIP-seq and RNA-seq configs are:
114114
115115
sampletable: 'config/sampletable.tsv'
116116
organism: 'dmel'
117+
genome: 'dm6'
117118
118119
aligner:
119120
index: 'bowtie2'
@@ -154,6 +155,22 @@ The major differences between ChIP-seq and RNA-seq configs are:
154155
- input-wingdisc-1
155156
- input-wingdisc-2
156157
158+
- label: gaf-wingdisc-pooled-1
159+
algorithm: epic2
160+
ip:
161+
- gaf-wingdisc-1
162+
control:
163+
- input-wingdisc-1
164+
extra: ''
165+
166+
- label: gaf-wingdisc-pooled-2
167+
algorithm: epic2
168+
ip:
169+
- gaf-wingdisc-2
170+
control:
171+
- input-wingdisc-2
172+
extra: ''
173+
157174
fastq_screen:
158175
- label: Human
159176
organism: human

docs/tests.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ Testing the installation
55
This section describes how to set up and run the example data.
66
It is useful for verifying everything is working correctly. This
77
reproduces the steps that are performed during the automated tests
8-
on `Circle CI<https://circleci.com>`_. You can see the latest test
9-
results `here<https://circleci.com/gh/lcdb/lcdb-wf/tree/master>`_.
8+
on `Circle CI <https://circleci.com>`_. You can see the latest test
9+
results `here <https://circleci.com/gh/lcdb/lcdb-wf/tree/master>`_.
1010

1111
The example run takes up about 360 MB of space and runs in about 15 mins on
1212
2 cores.

env.yml

+9-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
name: null
21
channels:
32
- conda-forge
43
- bioconda
@@ -59,8 +58,9 @@ dependencies:
5958
- deeptools=3.5.2
6059
- deeptoolsintervals=0.1.9
6160
- dnaio=0.10.0
62-
- docutils=0.20
61+
- docutils=0.20.1
6362
- dpath=2.1.5
63+
- epic2=0.0.52
6464
- exceptiongroup=1.1.1
6565
- execnet=1.9.0
6666
- executing=1.2.0
@@ -93,6 +93,7 @@ dependencies:
9393
- gitpython=3.1.31
9494
- glib=2.74.1
9595
- glib-tools=2.74.1
96+
- gmp=6.2.1
9697
- graphite2=1.3.13
9798
- gsl=2.7
9899
- gst-plugins-base=1.18.5
@@ -195,6 +196,7 @@ dependencies:
195196
- mysql-common=8.0.32
196197
- mysql-connector-c=6.1.11
197198
- mysql-libs=8.0.32
199+
- natsort=8.4.0
198200
- nbformat=5.8.0
199201
- ncbi-vdb=3.0.2
200202
- ncurses=6.3
@@ -243,7 +245,11 @@ dependencies:
243245
- perl-storable=3.15
244246
- perl-sub-info=0.002
245247
- perl-term-table=0.016
248+
- perl-test-fatal=0.016
249+
- perl-test-warnings=0.031
246250
- perl-test2-suite=0.000145
251+
- perl-try-tiny=0.31
252+
- perl-uri=5.17
247253
- perl-xml-libxml=2.0207
248254
- perl-xml-namespacesupport=1.12
249255
- perl-xml-sax=1.02
@@ -334,7 +340,7 @@ dependencies:
334340
- toml=0.10.2
335341
- tomli=2.0.1
336342
- toposort=1.10
337-
- tornado=6.3
343+
- tornado=6.3.2
338344
- trackhub=0.2.4
339345
- traitlets=5.9.0
340346
- typing-extensions=4.5.0
@@ -376,4 +382,3 @@ dependencies:
376382
- zlib=1.2.13
377383
- zstandard=0.19.0
378384
- zstd=1.5.2
379-
prefix: /gpfs/gsfs10/users/NICHD-core0/test/dalerr/lcdb-wf/env

include/autosql/epic2InputPeak.as

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
table epic2InputPeak
2+
"BED6+4 Peaks of signal enrichment based on pooled, normalized (interpreted) data."
3+
(
4+
string chrom; "Reference sequence chromosome or scaffold"
5+
uint chromStart; "Start position in chromosome"
6+
uint chromEnd; "End position in chromosome"
7+
string name; "PValue"
8+
uint score; "Indicates how dark the peak will be displayed in the browser (0-1000) "
9+
char[1] strand; "+ or - or . for unknown"
10+
int ChIPCount; "The number of ChIP counts in the region (also including counts from windows with a count below the cutoff)"
11+
int InputCount; "The number of Input counts in the region"
12+
float FDR; "Benjamini-Hochberg correction of the p-values"
13+
float log2FoldChange; "Log2 of the region ChIP count vs. the library-size corrected region Input count"
14+
)

include/autosql/epic2NoInputPeak.as

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
table epic2NoInputPeak
2+
"BED6 Peaks of signal enrichment based on pooled, normalized (interpreted) data."
3+
(
4+
string chrom; "Reference sequence chromosome or scaffold"
5+
uint chromStart; "Start position in chromosome"
6+
uint chromEnd; "End position in chromosome"
7+
string name; "The number of ChIP counts in the region (also including counts from windows with a count below the cutoff)"
8+
uint score; "Indicates how dark the peak will be displayed in the browser (0-1000) "
9+
char[1] strand; "+ or - or . for unknown"
10+
)

lib/chipseq.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -182,11 +182,18 @@ def detect_peak_format(fn):
182182
Returns None if undetermined.
183183
184184
This is useful for figuring out which autoSql file we should use or which
185-
bigBed 6+4 or bigBed 6+3 format to use.
185+
bigBed 6, 6+4, or 6+3 format to use.
186186
"""
187187
line = open(fn).readline().strip()
188188
toks = line.split('\t')
189189
if len(toks) == 10:
190-
return 'narrowPeak'
191-
if len(toks) == 9:
190+
if 'epic2' in fn:
191+
return 'epic2Input'
192+
else:
193+
return 'narrowPeak'
194+
elif len(toks) == 9:
192195
return 'broadPeak'
196+
elif len(toks) == 6:
197+
return 'epic2NoInput'
198+
else:
199+
raise ValueError("Invalid peak format in the number of fields.")

lib/patterns_targets.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
HERE = os.path.abspath(os.path.dirname(__file__))
1414

1515
# Note: when adding support for new peak callers, add them here.
16-
PEAK_CALLERS = ['macs2', 'spp', 'sicer']
16+
PEAK_CALLERS = ['macs2', 'spp', 'sicer', 'epic2']
1717

1818

1919
def update_recursive(d, u):

0 commit comments

Comments
 (0)