lcdb
diff --git a/‎.circleci/config.yml
+23-9 b/‎.circleci/config.yml
+23-9
diff --git a/‎deploy.py
+42-37 b/‎deploy.py
+42-37
diff --git a/‎docs/changelog.rst
+7 b/‎docs/changelog.rst
+7
diff --git a/‎docs/config-yaml.rst
+17 b/‎docs/config-yaml.rst
+17
diff --git a/‎docs/tests.rst
+2-2 b/‎docs/tests.rst
+2-2
diff --git a/‎env.yml
+9-4 b/‎env.yml
+9-4
diff --git a/‎include/autosql/epic2InputPeak.as
+14 b/‎include/autosql/epic2InputPeak.as
+14
diff --git a/‎include/autosql/epic2NoInputPeak.as
+10 b/‎include/autosql/epic2NoInputPeak.as
+10
diff --git a/‎lib/chipseq.py
+10-3 b/‎lib/chipseq.py
+10-3
diff --git a/‎lib/patterns_targets.py
+1-1 b/‎lib/patterns_targets.py
+1-1
@@ -54,13 +54,16 @@ variables:
             locales \
             locales-all \
             rsync \
+            tree \
             wget \
             x11-utils
 
           # support en_US.utf8
           rm -rf /var/lib/apt/lists/*
           localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
 
+          # Set env vars to be used throughout; this is specific to how
+          # circleci handles env vars.
           echo 'export DEPLOY=/tmp/lcdb-wf-test' >> $BASH_ENV
           echo 'export LCDBWF_ENV=lcdb-wf-test' >> $BASH_ENV
           echo 'export LCDBWF_ENV_R=lcdb-wf-test-r' >> $BASH_ENV
@@ -129,24 +132,35 @@ variables:
         conda info --envs
         conda config --show
 
+        # Copy the deploy script to a different location to simulate the
+        # suggested deployment method of downloading just the script.
+        cp deploy.py /tmp/deploy.py
+        cd /tmp/
+
         # Deploy to the new directory, so we are testing the real-world case of post-deployment.
         # Note that $DEPLOY is set in the "set-paths" step configured above.
         python deploy.py --flavor full --dest $DEPLOY --branch $CIRCLE_BRANCH --clone
 
+        set -x
+        tree $DEPLOY
+        tree $ORIG
+        set +x
+
         # Separately copy over some test-specific files
-        cp workflows/chipseq/run_test.sh $DEPLOY/workflows/chipseq
-        cp workflows/rnaseq/run_test.sh $DEPLOY/workflows/rnaseq
-        cp workflows/rnaseq/run_downstream_test.sh $DEPLOY/workflows/rnaseq
-        cp workflows/colocalization/run_test.sh $DEPLOY/workflows/references
-        cp workflows/colocalization/run_test.sh $DEPLOY/workflows/colocalization
+        cp $ORIG/workflows/chipseq/run_test.sh $DEPLOY/workflows/chipseq/run_test.sh
+        cp $ORIG/workflows/rnaseq/run_test.sh $DEPLOY/workflows/rnaseq/run_test.sh
+        cp $ORIG/workflows/rnaseq/run_downstream_test.sh $DEPLOY/workflows/rnaseq/run_downstream_test.sh
+        cp $ORIG/workflows/references/run_test.sh $DEPLOY/workflows/references/run_test.sh
+        cp $ORIG/workflows/colocalization/run_test.sh $DEPLOY/workflows/colocalization/run_test.sh
+
         mkdir $DEPLOY/ci
         mkdir $DEPLOY/test
-        cp test/lcdb-wf-test $DEPLOY/test
-        cp test/workflow_test_params.yaml $DEPLOY/test
-        cp ci/get-data.py $DEPLOY/ci
+        cp $ORIG/test/lcdb-wf-test $DEPLOY/test/lcdb-wf-test
+        cp $ORIG/test/workflow_test_params.yaml $DEPLOY/test/workflow_test_params.yaml
+        cp $ORIG/ci/get-data.py $DEPLOY/ci/get-data.py
 
         # the ./run_test.sh scripts run this
-        cp ci/preprocessor.py $DEPLOY/ci
+        cp $ORIG/ci/preprocessor.py $DEPLOY/ci/preprocessor.py
 
         # download example data
         cd $DEPLOY
 
@@ -12,7 +12,34 @@
 import logging
 import hashlib
 from pathlib import Path
-from distutils import filelist, log
+from distutils import filelist
+
+# Determine default staging area, used in help
+default_staging = "/tmp/{0}-lcdb-wf-staging".format(os.getenv('USER'))
+
+usage = f"""
+This script assists in the deployment of relevant code from the lcdb-wf
+repository to a new deployment directory for running an analysis. It is
+intended to be run in a standalone fashion such that with just the script you
+can download and deploy a specified version of the workflows.
+
+For example, the following command will clone the GitHub repo to {default_staging},
+check out the v9.999 branch, copy the files needed for RNA-seq over to the
+"my_analysis_dir" directory, store a read-only file .lcdb-wf-deployment.yaml
+with the metadata of the repo used for cloning, and build the conda
+environments within "my_analysis_dir":
+
+    ./deploy.py \\
+        --clone \\
+        --dest my_analysis_dir \\
+        --flavor rnaseq \\
+        --build-envs \\
+        --branch v9.999
+
+Compared to directly cloning the repo, this results in a cleaner deployment
+directory that does not have various test infrastructure or workflows not
+relevant to the project.
+"""
 
 logging.basicConfig(
     format="%(asctime)s [%(module)s] %(message)s",
@@ -31,10 +58,6 @@
 RESET = "\x1b[0m"
 
 
-# Determine default staging area
-default_staging = "/tmp/{0}-lcdb-wf-staging".format(os.getenv('USER'))
-
-
 def debug(s):
     logging.debug(GRAY + s + RESET)
 
@@ -51,28 +74,11 @@ def error(s):
     logging.error(RED + s + RESET)
 
 
+def write_include_file(source, flavor='all'):
 
-usage = f"""
-This script assists in the deployment of relevant code from the lcdb-wf
-repository to a new deployment directory for running an analysis.
-
-For example, the following command will clone the GitHub repo to {default_staging},
-check out the v9.999 branch, copy the files needed for RNA-seq over to the
-"my_analysis_dir" directory, store a read-only file .lcdb-wf-deployment.yaml
-with the metadata of the repo used for cloning, and build the conda
-environments within "my_analysis_dir":
-
-    ./deploy.py \\
-        --clone \\
-        --dest my_analysis_dir \\
-        --flavor rnaseq \\
-        --build-envs \\
-        --branch v9.999
-
-"""
-
-
-def write_include_file(flavor=None):
+    # Patterns follow that of MANIFEST.in
+    # (https://packaging.python.org/en/latest/guides/using-manifest-in/),
+    # and distutils.filelist is used below to parse them.
 
     PATTERN_DICT = {
         'rnaseq': [
@@ -107,17 +113,14 @@ def write_include_file(flavor=None):
     }
 
     patterns = []
-    if flavor is None or 'rnaseq':
+    if flavor in ('full', 'rnaseq'):
         patterns.extend(PATTERN_DICT['rnaseq'])
-    if flavor is None or 'chipseq':
+    if flavor in ('full', 'chipseq'):
         patterns.extend(PATTERN_DICT['chipseq'])
-    if flavor is None or 'full':
+    if flavor == 'full':
         patterns.extend(PATTERN_DICT['full'])
     patterns.extend(PATTERN_DICT['all'])
 
-    HERE = Path(__file__).resolve().parent
-    os.chdir(HERE)
-
     def fastwalk(path):
         """
         Find all files recursively, but short-circuit if we get to a conda env to
@@ -134,7 +137,7 @@ def fastwalk(path):
                 yield os.path.join(root, f).replace(path + '/', '')
 
     f = filelist.FileList()
-    f.allfiles = list(fastwalk(str(HERE)))
+    f.allfiles = list(fastwalk(source))
     for pattern in patterns:
         f.process_template_line(pattern)
     f.sort()
@@ -144,7 +147,7 @@ def fastwalk(path):
         sp.check_output(
             ["git", "ls-tree", "-r", "HEAD", "--name-only"],
             universal_newlines=True,
-            cwd=str(HERE),
+            cwd=source,
         ).splitlines(False),
     )
 
@@ -153,6 +156,7 @@ def fastwalk(path):
     with open(include, 'w') as fout:
         fout.write('\n\n')
         fout.write('\n'.join(to_transfer))
+
     return include
 
 
@@ -341,7 +345,6 @@ def build_envs(dest, conda_frontend="mamba"):
 
     ap.add_argument(
         "--staging",
-        default=default_staging,
         help="""Only used when --clone is specified. Clone the main git repo to
         this directory and do a diff on the deploy.py script found there to
         ensure this one is up-to-date, and if so then proceed using the new clone as the source.
@@ -384,12 +387,14 @@ def build_envs(dest, conda_frontend="mamba"):
             print("ERROR: --staging was specified but --clone was not. Did you want to use --clone?", file=sys.stderr)
             sys.exit(1)
     if args.clone:
-        source = args.staging
+        if args.staging is None:
+            args.staging = default_staging
+        source = os.path.abspath(args.staging)
         clone_repo(args.staging, args.branch, mismatch_ok=args.mismatch_ok)
     else:
         source = Path(__file__).parent.resolve()
 
-    include = write_include_file(source)
+    include = write_include_file(source, flavor)
     rsync(include, source, dest, args.rsync_args)
     deployment_json(source, dest)
 
 
@@ -1,6 +1,13 @@
 Changelog
 =========
 
+v1.10.3
+-------
+
+- improve the deploy script (thanks @aliciaaevans)
+- support the epic2 peak-caller for the ChIP-seq workflow (thanks @Mira0507)
+- for later versions of featureCounts, add ``--countReadPairs`` argument to RNA-seq workflow (@therealgenna)
+
 v1.10.2
 -------
 
 
@@ -114,6 +114,7 @@ The major differences between ChIP-seq and RNA-seq configs are:
 
     sampletable: 'config/sampletable.tsv'
     organism: 'dmel'
+    genome: 'dm6'
 
     aligner:
       index: 'bowtie2'
@@ -154,6 +155,22 @@ The major differences between ChIP-seq and RNA-seq configs are:
             - input-wingdisc-1
             - input-wingdisc-2
 
+        - label: gaf-wingdisc-pooled-1
+          algorithm: epic2
+          ip:
+            - gaf-wingdisc-1
+          control:
+            - input-wingdisc-1
+          extra: ''
+
+        - label: gaf-wingdisc-pooled-2
+          algorithm: epic2
+          ip:
+            - gaf-wingdisc-2
+          control:
+            - input-wingdisc-2
+          extra: ''
+
     fastq_screen:
       - label: Human
         organism: human
 
@@ -5,8 +5,8 @@ Testing the installation
 This section describes how to set up and run the example data.
 It is useful for verifying everything is working correctly. This
 reproduces the steps that are performed during the automated tests
-on `Circle CI<https://circleci.com>`_. You can see the latest test
-results `here<https://circleci.com/gh/lcdb/lcdb-wf/tree/master>`_.
+on `Circle CI <https://circleci.com>`_. You can see the latest test
+results `here <https://circleci.com/gh/lcdb/lcdb-wf/tree/master>`_.
 
 The example run takes up about 360 MB of space and runs in about 15 mins on
 2 cores.
 
@@ -1,4 +1,3 @@
-name: null
 channels:
   - conda-forge
   - bioconda
@@ -59,8 +58,9 @@ dependencies:
   - deeptools=3.5.2
   - deeptoolsintervals=0.1.9
   - dnaio=0.10.0
-  - docutils=0.20
+  - docutils=0.20.1
   - dpath=2.1.5
+  - epic2=0.0.52
   - exceptiongroup=1.1.1
   - execnet=1.9.0
   - executing=1.2.0
@@ -93,6 +93,7 @@ dependencies:
   - gitpython=3.1.31
   - glib=2.74.1
   - glib-tools=2.74.1
+  - gmp=6.2.1
   - graphite2=1.3.13
   - gsl=2.7
   - gst-plugins-base=1.18.5
@@ -195,6 +196,7 @@ dependencies:
   - mysql-common=8.0.32
   - mysql-connector-c=6.1.11
   - mysql-libs=8.0.32
+  - natsort=8.4.0
   - nbformat=5.8.0
   - ncbi-vdb=3.0.2
   - ncurses=6.3
@@ -243,7 +245,11 @@ dependencies:
   - perl-storable=3.15
   - perl-sub-info=0.002
   - perl-term-table=0.016
+  - perl-test-fatal=0.016
+  - perl-test-warnings=0.031
   - perl-test2-suite=0.000145
+  - perl-try-tiny=0.31
+  - perl-uri=5.17
   - perl-xml-libxml=2.0207
   - perl-xml-namespacesupport=1.12
   - perl-xml-sax=1.02
@@ -334,7 +340,7 @@ dependencies:
   - toml=0.10.2
   - tomli=2.0.1
   - toposort=1.10
-  - tornado=6.3
+  - tornado=6.3.2
   - trackhub=0.2.4
   - traitlets=5.9.0
   - typing-extensions=4.5.0
@@ -376,4 +382,3 @@ dependencies:
   - zlib=1.2.13
   - zstandard=0.19.0
   - zstd=1.5.2
-prefix: /gpfs/gsfs10/users/NICHD-core0/test/dalerr/lcdb-wf/env
 
@@ -0,0 +1,14 @@
+table epic2InputPeak
+"BED6+4 Peaks of signal enrichment based on pooled, normalized (interpreted) data."
+(
+    string chrom;        "Reference sequence chromosome or scaffold"
+    uint   chromStart;   "Start position in chromosome"
+    uint   chromEnd;     "End position in chromosome"
+    string name;	 "PValue"
+    uint   score;        "Indicates how dark the peak will be displayed in the browser (0-1000) "
+    char[1]  strand;     "+ or - or . for unknown"
+    int  ChIPCount;  "The number of ChIP counts in the region (also including counts from windows with a count below the cutoff)"
+    int  InputCount;       "The number of Input counts in the region"
+    float  FDR;       "Benjamini-Hochberg correction of the p-values"
+    float  log2FoldChange;       "Log2 of the region ChIP count vs. the library-size corrected region Input count"
+)
@@ -0,0 +1,10 @@
+table epic2NoInputPeak
+"BED6 Peaks of signal enrichment based on pooled, normalized (interpreted) data."
+(
+    string chrom;        "Reference sequence chromosome or scaffold"
+    uint   chromStart;   "Start position in chromosome"
+    uint   chromEnd;     "End position in chromosome"
+    string name;	 "The number of ChIP counts in the region (also including counts from windows with a count below the cutoff)"
+    uint   score;        "Indicates how dark the peak will be displayed in the browser (0-1000) "
+    char[1]  strand;     "+ or - or . for unknown"
+)
@@ -182,11 +182,18 @@ def detect_peak_format(fn):
     Returns None if undetermined.
 
     This is useful for figuring out which autoSql file we should use or which
-    bigBed 6+4 or bigBed 6+3 format to use.
+    bigBed 6, 6+4, or 6+3 format to use.
     """
     line = open(fn).readline().strip()
     toks = line.split('\t')
     if len(toks) == 10:
-        return 'narrowPeak'
-    if len(toks) == 9:
+        if 'epic2' in fn:
+            return 'epic2Input'
+        else:
+            return 'narrowPeak'
+    elif len(toks) == 9:
         return 'broadPeak'
+    elif len(toks) == 6:
+        return 'epic2NoInput'
+    else:
+        raise ValueError("Invalid peak format in the number of fields.")
@@ -13,7 +13,7 @@
 HERE = os.path.abspath(os.path.dirname(__file__))
 
 # Note: when adding support for new peak callers, add them here.
-PEAK_CALLERS = ['macs2', 'spp', 'sicer']
+PEAK_CALLERS = ['macs2', 'spp', 'sicer', 'epic2']
 
 
 def update_recursive(d, u):