diff --git a/.circleci/config.yml b/.circleci/config.yml index 95128536..e13b0fd1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -16,17 +16,6 @@ make_tag: &make_tag echo "export TAG=encodedcc/atac-seq-pipeline:${CIRCLE_BRANCH}_${CIRCLE_WORKFLOW_ID}" > ${BASH_ENV} commands: - download_task_test_data: - description: "Download task test data. This is based on py2 so run this before installing py3." - steps: - - run: - command: | - cd dev/test/test_task/ - rm -rf atac-seq-pipeline-test-data - export BOTO_CONFIG=/dev/null - gsutil -m cp -r gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data . - ./download_hg38_fasta_for_test_ataqc.sh - install_python3_caper_gcs: description: "Install py3, caper and gcs. Set py3 as default python." @@ -88,7 +77,7 @@ jobs: name: build image command: | source ${BASH_ENV} - export DOCKER_CACHE_TAG=v1.9.3 + export DOCKER_CACHE_TAG=v2.0.0 echo "pulling ${DOCKER_CACHE_TAG}!" docker pull encodedcc/atac-seq-pipeline:${DOCKER_CACHE_TAG} docker login -u=${DOCKERHUB_USER} -p=${DOCKERHUB_PASS} @@ -99,7 +88,6 @@ jobs: <<: *machine_defaults steps: - checkout - - download_task_test_data - install_python3_caper_gcs - run: *make_tag - run: @@ -108,6 +96,9 @@ jobs: source ${BASH_ENV} cd dev/test/test_task/ + echo ${GCLOUD_SERVICE_ACCOUNT_SECRET_JSON} > tmp_secret_key.json + export GOOGLE_APPLICATION_CREDENTIALS=$PWD/tmp_secret_key.json + for wdl in test_*.wdl do prefix=${wdl%.*} @@ -118,12 +109,17 @@ jobs: # add docker image to input JSON cat ${input} | jq ".+{\"${prefix}.docker\": \"${TAG}\"}" > ${input_with_docker} - caper run ${wdl} -i ${input_with_docker} -m ${metadata} - if [[ "${wdl}" != "test_choose_ctl.wdl" ]]; then - echo "Validating outputs of ${prefix}" - res=$(jq '.outputs["'${prefix}'.compare_md5sum.match_overall"]' "${metadata}") - [[ "$res" != true ]] && exit 100 + if [[ "${wdl}" == test_macs2* ]]; then + # run heavy MACS2 tasks on GCP + caper run ${wdl} -i ${input_with_docker} -m ${metadata} \ + --backend gcp --gcp-prj ${GOOGLE_PROJECT_ID} --docker ${TAG} \ + --gcp-service-account-key-json $PWD/tmp_secret_key.json \ + --out-gcs-bucket ${CAPER_OUT_DIR} --tmp-gcs-bucket ${CAPER_TMP_DIR} + else + # run other tasks locally + caper run ${wdl} -i ${input_with_docker} -m ${metadata} fi + rm -f ${metadata} done diff --git a/.gitignore b/.gitignore index af55da96..f92f2f60 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -# Byte-compiled / optimized / DLL files +# Byte-compiled / optimized / DLL files/ __pycache__/ *.py[cod] *$py.class @@ -121,4 +121,5 @@ tmp_db* *.local.json temp_db* cromwell.out +cromwell.out.* .dev diff --git a/README.md b/README.md index 0220c86d..61095924 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,10 @@ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.156534.svg)](https://doi.org/10.5281/zenodo.156534)[![CircleCI](https://circleci.com/gh/ENCODE-DCC/atac-seq-pipeline/tree/master.svg?style=svg)](https://circleci.com/gh/ENCODE-DCC/atac-seq-pipeline/tree/master) +## Updated genome TSV files (v3 -> v4) + + + ## Download new Caper>=2.1 New Caper is out. You need to update your Caper to work with the latest ENCODE ATAC-seq pipeline. @@ -138,8 +142,8 @@ You cannot use these input JSON files directly. Go to the destination directory We have a separate project on DNANexus to provide example FASTQs and `genome_tsv` for `hg38` and `mm10`. We recommend to make copies of these directories on your own project. `genome_tsv` -- AWS: https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/pipeline-genome-data/genome_tsv/v3 -- Azure: https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/pipeline-genome-data/genome_tsv/v3 +- AWS: https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/pipeline-genome-data/genome_tsv/v4 +- Azure: https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/pipeline-genome-data/genome_tsv/v4 Example FASTQs - AWS: https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled diff --git a/atac.wdl b/atac.wdl index aa7c5bf3..168e0204 100644 --- a/atac.wdl +++ b/atac.wdl @@ -7,10 +7,10 @@ struct RuntimeEnvironment { } workflow atac { - String pipeline_ver = 'v2.0.3' + String pipeline_ver = 'v2.1.0' meta { - version: 'v2.0.3' + version: 'v2.1.0' author: 'Jin wook Lee' email: 'leepc12@gmail.com' @@ -19,8 +19,8 @@ workflow atac { specification_document: 'https://docs.google.com/document/d/1f0Cm4vRyDQDu0bMehHD7P7KOMxTOP-HiNoIvL1VcBt8/edit?usp=sharing' - default_docker: 'encodedcc/atac-seq-pipeline:v2.0.3' - default_singularity: 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/atac-seq-pipeline_v2.0.3.sif' + default_docker: 'encodedcc/atac-seq-pipeline:v2.1.0' + default_singularity: 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/atac-seq-pipeline_v2.1.0.sif' default_conda: 'encode-atac-seq-pipeline' croo_out_def: 'https://storage.googleapis.com/encode-pipeline-output-definition/atac.croo.v5.json' @@ -72,8 +72,8 @@ workflow atac { } input { # group: runtime_environment - String docker = 'encodedcc/atac-seq-pipeline:v2.0.3' - String singularity = 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/atac-seq-pipeline_v2.0.3.sif' + String docker = 'encodedcc/atac-seq-pipeline:v2.1.0' + String singularity = 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/atac-seq-pipeline_v2.1.0.sif' String conda = 'encode-atac-seq-pipeline' String conda_macs2 = 'encode-atac-seq-pipeline-macs2' String conda_spp = 'encode-atac-seq-pipeline-spp' diff --git a/dev/test/test_task/compare_md5sum.wdl b/dev/test/test_task/compare_md5sum.wdl index 43639abd..9c1cb837 100644 --- a/dev/test/test_task/compare_md5sum.wdl +++ b/dev/test/test_task/compare_md5sum.wdl @@ -12,6 +12,13 @@ task compare_md5sum { import os import json import hashlib + import struct + + def getuncompressedsize(filename): + # https://stackoverflow.com/a/22348071 + with open(filename, 'rb') as f: + f.seek(-4, 2) + return struct.unpack('I', f.read(4))[0] def md5sum(filename, blocksize=65536): hash = hashlib.md5() @@ -39,6 +46,10 @@ task compare_md5sum { ref_f = ref_files[i] md5 = md5sum(f) ref_md5 = md5sum(ref_f) + + filesize = os.path.getsize(f) + ref_filesize = os.path.getsize(ref_f) + # if text file, read in contents if f.endswith('.qc') or f.endswith('.txt') or \ f.endswith('.log') or f.endswith('.out'): @@ -46,9 +57,22 @@ task compare_md5sum { contents = fp.read() with open(ref_f,'r') as fp: ref_contents = fp.read() + elif f.endswith('.gz'): + uncompressed_filesize = getuncompressedsize(f) + ref_uncompressed_filesize = getuncompressedsize(ref_f) + + contents = 'filesize={filesize}, uncompressed_filesize={uncompressed_filesize}'.format( + filesize=filesize, + uncompressed_filesize=uncompressed_filesize, + ) + ref_contents = 'filesize={filesize}, uncompressed_filesize={uncompressed_filesize}'.format( + filesize=ref_filesize, + uncompressed_filesize=ref_uncompressed_filesize, + ) else: - contents = '' - ref_contents = '' + contents = 'filesize={filesize}'.format(filesize=filesize) + ref_contents = 'filesize={filesize}'.format(filesize=ref_filesize) + matched = md5==ref_md5 result['tasks'].append(OrderedDict([ ('label', label), diff --git a/dev/test/test_task/download_hg38_fasta_for_test_ataqc.sh b/dev/test/test_task/download_hg38_fasta_for_test_ataqc.sh deleted file mode 100755 index 07e5832f..00000000 --- a/dev/test/test_task/download_hg38_fasta_for_test_ataqc.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e # exit on error - -wget -N -c https://storage.googleapis.com/encode-pipeline-genome-data/hg38/GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.gz diff --git a/dev/test/test_task/ref_output/test_bam2ta/pe/subsample/ENCFF341MYG.subsampled.400.trim.merged.nodup.5K.tn5.tagAlign.gz b/dev/test/test_task/ref_output/test_bam2ta/pe/subsample/ENCFF341MYG.subsampled.400.trim.merged.nodup.5K.tn5.tagAlign.gz deleted file mode 100644 index 8250889c..00000000 Binary files a/dev/test/test_task/ref_output/test_bam2ta/pe/subsample/ENCFF341MYG.subsampled.400.trim.merged.nodup.5K.tn5.tagAlign.gz and /dev/null differ diff --git a/dev/test/test_task/ref_output/test_bam2ta/se/subsample/ENCFF439VSY.subsampled.400.trim.merged.nodup.5K.tn5.tagAlign.gz b/dev/test/test_task/ref_output/test_bam2ta/se/subsample/ENCFF439VSY.subsampled.400.trim.merged.nodup.5K.tn5.tagAlign.gz deleted file mode 100644 index 5f13d9bb..00000000 Binary files a/dev/test/test_task/ref_output/test_bam2ta/se/subsample/ENCFF439VSY.subsampled.400.trim.merged.nodup.5K.tn5.tagAlign.gz and /dev/null differ diff --git a/dev/test/test_task/ref_output/test_spr/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz b/dev/test/test_task/ref_output/test_spr/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz deleted file mode 100644 index 6e06b0a0..00000000 Binary files a/dev/test/test_task/ref_output/test_spr/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz and /dev/null differ diff --git a/dev/test/test_task/ref_output/test_spr/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz b/dev/test/test_task/ref_output/test_spr/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz deleted file mode 100644 index eb9d26fd..00000000 Binary files a/dev/test/test_task/ref_output/test_spr/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz and /dev/null differ diff --git a/dev/test/test_task/ref_output/test_spr/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz b/dev/test/test_task/ref_output/test_spr/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz deleted file mode 100644 index faeeeb0e..00000000 Binary files a/dev/test/test_task/ref_output/test_spr/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz and /dev/null differ diff --git a/dev/test/test_task/ref_output/test_spr/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz b/dev/test/test_task/ref_output/test_spr/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz deleted file mode 100644 index 8b0f8fe6..00000000 Binary files a/dev/test/test_task/ref_output/test_spr/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz and /dev/null differ diff --git a/dev/test/test_task/ref_output/test_xcor/pe/subsample/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.no_chrM.R1.5K.cc.qc b/dev/test/test_task/ref_output/test_xcor/pe/subsample/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.no_chrM.R1.5K.cc.qc deleted file mode 100644 index d82f6b7e..00000000 --- a/dev/test/test_task/ref_output/test_xcor/pe/subsample/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.no_chrM.R1.5K.cc.qc +++ /dev/null @@ -1 +0,0 @@ -ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.no_chrM.R1.5K.tagAlign.gz 5000 0 0.0058943241165373 45 0.009822283 1500 0.003930345 1.499696 0.3333333 -1 diff --git a/dev/test/test_task/ref_output/test_xcor/se/subsample/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.no_chrM.5K.cc.qc b/dev/test/test_task/ref_output/test_xcor/se/subsample/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.no_chrM.5K.cc.qc deleted file mode 100644 index 79daf005..00000000 --- a/dev/test/test_task/ref_output/test_xcor/se/subsample/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.no_chrM.5K.cc.qc +++ /dev/null @@ -1 +0,0 @@ -ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.no_chrM.5K.tagAlign.gz 5000 0 0.00775551842446131 45 0.01939483 1500 0.003487772 2.223631 0.2682927 -1 diff --git a/dev/test/test_task/test_annot_enrich.json b/dev/test/test_task/test_annot_enrich.json index 2e62b46a..4517275c 100644 --- a/dev/test/test_task/test_annot_enrich.json +++ b/dev/test/test_task/test_annot_enrich.json @@ -1,10 +1,10 @@ { - "test_annot_enrich.blacklist" : "atac-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38.blacklist.bed.gz", - "test_annot_enrich.dnase" : "atac-seq-pipeline-test-data/genome_data/hg38/ataqc/reg2map_honeybadger2_dnase_all_p10_ucsc.hg19_to_hg38.bed.gz", - "test_annot_enrich.prom" : "atac-seq-pipeline-test-data/genome_data/hg38/ataqc/reg2map_honeybadger2_dnase_prom_p2.hg19_to_hg38.bed.gz", - "test_annot_enrich.enh" : "atac-seq-pipeline-test-data/genome_data/hg38/ataqc/reg2map_honeybadger2_dnase_enh_p2.hg19_to_hg38.bed.gz", + "test_annot_enrich.blacklist" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38.blacklist.bed.gz", + "test_annot_enrich.dnase" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38/ataqc/reg2map_honeybadger2_dnase_all_p10_ucsc.hg19_to_hg38.bed.gz", + "test_annot_enrich.prom" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38/ataqc/reg2map_honeybadger2_dnase_prom_p2.hg19_to_hg38.bed.gz", + "test_annot_enrich.enh" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38/ataqc/reg2map_honeybadger2_dnase_enh_p2.hg19_to_hg38.bed.gz", - "test_annot_enrich.ta" : "atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", + "test_annot_enrich.ta" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", - "test_annot_enrich.ref_annot_enrich_qc" : "atac-seq-pipeline-test-data/ref_output/test_annot_enrich/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.annot_enrich.qc" + "test_annot_enrich.ref_annot_enrich_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_annot_enrich/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.annot_enrich.qc" } diff --git a/dev/test/test_task/test_bam2ta.json b/dev/test/test_task/test_bam2ta.json index 2ae3ee91..a65c0fda 100644 --- a/dev/test/test_task/test_bam2ta.json +++ b/dev/test/test_task/test_bam2ta.json @@ -1,14 +1,14 @@ { - "test_bam2ta.pe_nodup_bam" : "atac-seq-pipeline-test-data/input/pe/nodup_bams/rep1/ENCFF341MYG.subsampled.400.trim.merged.nodup.bam", - "test_bam2ta.se_nodup_bam" : "atac-seq-pipeline-test-data/input/se/nodup_bams/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.bam", + "test_bam2ta.pe_nodup_bam" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/nodup_bams/rep1/ENCFF341MYG.subsampled.400.trim.merged.nodup.bam", + "test_bam2ta.se_nodup_bam" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/nodup_bams/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.bam", - "test_bam2ta.ref_pe_ta" : "atac-seq-pipeline-test-data/ref_output/test_bam2ta/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", - "test_bam2ta.ref_pe_ta_disable_tn5_shift" : "atac-seq-pipeline-test-data/ref_output/test_bam2ta/pe/disable_tn5_shift/ENCFF341MYG.subsampled.400.trim.merged.nodup.tagAlign.gz", - "test_bam2ta.ref_pe_ta_subsample" : "atac-seq-pipeline-test-data/ref_output/test_bam2ta/pe/subsample/fix_PIP-917/ENCFF341MYG.subsampled.400.trim.merged.nodup.5K.tn5.tagAlign.gz", + "test_bam2ta.ref_pe_ta" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_bam2ta/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", + "test_bam2ta.ref_pe_ta_disable_tn5_shift" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_bam2ta/pe/disable_tn5_shift/ENCFF341MYG.subsampled.400.trim.merged.nodup.tagAlign.gz", + "test_bam2ta.ref_pe_ta_subsample" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_bam2ta/pe/subsample/fix_PIP-917/ENCFF341MYG.subsampled.400.trim.merged.nodup.5K.tn5.tagAlign.gz", - "test_bam2ta.ref_se_ta" : "atac-seq-pipeline-test-data/ref_output/test_bam2ta/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", - "test_bam2ta.ref_se_ta_disable_tn5_shift" : "atac-seq-pipeline-test-data/ref_output/test_bam2ta/se/disable_tn5_shift/ENCFF439VSY.subsampled.400.trim.merged.nodup.tagAlign.gz", - "test_bam2ta.ref_se_ta_subsample" : "atac-seq-pipeline-test-data/ref_output/test_bam2ta/se/subsample/ENCFF439VSY.subsampled.400.trim.merged.nodup.5K.tn5.tagAlign.gz", + "test_bam2ta.ref_se_ta" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_bam2ta/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", + "test_bam2ta.ref_se_ta_disable_tn5_shift" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_bam2ta/se/disable_tn5_shift/ENCFF439VSY.subsampled.400.trim.merged.nodup.tagAlign.gz", + "test_bam2ta.ref_se_ta_subsample" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_bam2ta/se/subsample/ENCFF439VSY.subsampled.400.trim.merged.nodup.5K.tn5.tagAlign.gz", "test_bam2ta.bam2ta_subsample" : 5000 } diff --git a/dev/test/test_task/test_bowtie2.json b/dev/test/test_task/test_bowtie2.json index 792fb656..067a0256 100644 --- a/dev/test/test_task/test_bowtie2.json +++ b/dev/test/test_task/test_bowtie2.json @@ -1,28 +1,28 @@ { "test_bowtie2.pe_fastqs_R1" : [ - "atac-seq-pipeline-test-data/input/pe/fastqs/rep1/pair1/ENCFF341MYG.subsampled.400.fastq.gz", - "atac-seq-pipeline-test-data/input/pe/fastqs/rep1/pair1/ENCFF106QGY.subsampled.400.fastq.gz" + "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/fastqs/rep1/pair1/ENCFF341MYG.subsampled.400.fastq.gz", + "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/fastqs/rep1/pair1/ENCFF106QGY.subsampled.400.fastq.gz" ], "test_bowtie2.pe_fastqs_R2" : [ - "atac-seq-pipeline-test-data/input/pe/fastqs/rep1/pair2/ENCFF248EJF.subsampled.400.fastq.gz", - "atac-seq-pipeline-test-data/input/pe/fastqs/rep1/pair2/ENCFF368TYI.subsampled.400.fastq.gz" + "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/fastqs/rep1/pair2/ENCFF248EJF.subsampled.400.fastq.gz", + "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/fastqs/rep1/pair2/ENCFF368TYI.subsampled.400.fastq.gz" ], "test_bowtie2.se_fastqs_R1" : [ - "atac-seq-pipeline-test-data/input/se/fastqs/rep1/ENCFF439VSY.subsampled.400.fastq.gz", - "atac-seq-pipeline-test-data/input/se/fastqs/rep1/ENCFF325FCQ.subsampled.400.fastq.gz", - "atac-seq-pipeline-test-data/input/se/fastqs/rep1/ENCFF683IQS.subsampled.400.fastq.gz", - "atac-seq-pipeline-test-data/input/se/fastqs/rep1/ENCFF744CHW.subsampled.400.fastq.gz" + "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/fastqs/rep1/ENCFF439VSY.subsampled.400.fastq.gz", + "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/fastqs/rep1/ENCFF325FCQ.subsampled.400.fastq.gz", + "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/fastqs/rep1/ENCFF683IQS.subsampled.400.fastq.gz", + "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/fastqs/rep1/ENCFF744CHW.subsampled.400.fastq.gz" ], - "test_bowtie2.pe_bowtie2_idx_tar" : "atac-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/bowtie2_index/GRCh38_no_alt_analysis_set_GCA_000001405.15.chr19_chrM.fasta.tar", - "test_bowtie2.se_bowtie2_idx_tar" : "atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/bowtie2_index/mm10_no_alt_analysis_set_ENCODE.chr19_chrM.fasta.tar", + "test_bowtie2.pe_bowtie2_idx_tar" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/bowtie2_index/GRCh38_no_alt_analysis_set_GCA_000001405.15.chr19_chrM.fasta.tar", + "test_bowtie2.se_bowtie2_idx_tar" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/bowtie2_index/mm10_no_alt_analysis_set_ENCODE.chr19_chrM.fasta.tar", - "test_bowtie2.se_chrsz" : "atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", - "test_bowtie2.pe_chrsz" : "atac-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", + "test_bowtie2.se_chrsz" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", + "test_bowtie2.pe_chrsz" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", - "test_bowtie2.ref_pe_flagstat" : "atac-seq-pipeline-test-data/ref_output/test_bowtie2/pe/multimapping/merge_fastqs_R1_ENCFF341MYG.subsampled.400.trim.merged.samstats.qc", - "test_bowtie2.ref_pe_flagstat_no_multimapping" : "atac-seq-pipeline-test-data/ref_output/test_bowtie2/pe/no_multimapping/merge_fastqs_R1_ENCFF341MYG.subsampled.400.trim.merged.samstats.qc", - "test_bowtie2.ref_se_flagstat" : "atac-seq-pipeline-test-data/ref_output/test_bowtie2/se/multimapping/merge_fastqs_R1_ENCFF439VSY.subsampled.400.trim.merged.samstats.qc", - "test_bowtie2.ref_se_flagstat_no_multimapping" : "atac-seq-pipeline-test-data/ref_output/test_bowtie2/se/no_multimapping/merge_fastqs_R1_ENCFF439VSY.subsampled.400.trim.merged.samstats.qc", + "test_bowtie2.ref_pe_flagstat" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_bowtie2/pe/multimapping/merge_fastqs_R1_ENCFF341MYG.subsampled.400.trim.merged.samstats.qc", + "test_bowtie2.ref_pe_flagstat_no_multimapping" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_bowtie2/pe/no_multimapping/merge_fastqs_R1_ENCFF341MYG.subsampled.400.trim.merged.samstats.qc", + "test_bowtie2.ref_se_flagstat" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_bowtie2/se/multimapping/merge_fastqs_R1_ENCFF439VSY.subsampled.400.trim.merged.samstats.qc", + "test_bowtie2.ref_se_flagstat_no_multimapping" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_bowtie2/se/no_multimapping/merge_fastqs_R1_ENCFF439VSY.subsampled.400.trim.merged.samstats.qc", "test_bowtie2.multimapping" : 4 } diff --git a/dev/test/test_task/test_compare_signal_to_roadmap.json b/dev/test/test_task/test_compare_signal_to_roadmap.json index 3b684815..af262518 100644 --- a/dev/test/test_task/test_compare_signal_to_roadmap.json +++ b/dev/test/test_task/test_compare_signal_to_roadmap.json @@ -1,9 +1,9 @@ { - "test_compare_signal_to_roadmap.reg2map_bed" : "atac-seq-pipeline-test-data/genome_data/hg38/ataqc/hg38_celltype_compare_subsample.bed.gz", - "test_compare_signal_to_roadmap.reg2map" : "atac-seq-pipeline-test-data/genome_data/hg38/ataqc/hg38_dnase_avg_fseq_signal_formatted.txt.gz", - "test_compare_signal_to_roadmap.roadmap_meta" : "atac-seq-pipeline-test-data/genome_data/hg38/ataqc/hg38_dnase_avg_fseq_signal_metadata.txt", + "test_compare_signal_to_roadmap.reg2map_bed" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38/ataqc/hg38_celltype_compare_subsample.bed.gz", + "test_compare_signal_to_roadmap.reg2map" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38/ataqc/hg38_dnase_avg_fseq_signal_formatted.txt.gz", + "test_compare_signal_to_roadmap.roadmap_meta" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38/ataqc/hg38_dnase_avg_fseq_signal_metadata.txt", - "test_compare_signal_to_roadmap.pval_bw" : "atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pval.signal.bigwig", + "test_compare_signal_to_roadmap.pval_bw" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pval.signal.bigwig", - "test_compare_signal_to_roadmap.ref_roadmap_compare_log" : "atac-seq-pipeline-test-data/ref_output/test_compare_signal_to_roadmap/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pval.signal.roadmap_compare.log" + "test_compare_signal_to_roadmap.ref_roadmap_compare_log" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_compare_signal_to_roadmap/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pval.signal.roadmap_compare.log" } diff --git a/dev/test/test_task/test_count_signal_track.json b/dev/test/test_task/test_count_signal_track.json index 5077f690..b0cef402 100644 --- a/dev/test/test_task/test_count_signal_track.json +++ b/dev/test/test_task/test_count_signal_track.json @@ -1,8 +1,8 @@ { - "test_count_signal_track.se_chrsz" : "atac-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", + "test_count_signal_track.se_chrsz" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", - "test_count_signal_track.se_ta" : "atac-seq-pipeline-test-data/input/se/tas/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", + "test_count_signal_track.se_ta" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/tas/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", - "test_count_signal_track.ref_se_count_signal_track_pos_bw" : "atac-seq-pipeline-test-data/ref_output/test_count_signal_track/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.positive.bigwig", - "test_count_signal_track.ref_se_count_signal_track_neg_bw" : "atac-seq-pipeline-test-data/ref_output/test_count_signal_track/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.negative.bigwig" + "test_count_signal_track.ref_se_count_signal_track_pos_bw" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_count_signal_track/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.positive.bigwig", + "test_count_signal_track.ref_se_count_signal_track_neg_bw" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_count_signal_track/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.negative.bigwig" } diff --git a/dev/test/test_task/test_filter.json b/dev/test/test_task/test_filter.json index 7a899abd..4ca55a56 100644 --- a/dev/test/test_task/test_filter.json +++ b/dev/test/test_task/test_filter.json @@ -1,18 +1,18 @@ { - "test_filter.se_chrsz" : "atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", - "test_filter.pe_chrsz" : "atac-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", - "test_filter.pe_bam" : "atac-seq-pipeline-test-data/input/pe/bams/rep1/ENCFF341MYG.subsampled.400.trim.merged.bam", - "test_filter.pe_bam_no_multimapping" : "atac-seq-pipeline-test-data/input/pe/bams_no_multimapping/rep1/ENCFF341MYG.subsampled.400.trim.merged.bam", - "test_filter.se_bam" : "atac-seq-pipeline-test-data/input/se/bams/rep1/ENCFF439VSY.subsampled.400.trim.merged.bam", - "test_filter.se_bam_no_multimapping" : "atac-seq-pipeline-test-data/input/se/bams_no_multimapping/rep1/ENCFF439VSY.subsampled.400.trim.merged.bam", + "test_filter.se_chrsz" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", + "test_filter.pe_chrsz" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38_chr19_chrM/hg38_chr19_chrM.chrom.sizes", + "test_filter.pe_bam" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/bams/rep1/ENCFF341MYG.subsampled.400.trim.merged.bam", + "test_filter.pe_bam_no_multimapping" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/bams_no_multimapping/rep1/ENCFF341MYG.subsampled.400.trim.merged.bam", + "test_filter.se_bam" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/bams/rep1/ENCFF439VSY.subsampled.400.trim.merged.bam", + "test_filter.se_bam_no_multimapping" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/bams_no_multimapping/rep1/ENCFF439VSY.subsampled.400.trim.merged.bam", - "test_filter.ref_pe_nodup_samstat_qc" : "atac-seq-pipeline-test-data/ref_output/test_filter/pe/multimapping/ENCFF341MYG.subsampled.400.trim.merged.nodup.no_chrM.samstats.qc", - "test_filter.ref_pe_nodup_samstat_qc_no_multimapping" : "atac-seq-pipeline-test-data/ref_output/test_filter/pe/no_multimapping/ENCFF341MYG.subsampled.400.trim.merged.nodup.no_chrM.samstats.qc", - "test_filter.ref_pe_filt_samstat_qc" : "atac-seq-pipeline-test-data/ref_output/test_filter/pe/no_dup_removal/ENCFF341MYG.subsampled.400.trim.merged.filt.no_chrM.samstats.qc", + "test_filter.ref_pe_nodup_samstat_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_filter/pe/multimapping/ENCFF341MYG.subsampled.400.trim.merged.nodup.no_chrM.samstats.qc", + "test_filter.ref_pe_nodup_samstat_qc_no_multimapping" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_filter/pe/no_multimapping/ENCFF341MYG.subsampled.400.trim.merged.nodup.no_chrM.samstats.qc", + "test_filter.ref_pe_filt_samstat_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_filter/pe/no_dup_removal/ENCFF341MYG.subsampled.400.trim.merged.filt.no_chrM.samstats.qc", - "test_filter.ref_se_nodup_samstat_qc" : "atac-seq-pipeline-test-data/ref_output/test_filter/se/multimapping/ENCFF439VSY.subsampled.400.trim.merged.nodup.no_chrM.samstats.qc", - "test_filter.ref_se_nodup_samstat_qc_no_multimapping" : "atac-seq-pipeline-test-data/ref_output/test_filter/se/no_multimapping/ENCFF439VSY.subsampled.400.trim.merged.nodup.no_chrM.samstats.qc", - "test_filter.ref_se_filt_samstat_qc" : "atac-seq-pipeline-test-data/ref_output/test_filter/se/no_dup_removal/ENCFF439VSY.subsampled.400.trim.merged.filt.no_chrM.samstats.qc", + "test_filter.ref_se_nodup_samstat_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_filter/se/multimapping/ENCFF439VSY.subsampled.400.trim.merged.nodup.no_chrM.samstats.qc", + "test_filter.ref_se_nodup_samstat_qc_no_multimapping" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_filter/se/no_multimapping/ENCFF439VSY.subsampled.400.trim.merged.nodup.no_chrM.samstats.qc", + "test_filter.ref_se_filt_samstat_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_filter/se/no_dup_removal/ENCFF439VSY.subsampled.400.trim.merged.filt.no_chrM.samstats.qc", "test_filter.multimapping" : 4 } diff --git a/dev/test/test_task/test_frac_mito.json b/dev/test/test_task/test_frac_mito.json index 3904f2fe..032d3ac8 100644 --- a/dev/test/test_task/test_frac_mito.json +++ b/dev/test/test_task/test_frac_mito.json @@ -1,5 +1,5 @@ { - "test_frac_mito.non_mito_samstat" : "atac-seq-pipeline-test-data/input/pe/samstat_qcs/ENCFF341MYG.subsampled.400.trim.merged.non_mito.samstats.qc", - "test_frac_mito.mito_samstat" : "atac-seq-pipeline-test-data/input/pe/samstat_qcs/ENCFF341MYG.subsampled.400.trim.merged.samstats.qc", - "test_frac_mito.ref_frac_mito_qc" : "atac-seq-pipeline-test-data/ref_output/test_frac_mito/ENCFF341MYG.subsampled.400.trim.merged.frac_mito.qc" + "test_frac_mito.non_mito_samstat" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/samstat_qcs/ENCFF341MYG.subsampled.400.trim.merged.non_mito.samstats.qc", + "test_frac_mito.mito_samstat" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/samstat_qcs/ENCFF341MYG.subsampled.400.trim.merged.samstats.qc", + "test_frac_mito.ref_frac_mito_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_frac_mito/ENCFF341MYG.subsampled.400.trim.merged.frac_mito.qc" } diff --git a/dev/test/test_task/test_fraglen_stat_pe.json b/dev/test/test_task/test_fraglen_stat_pe.json index d36ca7e3..fb8fa8c1 100644 --- a/dev/test/test_task/test_fraglen_stat_pe.json +++ b/dev/test/test_task/test_fraglen_stat_pe.json @@ -1,5 +1,5 @@ { - "test_fraglen_stat_pe.nodup_bam" : "atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.nodup.bam", + "test_fraglen_stat_pe.nodup_bam" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.nodup.bam", - "test_fraglen_stat_pe.ref_nucleosomal_qc" : "atac-seq-pipeline-test-data/ref_output/test_fraglen_stat_pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.nucleosomal.qc" + "test_fraglen_stat_pe.ref_nucleosomal_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_fraglen_stat_pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.nucleosomal.qc" } diff --git a/dev/test/test_task/test_gc_bias.json b/dev/test/test_task/test_gc_bias.json index 2f6bd37a..3610c963 100644 --- a/dev/test/test_task/test_gc_bias.json +++ b/dev/test/test_task/test_gc_bias.json @@ -1,6 +1,6 @@ { "test_gc_bias.ref_fa" : "GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta.gz", - "test_gc_bias.nodup_bam" : "atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.nodup.bam", + "test_gc_bias.nodup_bam" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.nodup.bam", - "test_gc_bias.ref_gc_log" : "atac-seq-pipeline-test-data/ref_output/test_gc_bias/ENCFF341MYG.subsampled.400.trim.merged.nodup.gc.txt" + "test_gc_bias.ref_gc_log" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_gc_bias/ENCFF341MYG.subsampled.400.trim.merged.nodup.gc.txt" } diff --git a/dev/test/test_task/test_idr.json b/dev/test/test_task/test_idr.json index 3b97ffeb..22493b2d 100644 --- a/dev/test/test_task/test_idr.json +++ b/dev/test/test_task/test_idr.json @@ -1,15 +1,15 @@ { - "test_idr.se_blacklist" : "atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10.blacklist.bed.gz", - "test_idr.se_chrsz" : "atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", + "test_idr.se_blacklist" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10.blacklist.bed.gz", + "test_idr.se_chrsz" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", - "test_idr.se_peak_rep1" : "atac-seq-pipeline-test-data/input/se/peaks/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.narrowPeak.gz", - "test_idr.se_peak_rep2" : "atac-seq-pipeline-test-data/input/se/peaks/rep2/ENCFF463QCX.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.narrowPeak.gz", - "test_idr.se_peak_pooled" : "atac-seq-pipeline-test-data/input/se/peaks/pooled_rep/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pooled.pval0.01.300K.narrowPeak.gz", - "test_idr.se_ta_pooled" : "atac-seq-pipeline-test-data/input/se/tas/pooled_rep/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pooled.tagAlign.gz", + "test_idr.se_peak_rep1" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/peaks/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.narrowPeak.gz", + "test_idr.se_peak_rep2" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/peaks/rep2/ENCFF463QCX.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.narrowPeak.gz", + "test_idr.se_peak_pooled" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/peaks/pooled_rep/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pooled.pval0.01.300K.narrowPeak.gz", + "test_idr.se_ta_pooled" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/tas/pooled_rep/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pooled.tagAlign.gz", - "test_idr.ref_se_idr_peak" : "atac-seq-pipeline-test-data/ref_output/test_idr/rep1-rep2.idr0.05.narrowPeak.gz", - "test_idr.ref_se_idr_bfilt_peak" : "atac-seq-pipeline-test-data/ref_output/test_idr/rep1-rep2.idr0.05.bfilt.narrowPeak.gz", - "test_idr.ref_se_idr_frip_qc" : "atac-seq-pipeline-test-data/ref_output/test_idr/rep1-rep2.idr0.05.bfilt.frip.qc", + "test_idr.ref_se_idr_peak" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_idr/rep1-rep2.idr0.05.narrowPeak.gz", + "test_idr.ref_se_idr_bfilt_peak" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_idr/rep1-rep2.idr0.05.bfilt.narrowPeak.gz", + "test_idr.ref_se_idr_frip_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_idr/rep1-rep2.idr0.05.bfilt.frip.qc", "test_idr.idr_thresh" : 0.05 } diff --git a/dev/test/test_task/test_jsd.json b/dev/test/test_task/test_jsd.json index b272126b..94b1f1db 100644 --- a/dev/test/test_task/test_jsd.json +++ b/dev/test/test_task/test_jsd.json @@ -1,15 +1,15 @@ { "test_jsd.se_nodup_bams" : [ - "atac-seq-pipeline-test-data/input/se/nodup_bams/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.bam" + "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/nodup_bams/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.bam" ], - "test_jsd.se_blacklist" : "atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10.blacklist.bed.gz", + "test_jsd.se_blacklist" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10.blacklist.bed.gz", - "test_jsd.se_fake_blacklist" : "atac-seq-pipeline-test-data/input/se/fake_blacklist/mm10.whole_chr19.blacklist.bed.gz", + "test_jsd.se_fake_blacklist" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/fake_blacklist/mm10.whole_chr19.blacklist.bed.gz", "test_jsd.ref_se_jsd_logs" : [ - "atac-seq-pipeline-test-data/ref_output/test_jsd/rep1.ENCFF439VSY.subsampled.400.trim.merged.nodup.bfilt.jsd.qc" + "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_jsd/rep1.ENCFF439VSY.subsampled.400.trim.merged.nodup.bfilt.jsd.qc" ], "test_jsd.ref_se_jsd_fake_blacklist_logs" : [ - "atac-seq-pipeline-test-data/ref_output/test_jsd/fake_blacklist/rep1.ENCFF439VSY.subsampled.400.trim.merged.nodup.bfilt.jsd.qc" + "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_jsd/fake_blacklist/rep1.ENCFF439VSY.subsampled.400.trim.merged.nodup.bfilt.jsd.qc" ] } diff --git a/dev/test/test_task/test_macs2.json b/dev/test/test_task/test_macs2.json index b844d163..b84ff641 100644 --- a/dev/test/test_task/test_macs2.json +++ b/dev/test/test_task/test_macs2.json @@ -1,13 +1,13 @@ { - "test_macs2.se_blacklist" : "atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10.blacklist.bed.gz", - "test_macs2.se_chrsz" : "atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", + "test_macs2.se_blacklist" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10.blacklist.bed.gz", + "test_macs2.se_chrsz" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", "test_macs2.se_gensz" : "mm", - "test_macs2.se_ta" : "atac-seq-pipeline-test-data/input/se/tas/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", + "test_macs2.se_ta" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/tas/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", - "test_macs2.ref_se_macs2_npeak" : "atac-seq-pipeline-test-data/ref_output/test_macs2/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.narrowPeak.gz", - "test_macs2.ref_se_macs2_bfilt_npeak" : "atac-seq-pipeline-test-data/ref_output/test_macs2/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.bfilt.narrowPeak.gz", - "test_macs2.ref_se_macs2_frip_qc" : "atac-seq-pipeline-test-data/ref_output/test_macs2/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.bfilt.frip.qc", + "test_macs2.ref_se_macs2_npeak" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_macs2/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.narrowPeak.gz", + "test_macs2.ref_se_macs2_bfilt_npeak" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_macs2/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.bfilt.narrowPeak.gz", + "test_macs2.ref_se_macs2_frip_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_macs2/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.bfilt.frip.qc", "test_macs2.cap_num_peak" : 300000, "test_macs2.pval_thresh" : 0.01, diff --git a/dev/test/test_task/test_macs2_signal_track.json b/dev/test/test_task/test_macs2_signal_track.json index aa37690c..7824405e 100644 --- a/dev/test/test_task/test_macs2_signal_track.json +++ b/dev/test/test_task/test_macs2_signal_track.json @@ -1,10 +1,10 @@ { - "test_macs2_signal_track.se_chrsz" : "atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", + "test_macs2_signal_track.se_chrsz" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", "test_macs2_signal_track.se_gensz" : "mm", - "test_macs2_signal_track.se_ta" : "atac-seq-pipeline-test-data/input/se/tas/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", + "test_macs2_signal_track.se_ta" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/tas/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", - "test_macs2_signal_track.ref_se_macs2_pval_bw" : "atac-seq-pipeline-test-data/ref_output/test_macs2/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pval.signal.bigwig", + "test_macs2_signal_track.ref_se_macs2_pval_bw" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_macs2/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pval.signal.bigwig", "test_macs2_signal_track.pval_thresh" : 0.01, "test_macs2_signal_track.smooth_win" : 150 diff --git a/dev/test/test_task/test_overlap.json b/dev/test/test_task/test_overlap.json index 8e69dab9..d67af68d 100644 --- a/dev/test/test_task/test_overlap.json +++ b/dev/test/test_task/test_overlap.json @@ -1,13 +1,13 @@ { - "test_overlap.se_blacklist" : "atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10.blacklist.bed.gz", - "test_overlap.se_chrsz" : "atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", + "test_overlap.se_blacklist" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10.blacklist.bed.gz", + "test_overlap.se_chrsz" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", - "test_overlap.se_peak_rep1" : "atac-seq-pipeline-test-data/input/se/peaks/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.narrowPeak.gz", - "test_overlap.se_peak_rep2" : "atac-seq-pipeline-test-data/input/se/peaks/rep2/ENCFF463QCX.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.narrowPeak.gz", - "test_overlap.se_peak_pooled" : "atac-seq-pipeline-test-data/input/se/peaks/pooled_rep/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pooled.pval0.01.300K.narrowPeak.gz", - "test_overlap.se_ta_pooled" : "atac-seq-pipeline-test-data/input/se/tas/pooled_rep/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pooled.tagAlign.gz", + "test_overlap.se_peak_rep1" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/peaks/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.narrowPeak.gz", + "test_overlap.se_peak_rep2" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/peaks/rep2/ENCFF463QCX.subsampled.400.trim.merged.nodup.tn5.pval0.01.300K.narrowPeak.gz", + "test_overlap.se_peak_pooled" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/peaks/pooled_rep/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pooled.pval0.01.300K.narrowPeak.gz", + "test_overlap.se_ta_pooled" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/tas/pooled_rep/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pooled.tagAlign.gz", - "test_overlap.ref_se_overlap_peak" : "atac-seq-pipeline-test-data/ref_output/test_overlap/rep1-rep2.overlap.narrowPeak.gz", - "test_overlap.ref_se_overlap_bfilt_peak" : "atac-seq-pipeline-test-data/ref_output/test_overlap/rep1-rep2.overlap.bfilt.narrowPeak.gz", - "test_overlap.ref_se_overlap_frip_qc" : "atac-seq-pipeline-test-data/ref_output/test_overlap/rep1-rep2.overlap.bfilt.frip.qc" + "test_overlap.ref_se_overlap_peak" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_overlap/rep1-rep2.overlap.narrowPeak.gz", + "test_overlap.ref_se_overlap_bfilt_peak" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_overlap/rep1-rep2.overlap.bfilt.narrowPeak.gz", + "test_overlap.ref_se_overlap_frip_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_overlap/rep1-rep2.overlap.bfilt.frip.qc" } diff --git a/dev/test/test_task/test_pool_ta.json b/dev/test/test_task/test_pool_ta.json index 9b7a922b..a8849d50 100644 --- a/dev/test/test_task/test_pool_ta.json +++ b/dev/test/test_task/test_pool_ta.json @@ -1,6 +1,6 @@ { - "test_pool_ta.se_ta_rep1" : "atac-seq-pipeline-test-data/input/se/tas/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", - "test_pool_ta.se_ta_rep2" : "atac-seq-pipeline-test-data/input/se/tas/rep2/ENCFF463QCX.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", + "test_pool_ta.se_ta_rep1" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/tas/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", + "test_pool_ta.se_ta_rep2" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/tas/rep2/ENCFF463QCX.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", - "test_pool_ta.ref_se_pooled_ta" : "atac-seq-pipeline-test-data/ref_output/test_pool_ta/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pooled.tagAlign.gz" + "test_pool_ta.ref_se_pooled_ta" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_pool_ta/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pooled.tagAlign.gz" } diff --git a/dev/test/test_task/test_preseq.json b/dev/test/test_task/test_preseq.json index 78c257b7..e54bd123 100644 --- a/dev/test/test_task/test_preseq.json +++ b/dev/test/test_task/test_preseq.json @@ -1,7 +1,7 @@ { "test_preseq.paired_end" : true, - "test_preseq.bam" : "atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.bam", + "test_preseq.bam" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.bam", - "test_preseq.ref_picard_est_lib_size_qc" : "atac-seq-pipeline-test-data/ref_output/test_preseq/ENCFF341MYG.subsampled.400.trim.merged.picard_est_lib_size.qc", - "test_preseq.ref_preseq_log" : "atac-seq-pipeline-test-data/ref_output/test_preseq/ENCFF341MYG.subsampled.400.trim.merged.preseq.log" + "test_preseq.ref_picard_est_lib_size_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_preseq/ENCFF341MYG.subsampled.400.trim.merged.picard_est_lib_size.qc", + "test_preseq.ref_preseq_log" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_preseq/ENCFF341MYG.subsampled.400.trim.merged.preseq.log" } diff --git a/dev/test/test_task/test_reproducibility.json b/dev/test/test_task/test_reproducibility.json index 4d915ac9..fff72b2c 100644 --- a/dev/test/test_task/test_reproducibility.json +++ b/dev/test/test_task/test_reproducibility.json @@ -1,9 +1,9 @@ { - "test_reproducibility.se_overlap_peak_rep1_vs_rep2" : "atac-seq-pipeline-test-data/input/se/overlap_peaks/rep1-rep2.overlap.bfilt.narrowPeak.gz", - "test_reproducibility.se_overlap_peak_rep1_pr" : "atac-seq-pipeline-test-data/input/se/overlap_peaks/rep1-pr.overlap.bfilt.narrowPeak.gz", - "test_reproducibility.se_overlap_peak_rep2_pr" : "atac-seq-pipeline-test-data/input/se/overlap_peaks/rep2-pr.overlap.bfilt.narrowPeak.gz", - "test_reproducibility.se_overlap_peak_ppr" : "atac-seq-pipeline-test-data/input/se/overlap_peaks/ppr.overlap.bfilt.narrowPeak.gz", - "test_reproducibility.se_chrsz" : "atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", + "test_reproducibility.se_overlap_peak_rep1_vs_rep2" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/overlap_peaks/rep1-rep2.overlap.bfilt.narrowPeak.gz", + "test_reproducibility.se_overlap_peak_rep1_pr" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/overlap_peaks/rep1-pr.overlap.bfilt.narrowPeak.gz", + "test_reproducibility.se_overlap_peak_rep2_pr" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/overlap_peaks/rep2-pr.overlap.bfilt.narrowPeak.gz", + "test_reproducibility.se_overlap_peak_ppr" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/overlap_peaks/ppr.overlap.bfilt.narrowPeak.gz", + "test_reproducibility.se_chrsz" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/mm10_chr19_chrM/mm10_chr19_chrM.chrom.sizes", - "test_reproducibility.ref_se_reproducibility_qc" : "atac-seq-pipeline-test-data/ref_output/test_reproducibility/overlap.reproducibility.qc" + "test_reproducibility.ref_se_reproducibility_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_reproducibility/overlap.reproducibility.qc" } diff --git a/dev/test/test_task/test_spr.json b/dev/test/test_task/test_spr.json index 33a4d943..6dbb7444 100644 --- a/dev/test/test_task/test_spr.json +++ b/dev/test/test_task/test_spr.json @@ -1,14 +1,14 @@ { - "test_spr.pe_ta" : "atac-seq-pipeline-test-data/input/pe/tas/rep1/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", - "test_spr.se_ta" : "atac-seq-pipeline-test-data/input/se/tas/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", + "test_spr.pe_ta" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/tas/rep1/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", + "test_spr.se_ta" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/tas/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", - "test_spr.ref_pe_ta_pr1" : "atac-seq-pipeline-test-data/ref_output/test_spr/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz", - "test_spr.ref_pe_ta_pr2" : "atac-seq-pipeline-test-data/ref_output/test_spr/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz", - "test_spr.ref_se_ta_pr1" : "atac-seq-pipeline-test-data/ref_output/test_spr/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz", - "test_spr.ref_se_ta_pr2" : "atac-seq-pipeline-test-data/ref_output/test_spr/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz", + "test_spr.ref_pe_ta_pr1" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_spr/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz", + "test_spr.ref_pe_ta_pr2" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_spr/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz", + "test_spr.ref_se_ta_pr1" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_spr/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz", + "test_spr.ref_se_ta_pr2" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_spr/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz", - "test_spr.ref_pe_seed_10_ta_pr1" : "atac-seq-pipeline-test-data/ref_output/test_spr/pe/pseudoreplication_random_seed_10/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz", - "test_spr.ref_pe_seed_10_ta_pr2" : "atac-seq-pipeline-test-data/ref_output/test_spr/pe/pseudoreplication_random_seed_10/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz", - "test_spr.ref_se_seed_10_ta_pr1" : "atac-seq-pipeline-test-data/ref_output/test_spr/se/pseudoreplication_random_seed_10/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz", - "test_spr.ref_se_seed_10_ta_pr2" : "atac-seq-pipeline-test-data/ref_output/test_spr/se/pseudoreplication_random_seed_10/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz" + "test_spr.ref_pe_seed_10_ta_pr1" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_spr/pe/pseudoreplication_random_seed_10/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz", + "test_spr.ref_pe_seed_10_ta_pr2" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_spr/pe/pseudoreplication_random_seed_10/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz", + "test_spr.ref_se_seed_10_ta_pr1" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_spr/se/pseudoreplication_random_seed_10/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr1.tagAlign.gz", + "test_spr.ref_se_seed_10_ta_pr2" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_spr/se/pseudoreplication_random_seed_10/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.pr2.tagAlign.gz" } diff --git a/dev/test/test_task/test_tss_enrich.json b/dev/test/test_task/test_tss_enrich.json index 4597e645..3edc891d 100644 --- a/dev/test/test_task/test_tss_enrich.json +++ b/dev/test/test_task/test_tss_enrich.json @@ -1,9 +1,9 @@ { - "test_tss_enrich.tss" : "atac-seq-pipeline-test-data/genome_data/hg38/ataqc/hg38_gencode_tss_unique.bed.gz", - "test_tss_enrich.chrsz" : "atac-seq-pipeline-test-data/genome_data/hg38/hg38.chrom.sizes", + "test_tss_enrich.tss" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38/ataqc/ENCFF766FGL.bed.gz", + "test_tss_enrich.chrsz" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/genome_data/hg38/hg38.chrom.sizes", - "test_tss_enrich.read_len_log" : "atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.read_length.txt", - "test_tss_enrich.nodup_bam" : "atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.nodup.bam", + "test_tss_enrich.read_len_log" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.read_length.txt", + "test_tss_enrich.nodup_bam" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/ataqc/ENCFF341MYG.subsampled.400.trim.merged.nodup.bam", - "test_tss_enrich.ref_tss_enrich_qc" : "atac-seq-pipeline-test-data/ref_output/test_tss_enrich/ENCFF341MYG.subsampled.400.trim.merged.nodup.tss_enrich.qc" + "test_tss_enrich.ref_tss_enrich_qc" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_tss_enrich/ENCFF341MYG.subsampled.400.trim.merged.nodup.tss_enrich.qc" } diff --git a/dev/test/test_task/test_xcor.json b/dev/test/test_task/test_xcor.json index 707d1f57..d165d67e 100644 --- a/dev/test/test_task/test_xcor.json +++ b/dev/test/test_task/test_xcor.json @@ -1,11 +1,11 @@ { - "test_xcor.pe_ta" : "atac-seq-pipeline-test-data/input/pe/tas/rep1/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", - "test_xcor.se_ta" : "atac-seq-pipeline-test-data/input/se/tas/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", + "test_xcor.pe_ta" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/pe/tas/rep1/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", + "test_xcor.se_ta" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/input/se/tas/rep1/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.tagAlign.gz", - "test_xcor.ref_pe_xcor_log" : "atac-seq-pipeline-test-data/ref_output/test_xcor/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.no_chrM.R1.25M.cc.qc", - "test_xcor.ref_pe_xcor_log_subsample" : "atac-seq-pipeline-test-data/ref_output/test_xcor/pe/subsample/fix_PIP-917/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.no_chrM.R1.5K.cc.qc", - "test_xcor.ref_se_xcor_log" : "atac-seq-pipeline-test-data/ref_output/test_xcor/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.25M.cc.qc", - "test_xcor.ref_se_xcor_log_subsample" : "atac-seq-pipeline-test-data/ref_output/test_xcor/se/subsample/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.no_chrM.5K.cc.qc", + "test_xcor.ref_pe_xcor_log" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_xcor/pe/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.no_chrM.R1.25M.cc.qc", + "test_xcor.ref_pe_xcor_log_subsample" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_xcor/pe/subsample/fix_PIP-917/ENCFF341MYG.subsampled.400.trim.merged.nodup.tn5.no_chrM.R1.5K.cc.qc", + "test_xcor.ref_se_xcor_log" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_xcor/se/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.25M.cc.qc", + "test_xcor.ref_se_xcor_log_subsample" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/atac-seq-pipeline-test-data/ref_output/test_xcor/se/subsample/ENCFF439VSY.subsampled.400.trim.merged.nodup.tn5.no_chrM.5K.cc.qc", "test_xcor.xcor_subsample" : 5000 } diff --git a/dev/test/test_workflow/ENCSR356KRQ.json b/dev/test/test_workflow/ENCSR356KRQ.json index 6a097902..1b1900ea 100644 --- a/dev/test/test_workflow/ENCSR356KRQ.json +++ b/dev/test/test_workflow/ENCSR356KRQ.json @@ -1,7 +1,7 @@ { "atac.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ref_output/v1.1.6/ENCSR356KRQ/qc.json", "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", + "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/hg38.tsv", "atac.fastqs_rep1_R1" : [ "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq/rep1/pair1/ENCFF341MYG.fastq.gz", "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq/rep1/pair1/ENCFF106QGY.fastq.gz" diff --git a/dev/test/test_workflow/ENCSR356KRQ_subsampled.json b/dev/test/test_workflow/ENCSR356KRQ_subsampled.json index 7f8eac61..cb224938 100644 --- a/dev/test/test_workflow/ENCSR356KRQ_subsampled.json +++ b/dev/test/test_workflow/ENCSR356KRQ_subsampled.json @@ -1,7 +1,7 @@ { - "atac.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ref_output/v1.8.0/ENCSR356KRQ_subsampled/qc.json", + "atac.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ref_output/v2.1.0/ENCSR356KRQ_subsampled/qc.json", "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", + "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/hg38.tsv", "atac.fastqs_rep1_R1" : [ "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF341MYG.subsampled.400.fastq.gz", "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF106QGY.subsampled.400.fastq.gz" diff --git a/dev/test/test_workflow/ENCSR356KRQ_subsampled_chr19_only.json b/dev/test/test_workflow/ENCSR356KRQ_subsampled_chr19_only.json index 1290565a..fcf7013a 100644 --- a/dev/test/test_workflow/ENCSR356KRQ_subsampled_chr19_only.json +++ b/dev/test/test_workflow/ENCSR356KRQ_subsampled_chr19_only.json @@ -1,7 +1,7 @@ { "atac.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ref_output/v1.1.7.2/ENCSR356KRQ_subsampled_chr19_only/qc.json", "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.tsv", + "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/hg38_chr19_chrM.tsv", "atac.fastqs_rep1_R1" : [ "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF341MYG.subsampled.400.fastq.gz", "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF106QGY.subsampled.400.fastq.gz" diff --git a/dev/test/test_workflow/ENCSR356KRQ_subsampled_start_from_bam.json b/dev/test/test_workflow/ENCSR356KRQ_subsampled_start_from_bam.json index 7e8805ed..2d6836f1 100644 --- a/dev/test/test_workflow/ENCSR356KRQ_subsampled_start_from_bam.json +++ b/dev/test/test_workflow/ENCSR356KRQ_subsampled_start_from_bam.json @@ -1,7 +1,7 @@ { - "atac.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ref_output/v1.8.0/ENCSR356KRQ_subsampled_start_from_bam/qc.json", + "atac.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ref_output/v2.1.0/ENCSR356KRQ_subsampled_start_from_bam/qc.json", "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", + "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/hg38.tsv", "atac.read_len" : [76, 76], "atac.nodup_bams" : [ "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/bam_subsampled/rep1/ENCFF341MYG.subsampled.400.trim.merged.nodup.no_chrM_MT.bam", diff --git a/dev/test/test_workflow/ENCSR889WQX.json b/dev/test/test_workflow/ENCSR889WQX.json index 4c5a1e30..69eea40c 100644 --- a/dev/test/test_workflow/ENCSR889WQX.json +++ b/dev/test/test_workflow/ENCSR889WQX.json @@ -1,7 +1,7 @@ { "atac.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ref_output/v1.1.6/ENCSR889WQX/qc.json", "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/mm10.tsv", + "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/mm10.tsv", "atac.fastqs_rep1_R1" : [ "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR889WQX/fastq/rep1/ENCFF439VSY.fastq.gz", "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR889WQX/fastq/rep1/ENCFF325FCQ.fastq.gz", diff --git a/dev/test/test_workflow/ENCSR889WQX_subsampled.json b/dev/test/test_workflow/ENCSR889WQX_subsampled.json index b42a95cd..b28cff72 100644 --- a/dev/test/test_workflow/ENCSR889WQX_subsampled.json +++ b/dev/test/test_workflow/ENCSR889WQX_subsampled.json @@ -1,7 +1,7 @@ { "atac.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ref_output/v1.8.0/ENCSR889WQX_subsampled/qc.json", "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/mm10.tsv", + "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/mm10.tsv", "atac.fastqs_rep1_R1" : [ "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR889WQX/fastq_subsampled/rep1/ENCFF439VSY.subsampled.400.fastq.gz", "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR889WQX/fastq_subsampled/rep1/ENCFF325FCQ.subsampled.400.fastq.gz", diff --git a/dev/test/test_workflow/ENCSR889WQX_subsampled_chr19_only.json b/dev/test/test_workflow/ENCSR889WQX_subsampled_chr19_only.json index ffa7b847..966dacd5 100644 --- a/dev/test/test_workflow/ENCSR889WQX_subsampled_chr19_only.json +++ b/dev/test/test_workflow/ENCSR889WQX_subsampled_chr19_only.json @@ -1,7 +1,7 @@ { "atac.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ref_output/v1.1.6.a/ENCSR889WQX_subsampled_chr19_only/qc.json", "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/mm10_chr19_chrM.tsv", + "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/mm10_chr19_chrM.tsv", "atac.fastqs_rep1_R1" : [ "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR889WQX/fastq_subsampled/rep1/ENCFF439VSY.subsampled.400.fastq.gz", "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR889WQX/fastq_subsampled/rep1/ENCFF325FCQ.subsampled.400.fastq.gz", diff --git a/dev/test/test_workflow/ENCSR889WQX_subsampled_unrep.json b/dev/test/test_workflow/ENCSR889WQX_subsampled_unrep.json index deb51a65..4928fb23 100644 --- a/dev/test/test_workflow/ENCSR889WQX_subsampled_unrep.json +++ b/dev/test/test_workflow/ENCSR889WQX_subsampled_unrep.json @@ -1,7 +1,7 @@ { "atac.qc_report.qc_json_ref" : "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ref_output/v1.8.0/ENCSR889WQX_subsampled_unrep/qc.json", "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/mm10.tsv", + "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/mm10.tsv", "atac.fastqs_rep1_R1" : [ "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR889WQX/fastq_subsampled/rep1/ENCFF439VSY.subsampled.400.fastq.gz", "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR889WQX/fastq_subsampled/rep1/ENCFF325FCQ.subsampled.400.fastq.gz", diff --git a/dev/test/test_workflow/ref_output/v2.1.0/ENCSR356KRQ_subsampled/qc.json b/dev/test/test_workflow/ref_output/v2.1.0/ENCSR356KRQ_subsampled/qc.json new file mode 100644 index 00000000..6fc302ba --- /dev/null +++ b/dev/test/test_workflow/ref_output/v2.1.0/ENCSR356KRQ_subsampled/qc.json @@ -0,0 +1,405 @@ +{ + "general": { + "date": "2021-12-17 20:42:30", + "title": "ENCSR356KRQ (subsampled 1/400 reads)", + "description": "ATAC-seq on primary keratinocytes in day 0.0 of differentiation", + "pipeline_ver": "v2.1.0", + "pipeline_type": "atac", + "genome": "hg38", + "aligner": "bowtie2", + "seq_endedness": { + "rep1": { + "paired_end": true + }, + "rep2": { + "paired_end": true + } + }, + "peak_caller": "macs2" + }, + "align": { + "samstat": { + "rep1": { + "total_reads": 691166, + "total_reads_qc_failed": 0, + "duplicate_reads": 0, + "duplicate_reads_qc_failed": 0, + "mapped_reads": 680479, + "mapped_reads_qc_failed": 0, + "pct_mapped_reads": 98.5, + "paired_reads": 691166, + "paired_reads_qc_failed": 0, + "read1": 345583, + "read1_qc_failed": 0, + "read2": 345583, + "read2_qc_failed": 0, + "properly_paired_reads": 556054, + "properly_paired_reads_qc_failed": 0, + "pct_properly_paired_reads": 80.5, + "with_itself": 676210, + "with_itself_qc_failed": 0, + "singletons": 4269, + "singletons_qc_failed": 0, + "pct_singletons": 0.6, + "diff_chroms": 15177, + "diff_chroms_qc_failed": 0 + }, + "rep2": { + "total_reads": 848854, + "total_reads_qc_failed": 0, + "duplicate_reads": 0, + "duplicate_reads_qc_failed": 0, + "mapped_reads": 838515, + "mapped_reads_qc_failed": 0, + "pct_mapped_reads": 98.8, + "paired_reads": 848854, + "paired_reads_qc_failed": 0, + "read1": 424427, + "read1_qc_failed": 0, + "read2": 424427, + "read2_qc_failed": 0, + "properly_paired_reads": 682808, + "properly_paired_reads_qc_failed": 0, + "pct_properly_paired_reads": 80.4, + "with_itself": 832920, + "with_itself_qc_failed": 0, + "singletons": 5595, + "singletons_qc_failed": 0, + "pct_singletons": 0.7000000000000001, + "diff_chroms": 20376, + "diff_chroms_qc_failed": 0 + } + }, + "dup": { + "rep1": { + "unpaired_reads": 0, + "paired_reads": 249183, + "unmapped_reads": 0, + "unpaired_duplicate_reads": 0, + "paired_duplicate_reads": 960, + "paired_optical_duplicate_reads": 3, + "pct_duplicate_reads": 0.38530000000000003 + }, + "rep2": { + "unpaired_reads": 0, + "paired_reads": 301770, + "unmapped_reads": 0, + "unpaired_duplicate_reads": 0, + "paired_duplicate_reads": 1855, + "paired_optical_duplicate_reads": 6, + "pct_duplicate_reads": 0.6147 + } + }, + "frac_mito": { + "rep1": { + "non_mito_reads": 665674, + "mito_reads": 54279, + "frac_mito_reads": 0.07539242144973353 + }, + "rep2": { + "non_mito_reads": 817862, + "mito_reads": 77218, + "frac_mito_reads": 0.08626938374223533 + } + }, + "nodup_samstat": { + "rep1": { + "total_reads": 461170, + "total_reads_qc_failed": 0, + "duplicate_reads": 0, + "duplicate_reads_qc_failed": 0, + "mapped_reads": 461170, + "mapped_reads_qc_failed": 0, + "pct_mapped_reads": 100.0, + "paired_reads": 461170, + "paired_reads_qc_failed": 0, + "read1": 230585, + "read1_qc_failed": 0, + "read2": 230585, + "read2_qc_failed": 0, + "properly_paired_reads": 461170, + "properly_paired_reads_qc_failed": 0, + "pct_properly_paired_reads": 100.0, + "with_itself": 461170, + "with_itself_qc_failed": 0, + "singletons": 0, + "singletons_qc_failed": 0, + "pct_singletons": 0.0, + "diff_chroms": 0, + "diff_chroms_qc_failed": 0 + }, + "rep2": { + "total_reads": 550916, + "total_reads_qc_failed": 0, + "duplicate_reads": 0, + "duplicate_reads_qc_failed": 0, + "mapped_reads": 550916, + "mapped_reads_qc_failed": 0, + "pct_mapped_reads": 100.0, + "paired_reads": 550916, + "paired_reads_qc_failed": 0, + "read1": 275458, + "read1_qc_failed": 0, + "read2": 275458, + "read2_qc_failed": 0, + "properly_paired_reads": 550916, + "properly_paired_reads_qc_failed": 0, + "pct_properly_paired_reads": 100.0, + "with_itself": 550916, + "with_itself_qc_failed": 0, + "singletons": 0, + "singletons_qc_failed": 0, + "pct_singletons": 0.0, + "diff_chroms": 0, + "diff_chroms_qc_failed": 0 + } + }, + "frag_len_stat": { + "rep1": { + "frac_reads_in_nfr": 0.5035996900871412, + "frac_reads_in_nfr_qc_pass": true, + "frac_reads_in_nfr_qc_reason": "OK", + "nfr_over_mono_nuc_reads": 1.5384349444850742, + "nfr_over_mono_nuc_reads_qc_pass": false, + "nfr_over_mono_nuc_reads_qc_reason": "out of range [2.5, inf]", + "nfr_peak_exists": true, + "mono_nuc_peak_exists": true, + "di_nuc_peak_exists": true + }, + "rep2": { + "frac_reads_in_nfr": 0.5476324967292632, + "frac_reads_in_nfr_qc_pass": true, + "frac_reads_in_nfr_qc_reason": "OK", + "nfr_over_mono_nuc_reads": 1.815339639280494, + "nfr_over_mono_nuc_reads_qc_pass": false, + "nfr_over_mono_nuc_reads_qc_reason": "out of range [2.5, inf]", + "nfr_peak_exists": true, + "mono_nuc_peak_exists": true, + "di_nuc_peak_exists": true + } + }, + "frac_reads_in_annot": { + "rep1": { + "fri_dhs": 0.46376173645293495, + "fri_blacklist": 0.007832252748444174, + "fri_prom": 0.15181603313311795, + "fri_enh": 0.3902769043953423 + }, + "rep2": { + "fri_dhs": 0.4277004116780054, + "fri_blacklist": 0.009758293460346042, + "fri_prom": 0.13602799700861837, + "fri_enh": 0.372750110724684 + } + } + }, + "lib_complexity": { + "lib_complexity": { + "rep1": { + "total_fragments": 230808, + "distinct_fragments": 230603, + "positions_with_one_read": 197, + "NRF": 0.999112, + "PBC1": 0.999133, + "PBC2": 1169.558376 + }, + "rep2": { + "total_fragments": 275697, + "distinct_fragments": 275503, + "positions_with_one_read": 175, + "NRF": 0.999296, + "PBC1": 0.999339, + "PBC2": 1573.262857 + } + } + }, + "replication": { + "reproducibility": { + "overlap": { + "Nt": 29961, + "N1": 13398, + "N2": 14855, + "Np": 30170, + "N_opt": 30170, + "N_consv": 29961, + "opt_set": "pooled-pr1_vs_pooled-pr2", + "consv_set": "rep1_vs_rep2", + "rescue_ratio": 1.0069757351223256, + "self_consistency_ratio": 1.1087475742648156, + "reproducibility": "pass" + }, + "idr": { + "Nt": 343, + "N1": 27, + "N2": 46, + "Np": 446, + "N_opt": 446, + "N_consv": 343, + "opt_set": "pooled-pr1_vs_pooled-pr2", + "consv_set": "rep1_vs_rep2", + "rescue_ratio": 1.3002915451895043, + "self_consistency_ratio": 1.7037037037037037, + "reproducibility": "pass" + } + }, + "num_peaks": { + "rep1": { + "num_peaks": 236085 + }, + "rep2": { + "num_peaks": 269945 + } + } + }, + "peak_stat": { + "peak_region_size": { + "rep1": { + "min_size": 150.0, + "25_pct": 150.0, + "50_pct": 182.0, + "75_pct": 249.0, + "max_size": 976.0, + "mean": 209.35886650994345 + }, + "rep2": { + "min_size": 150.0, + "25_pct": 150.0, + "50_pct": 190.0, + "75_pct": 255.0, + "max_size": 1058.0, + "mean": 214.83504788012374 + }, + "idr_opt": { + "min_size": 168.0, + "25_pct": 379.25, + "50_pct": 462.5, + "75_pct": 558.0, + "max_size": 907.0, + "mean": 475.30941704035877 + }, + "overlap_opt": { + "min_size": 150.0, + "25_pct": 229.0, + "50_pct": 311.0, + "75_pct": 411.0, + "max_size": 1212.0, + "mean": 332.8175339741465 + } + } + }, + "align_enrich": { + "xcor_score": { + "rep1": { + "subsampled_reads": 230808, + "estimated_fragment_len": 0, + "corr_estimated_fragment_len": 0.0111972879681259, + "phantom_peak": 70, + "corr_phantom_peak": 0.01143303, + "argmin_corr": 1500, + "min_corr": 0.004724189, + "NSC": 2.370203, + "RSC": 0.9648616 + }, + "rep2": { + "subsampled_reads": 275697, + "estimated_fragment_len": 0, + "corr_estimated_fragment_len": 0.0135895835739089, + "phantom_peak": 70, + "corr_phantom_peak": 0.01393697, + "argmin_corr": 1500, + "min_corr": 0.007344452, + "NSC": 1.85032, + "RSC": 0.9473054 + } + }, + "tss_enrich": { + "rep1": { + "tss_enrich": 19.469529156258762 + }, + "rep2": { + "tss_enrich": 16.786519165410933 + } + }, + "jsd": { + "rep1": { + "auc": 0.014297736952048858, + "syn_auc": 0.43674234633678954, + "x_intercept": 0.9580399307429933, + "syn_x_intercept": 0.03713794486602452, + "elbow_pt": 0.9585437634705278, + "syn_elbow_pt": 0.5981251752734491, + "syn_jsd": 0.09928508053140922 + }, + "rep2": { + "auc": 0.016935627964213763, + "syn_auc": 0.4413757539559547, + "x_intercept": 0.9508103309701179, + "syn_x_intercept": 0.018620135168457327, + "elbow_pt": 0.9514281258622137, + "syn_elbow_pt": 0.45900785671043565, + "syn_jsd": 0.09419160218976005 + } + } + }, + "peak_enrich": { + "frac_reads_in_peaks": { + "macs2": { + "rep1": { + "frip": 0.9184682438146453 + }, + "rep2": { + "frip": 0.9103747213731312 + }, + "rep1-pr1": { + "frip": 0.964616238626803 + }, + "rep2-pr1": { + "frip": 0.9598595793188073 + }, + "rep1-pr2": { + "frip": 0.9648804773965236 + }, + "rep2-pr2": { + "frip": 0.9598849915413602 + }, + "pooled": { + "frip": 0.6919402106145129 + }, + "pooled-pr1": { + "frip": 0.9143552734544822 + }, + "pooled-pr2": { + "frip": 0.9153765892949597 + } + }, + "overlap": { + "rep1_vs_rep2": { + "frip": 0.18861638240228598 + }, + "rep1-pr1_vs_rep1-pr2": { + "frip": 0.14657935251642562 + }, + "rep2-pr1_vs_rep2-pr2": { + "frip": 0.13989791547168715 + }, + "pooled-pr1_vs_pooled-pr2": { + "frip": 0.1893801514891027 + } + }, + "idr": { + "rep1_vs_rep2": { + "frip": 0.007678201259576755 + }, + "rep1-pr1_vs_rep1-pr2": { + "frip": 0.0011297352386321748 + }, + "rep2-pr1_vs_rep2-pr2": { + "frip": 0.001506581765641223 + }, + "pooled-pr1_vs_pooled-pr2": { + "frip": 0.00959602247239859 + } + } + } + } +} diff --git a/dev/test/test_workflow/ref_output/v2.1.0/ENCSR356KRQ_subsampled_start_from_bam/qc.json b/dev/test/test_workflow/ref_output/v2.1.0/ENCSR356KRQ_subsampled_start_from_bam/qc.json new file mode 100644 index 00000000..cec695c6 --- /dev/null +++ b/dev/test/test_workflow/ref_output/v2.1.0/ENCSR356KRQ_subsampled_start_from_bam/qc.json @@ -0,0 +1,225 @@ +{ + "general": { + "date": "2021-12-17 23:28:15", + "title": "ENCSR356KRQ (subsampled 1/400 reads, starting from BAM)", + "description": "ATAC-seq on primary keratinocytes in day 0.0 of differentiation", + "pipeline_ver": "v2.1.0", + "pipeline_type": "atac", + "genome": "hg38", + "aligner": "bowtie2", + "seq_endedness": { + "rep1": { + "paired_end": true + }, + "rep2": { + "paired_end": true + } + }, + "peak_caller": "macs2" + }, + "align": { + "frag_len_stat": { + "rep1": { + "frac_reads_in_nfr": 0.5025463237588045, + "frac_reads_in_nfr_qc_pass": true, + "frac_reads_in_nfr_qc_reason": "OK", + "nfr_over_mono_nuc_reads": 1.5317476261266691, + "nfr_over_mono_nuc_reads_qc_pass": false, + "nfr_over_mono_nuc_reads_qc_reason": "out of range [2.5, inf]", + "nfr_peak_exists": true, + "mono_nuc_peak_exists": true, + "di_nuc_peak_exists": true + }, + "rep2": { + "frac_reads_in_nfr": 0.5463799114259104, + "frac_reads_in_nfr_qc_pass": true, + "frac_reads_in_nfr_qc_reason": "OK", + "nfr_over_mono_nuc_reads": 1.805493716774327, + "nfr_over_mono_nuc_reads_qc_pass": false, + "nfr_over_mono_nuc_reads_qc_reason": "out of range [2.5, inf]", + "nfr_peak_exists": true, + "mono_nuc_peak_exists": true, + "di_nuc_peak_exists": true + } + }, + "frac_reads_in_annot": { + "rep1": { + "fri_dhs": 0.46616961177166133, + "fri_blacklist": 0.006523024371017539, + "fri_prom": 0.15275788828308043, + "fri_enh": 0.39283540257067157 + }, + "rep2": { + "fri_dhs": 0.43002061885928544, + "fri_blacklist": 0.008300741838673936, + "fri_prom": 0.13694297894821805, + "fri_enh": 0.3754631905666884 + } + } + }, + "replication": { + "reproducibility": { + "overlap": { + "Nt": 29773, + "N1": 13112, + "N2": 14769, + "Np": 29827, + "N_opt": 29827, + "N_consv": 29773, + "opt_set": "pooled-pr1_vs_pooled-pr2", + "consv_set": "rep1_vs_rep2", + "rescue_ratio": 1.001813723843751, + "self_consistency_ratio": 1.1263727882855399, + "reproducibility": "pass" + }, + "idr": { + "Nt": 339, + "N1": 25, + "N2": 44, + "Np": 434, + "N_opt": 434, + "N_consv": 339, + "opt_set": "pooled-pr1_vs_pooled-pr2", + "consv_set": "rep1_vs_rep2", + "rescue_ratio": 1.28023598820059, + "self_consistency_ratio": 1.76, + "reproducibility": "pass" + } + }, + "num_peaks": { + "rep1": { + "num_peaks": 234228 + }, + "rep2": { + "num_peaks": 267741 + } + } + }, + "peak_stat": { + "peak_region_size": { + "rep1": { + "min_size": 150.0, + "25_pct": 150.0, + "50_pct": 181.0, + "75_pct": 249.0, + "max_size": 976.0, + "mean": 209.40041754188226 + }, + "rep2": { + "min_size": 150.0, + "25_pct": 150.0, + "50_pct": 190.0, + "75_pct": 255.0, + "max_size": 1058.0, + "mean": 214.8737772698242 + }, + "idr_opt": { + "min_size": 150.0, + "25_pct": 379.25, + "50_pct": 467.0, + "75_pct": 576.0, + "max_size": 1025.0, + "mean": 477.16129032258067 + }, + "overlap_opt": { + "min_size": 150.0, + "25_pct": 229.0, + "50_pct": 309.0, + "75_pct": 411.0, + "max_size": 1212.0, + "mean": 332.02715660307774 + } + } + }, + "align_enrich": { + "tss_enrich": { + "rep1": { + "tss_enrich": 19.550830939157237 + }, + "rep2": { + "tss_enrich": 16.81303293753921 + } + }, + "jsd": { + "rep1": { + "auc": 0.014246947896647228, + "syn_auc": 0.4366366802369166, + "x_intercept": 0.9581778849421992, + "syn_x_intercept": 0.03765498070607418, + "elbow_pt": 0.9586817176697336, + "syn_elbow_pt": 0.601024988228519, + "syn_jsd": 0.09951945187613959 + }, + "rep2": { + "auc": 0.01686937393823306, + "syn_auc": 0.44126863265942623, + "x_intercept": 0.9509822738850702, + "syn_x_intercept": 0.018958523536221666, + "elbow_pt": 0.9515980694409456, + "syn_elbow_pt": 0.4624411418150721, + "syn_jsd": 0.0943388369132815 + } + } + }, + "peak_enrich": { + "frac_reads_in_peaks": { + "macs2": { + "rep1": { + "frip": 0.9203157502901311 + }, + "rep2": { + "frip": 0.9125257735741068 + }, + "rep1-pr1": { + "frip": 0.9667303127709703 + }, + "rep2-pr1": { + "frip": 0.9637078725886574 + }, + "rep1-pr2": { + "frip": 0.9681182776862979 + }, + "rep2-pr2": { + "frip": 0.9613818304557429 + }, + "pooled": { + "frip": 0.6966157660742128 + }, + "pooled-pr1": { + "frip": 0.9175643626026129 + }, + "pooled-pr2": { + "frip": 0.917779641053283 + } + }, + "overlap": { + "rep1_vs_rep2": { + "frip": 0.18944882424223067 + }, + "rep1-pr1_vs_rep1-pr2": { + "frip": 0.14654579693008388 + }, + "rep2-pr1_vs_rep2-pr2": { + "frip": 0.1402779510283748 + }, + "pooled-pr1_vs_pooled-pr2": { + "frip": 0.18980717133382977 + } + }, + "idr": { + "rep1_vs_rep2": { + "frip": 0.007711948829233138 + }, + "rep1-pr1_vs_rep1-pr2": { + "frip": 0.0010816965556504413 + }, + "rep2-pr1_vs_rep2-pr2": { + "frip": 0.0015794339719554162 + }, + "pooled-pr1_vs_pooled-pr2": { + "frip": 0.009460762490741868 + } + } + } + } +} diff --git a/docs/input.md b/docs/input.md index 42de789f..6d3b40b7 100644 --- a/docs/input.md +++ b/docs/input.md @@ -59,30 +59,28 @@ We currently provide TSV files for 4 genomes as shown in the below table. You ca Genome|URL -|- -hg38|`https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv` -mm10|`https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/mm10.tsv` -hg19|`https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg19_caper.tsv` -mm9|`https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/mm9_caper.tsv` +hg38|`https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/hg38.tsv` +mm10|`https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/mm10.tsv` For DNAnexus CLI (AWS project): Genome|DX URI -|- -hg38|`dx://project-BKpvFg00VBPV975PgJ6Q03v6:pipeline-genome-data/genome_tsv/v3/hg38.dx.tsv` -mm10|`dx://project-BKpvFg00VBPV975PgJ6Q03v6:pipeline-genome-data/genome_tsv/v3/mm10.dx.tsv` +hg38|`dx://project-BKpvFg00VBPV975PgJ6Q03v6:pipeline-genome-data/genome_tsv/v4/hg38.dx.tsv` +mm10|`dx://project-BKpvFg00VBPV975PgJ6Q03v6:pipeline-genome-data/genome_tsv/v4/mm10.dx.tsv` For DNAnexus CLI (Azure project): Genome|DX URI -|- -hg38|`dx://project-F6K911Q9xyfgJ36JFzv03Z5J:pipeline-genome-data/genome_tsv/v3/hg38.dx_azure.tsv` -mm10|`dx://project-F6K911Q9xyfgJ36JFzv03Z5J:pipeline-genome-data/genome_tsv/v3/mm10.dx_azure.tsv` +hg38|`dx://project-F6K911Q9xyfgJ36JFzv03Z5J:pipeline-genome-data/genome_tsv/v4/hg38.dx_azure.tsv` +mm10|`dx://project-F6K911Q9xyfgJ36JFzv03Z5J:pipeline-genome-data/genome_tsv/v4/mm10.dx_azure.tsv` -For DNAnexus Web UI (AWS project): Choose one of the following TSV file on `https://platform.DNAnexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/pipeline-genome-data/genome_tsv/v3`. +For DNAnexus Web UI (AWS project): Choose one of the following TSV file on `https://platform.DNAnexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/pipeline-genome-data/genome_tsv/v4`. Genome|File name -|- hg38|`hg38.dx.tsv` mm10|`mm10.dx.tsv` -For DNAnexus Web UI (Azure project): Choose one of the following TSV file on `https://platform.DNAnexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/pipeline-genome-data/genome_tsv/v3`. +For DNAnexus Web UI (Azure project): Choose one of the following TSV file on `https://platform.DNAnexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/pipeline-genome-data/genome_tsv/v4`. Genome|File name -|- hg38|`hg38.dx_azure.tsv` diff --git a/docs/input_short.md b/docs/input_short.md index 1bb73dcc..c5a1327a 100644 --- a/docs/input_short.md +++ b/docs/input_short.md @@ -20,33 +20,31 @@ Mandatory parameters: * (Optional) `atac.paired_ends`: For samples with mixed read ends, you can define read endedness for each biological replicate (e.g. `[true, false]` means paired-ended biorep-1 and single-ended biorep-2). 4) Reference genome - * `atac.genome_tsv`: Choose one from the following genome TSVs. `v3` is a standard for >=ENCODE4. + * `atac.genome_tsv`: Choose one from the following genome TSVs. `v3` was a standard for >=ENCODE4 and <=v2.0.2. It's updated to `v4` for >=v2.1.0 (on 12/01/2021). See the version history of genome TSV files [here](https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/history.txt). Genome|URL -|- - hg38|`https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv` - mm10|`https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/mm10.tsv` - hg19|`https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg19_caper.tsv` - mm9|`https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/mm9_caper.tsv` + hg38|`https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/hg38.tsv` + mm10|`https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/mm10.tsv` For DNAnexus CLI (AWS project): Genome|DX URI -|- - hg38|`dx://project-BKpvFg00VBPV975PgJ6Q03v6:pipeline-genome-data/genome_tsv/v3/hg38.dx.tsv` - mm10|`dx://project-BKpvFg00VBPV975PgJ6Q03v6:pipeline-genome-data/genome_tsv/v3/mm10.dx.tsv` + hg38|`dx://project-BKpvFg00VBPV975PgJ6Q03v6:pipeline-genome-data/genome_tsv/v4/hg38.dx.tsv` + mm10|`dx://project-BKpvFg00VBPV975PgJ6Q03v6:pipeline-genome-data/genome_tsv/v4/mm10.dx.tsv` For DNAnexus CLI (Azure project): Genome|DX URI -|- - hg38|`dx://project-F6K911Q9xyfgJ36JFzv03Z5J:pipeline-genome-data/genome_tsv/v3/hg38.dx_azure.tsv` - mm10|`dx://project-F6K911Q9xyfgJ36JFzv03Z5J:pipeline-genome-data/genome_tsv/v3/mm10.dx_azure.tsv` + hg38|`dx://project-F6K911Q9xyfgJ36JFzv03Z5J:pipeline-genome-data/genome_tsv/v4/hg38.dx_azure.tsv` + mm10|`dx://project-F6K911Q9xyfgJ36JFzv03Z5J:pipeline-genome-data/genome_tsv/v4/mm10.dx_azure.tsv` - For DNAnexus Web UI (AWS project): Choose one of the following TSV file on `https://platform.DNAnexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/pipeline-genome-data/genome_tsv/v3`. + For DNAnexus Web UI (AWS project): Choose one of the following TSV file on `https://platform.DNAnexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/pipeline-genome-data/genome_tsv/v4`. Genome|File name -|- hg38|`hg38.dx.tsv` mm10|`mm10.dx.tsv` - For DNAnexus Web UI (Azure project): Choose one of the following TSV file on `https://platform.DNAnexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/pipeline-genome-data/genome_tsv/v3`. + For DNAnexus Web UI (Azure project): Choose one of the following TSV file on `https://platform.DNAnexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/pipeline-genome-data/genome_tsv/v4`. Genome|File name -|- hg38|`hg38.dx_azure.tsv` diff --git a/example_input_json/ENCSR356KRQ_subsampled.json b/example_input_json/ENCSR356KRQ_subsampled.json index 35642e6d..36d35fc7 100644 --- a/example_input_json/ENCSR356KRQ_subsampled.json +++ b/example_input_json/ENCSR356KRQ_subsampled.json @@ -1,6 +1,6 @@ { "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v3/hg38.tsv", + "atac.genome_tsv" : "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v4/hg38.tsv", "atac.fastqs_rep1_R1" : [ "https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF341MYG.subsampled.400.fastq.gz", "https://storage.googleapis.com/encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF106QGY.subsampled.400.fastq.gz" diff --git a/example_input_json/dx/ENCSR356KRQ_subsampled_dx.json b/example_input_json/dx/ENCSR356KRQ_subsampled_dx.json index b8abe4b8..3b44b4a4 100644 --- a/example_input_json/dx/ENCSR356KRQ_subsampled_dx.json +++ b/example_input_json/dx/ENCSR356KRQ_subsampled_dx.json @@ -1,6 +1,6 @@ { "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v3/hg38.dx.tsv", + "atac.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v4/hg38.dx.tsv", "atac.fastqs_rep1_R1" : [ "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF341MYG.subsampled.400.fastq.gz", "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF106QGY.subsampled.400.fastq.gz" diff --git a/example_input_json/dx/ENCSR356KRQ_subsampled_rep1_dx.json b/example_input_json/dx/ENCSR356KRQ_subsampled_rep1_dx.json index 9d96bb59..18e53517 100644 --- a/example_input_json/dx/ENCSR356KRQ_subsampled_rep1_dx.json +++ b/example_input_json/dx/ENCSR356KRQ_subsampled_rep1_dx.json @@ -1,6 +1,6 @@ { "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v3/hg38.dx.tsv", + "atac.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v4/hg38.dx.tsv", "atac.fastqs_rep1_R1" : [ "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF341MYG.subsampled.400.fastq.gz", "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF106QGY.subsampled.400.fastq.gz" diff --git a/example_input_json/dx/template_hg38.json b/example_input_json/dx/template_hg38.json index b1dcaa36..fdf602d5 100644 --- a/example_input_json/dx/template_hg38.json +++ b/example_input_json/dx/template_hg38.json @@ -1,4 +1,4 @@ { "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v3/hg38.dx.tsv" + "atac.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v4/hg38.dx.tsv" } diff --git a/example_input_json/dx/template_mm10.json b/example_input_json/dx/template_mm10.json index 2860a8a0..71cc9a77 100644 --- a/example_input_json/dx/template_mm10.json +++ b/example_input_json/dx/template_mm10.json @@ -1,4 +1,4 @@ { "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v3/mm10.dx.tsv" + "atac.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v4/mm10.dx.tsv" } diff --git a/example_input_json/dx_azure/ENCSR356KRQ_subsampled_dx_azure.json b/example_input_json/dx_azure/ENCSR356KRQ_subsampled_dx_azure.json index 07fb70ab..402f615d 100644 --- a/example_input_json/dx_azure/ENCSR356KRQ_subsampled_dx_azure.json +++ b/example_input_json/dx_azure/ENCSR356KRQ_subsampled_dx_azure.json @@ -1,6 +1,6 @@ { "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v3/hg38.dx_azure.tsv", + "atac.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v4/hg38.dx_azure.tsv", "atac.fastqs_rep1_R1" : [ "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF341MYG.subsampled.400.fastq.gz", "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF106QGY.subsampled.400.fastq.gz" diff --git a/example_input_json/dx_azure/template_hg38.json b/example_input_json/dx_azure/template_hg38.json index a5c8d359..117c1469 100644 --- a/example_input_json/dx_azure/template_hg38.json +++ b/example_input_json/dx_azure/template_hg38.json @@ -1,4 +1,4 @@ { "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v3/hg38.dx_azure.tsv" + "atac.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v4/hg38.dx_azure.tsv" } diff --git a/example_input_json/dx_azure/template_mm10.json b/example_input_json/dx_azure/template_mm10.json index 76cc5cc7..0f0e3677 100644 --- a/example_input_json/dx_azure/template_mm10.json +++ b/example_input_json/dx_azure/template_mm10.json @@ -1,4 +1,4 @@ { "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v3/mm10.dx_azure.tsv" + "atac.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v4/mm10.dx_azure.tsv" } diff --git a/example_input_json/terra/ENCSR356KRQ_subsampled.terra.json b/example_input_json/terra/ENCSR356KRQ_subsampled.terra.json index f948d159..b1111c11 100644 --- a/example_input_json/terra/ENCSR356KRQ_subsampled.terra.json +++ b/example_input_json/terra/ENCSR356KRQ_subsampled.terra.json @@ -1,6 +1,6 @@ { "atac.pipeline_type" : "atac", - "atac.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v3/hg38.terra.tsv", + "atac.genome_tsv" : "gs://encode-pipeline-genome-data/genome_tsv/v4/hg38.terra.tsv", "atac.fastqs_rep1_R1" : [ "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF341MYG.subsampled.400.fastq.gz", "gs://encode-pipeline-test-samples/encode-atac-seq-pipeline/ENCSR356KRQ/fastq_subsampled/rep1/pair1/ENCFF106QGY.subsampled.400.fastq.gz"