diff --git a/.circleci/config.yml b/.circleci.back/config.bak similarity index 68% rename from .circleci/config.yml rename to .circleci.back/config.bak index c5ab1074..f28289a4 100644 --- a/.circleci/config.yml +++ b/.circleci.back/config.bak @@ -26,7 +26,7 @@ variables: save_cache: &save_cache save_cache: - key: v0-{{ checksum "env.yml" }}-{{ checksum "env-r.yml" }} + key: v4-{{ checksum "env.yml" }}-{{ checksum "env-r.yml" }} paths: - /opt/conda @@ -38,7 +38,7 @@ variables: restore_cache: &restore_cache restore_cache: keys: - - v0-{{ checksum "env.yml" }}-{{ checksum "env-r.yml" }} + - v4-{{ checksum "env.yml" }}-{{ checksum "env-r.yml" }} # -------------------------------------------------------------------------- # The path needs to be set each time; in jobs below this will be called as @@ -48,10 +48,13 @@ variables: name: Set path command: | # x11-utils required to avoid R::png() segfaulting - apt update && apt install -y locales-all locales x11-utils + apt update && apt install -y locales-all locales rsync x11-utils echo 'export DEPLOY=/tmp/lcdb-wf-test' >> $BASH_ENV + echo 'export LCDBWF_ENV=lcdb-wf-test' >> $BASH_ENV + echo 'export LCDBWF_ENV_R=lcdb-wf-test-r' >> $BASH_ENV echo 'export LC_ALL=en_US.utf8' >> $BASH_ENV echo 'export LANG=en_US.utf8' >> $BASH_ENV + echo 'export ORIG=$(pwd)' >> $BASH_ENV source $BASH_ENV # -------------------------------------------------------------------------- # Set up conda if the environments do not already exist @@ -60,20 +63,21 @@ variables: name: Setup conda command: | set -e - apt update - apt install -y locales-all locales - export LC_ALL=en_US.utf8 - export LANG=en_US.utf8 + source $BASH_ENV # We only do the installation if the conda environment does not already # exist. if ! conda env list | grep -q "lcdb-wf-test"; then echo "Setting up conda..." + conda update conda conda config --system --add channels defaults conda config --system --add channels bioconda conda config --system --add channels conda-forge - conda install mamba -y - mamba env create -n lcdb-wf-test --file env.yml - mamba env create -n lcdb-wf-test-r --file env-r.yml + conda config --system --set channel_priority strict + conda install gcc mamba yaml -y + which mamba + mamba --version + mamba env create -n $LCDBWF_ENV --file env.yml + mamba env create -n $LCDBWF_ENV_R --file env-r.yml fi # -------------------------------------------------------------------------- @@ -82,6 +86,7 @@ variables: run: name: Download example data command: | + conda info --envs source activate lcdb-wf-test # rsync is required for the deployment process @@ -98,6 +103,9 @@ variables: cp workflows/colocalization/run_test.sh $DEPLOY/workflows/references cp workflows/colocalization/run_test.sh $DEPLOY/workflows/colocalization mkdir $DEPLOY/ci + mkdir $DEPLOY/test + cp test/lcdb-wf-test $DEPLOY/test + cp test/workflow_test_params.yaml $DEPLOY/test cp ci/get-data.py $DEPLOY/ci # the ./run_test.sh scripts run this @@ -105,7 +113,7 @@ variables: # download example data cd $DEPLOY - python ci/get-data.py + test/lcdb-wf-test data --kind=all --verbose # -------------------------------------------------------------------------- # Run the doctests across the included modules @@ -115,29 +123,22 @@ variables: command: | source activate lcdb-wf-test - # Ensure that the chunks in rnaseq.Rmd have matching documentation - (cd ci && python ensure_docs.py) + echo LCDBWF_ENV_R=$LCDBWF_ENV_R + echo LCDBWF_ENV=$LCDBWF_ENV + + # run unit tests and doctests for the modules in lib + test/lcdb-wf-test unit_tests --pytest - pytest --doctest-modules lib + # Ensure that the chunks in rnaseq.Rmd have matching documentation + test/lcdb-wf-test unit_tests --ensure-docs # find all URLs in reference configs and make sure they exist - python -c 'from lib.common import check_all_urls_found; check_all_urls_found()' + test/lcdb-wf-test unit_tests --url-check - source deactivate - source activate lcdb-wf-test-r - Rscript -e "devtools::test('lib/lcdbwf')" + # run R package unit tests using the R env + test/lcdb-wf-test unit_tests --r-test - # -------------------------------------------------------------------------- - # Other downstream jobs depend on this so that we can catch trivial errors - # quickly - rnaseq-dryrun: &rnaseq-dryrun - run: - name: dry run - command: | - cd $DEPLOY/workflows/rnaseq - source activate lcdb-wf-test - ./run_test.sh --use-conda -n # -------------------------------------------------------------------------- # Standard chipseq workflow chipseq-step: &chipseq-step @@ -146,8 +147,9 @@ variables: command: | cd $DEPLOY/workflows/chipseq source activate lcdb-wf-test - ./run_test.sh --use-conda -j2 -k -p -r - python chipseq_trackhub.py config/config.yaml config/hub_config.yaml + $DEPLOY/test/lcdb-wf-test chipseq --run-workflow --use-conda -j2 -k -p -r + $DEPLOY/test/lcdb-wf-test chipseq --trackhub + # -------------------------------------------------------------------------- # Previous versions had an error where chipseq peaks needed to be defined for # every caller. This does a (relatively) quick test to only run a single @@ -156,7 +158,6 @@ variables: run: name: chipseq misc command: | - ORIG=$(pwd) cd $DEPLOY/workflows/chipseq source activate lcdb-wf-test ./run_test.sh --use-conda -j2 -k -p -r \ @@ -185,32 +186,29 @@ variables: run: name: references workflow command: | - cd $DEPLOY/workflows/references source activate lcdb-wf-test - ./run_test.sh --use-conda -j2 -k -p -r --configfile config/config.yaml + $DEPLOY/test/lcdb-wf-test references --run-workflow --configfile=config/config.yaml -j2 -p -r -k --orig $ORIG + # -------------------------------------------------------------------------- # Standard RNA-seq workflow rnaseq-step: &rnaseq-step run: name: rnaseq workflow command: | - cd $DEPLOY/workflows/rnaseq + cd $DEPLOY source activate lcdb-wf-test - ./run_test.sh --use-conda -j2 -k -p -r - python rnaseq_trackhub.py config/config.yaml config/hub_config.yaml + $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow -n + $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --use-conda -j2 -k -p -r --orig $ORIG - # Starting in v1.6, we separate out the non-R from the - # R environments. So we need to test the rnaseq.Rmd separately - # outside the context of the Snakefile. - source activate lcdb-wf-test-r + $DEPLOY/test/lcdb-wf-test rnaseq --trackhub --orig $ORIG - # This script runs the preprocessor on the Rmd files and stores them + # This run the preprocessor on the Rmd files and stores them # in a new download-test directory (see the comments in the script # for details) - bash run_downstream_test.sh + $DEPLOY/test/lcdb-wf-test rnaseq --downstream # bundle up the entire directory to be used as an artifact - tar -zcf downstream.tar.gz downstream-test/ + tar -zcf downstream.tar.gz workflows/rnaseq/downstream-test/ # -------------------------------------------------------------------------- # Various tests on RNA-seq workflow that don't warrant the overhead of a new @@ -221,63 +219,20 @@ variables: command: | ORIG=$(pwd) cd $DEPLOY - cp -r workflows/rnaseq workflows/rnaseq-misc-test - - # make a copy of the data so we can get a fresh copy each time below - cp -r workflows/rnaseq/data /tmp/data - cd workflows/rnaseq-misc-test source activate lcdb-wf-test - echo "test SRA, PE" - rm -r data - ./run_test.sh -j 1 --use-conda -k -p -r --until cutadapt \ - --configfile $ORIG/test/test_configs/test_rnaseq_config.yaml \ - --config sampletable=$ORIG/test/test_configs/test_sra_sampletable.tsv - - echo "test SRA, SE" - rm -r data - ./run_test.sh -j 1 --use-conda -k -p -r --until cutadapt \ - --configfile $ORIG/test/test_configs/test_rnaseq_config.yaml \ - --config sampletable=$ORIG/test/test_configs/test_sra_sampletable_SE_only.tsv - - echo "test strandedness, PE" - rm -r data && cp -r /tmp/data data - ./run_test.sh -j 2 --use-conda -k -p -r --until strand_check \ - --configfile $ORIG/test/test_configs/test_rnaseq_config.yaml \ - --config sampletable=$ORIG/test/test_configs/test_pe_sampletable.tsv - - echo "test strandedness, SE" - rm -r data && cp -r /tmp/data data - ./run_test.sh -j 2 --use-conda -k -p -r --until strand_check \ - --configfile $ORIG/test/test_configs/test_rnaseq_config.yaml \ - --config sampletable=$ORIG/test/test_configs/two_samples.tsv - - echo "test STAR 2-pass alignment" - rm -r data && cp -r /tmp/data data - ./run_test.sh -j 2 --use-conda -k -p -r \ - --forcerun star_pass1 \ - --configfile \ - $ORIG/test/test_configs/test_rnaseq_config.yaml \ - $ORIG/test/test_configs/star_override_2pass.yaml \ - --config sampletable=$ORIG/test/test_configs/two_samples.tsv \ - --until star_pass2 - - echo "test STAR 1-pass alignment" - rm -r data && cp -r /tmp/data data - ./run_test.sh -j 2 --use-conda -k -p -r \ - --forcerun star \ - --configfile \ - $ORIG/test/test_configs/test_rnaseq_config.yaml \ - $ORIG/test/test_configs/star_override_1pass.yaml \ - --config sampletable=$ORIG/test/test_configs/two_samples.tsv \ - --until star - - echo "test PE reads" - rm -r data && cp -r /tmp/data data - ./run_test.sh -j 2 --use-conda -k -p -r --until multiqc \ - --configfile \ - $ORIG/test/test_configs/test_rnaseq_config.yaml \ - --config sampletable=$ORIG/test/test_configs/test_pe_sampletable.tsv + # Check the help for test/lcdb-wf-test to see what args these + # provide; some of them use the --until argument to restrict the + # rules that are run. Note the use of --orig $ORIG to use the test + # configs from the original clone rather than the deployed directory. + $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --sra-pe -k -r -p -j2 --use-conda --orig $ORIG + $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --sra-se -k -r -p -j2 --use-conda --orig $ORIG + $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --strandedness-pe -k -r -p -j2 --use-conda --orig $ORIG + $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --star-2pass -k -r -p -j2 --use-conda --orig $ORIG + $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --star-1pass -k -r -p -j2 --use-conda --orig $ORIG + $DEPLOY/test/lcdb-wf-test rnaseq --run-workflow --pe -k -r -p -j2 --use-conda --orig $ORIG + + # -------------------------------------------------------------------------- # Standard colocalization workflow @@ -307,6 +262,7 @@ jobs: <<: *defaults steps: - checkout + - *set-path # Check the hashes of requirements files. If they match a cache, load it. # The cache is set up to be the entire miniconda installation, so that @@ -337,6 +293,7 @@ jobs: steps: - checkout - *restore_cache + - *set-path - *setup - *pytest-step @@ -449,41 +406,41 @@ workflows: test-suite: jobs: - initial-setup - - pytest: - requires: - - initial-setup - - chipseq: - requires: - - initial-setup - - pytest +# - pytest: +# requires: +# - initial-setup +# - chipseq: +# requires: +# - initial-setup +# - pytest - chipseq-misc: requires: - initial-setup - - pytest - - rnaseq: - requires: - - initial-setup - - pytest - - rnaseq-misc: - requires: - - initial-setup - - pytest - - references: - requires: - - initial-setup - - pytest - - colocalization: - requires: - - initial-setup - - pytest - - build-docs: - requires: - - initial-setup - - report-env: - requires: - - rnaseq - - rnaseq-misc - - chipseq - - chipseq-misc - - references - - colocalization +# - pytest +# - rnaseq: +# requires: +# - initial-setup +# - pytest +# - rnaseq-misc: +# requires: +# - initial-setup +# - pytest +# - references: +# requires: +# - initial-setup +# - pytest +# - colocalization: +# requires: +# - initial-setup +# - pytest +# - build-docs: +# requires: +# - initial-setup +# - report-env: +# requires: +# - rnaseq +# - rnaseq-misc +# - chipseq +# - chipseq-misc +# - references +# - colocalization diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 00000000..f8a41faf --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,78 @@ +on: [push] +env: + DEPLOY: /tmp/lcdb-wf-test + LCDBWF_ENV: lcdb-wf-test + LCDBWF_ENV_R: lcdb-wf-test-r + LC_ALL: en_US.utf8 + LANG: en_US.utf8 + +jobs: + + build: + runs-on: ubuntu-latest + strategy: + fail-fast: true + container: continuumio/miniconda3 + steps: + + - uses: actions/checkout@v1 + + - name: Cache conda env + id: cache-conda + uses: actions/cache@v3 + with: + path: /opt/conda + key: ${{ hashFiles('env.yml') }}-${{ hashFiles('env-r.yml') }} + + - name: Rebuild cache + if: steps.cache-conda.outputs.cache-hit != 'true' + run: | + conda config --add channels defaults + conda config --add channels bioconda + conda config --add channels conda-forge + conda config --set channel_priority strict + + conda install mamba -y + mamba env create -n lcdb-wf-test --file env.yml + mamba env create -n lcdb-wf-test-r --file env-r.yml + + - name: Download example data + run: | + conda info --envs + source activate lcdb-wf-test + + # rsync is required for the deployment process + apt install -y rsync + + # Deploy to the new directory, so we are testing the real-world case of post-deployment. + # Note that $DEPLOY is set in the "set-paths" step configured above. + python deploy.py --flavor full --dest $DEPLOY --branch $GITHUB_HEAD_REF --clone + + # Separately copy over some test-specific files + cp workflows/chipseq/run_test.sh $DEPLOY/workflows/chipseq + cp workflows/rnaseq/run_test.sh $DEPLOY/workflows/rnaseq + cp workflows/rnaseq/run_downstream_test.sh $DEPLOY/workflows/rnaseq + cp workflows/colocalization/run_test.sh $DEPLOY/workflows/references + cp workflows/colocalization/run_test.sh $DEPLOY/workflows/colocalization + mkdir $DEPLOY/ci + mkdir $DEPLOY/test + cp test/lcdb-wf-test $DEPLOY/test + cp test/workflow_test_params.yaml $DEPLOY/test + cp ci/get-data.py $DEPLOY/ci + + # the ./run_test.sh scripts run this + cp ci/preprocessor.py $DEPLOY/ci + + # download example data + cd $DEPLOY + test/lcdb-wf-test data --kind=all --verbose + + - name: chipseq misc + run: | + cd $DEPLOY/workflows/chipseq + source activate lcdb-wf-test + ./run_test.sh --use-conda -j2 -k -p -r \ + --configfile $ORIG/test/test_configs/test_chipseq_regression.yaml \ + --config sampletable=$ORIG/test/test_configs/chipseq_one_run.tsv \ + merged_bigwigs="{}" \ + --until bed_to_bigbed diff --git a/ci/ensure_docs.py b/ci/ensure_docs.py old mode 100644 new mode 100755 index cee62f6d..232d8fab --- a/ci/ensure_docs.py +++ b/ci/ensure_docs.py @@ -96,3 +96,7 @@ def get_headings(rst, underline="-"): print("\n") print("\n".join(errors)) sys.exit(1) + +else: + print("OK: found documentation for these identified chunks:") + print("- " + "\n- ".join(sorted(headings))) diff --git a/docs/changelog.rst b/docs/changelog.rst index e1fb489d..160bba54 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,17 @@ Changelog ========= +v1.10 +----- + +General +~~~~~~~ +- Migrated to a unified testing script that simplifies local and CI testing +- Added "--gres:lscratch" to all slurm-related params +- If sampletable is from SRA, raise an error if a Layout column can't be found + (to prevent incorrect interpretation of samples as single-end) +- Fix R tests +- ensure bam indexes are made for the markdups bams, even if bigwigs are not created +- fix library loads in rnaseq.Rmd to ensure they come before parallelization configuration v1.9 ---- diff --git a/env-r.yml b/env-r.yml index 4b30a89a..a23ff911 100644 --- a/env-r.yml +++ b/env-r.yml @@ -1,387 +1,403 @@ -name: null channels: - conda-forge - bioconda dependencies: - - _libgcc_mutex=0.1=conda_forge + - _libgcc_mutex=0.1 - _openmp_mutex=4.5=2_gnu - - _r-mutex=1.0.1=anacondar_1 - - binutils_impl_linux-64=2.39=h6ceecb4_0 - - bioconductor-all=1.32.0=r40hdfd78af_1 - - bioconductor-annotate=1.68.0=r40hdfd78af_1 - - bioconductor-annotationdbi=1.52.0=r40hdfd78af_1 - - bioconductor-annotationhub=2.22.0=r40hdfd78af_1 - - bioconductor-apeglm=1.12.0=r40h399db7b_1 - - bioconductor-biobase=2.50.0=r40hd029910_1 - - bioconductor-biocfilecache=1.14.0=r40hdfd78af_1 - - bioconductor-biocgenerics=0.36.0=r40hdfd78af_1 - - bioconductor-biocparallel=1.24.1=r40h399db7b_0 - - bioconductor-biocversion=3.12.0=r40hdfd78af_1 - - bioconductor-biomart=2.46.3=r40hdfd78af_0 - - bioconductor-biostrings=2.58.0=r40hd029910_1 - - bioconductor-clusterprofiler=3.18.1=r40hdfd78af_0 - - bioconductor-complexheatmap=2.6.2=r40hdfd78af_1 - - bioconductor-consensusclusterplus=1.54.0=r40hdfd78af_1 - - bioconductor-degreport=1.26.0=r40hdfd78af_1 - - bioconductor-delayedarray=0.16.3=r40hd029910_0 - - bioconductor-deseq2=1.30.1=r40h399db7b_0 - - bioconductor-do.db=2.9=r40hdfd78af_10 - - bioconductor-dose=3.16.0=r40hdfd78af_1 - - bioconductor-dupradar=1.20.0=r40hdfd78af_1 - - bioconductor-edger=3.32.1=r40h399db7b_0 - - bioconductor-enrichplot=1.10.2=r40hdfd78af_0 - - bioconductor-fgsea=1.16.0=r40h399db7b_1 - - bioconductor-genefilter=1.72.1=r40hba52eb8_0 - - bioconductor-geneplotter=1.68.0=r40hdfd78af_1 - - bioconductor-genomeinfodb=1.26.4=r40hdfd78af_0 - - bioconductor-genomeinfodbdata=1.2.4=r40hdfd78af_2 - - bioconductor-genomicalignments=1.26.0=r40hd029910_1 - - bioconductor-genomicfeatures=1.42.2=r40hdfd78af_0 - - bioconductor-genomicranges=1.42.0=r40hd029910_1 - - bioconductor-go.db=3.12.1=r40hdfd78af_1 - - bioconductor-gosemsim=2.16.1=r40h399db7b_0 - - bioconductor-graph=1.68.0=r40hd029910_1 - - bioconductor-graphite=1.36.0=r40hdfd78af_1 - - bioconductor-ihw=1.18.0=r40hdfd78af_1 - - bioconductor-interactivedisplaybase=1.28.0=r40hdfd78af_1 - - bioconductor-iranges=2.24.1=r40hd029910_0 - - bioconductor-limma=3.46.0=r40hd029910_1 - - bioconductor-lpsymphony=1.18.0=r40h399db7b_1 - - bioconductor-matrixgenerics=1.2.1=r40hdfd78af_0 - - bioconductor-qvalue=2.22.0=r40hdfd78af_1 - - bioconductor-reactome.db=1.74.0=r40hdfd78af_1 - - bioconductor-reactomepa=1.34.0=r40hdfd78af_1 - - bioconductor-rhdf5=2.34.0=r40h399db7b_1 - - bioconductor-rhdf5filters=1.2.0=r40h399db7b_1 - - bioconductor-rhdf5lib=1.12.1=r40hd029910_0 - - bioconductor-rhtslib=1.22.0=r40hd029910_1 - - bioconductor-rsamtools=2.6.0=r40h399db7b_1 - - bioconductor-rsubread=2.4.3=r40hd029910_0 - - bioconductor-rtracklayer=1.50.0=r40h7f5ccec_2 - - bioconductor-s4vectors=0.28.1=r40hd029910_0 - - bioconductor-summarizedexperiment=1.20.0=r40hdfd78af_1 - - bioconductor-sva=3.38.0=r40hd029910_1 - - bioconductor-tximport=1.18.0=r40hdfd78af_1 - - bioconductor-xvector=0.30.0=r40hd029910_1 - - bioconductor-zlibbioc=1.36.0=r40hd029910_1 - - bwidget=1.9.14=ha770c72_1 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.18.1=h7f98852_0 - - ca-certificates=2022.9.24=ha878542_0 - - cairo=1.16.0=ha61ee94_1014 - - curl=7.86.0=h7bff187_1 - - expat=2.5.0=h27087fc_0 - - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - - font-ttf-inconsolata=3.000=h77eed37_0 - - font-ttf-source-code-pro=2.038=h77eed37_0 - - font-ttf-ubuntu=0.83=hab24e00_0 - - fontconfig=2.14.1=hc2a2eb6_0 + - _r-mutex=1.0.1 + - binutils_impl_linux-64=2.40 + - bioconductor-all=1.40.0 + - bioconductor-annotate=1.76.0 + - bioconductor-annotationdbi=1.60.0 + - bioconductor-annotationhub=3.6.0 + - bioconductor-apeglm=1.20.0 + - bioconductor-biobase=2.58.0 + - bioconductor-biocfilecache=2.6.0 + - bioconductor-biocgenerics=0.44.0 + - bioconductor-biocio=1.8.0 + - bioconductor-biocparallel=1.32.5 + - bioconductor-biocversion=3.16.0 + - bioconductor-biomart=2.54.0 + - bioconductor-biostrings=2.66.0 + - bioconductor-clusterprofiler=4.6.0 + - bioconductor-complexheatmap=2.14.0 + - bioconductor-consensusclusterplus=1.62.0 + - bioconductor-data-packages=20230202 + - bioconductor-degreport=1.34.0 + - bioconductor-delayedarray=0.24.0 + - bioconductor-deseq2=1.38.0 + - bioconductor-dose=3.24.0 + - bioconductor-edger=3.40.0 + - bioconductor-enrichplot=1.18.0 + - bioconductor-fgsea=1.24.0 + - bioconductor-genefilter=1.80.0 + - bioconductor-geneplotter=1.76.0 + - bioconductor-genomeinfodb=1.34.8 + - bioconductor-genomeinfodbdata=1.2.9 + - bioconductor-genomicalignments=1.34.0 + - bioconductor-genomicfeatures=1.50.2 + - bioconductor-genomicranges=1.50.0 + - bioconductor-ggtree=3.6.0 + - bioconductor-go.db=3.16.0 + - bioconductor-gosemsim=2.24.0 + - bioconductor-hdo.db=0.99.1 + - bioconductor-ihw=1.26.0 + - bioconductor-interactivedisplaybase=1.36.0 + - bioconductor-iranges=2.32.0 + - bioconductor-keggrest=1.38.0 + - bioconductor-limma=3.54.0 + - bioconductor-lpsymphony=1.26.0 + - bioconductor-matrixgenerics=1.10.0 + - bioconductor-qvalue=2.30.0 + - bioconductor-rhdf5=2.42.0 + - bioconductor-rhdf5filters=1.10.0 + - bioconductor-rhdf5lib=1.20.0 + - bioconductor-rhtslib=2.0.0 + - bioconductor-rsamtools=2.14.0 + - bioconductor-rtracklayer=1.58.0 + - bioconductor-s4vectors=0.36.0 + - bioconductor-summarizedexperiment=1.28.0 + - bioconductor-sva=3.46.0 + - bioconductor-treeio=1.22.0 + - bioconductor-tximport=1.26.0 + - bioconductor-xvector=0.38.0 + - bioconductor-zlibbioc=1.44.0 + - bwidget=1.9.14 + - bzip2=1.0.8 + - c-ares=1.18.1 + - ca-certificates=2022.12.7 + - cairo=1.16.0 + - curl=7.87.0 + - expat=2.5.0 + - font-ttf-dejavu-sans-mono=2.37 + - font-ttf-inconsolata=3.000 + - font-ttf-source-code-pro=2.038 + - font-ttf-ubuntu=0.83 + - fontconfig=2.14.2 - fonts-conda-ecosystem=1=0 - fonts-conda-forge=1=0 - - freetype=2.12.1=hca18f0e_0 - - fribidi=1.0.10=h36c2ea0_0 - - gcc_impl_linux-64=12.2.0=hcc96c02_19 - - gettext=0.21.1=h27087fc_0 - - gfortran_impl_linux-64=12.2.0=h55be85b_19 - - glpk=5.0=h445213a_0 - - gmp=6.2.1=h58526e2_0 - - graphite2=1.3.13=h58526e2_1001 - - gsl=2.7=he838d99_0 - - gxx_impl_linux-64=12.2.0=hcc96c02_19 - - harfbuzz=5.3.0=h418a68e_0 - - icu=70.1=h27087fc_0 - - jpeg=9e=h166bdaf_2 - - kernel-headers_linux-64=2.6.32=he073ed8_15 - - keyutils=1.6.1=h166bdaf_0 - - krb5=1.19.3=h3790be6_0 - - ld_impl_linux-64=2.39=hc81fddc_0 - - lerc=4.0.0=h27087fc_0 + - freetype=2.12.1 + - fribidi=1.0.10 + - gcc_impl_linux-64=12.2.0 + - gettext=0.21.1 + - gfortran_impl_linux-64=12.2.0 + - glpk=5.0 + - gmp=6.2.1 + - graphite2=1.3.13 + - gsl=2.7 + - gxx_impl_linux-64=12.2.0 + - harfbuzz=6.0.0 + - icu=70.1 + - jpeg=9e + - jq=1.6 + - kernel-headers_linux-64=2.6.32 + - keyutils=1.6.1 + - krb5=1.20.1 + - ld_impl_linux-64=2.40 + - lerc=4.0.0 - libblas=3.9.0=16_linux64_openblas - libcblas=3.9.0=16_linux64_openblas - - libcurl=7.86.0=h7bff187_1 - - libdeflate=1.14=h166bdaf_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libffi=3.4.2=h7f98852_5 - - libgcc-devel_linux-64=12.2.0=h3b97bd3_19 - - libgcc-ng=12.2.0=h65d4601_19 - - libgfortran-ng=12.2.0=h69a702a_19 - - libgfortran5=12.2.0=h337968e_19 - - libgit2=1.5.0=hde0c96a_1 - - libglib=2.74.1=h606061b_1 - - libgomp=12.2.0=h65d4601_19 - - libiconv=1.17=h166bdaf_0 + - libcurl=7.87.0 + - libdeflate=1.17 + - libedit=3.1.20191231 + - libev=4.33 + - libffi=3.4.2 + - libgcc-devel_linux-64=12.2.0 + - libgcc-ng=12.2.0 + - libgfortran-ng=12.2.0 + - libgfortran5=12.2.0 + - libgit2=1.5.1 + - libglib=2.74.1 + - libgomp=12.2.0 + - libiconv=1.17 - liblapack=3.9.0=16_linux64_openblas - - libnghttp2=1.47.0=hdcd2b5c_1 - - libopenblas=0.3.21=pthreads_h78a6416_3 - - libpng=1.6.38=h753d276_0 - - libsanitizer=12.2.0=h46fd767_19 - - libssh2=1.10.0=haa6b8db_3 - - libstdcxx-devel_linux-64=12.2.0=h3b97bd3_19 - - libstdcxx-ng=12.2.0=h46fd767_19 - - libtiff=4.4.0=h55922b4_4 - - libuuid=2.32.1=h7f98852_1000 - - libwebp-base=1.2.4=h166bdaf_0 - - libxcb=1.13=h7f98852_1004 - - libxml2=2.10.3=h7463322_0 - - libzlib=1.2.13=h166bdaf_4 - - make=4.3=hd18ef5c_1 - - ncurses=6.3=h27087fc_1 - - openssl=1.1.1s=h166bdaf_0 - - pandoc=2.19.2=h32600fe_1 - - pango=1.50.11=h382ae3d_0 - - pcre2=10.40=hc3806b6_0 - - pixman=0.40.0=h36c2ea0_0 - - pthread-stubs=0.4=h36c2ea0_1001 - - r-ashr=2.2_54=r40h7525677_0 - - r-askpass=1.1=r40hcfec24a_2 - - r-assertthat=0.2.1=r40hc72bb7e_2 - - r-babelgene=22.3=r40hc72bb7e_0 - - r-backports=1.4.1=r40hcfec24a_0 - - r-base=4.0.5=hb87df5d_8 - - r-base64enc=0.1_3=r40hcfec24a_1004 - - r-bbmle=1.0.25=r40hc72bb7e_0 - - r-bdsmatrix=1.3_6=r40h06615bd_0 - - r-bh=1.78.0_0=r40hc72bb7e_0 - - r-biocmanager=1.30.18=r40hc72bb7e_0 - - r-bit=4.0.4=r40hcfec24a_0 - - r-bit64=4.0.5=r40hcfec24a_0 - - r-bitops=1.0_7=r40h06615bd_0 - - r-blob=1.2.3=r40hc72bb7e_0 - - r-brew=1.0_7=r40hc72bb7e_0 - - r-brio=1.1.3=r40hcfec24a_0 - - r-broom=1.0.1=r40hc72bb7e_0 - - r-bslib=0.4.0=r40hc72bb7e_0 - - r-cachem=1.0.6=r40hcfec24a_0 - - r-cairo=1.6_0=r40h06615bd_0 - - r-callr=3.7.2=r40hc72bb7e_0 - - r-catools=1.18.2=r40h7525677_0 - - r-checkmate=2.1.0=r40h06615bd_0 - - r-circlize=0.4.15=r40hc72bb7e_0 - - r-cli=3.4.1=r40h7525677_0 - - r-clipr=0.8.0=r40hc72bb7e_0 - - r-clue=0.3_60=r40hcfec24a_0 - - r-cluster=2.1.3=r40h8da6f51_0 - - r-coda=0.19_4=r40hc72bb7e_0 - - r-codetools=0.2_18=r40hc72bb7e_0 - - r-colorspace=2.0_3=r40h06615bd_0 - - r-commonmark=1.8.0=r40h06615bd_0 - - r-cowplot=1.1.1=r40hc72bb7e_0 - - r-cpp11=0.4.2=r40hc72bb7e_0 - - r-crayon=1.5.1=r40hc72bb7e_0 - - r-credentials=1.3.2=r40hc72bb7e_0 - - r-crosstalk=1.2.0=r40hc72bb7e_0 - - r-curl=4.3.2=r40hcfec24a_0 - - r-data.table=1.14.2=r40hcfec24a_0 - - r-dbi=1.1.3=r40hc72bb7e_0 - - r-dbplyr=2.2.1=r40hc72bb7e_0 - - r-dendextend=1.16.0=r40hc72bb7e_0 - - r-desc=1.4.2=r40hc72bb7e_0 - - r-devtools=2.4.4=r40hc72bb7e_0 - - r-diffobj=0.3.5=r40hcfec24a_0 - - r-digest=0.6.29=r40h03ef668_0 - - r-downlit=0.4.2=r40hc72bb7e_0 - - r-downloader=0.4=r40hc72bb7e_1003 - - r-dplyr=1.0.10=r40h7525677_0 - - r-dt=0.25=r40hc72bb7e_0 - - r-egg=0.4.5=r40hc72bb7e_2 - - r-ellipsis=0.3.2=r40hcfec24a_0 - - r-emdbook=1.3.12=r40hc72bb7e_1 - - r-etrunct=0.1=r40hc72bb7e_1003 - - r-evaluate=0.16=r40hc72bb7e_0 - - r-fansi=1.0.3=r40h06615bd_0 - - r-farver=2.1.1=r40h7525677_0 - - r-fastmap=1.1.0=r40h03ef668_0 - - r-fastmatch=1.1_3=r40hcfec24a_0 - - r-fdrtool=1.2.17=r40hcfec24a_0 - - r-fontawesome=0.3.0=r40hc72bb7e_0 - - r-forcats=0.5.2=r40hc72bb7e_0 - - r-foreach=1.5.2=r40hc72bb7e_0 - - r-formatr=1.12=r40hc72bb7e_0 - - r-fs=1.5.2=r40h7525677_1 - - r-futile.logger=1.4.3=r40hc72bb7e_1003 - - r-futile.options=1.0.1=r40hc72bb7e_1002 - - r-gclus=1.3.2=r40hc72bb7e_2 - - r-generics=0.1.3=r40hc72bb7e_0 - - r-gert=1.5.0=r40h163148b_2 - - r-getoptlong=1.0.5=r40hc72bb7e_0 - - r-ggally=2.1.2=r40hc72bb7e_0 - - r-ggdendro=0.1.23=r40hc72bb7e_0 - - r-ggforce=0.3.4=r40h7525677_0 - - r-ggfun=0.0.7=r40hc72bb7e_0 - - r-ggnewscale=0.4.7=r40hc72bb7e_0 - - r-ggplot2=3.3.6=r40hc72bb7e_0 - - r-ggraph=2.0.6=r40h7525677_0 - - r-ggrepel=0.9.1=r40h03ef668_0 - - r-gh=1.3.1=r40hc72bb7e_0 - - r-gitcreds=0.1.2=r40hc72bb7e_0 - - r-globaloptions=0.1.2=r40ha770c72_0 - - r-glue=1.6.2=r40h06615bd_0 - - r-gplots=3.1.3=r40hc72bb7e_0 - - r-graphlayouts=0.8.1=r40h7525677_0 - - r-gridextra=2.3=r40hc72bb7e_1003 - - r-gtable=0.3.1=r40hc72bb7e_0 - - r-gtools=3.9.3=r40h06615bd_0 - - r-heatmaply=1.3.0=r40hc72bb7e_0 - - r-hexbin=1.28.2=r40h8da6f51_0 - - r-highr=0.9=r40hc72bb7e_0 - - r-hms=1.1.2=r40hc72bb7e_0 - - r-htmltools=0.5.3=r40h7525677_0 - - r-htmlwidgets=1.5.4=r40hc72bb7e_0 - - r-httpuv=1.6.6=r40h7525677_0 - - r-httr=1.4.4=r40hc72bb7e_0 - - r-igraph=1.3.4=r40hb34fc8a_0 - - r-ini=0.3.1=r40hc72bb7e_1003 - - r-invgamma=1.1=r40hc72bb7e_1 - - r-irlba=2.3.5=r40h5f7b363_0 - - r-isoband=0.2.5=r40h03ef668_0 - - r-iterators=1.0.14=r40hc72bb7e_0 - - r-jquerylib=0.1.4=r40hc72bb7e_0 - - r-jsonlite=1.8.0=r40h06615bd_0 - - r-kernsmooth=2.23_20=r40h742201e_0 - - r-knitr=1.40=r40hc72bb7e_0 - - r-labeling=0.4.2=r40hc72bb7e_1 - - r-lambda.r=1.2.4=r40hc72bb7e_1 - - r-lasso2=1.2_22=r40hcfec24a_0 - - r-later=1.2.0=r40h03ef668_0 - - r-lattice=0.20_45=r40hcfec24a_0 - - r-lazyeval=0.2.2=r40hcfec24a_2 - - r-lifecycle=1.0.2=r40hc72bb7e_0 - - r-locfit=1.5_9.4=r40hcfec24a_1 - - r-logging=0.10_108=r40ha770c72_2 - - r-magrittr=2.0.3=r40h06615bd_0 - - r-mass=7.3_58.1=r40h06615bd_0 - - r-matrix=1.4_1=r40h0154571_0 - - r-matrixstats=0.62.0=r40h06615bd_0 - - r-memoise=2.0.1=r40hc72bb7e_0 - - r-mgcv=1.8_40=r40h0154571_0 - - r-mime=0.12=r40hcfec24a_0 - - r-miniui=0.1.1.1=r40hc72bb7e_1002 - - r-mixsqp=0.3_43=r40h306847c_1 - - r-mnormt=2.1.0=r40h8da6f51_0 - - r-msigdbr=7.5.1=r40hc72bb7e_0 - - r-munsell=0.5.0=r40hc72bb7e_1004 - - r-mvtnorm=1.1_3=r40h859d828_0 - - r-nlme=3.1_159=r40h8da6f51_0 - - r-nozzle.r1=1.1_1.1=r40ha770c72_0 - - r-numderiv=2016.8_1.1=r40hc72bb7e_3 - - r-openssl=2.0.3=r40hfaab4ff_0 - - r-openxlsx=4.2.5=r40h03ef668_0 - - r-pheatmap=1.0.12=r40hc72bb7e_2 - - r-pillar=1.8.1=r40hc72bb7e_0 - - r-pkgbuild=1.3.1=r40hc72bb7e_0 - - r-pkgconfig=2.0.3=r40hc72bb7e_1 - - r-pkgdown=2.0.6=r40hc72bb7e_0 - - r-pkgload=1.3.0=r40hc72bb7e_0 - - r-plogr=0.2.0=r40hc72bb7e_1003 - - r-plotly=4.10.0=r40hc72bb7e_0 - - r-plyr=1.8.7=r40h7525677_0 - - r-png=0.1_7=r40hcfec24a_1004 - - r-polyclip=1.10_0=r40h7525677_2 - - r-praise=1.0.0=r40hc72bb7e_1005 - - r-prettyunits=1.1.1=r40hc72bb7e_1 - - r-processx=3.7.0=r40h06615bd_0 - - r-profvis=0.3.7=r40hcfec24a_0 - - r-progress=1.2.2=r40hc72bb7e_2 - - r-promises=1.2.0.1=r40h03ef668_0 - - r-ps=1.7.1=r40h06615bd_0 - - r-psych=2.2.5=r40hc72bb7e_0 - - r-purrr=0.3.4=r40hcfec24a_1 - - r-qap=0.1_2=r40h8da6f51_0 - - r-r6=2.5.1=r40hc72bb7e_0 - - r-ragg=1.2.2=r40hc1f6985_0 - - r-rappdirs=0.3.3=r40hcfec24a_0 - - r-rcmdcheck=1.4.0=r40h785f33e_0 - - r-rcolorbrewer=1.1_3=r40h785f33e_0 - - r-rcpp=1.0.9=r40h7525677_1 - - r-rcpparmadillo=0.11.2.3.1=r40h9f5de39_0 - - r-rcppeigen=0.3.3.9.2=r40h43535f1_0 - - r-rcppnumerical=0.4_0=r40h03ef668_1 - - r-rcurl=1.98_1.8=r40h06615bd_0 - - r-readr=2.1.2=r40h03ef668_0 - - r-registry=0.5_1=r40hc72bb7e_2 - - r-rematch2=2.1.2=r40hc72bb7e_1 - - r-remotes=2.4.2=r40hc72bb7e_0 - - r-reshape=0.8.9=r40hc72bb7e_0 - - r-reshape2=1.4.4=r40h03ef668_1 - - r-rjson=0.2.21=r40h7525677_1 - - r-rlang=1.0.6=r40h7525677_0 - - r-rmarkdown=2.16=r40hc72bb7e_0 - - r-roxygen2=7.2.1=r40h7525677_0 - - r-rprojroot=2.0.3=r40hc72bb7e_0 - - r-rsqlite=2.2.8=r40h03ef668_0 - - r-rstudioapi=0.14=r40hc72bb7e_0 - - r-rvcheck=0.2.1=r40hc72bb7e_0 - - r-rversions=2.1.2=r40hc72bb7e_0 - - r-sass=0.4.2=r40h7525677_0 - - r-scales=1.2.1=r40hc72bb7e_0 - - r-scatterpie=0.1.8=r40hc72bb7e_0 - - r-seriation=1.3.6=r40h8da6f51_0 - - r-sessioninfo=1.2.2=r40hc72bb7e_0 - - r-shadowtext=0.1.2=r40hc72bb7e_0 - - r-shape=1.4.6=r40ha770c72_0 - - r-shiny=1.7.2=r40h785f33e_0 - - r-slam=0.1_50=r40hb699f27_1 - - r-snow=0.4_4=r40hc72bb7e_0 - - r-sourcetools=0.1.7=r40h03ef668_1002 - - r-sparsem=1.81=r40h859d828_0 - - r-spp=1.16.0=r40h52a8340_4 - - r-squarem=2021.1=r40hc72bb7e_0 - - r-stringi=1.7.8=r40h30a9eb7_0 - - r-stringr=1.4.1=r40hc72bb7e_0 - - r-survival=3.4_0=r40h06615bd_0 - - r-sys=3.4=r40hcfec24a_0 - - r-systemfonts=1.0.4=r40hef9c87a_0 - - r-testthat=3.1.4=r40h7525677_0 - - r-textshaping=0.3.6=r40h9354b80_2 - - r-tibble=3.1.8=r40h06615bd_0 - - r-tidygraph=1.2.2=r40h7525677_0 - - r-tidyr=1.2.1=r40h7525677_0 - - r-tidyselect=1.1.2=r40hc72bb7e_0 - - r-tinytex=0.42=r40hc72bb7e_0 - - r-tmvnsim=1.0_2=r40h859d828_3 - - r-truncnorm=1.0_8=r40hcfec24a_1002 - - r-tsp=1.2_1=r40h06615bd_0 - - r-tweenr=2.0.2=r40h7525677_0 - - r-tzdb=0.3.0=r40h7525677_0 - - r-upsetr=1.4.0=r40hc72bb7e_2 - - r-urlchecker=1.0.1=r40hc72bb7e_0 - - r-usethis=2.1.6=r40hc72bb7e_0 - - r-utf8=1.2.2=r40hcfec24a_0 - - r-vctrs=0.4.1=r40h7525677_0 - - r-viridis=0.6.2=r40hc72bb7e_0 - - r-viridislite=0.4.1=r40hc72bb7e_0 - - r-vroom=1.5.7=r40h03ef668_0 - - r-waldo=0.4.0=r40hc72bb7e_0 - - r-webshot=0.5.4=r40hc72bb7e_0 - - r-whisker=0.4=r40hc72bb7e_1 - - r-withr=2.5.0=r40hc72bb7e_0 - - r-xfun=0.33=r40h7525677_0 - - r-xml=3.99_0.10=r40h06615bd_0 - - r-xml2=1.3.3=r40h7525677_1 - - r-xopen=1.0.0=r40hc72bb7e_1003 - - r-xtable=1.8_4=r40hc72bb7e_3 - - r-yaml=2.3.5=r40h06615bd_0 - - r-yulab.utils=0.0.5=r40hc72bb7e_0 - - r-zip=2.2.1=r40h06615bd_0 - - readline=8.1.2=h0f457ee_0 - - sed=4.8=he412f7d_0 - - sysroot_linux-64=2.12=he073ed8_15 - - tk=8.6.12=h27826a3_0 - - tktable=2.10=hb7b940f_3 - - xorg-kbproto=1.0.7=h7f98852_1002 - - xorg-libice=1.0.10=h7f98852_0 - - xorg-libsm=1.2.3=hd9c2040_1000 - - xorg-libx11=1.7.2=h7f98852_0 - - xorg-libxau=1.0.9=h7f98852_0 - - xorg-libxdmcp=1.1.3=h7f98852_0 - - xorg-libxext=1.3.4=h7f98852_1 - - xorg-libxrender=0.9.10=h7f98852_1003 - - xorg-libxt=1.2.1=h7f98852_2 - - xorg-renderproto=0.11.1=h7f98852_1002 - - xorg-xextproto=7.3.0=h7f98852_1002 - - xorg-xproto=7.0.31=h7f98852_1007 - - xz=5.2.6=h166bdaf_0 - - zlib=1.2.13=h166bdaf_4 - - zstd=1.5.2=h6239696_4 -prefix: /home/dalerr/proj/lcdb-wf/env-r + - libnghttp2=1.51.0 + - libnsl=2.0.0 + - libopenblas=0.3.21 + - libpng=1.6.39 + - libsanitizer=12.2.0 + - libsqlite=3.40.0 + - libssh2=1.10.0 + - libstdcxx-devel_linux-64=12.2.0 + - libstdcxx-ng=12.2.0 + - libtiff=4.5.0 + - libuuid=2.38.1 + - libwebp-base=1.3.0 + - libxcb=1.13 + - libxml2=2.10.3 + - libzlib=1.2.13 + - make=4.3 + - ncurses=6.3 + - oniguruma=6.9.8 + - openssl=1.1.1t + - pandoc=2.19.2 + - pango=1.50.14 + - pcre2=10.40 + - pixman=0.40.0 + - pthread-stubs=0.4 + - python=3.11.0 + - python_abi=3.11=3_cp311 + - r-ape=5.7_1 + - r-aplot=0.1.10 + - r-ashr=2.2_54 + - r-askpass=1.1 + - r-assertthat=0.2.1 + - r-babelgene=22.9 + - r-backports=1.4.1 + - r-base=4.2.2 + - r-base64enc=0.1_3 + - r-bbmle=1.0.25 + - r-bdsmatrix=1.3_6 + - r-bh=1.81.0_1 + - r-biocmanager=1.30.20 + - r-bit=4.0.5 + - r-bit64=4.0.5 + - r-bitops=1.0_7 + - r-blob=1.2.4 + - r-brew=1.0_8 + - r-brio=1.1.3 + - r-broom=1.0.4 + - r-bslib=0.4.2 + - r-ca=0.71.1 + - r-cachem=1.0.7 + - r-callr=3.7.3 + - r-circlize=0.4.15 + - r-cli=3.6.1 + - r-clipr=0.8.0 + - r-clue=0.3_64 + - r-cluster=2.1.4 + - r-coda=0.19_4 + - r-codetools=0.2_19 + - r-colorspace=2.1_0 + - r-commonmark=1.9.0 + - r-cowplot=1.1.1 + - r-cpp11=0.4.3 + - r-crayon=1.5.2 + - r-credentials=1.3.2 + - r-crosstalk=1.2.0 + - r-curl=4.3.3 + - r-data.table=1.14.8 + - r-dbi=1.1.3 + - r-dbplyr=2.3.2 + - r-dendextend=1.17.1 + - r-desc=1.4.2 + - r-devtools=2.4.5 + - r-diffobj=0.3.5 + - r-digest=0.6.31 + - r-doparallel=1.0.17 + - r-downlit=0.4.2 + - r-downloader=0.4 + - r-dplyr=1.1.1 + - r-dt=0.27 + - r-egg=0.4.5 + - r-ellipsis=0.3.2 + - r-emdbook=1.3.12 + - r-etrunct=0.1 + - r-evaluate=0.20 + - r-fansi=1.0.4 + - r-farver=2.1.1 + - r-fastmap=1.1.1 + - r-fastmatch=1.1_3 + - r-fdrtool=1.2.17 + - r-filelock=1.0.2 + - r-fontawesome=0.5.0 + - r-forcats=1.0.0 + - r-foreach=1.5.2 + - r-formatr=1.14 + - r-fs=1.6.1 + - r-futile.logger=1.4.3 + - r-futile.options=1.0.1 + - r-gclus=1.3.2 + - r-generics=0.1.3 + - r-gert=1.9.2 + - r-getoptlong=1.0.5 + - r-ggally=2.1.2 + - r-ggdendro=0.1.23 + - r-ggforce=0.4.1 + - r-ggfun=0.0.9 + - r-ggnewscale=0.4.8 + - r-ggplot2=3.4.1 + - r-ggplotify=0.1.0 + - r-ggraph=2.1.0 + - r-ggrepel=0.9.3 + - r-gh=1.4.0 + - r-gitcreds=0.1.2 + - r-globaloptions=0.1.2 + - r-glue=1.6.2 + - r-graphlayouts=0.8.4 + - r-gridextra=2.3 + - r-gridgraphics=0.5_1 + - r-gson=0.1.0 + - r-gtable=0.3.3 + - r-heatmaply=1.4.2 + - r-hexbin=1.28.3 + - r-highr=0.10 + - r-hms=1.1.3 + - r-htmltools=0.5.5 + - r-htmlwidgets=1.6.2 + - r-httpuv=1.6.9 + - r-httr=1.4.5 + - r-httr2=0.2.2 + - r-igraph=1.4.1 + - r-ini=0.3.1 + - r-invgamma=1.1 + - r-irlba=2.3.5.1 + - r-isoband=0.2.7 + - r-iterators=1.0.14 + - r-jquerylib=0.1.4 + - r-jsonlite=1.8.4 + - r-knitr=1.42 + - r-labeling=0.4.2 + - r-lambda.r=1.2.4 + - r-later=1.3.0 + - r-lattice=0.20_45 + - r-lazyeval=0.2.2 + - r-lifecycle=1.0.3 + - r-locfit=1.5_9.7 + - r-logging=0.10_108 + - r-magrittr=2.0.3 + - r-mass=7.3_58.3 + - r-matrix=1.5_3 + - r-matrixstats=0.63.0 + - r-memoise=2.0.1 + - r-mgcv=1.8_42 + - r-mime=0.12 + - r-miniui=0.1.1.1 + - r-mixsqp=0.3_48 + - r-mnormt=2.1.1 + - r-msigdbr=7.5.1 + - r-munsell=0.5.0 + - r-mvtnorm=1.1_3 + - r-nlme=3.1_162 + - r-numderiv=2016.8_1.1 + - r-openssl=2.0.5 + - r-openxlsx=4.2.5.2 + - r-patchwork=1.1.2 + - r-pheatmap=1.0.12 + - r-pillar=1.9.0 + - r-pkgbuild=1.4.0 + - r-pkgconfig=2.0.3 + - r-pkgdown=2.0.7 + - r-pkgload=1.3.2 + - r-plogr=0.2.0 + - r-plotly=4.10.1 + - r-plyr=1.8.8 + - r-png=0.1_8 + - r-polyclip=1.10_4 + - r-praise=1.0.0 + - r-prettyunits=1.1.1 + - r-processx=3.8.0 + - r-profvis=0.3.7 + - r-progress=1.2.2 + - r-promises=1.2.0.1 + - r-ps=1.7.3 + - r-psych=2.3.3 + - r-purrr=1.0.1 + - r-qap=0.1_2 + - r-r6=2.5.1 + - r-ragg=1.2.5 + - r-rappdirs=0.3.3 + - r-rcmdcheck=1.4.0 + - r-rcolorbrewer=1.1_3 + - r-rcpp=1.0.10 + - r-rcpparmadillo=0.11.4.4.0 + - r-rcppeigen=0.3.3.9.3 + - r-rcppnumerical=0.5_0 + - r-rcurl=1.98_1.10 + - r-readr=2.1.4 + - r-registry=0.5_1 + - r-rematch2=2.1.2 + - r-remotes=2.4.2 + - r-reshape=0.8.9 + - r-reshape2=1.4.4 + - r-restfulr=0.0.15 + - r-rjson=0.2.21 + - r-rlang=1.1.0 + - r-rmarkdown=2.21 + - r-roxygen2=7.2.3 + - r-rprojroot=2.0.3 + - r-rsqlite=2.3.0 + - r-rstudioapi=0.14 + - r-rvcheck=0.2.1 + - r-rversions=2.1.2 + - r-sass=0.4.5 + - r-scales=1.2.1 + - r-scatterpie=0.1.8 + - r-seriation=1.4.2 + - r-sessioninfo=1.2.2 + - r-shadowtext=0.1.2 + - r-shape=1.4.6 + - r-shiny=1.7.4 + - r-slam=0.1_50 + - r-snow=0.4_4 + - r-sourcetools=0.1.7_1 + - r-sparsem=1.81 + - r-squarem=2021.1 + - r-stringi=1.7.12 + - r-stringr=1.5.0 + - r-survival=3.5_5 + - r-sys=3.4.1 + - r-systemfonts=1.0.4 + - r-testthat=3.1.7 + - r-textshaping=0.3.6 + - r-tibble=3.2.1 + - r-tidygraph=1.2.3 + - r-tidyr=1.3.0 + - r-tidyselect=1.2.0 + - r-tidytree=0.4.2 + - r-tinytex=0.44 + - r-tmvnsim=1.0_2 + - r-truncnorm=1.0_9 + - r-tsp=1.2_3 + - r-tweenr=2.0.2 + - r-tzdb=0.3.0 + - r-upsetr=1.4.0 + - r-urlchecker=1.0.1 + - r-usethis=2.1.6 + - r-utf8=1.2.3 + - r-vctrs=0.6.1 + - r-viridis=0.6.2 + - r-viridislite=0.4.1 + - r-vroom=1.6.1 + - r-waldo=0.4.0 + - r-webshot=0.5.4 + - r-whisker=0.4.1 + - r-withr=2.5.0 + - r-xfun=0.38 + - r-xml=3.99_0.14 + - r-xml2=1.3.3 + - r-xopen=1.0.0 + - r-xtable=1.8_4 + - r-yaml=2.3.7 + - r-yulab.utils=0.0.6 + - r-zip=2.2.2 + - readline=8.2 + - sed=4.8 + - sysroot_linux-64=2.12 + - tk=8.6.12 + - tktable=2.10 + - tzdata=2023c + - xorg-kbproto=1.0.7 + - xorg-libice=1.0.10 + - xorg-libsm=1.2.3 + - xorg-libx11=1.8.4 + - xorg-libxau=1.0.9 + - xorg-libxdmcp=1.1.3 + - xorg-libxext=1.3.4 + - xorg-libxrender=0.9.10 + - xorg-libxt=1.2.1 + - xorg-renderproto=0.11.1 + - xorg-xextproto=7.3.0 + - xorg-xproto=7.0.31 + - xz=5.2.6 + - yaml=0.2.5 + - zlib=1.2.13 + - zstd=1.5.2 + - argcomplete=3.0.5 + - pip=23.0.1 + - pyyaml=6.0 + - setuptools=67.6.1 + - toml=0.10.2 + - wheel=0.40.0 + - xmltodict=0.13.0 + - yq=3.1.1 diff --git a/env.yml b/env.yml index 66a8bd01..4594f5ea 100644 --- a/env.yml +++ b/env.yml @@ -8,21 +8,21 @@ dependencies: - alsa-lib=1.2.3.2 - amply=0.1.5 - appdirs=1.4.4 - - argcomplete=2.0.0 - - argh=0.26.2 - - asttokens=2.0.8 + - argcomplete=3.0.5 + - argh=0.27.2 + - asttokens=2.2.1 - attr=2.5.1 - - attrs=22.1.0 + - attrs=22.2.0 - backcall=0.2.0 - backports=1.0 - backports.functools_lru_cache=1.6.4 - bedtools=2.30.0 - binutils_impl_linux-64=2.39 - binutils_linux-64=2.39 - - biopython=1.79 + - biopython=1.81 - boost-cpp=1.74.0 - bowtie=1.3.1 - - bowtie2=2.5.0 + - bowtie2=2.5.1 - brotli=1.0.9 - brotli-bin=1.0.9 - brotlipy=0.7.0 @@ -30,9 +30,9 @@ dependencies: - bx-python=0.9.0 - bzip2=1.0.8 - c-ares=1.18.1 - - ca-certificates=2022.9.24 + - ca-certificates=2022.12.7 - cairo=1.16.0 - - certifi=2022.9.24 + - certifi=2022.12.7 - cffi=1.15.1 - charset-normalizer=2.1.1 - click=8.1.3 @@ -45,42 +45,40 @@ dependencies: - colorama=0.4.6 - coloredlogs=15.0.1 - colormath=3.0.0 - - commonmark=0.9.1 - configargparse=1.5.3 - connection_pool=0.0.3 - - contourpy=1.0.6 - - cryptography=38.0.2 + - contourpy=1.0.7 + - cryptography=39.0.0 - curl=7.86.0 - - cutadapt=4.1 + - cutadapt=4.3 - cycler=0.11.0 - - dataclasses=0.8 - datrie=0.8.2 - dbus=1.13.6 - decorator=5.1.1 - deeptools=3.5.1 - deeptoolsintervals=0.1.9 - - dnaio=0.9.1 + - dnaio=0.10.0 - docutils=0.19 - - dpath=2.0.6 - - exceptiongroup=1.0.0 + - dpath=2.1.5 + - exceptiongroup=1.1.1 - execnet=1.9.0 - - executing=1.1.1 + - executing=1.2.0 - expat=2.5.0 - fastq-screen=0.15.2 - - fastqc=0.11.9 + - fastqc=0.12.1 - fftw=3.3.10 - - filelock=3.8.0 + - filelock=3.10.7 - font-ttf-dejavu-sans-mono=2.37 - font-ttf-inconsolata=3.000 - font-ttf-source-code-pro=2.038 - font-ttf-ubuntu=0.83 - - fontconfig=2.14.1 + - fontconfig=2.14.2 - fonts-conda-ecosystem=1 - fonts-conda-forge=1 - - fonttools=4.38.0 + - fonttools=4.39.3 - freetype=2.12.1 - fribidi=1.0.10 - - future=0.18.2 + - future=0.18.3 - gat=1.3.6 - gcc_impl_linux-64=10.4.0 - gcc_linux-64=10.4.0 @@ -90,8 +88,8 @@ dependencies: - gfortran_impl_linux-64=10.4.0 - gfortran_linux-64=10.4.0 - giflib=5.2.1 - - gitdb=4.0.9 - - gitpython=3.1.29 + - gitdb=4.0.10 + - gitpython=3.1.31 - glib=2.74.1 - glib-tools=2.74.1 - graphite2=1.3.13 @@ -107,25 +105,24 @@ dependencies: - humanfriendly=10.0 - icu=69.1 - idna=3.4 - - importlib-metadata=4.11.4 - - importlib_metadata=4.11.4 - - importlib_resources=5.10.0 - - iniconfig=1.1.1 + - importlib-metadata=6.1.0 + - importlib_resources=5.12.0 + - iniconfig=2.0.0 - intervalstats=1.01 - - ipython=8.6.0 + - ipython=8.11.0 - isa-l=2.30.0 - jack=1.9.18 - - jedi=0.18.1 + - jedi=0.18.2 - jinja2=3.1.2 - jpeg=9e - - jsonschema=4.16.0 - - jupyter_core=4.11.1 + - jsonschema=4.17.3 + - jupyter_core=5.3.0 - kallisto=0.48.0 - kernel-headers_linux-64=2.6.32 - keyutils=1.6.1 - kiwisolver=1.4.4 - krb5=1.19.3 - - lcms2=2.13.1 + - lcms2=2.14 - ld_impl_linux-64=2.39 - lerc=4.0.0 - libblas=3.9.0 @@ -151,156 +148,196 @@ dependencies: - libgfortran5=12.2.0 - libglib=2.74.1 - libgomp=12.2.0 + - libhwloc=2.8.0 - libiconv=1.17 - - libjemalloc=5.2.1 + - libjemalloc=5.3.0 - liblapack=3.9.0 - liblapacke=3.9.0 - libllvm13=13.0.1 - - libnghttp2=1.47.0 + - libnghttp2=1.51.0 - libnsl=2.0.0 - libogg=1.3.4 - libopenblas=0.3.21 - libopus=1.3.1 - - libpng=1.6.38 + - libpng=1.6.39 - libpq=14.5 - libsanitizer=10.4.0 - libsndfile=1.0.31 - - libsqlite=3.39.4 + - libsqlite=3.40.0 - libssh2=1.10.0 - libstdcxx-devel_linux-64=10.4.0 - libstdcxx-ng=12.2.0 - libtiff=4.4.0 - - libtool=2.4.6 - - libudev1=251 - - libuuid=2.32.1 + - libtool=2.4.7 + - libudev1=253 + - libuuid=2.38.1 - libvorbis=1.3.7 - libwebp=1.2.4 - libwebp-base=1.2.4 - libxcb=1.13 - libxkbcommon=1.0.3 - - libxml2=2.9.12 + - libxml2=2.9.14 - libzlib=1.2.13 - lzo=2.10 - lzstring=1.0.4 - make=4.3 - - markdown=3.4.1 - - markupsafe=2.1.1 - - matplotlib=3.6.0 - - matplotlib-base=3.6.0 + - markdown=3.4.3 + - markdown-it-py=2.2.0 + - markupsafe=2.1.2 + - matplotlib=3.7.1 + - matplotlib-base=3.7.1 - matplotlib-inline=0.1.6 - - multiqc=1.13 + - mdurl=0.1.0 + - multiqc=1.14 - munkres=1.1.4 - - mysql-common=8.0.31 + - mysql-common=8.0.32 - mysql-connector-c=6.1.11 - - mysql-libs=8.0.31 - - nbformat=5.7.0 + - mysql-libs=8.0.32 + - nbformat=5.8.0 + - ncbi-vdb=3.0.2 - ncurses=6.3 - - networkx=2.8.7 - - nspr=4.32 - - nss=3.78 - - numpy=1.23.4 + - networkx=3.0 + - nspr=4.35 + - nss=3.89 + - numpy=1.23.5 - openjdk=11.0.1 - openjpeg=2.5.0 - - openssl=1.1.1q - - packaging=21.3 - - pandas=1.5.1 - - pandoc=2.19.2 + - openssl=1.1.1t + - ossuuid=1.6.2 + - packaging=23.0 + - pandas=1.5.3 + - pandoc=3.1.1 - pango=1.50.7 - parso=0.8.3 - patsy=0.5.3 - pbzip2=1.1.13 - pcre2=10.37 - perl=5.32.1 + - perl-alien-build=2.48 + - perl-alien-libxml2=0.17 + - perl-business-isbn=3.007 + - perl-business-isbn-data=20210112.006 + - perl-capture-tiny=0.48 + - perl-carp=1.50 + - perl-constant=1.33 + - perl-data-dumper=2.183 + - perl-encode=3.19 + - perl-exporter=5.74 + - perl-extutils-makemaker=7.70 + - perl-ffi-checklib=0.28 + - perl-file-chdir=0.1011 + - perl-file-path=2.18 + - perl-file-temp=0.2304 + - perl-file-which=1.24 - perl-gd=2.76 - perl-gdgraph=1.54 - perl-gdtextutil=0.86 + - perl-importer=0.026 + - perl-mime-base64=3.16 + - perl-parent=0.241 + - perl-path-tiny=0.124 + - perl-pathtools=3.75 + - perl-scope-guard=0.21 + - perl-storable=3.15 + - perl-sub-info=0.002 + - perl-term-table=0.016 + - perl-test2-suite=0.000145 + - perl-uri=5.12 + - perl-xml-libxml=2.0207 + - perl-xml-namespacesupport=1.12 + - perl-xml-sax=1.02 + - perl-xml-sax-base=1.09 - pexpect=4.8.0 - picard=2.27.4 - pickleshare=0.7.5 - pigz=2.6 - pillow=9.2.0 - - pip=22.3 + - pip=23.0.1 - pixman=0.40.0 - pkgutil-resolve-name=1.3.10 - plac=1.3.5 - - plotly=5.11.0 + - platformdirs=3.2.0 + - plotly=5.14.0 - pluggy=1.0.0 + - pooch=1.7.0 - preseq=3.2.0 - - prompt-toolkit=3.0.31 - - psutil=5.9.3 + - prompt-toolkit=3.0.38 + - prompt_toolkit=3.0.38 + - psutil=5.9.4 - pthread-stubs=0.4 - ptyprocess=0.7.0 - - pulp=2.6.0 + - pulp=2.7.0 - pulseaudio=14.0 - pure_eval=0.2.2 - py2bit=0.3.0 - pybedtools=0.9.0 - pybigwig=0.3.18 - pycparser=2.21 - - pyfaidx=0.7.1 - - pygments=2.13.0 - - pyopenssl=22.1.0 + - pyfaidx=0.7.2.1 + - pygments=2.14.0 + - pyopenssl=23.1.1 - pyparsing=3.0.9 - pyqt=5.15.4 - pyqt5-sip=12.9.0 - - pyrsistent=0.18.1 - - pysam=0.19.1 + - pyrsistent=0.19.3 + - pysam=0.20.0 - pysocks=1.7.1 - - pytest=7.2.0 - - pytest-xdist=3.0.2 - - python=3.10.6 + - pytest=7.2.2 + - pytest-xdist=3.2.1 + - python=3.10.8 - python-dateutil=2.8.2 - - python-fastjsonschema=2.16.2 + - python-fastjsonschema=2.16.3 - python-isal=1.1.0 - python-lzo=1.14 - python_abi=3.10 - - pytz=2022.5 + - pytz=2023.3 - pyvcf3=1.0.3 - pyyaml=6.0 - qt-main=5.15.2 - r-base=4.1.3 - - ratelimiter=1.2.0 - - readline=8.1.2 - - requests=2.28.1 - - reretry=0.11.1 - - rich=12.6.0 - - rich-click=1.5.2 + - readline=8.2 + - requests=2.28.2 + - reretry=0.11.8 + - rich=13.3.3 + - rich-click=1.6.1 - rseqc=5.0.1 - - salmon=1.9.0 + - salmon=1.10.1 - samtools=1.16.1 - - scipy=1.9.3 - - seaborn=0.12.1 - - seaborn-base=0.12.1 + - scipy=1.10.1 + - seaborn=0.12.2 + - seaborn-base=0.12.2 - sed=4.8 - - setuptools=65.5.0 - - simplejson=3.17.6 + - setuptools=67.6.1 + - simplejson=3.18.4 - sip=6.5.1 - six=1.16.0 - - smart_open=6.2.0 + - smart_open=6.3.0 - smmap=3.0.5 - - snakemake-minimal=7.17.1 + - snakemake-minimal=7.25.0 - spectra=0.0.11 - - sqlite=3.39.4 - - sra-tools=2.9.6 - - stack_data=0.5.1 - - star=2.7.10a - - statsmodels=0.13.2 + - sqlite=3.40.0 + - sra-tools=3.0.3 + - stack_data=0.6.2 + - star=2.7.10b + - statsmodels=0.13.5 - stopit=1.1.2 - - subread=2.0.1 + - subread=2.0.3 - sysroot_linux-64=2.12 - tabulate=0.9.0 - - tbb=2021.6.0 - - tenacity=8.1.0 + - tbb=2021.7.0 + - tenacity=8.2.2 + - throttler=1.2.1 - tk=8.6.12 - tktable=2.10 - toml=0.10.2 - tomli=2.0.1 - - toposort=1.7 + - toposort=1.10 - tornado=6.2 - trackhub=0.2.4 - - traitlets=5.5.0 - - typing_extensions=4.4.0 - - tzdata=2022f + - traitlets=5.9.0 + - typing-extensions=4.5.0 + - typing_extensions=4.5.0 + - tzdata=2023c - ucsc-bedgraphtobigwig=377 - ucsc-bedsort=377 - ucsc-bedtobigbed=377 @@ -313,15 +350,15 @@ dependencies: - ucsc-twobittofa=377 - ucsc-wigtobigwig=377 - unicodedata2=15.0.0 - - urllib3=1.26.11 - - wcwidth=0.2.5 - - wheel=0.37.1 - - wrapt=1.14.1 - - xopen=1.6.0 + - urllib3=1.26.15 + - wcwidth=0.2.6 + - wheel=0.40.0 + - wrapt=1.15.0 + - xopen=1.7.0 - xorg-kbproto=1.0.7 - xorg-libice=1.0.10 - xorg-libsm=1.2.3 - - xorg-libx11=1.7.2 + - xorg-libx11=1.8.4 - xorg-libxau=1.0.9 - xorg-libxdmcp=1.1.3 - xorg-libxext=1.3.4 @@ -333,6 +370,8 @@ dependencies: - xz=5.2.6 - yaml=0.2.5 - yte=1.5.1 - - zipp=3.10.0 + - zipp=3.15.0 - zlib=1.2.13 + - zstandard=0.19.0 - zstd=1.5.2 +# diff --git a/include/WRAPPER_SLURM b/include/WRAPPER_SLURM index 548f6afb..841058f4 100755 --- a/include/WRAPPER_SLURM +++ b/include/WRAPPER_SLURM @@ -2,23 +2,40 @@ #SBATCH --job-name="lcdb-wf" #SBATCH --partition="norm" #SBATCH --time=12:00:00 +#SBATCH --gres=lscratch:5 # make logdir if [[ ! -e logs ]]; then mkdir -p logs; fi +# use SNAKEMAKE_PROFILE if one is set in the environment variables +if [ -z "$LCDBWF_SNAKEMAKE_PROFILE" ]; then + if [ -z "$SNAKEMAKE_PROFILE" ]; then + # no snakemake profile found + PROFILE_CMD="" + echo "No environment variable SNAKEMAKE_PROFILE or LCDBWF_SNAKE_PROFILE found." + echo "snakemake will run in single job." + else + # generic SNAKEMAKE_PROFILE found + PROFILE_CMD="--profile $SNAKEMAKE_PROFILE" + fi +else +# LCDBWF_SNAKEMAKE_PROFILE found, this takes priority if both profile variables are set +PROFILE_CMD="--profile $LCDBWF_SNAKEMAKE_PROFILE" +fi + # Run snakemake ( time snakemake \ -p \ --directory $PWD \ -k \ + --restart-times 3 \ --rerun-incomplete \ --jobname "s.{rulename}.{jobid}.sh" \ -j 999 \ - --cluster-config config/clusterconfig.yaml \ - --cluster 'sbatch {cluster.prefix} --cpus-per-task={threads} --output=logs/{rule}.o.%j --error=logs/{rule}.e.%j' \ --use-conda \ --configfile config/config.yaml \ + $PROFILE_CMD \ --latency-wait=300 \ --max-jobs-per-second 1 \ --max-status-checks-per-second 0.01 \ diff --git a/include/reference_configs/ERCC.yaml b/include/reference_configs/ERCC.yaml index cea79ccc..22aa25af 100644 --- a/include/reference_configs/ERCC.yaml +++ b/include/reference_configs/ERCC.yaml @@ -2,14 +2,14 @@ references: ercc: srm2374: genome: - url: 'https://www-s.nist.gov/srmors/certificates/documents/SRM2374_Sequence_v1.FASTA' + url: 'https://tsapps.nist.gov/srmext/certificates/documents/SRM2374_Sequence_v1.FASTA' postprocess: "lib.postprocess.ercc.fasta_postprocess" indexes: - 'bowtie2' - 'hisat2' - 'star' annotation: - url: 'https://www-s.nist.gov/srmors/certificates/documents/SRM2374_Sequence_v1.FASTA' + url: 'https://tsapps.nist.gov/srmext/certificates/documents/SRM2374_Sequence_v1.FASTA' postprocess: "lib.postprocess.ercc.gtf_postprocess" fisher92: diff --git a/lib/common.py b/lib/common.py index 24b7e95c..829cc129 100644 --- a/lib/common.py +++ b/lib/common.py @@ -629,6 +629,9 @@ def load_config(config, missing_references_ok=False): # Here we populate a list of reference sections. Items later on the list # will have higher priority includes = config.get('include_references', []) + for i in includes: + if not os.path.exists(i): + raise ValueError("include_references: '{}' does not exist".format(i)) reference_sections = [] # First the directories. Directories that come earlier lose to those that @@ -716,6 +719,21 @@ def is_paired_end(sampletable, sample): sample : str Assumed to be found in the first column of `sampletable` """ + # We can't fall back to detecting PE based on two fastq files provided for + # each sample when it's an SRA sampletable (which only has SRR accessions). + # + # So detect first detect if SRA sampletable based on presence of "Run" + # column and all values of that column starting with "SRR", and then raise + # an error if the Layout column does not exist. + + if "Run" in sampletable.columns: + if all(sampletable["Run"].str.startswith("SRR")): + if "Layout" not in sampletable.columns and "layout" not in sampletable.columns: + raise ValueError( + "Sampletable appears to be SRA, but no 'Layout' column " + "found. This is required to specify single- or paired-end " + "libraries.") + row = sampletable.set_index(sampletable.columns[0]).loc[sample] if 'orig_filename_R2' in row: return True diff --git a/lib/lcdbwf/DESCRIPTION b/lib/lcdbwf/DESCRIPTION index d6781bb2..97dc6126 100644 --- a/lib/lcdbwf/DESCRIPTION +++ b/lib/lcdbwf/DESCRIPTION @@ -23,4 +23,4 @@ License: MIT Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.1 diff --git a/lib/lcdbwf/R/annotations.R b/lib/lcdbwf/R/annotations.R index 473c4b95..d1a40b7d 100644 --- a/lib/lcdbwf/R/annotations.R +++ b/lib/lcdbwf/R/annotations.R @@ -119,10 +119,11 @@ get_annotation_db <- function(config, dbtype, genus_species=NULL, orgdb_key_over #' #' @param res_list List of DESeqResults objects #' @param config Full config object, at least containing config$orgdb +#' @param use_orgdb Boolean to use or bypass orgDb extra columns #' #' @return List of same results objects, but each one with additional columns #' attached as specified in the config -attach_extra <- function(res_list, config, force_intersect){ +attach_extra <- function(res_list, config, force_intersect, use_orgdb=TRUE){ if (missing(force_intersect)) force_intersect <- config$main$force_intersect if (is.null(force_intersect)) force_intersect <- FALSE @@ -149,28 +150,39 @@ attach_extra <- function(res_list, config, force_intersect){ keys <- rownames(res_list[[1]]$res) - orgdb <- lcdbwf:::get_annotation_db(config, dbtype="OrgDb") + if (use_orgdb == TRUE) { + orgdb <- lcdbwf:::get_annotation_db(config, dbtype="OrgDb") - # Create a dataframe mapping gene IDs to the various configured columns - lookups <- list() - for (col in config$annotation$orgdb_columns){ - lookups[[col]] <- mapIds(orgdb, keys=keys, column=col, keytype=config$annotation$keytype, multiVal='first') - if (col %in% config$annotation$fill){ - lookups[[col]] <- ifelse(is.na(lookups[[col]]), keys, lookups[[col]]) - } - } - lookups <- data.frame(lookups) - - # Use that dataframe to attach additional columns to each results object - for (name in names(res_list)){ - res <- res_list[[name]]$res - orig_colnames <- colnames(res) + # Create a dataframe mapping gene IDs to the various configured columns + lookups <- list() for (col in config$annotation$orgdb_columns){ - res[[col]] <- lookups[[col]] + lookups[[col]] <- mapIds(orgdb, keys=keys, column=col, keytype=config$annotation$keytype, multiVal='first') + if (col %in% config$annotation$fill){ + lookups[[col]] <- ifelse(is.na(lookups[[col]]), keys, lookups[[col]]) + } + } + lookups <- data.frame(lookups) + + # Use that dataframe to attach additional columns to each results object + for (name in names(res_list)){ + res <- res_list[[name]]$res + orig_colnames <- colnames(res) + for (col in config$annotation$orgdb_columns){ + res[[col]] <- lookups[[col]] + } + res$gene <- rownames(res) + res <- res[, c('gene', config$annotation$orgdb_columns, orig_colnames)] + res_list[[name]]$res <- res + } + } else { + # attach the genes as SYMBOLs in absence of OrgDb data + for (name in names(res_list)){ + res <- res_list[[name]]$res + res$gene <- rownames(res) + res$SYMBOL <- rownames(res) + res_list[[name]]$res <- res } - res$gene <- rownames(res) - res <- res[, c('gene', config$annotation$orgdb_columns, orig_colnames)] - res_list[[name]]$res <- res } + return(res_list) } diff --git a/lib/lcdbwf/R/loading.R b/lib/lcdbwf/R/loading.R index 06422033..68fcc818 100644 --- a/lib/lcdbwf/R/loading.R +++ b/lib/lcdbwf/R/loading.R @@ -19,9 +19,12 @@ #' Additional args are passed to DESeq2::DESeqDataSetFromMatrix. DESeqDataSetFromCombinedFeatureCounts <- function(filename, sampletable, design, - sample_func=lcdbwf.samplename, + sample_func=lcdbwf_samplename, subset_counts=FALSE, ...){ + if (is.null(subset_counts)){ + subset_counts <- FALSE + } # The sampletable may be data.frame or tibble; if it's a tibble then it # likely doesn't have rownames. So in this function we assume that it's the # first column that contains the samplenames. diff --git a/lib/lcdbwf/R/plotting.R b/lib/lcdbwf/R/plotting.R index 216871ec..c850b9dc 100644 --- a/lib/lcdbwf/R/plotting.R +++ b/lib/lcdbwf/R/plotting.R @@ -340,7 +340,7 @@ counts.plot <- function(df, rank.nb=NULL, no.aes=FALSE, facet='label') { #' Plot a histogram of raw pvals #' -#' This is right out of the DESeq2 vignette, from the section about independent +#' This is edited from the DESeq2 vignette, from the section about independent #' filtering. The resulting histogram indicates pvals for those genes kept and #' removed before multiple testing adjustment. #' @@ -352,11 +352,17 @@ pval_hist <- function(res){ h1 <- hist(res$pvalue[!use], breaks=0:50/50, plot=FALSE) h2 <- hist(res$pvalue[use], breaks=0:50/50, plot=FALSE) colori <- c(`counts too low`='khaki', `pass`="powderblue") - barplot(height = rbind(h1$counts, h2$counts), beside = FALSE, - col = colori, space = 0, main = "", ylab="frequency") - text(x = c(0, length(h1$counts)), y = 0, label = paste(c(0,1)), - adj = c(0.5,1.7), xpd=NA) - legend("topright", fill=rev(colori), legend=rev(names(colori))) + df <- rbind(data.frame(x=h1$mids, counts=h1$counts, label='counts too low'), + data.frame(x=h2$mids, counts=h2$counts, label='pass') + ) + plt <- ggplot2::ggplot(df, aes(x=x, y=counts, fill=label)) + + geom_bar(stat = 'identity', color='gray20') + + theme_classic() + + scale_fill_manual(values=c("#EBE379", "#A3DAE0")) + + xlab('p-value') + + ylab('frequency') + + theme(legend.position = c(0.8, 0.8)) + return(plt) } #' Barplot of size factors by sample @@ -412,3 +418,80 @@ plotSparsity2 <- function(dds){ } + + +#' Plot a scatterplot of two contrasts' LFCs, color-coded by significance +#' +#' This is edited from the DESeq2 vignette, from the section about independent +#' filtering. The resulting histogram indicates pvals for those genes kept and +#' removed before multiple testing adjustment. +#' +#' @param res_i DESeq2 results object +#' @param res_j second DESeq2 results object +#' @param padj.thr float, p.adj threshold +#' @param name.col string, gene name column to merge the 2 results, also used for labelling plots +#' @param label_i, label_j string, label for res_i and res_j +#' @param return.values boolean, whether to return the ggplot object (FALSE) or the dataframe (TRUE) +#' @param' color.palette list of string, colors to use for significance categories 'Both - same LFC sign', +#' 'Both - opposite LFC sign 'None', label_i, label_j + +#' @return Either returns a ggplot object of the scatterplot, or the corresponding dataframe if return.values=TRUE + +lfc_scatter <- function(res_i, res_j, padj.thr=0.1, name.col='SYMBOL', label_i=NULL, label_j=NULL, + return.values=FALSE, color.palette=c('#FF3333', "#FF6699", '#999999', '#66CCCC', '#0072B2')) { + # colors from color-blind palette red pink grey cyan blue + # check whether the genes match in res_i and res_j, emits a warning if not + diff.genes <- c(setdiff(rownames(res_i), rownames(res_j)), + setdiff(rownames(res_j), rownames(res_i))) %>% + unlist() %>% + unique() + if( length(diff.genes) > 0 ) { + warning(paste0(length(diff.genes), + ' genes were discarded because found in one res but not the other')) + } + + # use generic labels if not provided + if (is.null(label_i)) { + label_i <- 'LFCs contrast 1' + } + if (is.null(label_j)) { + label_j <- 'LFCs contrast 2' + } + + # join results into dataframe + cols.sub <- c('log2FoldChange', 'padj', name.col) + df <- merge(as.data.frame(res_i)[cols.sub], + as.data.frame(res_j)[cols.sub], + by= name.col) + # add significance column + df <- df %>% + mutate('Significance' = case_when( + (padj.x <= padj.thr) & (padj.y <= padj.thr) & (log2FoldChange.x * log2FoldChange.y >= 0) ~ 'Both - same LFC sign', + (padj.x <= padj.thr) & (padj.y <= padj.thr) & (log2FoldChange.x * log2FoldChange.y < 0) ~ 'Both - opposite LFC sign', + (padj.x <= padj.thr) ~ label_i, + (padj.y <= padj.thr) ~ label_j, + TRUE ~ 'None')) + + # if return.values, return the dataframe now, no need to generate the plot + if (return.values == TRUE) { + return(df) + } + + # Significance as factor, to reorder in the graph + df[['Significance']] <- factor(df[['Significance']], levels=c('None', label_j, label_i, 'Both - opposite LFC sign', 'Both - same LFC sign')) + + names(color.palette) <- c('Both - same LFC sign', 'Both - opposite LFC sign', 'None', label_i, label_j) + + p <- ggplot(df %>% arrange(Significance), aes_string(x='log2FoldChange.x', y='log2FoldChange.y', + color='Significance', label=name.col)) + + geom_point(size=1) + + theme_bw() + + scale_color_manual(values=color.palette) + + geom_abline(color="#333333", linetype="dashed", size=0.5, alpha=0.7) + + geom_hline(yintercept=0, color="#333333", linetype="dashed", size=0.5, alpha=0.7) + + geom_vline(xintercept=0, color="#333333", linetype="dashed", size=0.5, alpha=0.7) + + xlab(label_i) + + ylab(label_j) + + return(p) +} diff --git a/lib/lcdbwf/R/results.R b/lib/lcdbwf/R/results.R index d84ecd4c..b1cec3aa 100644 --- a/lib/lcdbwf/R/results.R +++ b/lib/lcdbwf/R/results.R @@ -56,7 +56,7 @@ build_results_tabs <- function(res_list, dds_list, config, text){ lcdbwf:::mdcat('### P-value distribution') lcdbwf:::folded_markdown(text$results_plots$pval_hist, "Help") - lcdbwf:::pval_hist(res_i) + print(lcdbwf:::pval_hist(res_i)) if (config$toggle$results_diagnostics){ lcdbwf:::results_diagnostics(res=res_i, dds=res_list[[name]]$dds, name=name, config=config, text=text) diff --git a/lib/lcdbwf/tests/testthat/test-dds.R b/lib/lcdbwf/tests/testthat/test-dds.R index a9043b3b..64f2da7f 100644 --- a/lib/lcdbwf/tests/testthat/test-dds.R +++ b/lib/lcdbwf/tests/testthat/test-dds.R @@ -5,13 +5,13 @@ test_that("stripping gene versions", { rownames(dds) <- paste(rownames(dds), seq(1000), sep='.') - expect_error(lcdbwf::strip_dotted_version_from_dds(dds), "Gene names don't appear to be Ensembl") + expect_error(lcdbwf:::strip_dotted_version_from_dds(dds), "Gene names don't appear to be Ensembl") - forced <- lcdbwf::strip_dotted_version_from_dds(dds, force=TRUE) + forced <- lcdbwf:::strip_dotted_version_from_dds(dds, force=TRUE) expect_equal(rownames(forced)[1], "gene1") rownames(dds) <- paste0("ENS", rownames(dds), '.', seq(1000)) - fixed <- lcdbwf::strip_dotted_version_from_dds(dds) + fixed <- lcdbwf:::strip_dotted_version_from_dds(dds) expect_equal(rownames(fixed)[1], "ENSgene1") }) diff --git a/lib/lcdbwf/tests/testthat/test-helpers.R b/lib/lcdbwf/tests/testthat/test-helpers.R index 491fc605..9938cf8f 100644 --- a/lib/lcdbwf/tests/testthat/test-helpers.R +++ b/lib/lcdbwf/tests/testthat/test-helpers.R @@ -532,8 +532,8 @@ res <- new("DESeqResults", priorInfo = list(), rownames = c("FBgn0000061", lfcThreshold = 0)) -test_that("lcdbwf.samplename works", { - res <- lcdbwf.samplename("data/rnaseq_samples/sampleNNN.cutadapt.bam") +test_that("lcdbwf_samplename works", { + res <- lcdbwf_samplename("data/rnaseq_samples/sampleNNN.cutadapt.bam") expect_equal(res, "sampleNNN") }) diff --git a/lib/lcdbwf/tests/testthat/test-loading.R b/lib/lcdbwf/tests/testthat/test-loading.R index 762813c5..9611d676 100644 --- a/lib/lcdbwf/tests/testthat/test-loading.R +++ b/lib/lcdbwf/tests/testthat/test-loading.R @@ -5,7 +5,7 @@ sampletable <- data.frame( test_that("load from combined featurecounts", { - dds <- lcdbwf::DESeqDataSetFromCombinedFeatureCounts( + dds <- lcdbwf:::DESeqDataSetFromCombinedFeatureCounts( "featurecounts.txt", sampletable, design=~group @@ -15,7 +15,7 @@ test_that("load from combined featurecounts", { test_that("load from combined featurecounts, subset sampletable, no subset.counts", { expect_error( - lcdbwf::DESeqDataSetFromCombinedFeatureCounts( + lcdbwf:::DESeqDataSetFromCombinedFeatureCounts( "featurecounts.txt", sampletable %>% dplyr::filter(samplename != "sample4"), design=~group), @@ -27,10 +27,10 @@ test_that("load from combined featurecounts, subset sampletable, no subset.count }) test_that("load from combined featurecounts, subset sampletable, with subset.counts", { - dds <- lcdbwf::DESeqDataSetFromCombinedFeatureCounts( + dds <- lcdbwf:::DESeqDataSetFromCombinedFeatureCounts( "featurecounts.txt", sampletable %>% dplyr::filter(samplename != "sample4"), - design=~group, subset.counts=TRUE) + design=~group, subset_counts=TRUE) expect_s4_class(dds, "DESeqDataSet") }) @@ -47,10 +47,10 @@ test_that("load from combined featurecounts, using custom func", { stringr::str_split(stringr::fixed('/'), simplify=TRUE) x[,1] } - dds <- lcdbwf::DESeqDataSetFromCombinedFeatureCounts( + dds <- lcdbwf:::DESeqDataSetFromCombinedFeatureCounts( "featurecounts.txt", sampletable2, - sample.func=custom.func, + sample_func=custom.func, design=~group) expect_s4_class(dds, "DESeqDataSet") diff --git a/test/lcdb-wf-test b/test/lcdb-wf-test new file mode 100755 index 00000000..375c921a --- /dev/null +++ b/test/lcdb-wf-test @@ -0,0 +1,482 @@ +#!/usr/bin/env python + +""" +This script aims to make it more convenient to run various tests using +different configs. +""" +import os +import shlex +import subprocess as sp +import sys +from pathlib import Path +import argparse +import yaml + +HERE = Path(__file__).resolve().parent +TOPLEVEL = Path(__file__).resolve().parent.parent + +WORKFLOW_ARGS = yaml.safe_load(open(TOPLEVEL / "test" / "workflow_test_params.yaml")) + + +def print_header(name): + print("-" * 80) + print("lcdb-wf-test: ", name) + print("-" * 80) + + +class Runner(object): + """ + To add a new command, create a new method with a name starting with + "_cmd_", create a new ArgumentParser. + """ + + default_env = os.getenv("LCDBWF_ENV", str(TOPLEVEL / "env")) + default_env_r = os.getenv("LCDBWF_ENV_R", str(TOPLEVEL / "env-r")) + global_parser = argparse.ArgumentParser(add_help=False) + global_parser.add_argument( + "--env", default=default_env, + help=f"""Main conda environment to use. Override + by setting $LCDBWF_ENV or override that by explicity setting --env. Currently will use {default_env}""" + ) + global_parser.add_argument( + "--env-r", + default=default_env_r, + help=f"""Main R conda environment to use. Override by setting + $LCDBWF_ENV_R or override that by explicity setting --env-r. Currently + will use {default_env_r}""" + ) + global_parser.add_argument( + "--orig", + default=str(TOPLEVEL), + help=f"""If specified, you can use the special string '__ORIG__' in + command line arguments which will be filled in with the value provided + here. Mostly used in CI.""", + ) + + def __init__(self): + parser = argparse.ArgumentParser( + description=""" + Test runner for lcdb-wf. There are many things to test; as a first pass use + these with appropriate Snakemake args (-n, -j, --use-conda, etc) + + %(prog)s data --kind all + %(prog)s unit_tests --pytest + %(prog)s unit_tests --r-test + %(prog)s rnaseq --run-workflow + %(prog)s rnaseq --trackhub + %(prog)s rnaseq --downstream + %(prog)s chipseq --run-workflow + %(prog)s references --run-workflow --configfile=config/config.yaml + + DATA + ---- + %(prog)s data --kind all --verbose + + UNIT TESTS + ---------- + # Run the pytest unit tests on the lib/ + %(prog)s unit_tests --pytest + + # Run tests on lcdbwf R package + %(prog)s unit_tests --r-test + + # Ensure URLs in the configs exist + %(prog)s unit_tests --url-check + + # Ensure rnaseq.Rmd has matching sections in the docs + %(prog)s unit_tests --ensure-docs + + RNASEQ + ------ + # Run main workflow + %(prog)s rnaseq --run-workflow + + # Build RNA-seq trackhub from output of main workflow + %(prog)s rnaseq --trackhub + + # Run rnaseq.Rmd + %(prog)s rnaseq --downstream + + # Each of these runs a restricted subset of the workflow with + # customized configs; they should be run one at a time. + %(prog)s rnaseq --run-workflow --sra-pe + %(prog)s rnaseq --run-workflow --sra-se + %(prog)s rnaseq --run-workflow --strandedness-pe + %(prog)s rnaseq --run-workflow --strandedness-se + %(prog)s rnaseq --run-workflow --star-2pass + %(prog)s rnaseq --run-workflow --star-1pass + %(prog)s rnaseq --run-workflow --pe + + """, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + choices = [i.replace("_cmd_", "") for i in dir(self) if i.startswith("_cmd_")] + + parser.add_argument("command", help="Subcommand to run", choices=choices) + args = parser.parse_args(sys.argv[1:2]) + if not hasattr(self, "_cmd_" + args.command): + print("Unrecognized command") + parser.print_help() + sys.exit(1) + getattr(self, "_cmd_" + args.command)() + + def _cmd_data(self): + parser = argparse.ArgumentParser( + description="Download data", + parents=[self.global_parser], + ) + + parser.add_argument( + "--kind", + default="all", + choices=["all", "rnaseq", "chipseq"], + help="Kind of data to download", + ) + parser.add_argument( + "--branch", default="master", help="Branch from lcdb-test-data to use" + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Be verbose about what's being downloaded", + ) + + args = parser.parse_args(sys.argv[2:]) + + repo = "lcdb-test-data" + URL = f"https://github.com/lcdb/{repo}/blob/{args.branch}/data/{{}}?raw=true" + + # This dict maps files in the `data` directory of the repo to a local + # path to which it should be downloaded. + data_files = { + "rnaseq": [ + ( + "rnaseq_samples/sample1/sample1.small_R1.fastq.gz", + "workflows/rnaseq/data/example_data/rnaseq_sample1.fq.gz", + ), + ( + "rnaseq_samples/sample2/sample2.small_R1.fastq.gz", + "workflows/rnaseq/data/example_data/rnaseq_sample2.fq.gz", + ), + ( + "rnaseq_samples/sample3/sample3.small_R1.fastq.gz", + "workflows/rnaseq/data/example_data/rnaseq_sample3.fq.gz", + ), + ( + "rnaseq_samples/sample4/sample4.small_R1.fastq.gz", + "workflows/rnaseq/data/example_data/rnaseq_sample4.fq.gz", + ), + ( + "rnaseq_samples/sample1/sample1.small_R1.fastq.gz", + "workflows/rnaseq/data/example_data/rnaseq_sample1PE_1.fq.gz", + ), + ( + "rnaseq_samples/sample1/sample1.small_R2.fastq.gz", + "workflows/rnaseq/data/example_data/rnaseq_sample1PE_2.fq.gz", + ), + ( + "rnaseq_samples/sample2/sample2.small_R1.fastq.gz", + "workflows/rnaseq/data/example_data/rnaseq_sample2PE_1.fq.gz", + ), + ( + "rnaseq_samples/sample2/sample2.small_R2.fastq.gz", + "workflows/rnaseq/data/example_data/rnaseq_sample2PE_2.fq.gz", + ), + ], + "chipseq": [ + ( + "chipseq_samples/input_1/input_1.tiny_R1.fastq.gz", + "workflows/chipseq/data/example_data/chipseq_input1.fq.gz", + ), + ( + "chipseq_samples/input_2/input_2.tiny_R1.fastq.gz", + "workflows/chipseq/data/example_data/chipseq_input2.fq.gz", + ), + ( + "chipseq_samples/input_3/input_3.tiny_R1.fastq.gz", + "workflows/chipseq/data/example_data/chipseq_input3.fq.gz", + ), + ( + "chipseq_samples/ip_1/ip_1.tiny_R1.fastq.gz", + "workflows/chipseq/data/example_data/chipseq_ip1.fq.gz", + ), + ( + "chipseq_samples/ip_2/ip_2.tiny_R1.fastq.gz", + "workflows/chipseq/data/example_data/chipseq_ip2.fq.gz", + ), + ( + "chipseq_samples/ip_3/ip_3.tiny_R1.fastq.gz", + "workflows/chipseq/data/example_data/chipseq_ip3.fq.gz", + ), + ( + "chipseq_samples/ip_4/ip_4.tiny_R1.fastq.gz", + "workflows/chipseq/data/example_data/chipseq_ip4.fq.gz", + ), + ], + } + + if args.kind == "all": + kinds = list(data_files.keys()) + else: + kinds = [args.kind] + for kind in kinds: + for fn, dest in data_files[kind]: + url = URL.format(fn) + if args.verbose: + print(f"downloading {url}") + if dest is None: + dest = fn + dest = Path(dest) + dest.parent.mkdir(parents=True, exist_ok=True) + sp.run( + f"wget -q -O- {url} > {dest}", shell=True, check=True, cwd=TOPLEVEL + ) + + def _cmd_unit_tests(self): + parser = argparse.ArgumentParser( + description="Run various unit tests and checks", + parents=[self.global_parser], + ) + parser.add_argument( + "--pytest", + action="store_true", + help="Run pytest unit tests and module doctests on lib/ directory", + ) + parser.add_argument( + "--url-check", + action="store_true", + help="Ensure that URLs found in config files (e.g., to genome references) are still valid", + ) + parser.add_argument( + "--r-test", + action="store_true", + help="""Run devtools::test on the lcdbwf R package. Activates the + conda environment specified by --env-r just before running.""", + ) + + parser.add_argument( + "--ensure-docs", + action="store_true", + help="Ensure that all named R chunks are documented in the online help docs", + ) + + args = parser.parse_args(sys.argv[2:]) + + if args.pytest: + print_header("pytest") + sp.run(["pytest", "--doctest-modules", "lib"], check=True, cwd=TOPLEVEL) + + if args.url_check: + print_header("url check") + sys.path.insert(0, str(TOPLEVEL)) + from lib.common import check_all_urls_found + + check_all_urls_found() + + if args.r_test: + print_header("R test") + sp.run( + 'eval "$(conda shell.bash hook)" ' + f"&& conda activate {args.env_r} " + '''&& Rscript -e "devtools::test('lib/lcdbwf', export_all=TRUE)"''', + shell=True, + check=True, + executable="/bin/bash" + ) + + if args.ensure_docs: + sp.run(["./ensure_docs.py"], check=True, cwd=TOPLEVEL / "ci") + + def _cmd_rnaseq(self): + """ + This function handles the "rnaseq" subcommand. + """ + + parser = argparse.ArgumentParser( + description="Run rnaseq workflow and downstream tests", + parents=[self.global_parser], + ) + parser.add_argument( + "--run-workflow", + action="store_true", + help="""Run rnaseq workflow using the run_tesh.sh harness, which + edits the Snakefile to use test settings before running. Additional + args not specified here are passed to Snakemake, or use other flags + below to easily specify config sets.""", + ) + parser.add_argument( + "--trackhub", action="store_true", help="Build the rnaseq track hub" + ) + parser.add_argument( + "--downstream", + action="store_true", + help="""Run the downstream rnaseq.Rmd, via + workflows/rnaseq/run_downstream_test.sh. This runs the preprocessor + on the files to allow the use of # [TEST SETTINGS] comments; see + that script for details. Activates environment configured in + --env-r before running.""", + ) + + # Here we programmatically build the parser from the + # workflow_test_params.yaml file. They are added to + # a mutually-exclusive group to avoid overwriting each others' config + # file params. They all write their params to the args.additional_args + # attribute. + group = parser.add_mutually_exclusive_group() + workflow_prefix = "bash run_test.sh" + workflow_dir = TOPLEVEL / "workflows/rnaseq" + for key, val in WORKFLOW_ARGS["rnaseq"].items(): + group.add_argument( + "--" + key, + action="store_const", + default="", + dest="additional_args", + const=val["args"], + help=f"""GROUP: Part of mutually exclusive rnaseq group. Runs + the following: cd {workflow_dir} && {workflow_prefix} + {val['args']}. {val['desc']} Configured in + workflow_test_params.yaml.""", + ) + + args, extra = parser.parse_known_args(sys.argv[2:]) + + if args.run_workflow: + print(args) + if args.additional_args: + extra.extend(shlex.split(args.additional_args)) + + extra = [i.replace("__ORIG__", args.orig) for i in extra] + strargs = " ".join(extra) + cmd = ( + 'eval "$(conda shell.bash hook)" ' + f"&& conda activate {args.env} " + f"&& (cd {workflow_dir} && {workflow_prefix} {strargs})" + ) + print_header(f"Running the following command:\n{cmd}") + sp.run( + cmd, + check=True, + shell=True, + executable="/bin/bash" + ) + if args.trackhub: + cmd = ( + 'eval "$(conda shell.bash hook)" ' + f"&& conda activate {args.env} " + f"&& (cd {workflow_dir} " + "&& python rnaseq_trackhub.py config/config.yaml config/hub_config.yaml)" + ) + print_header(f"Building trackhub with command: {cmd}") + + sp.run( + cmd, + shell=True, + check=True, + executable="/bin/bash" + ) + print("See workflows/rnaseq/staging for the built trackhub") + + if args.downstream: + print_header("running downstream rnaseq.Rmd") + sp.run( + 'eval "$(conda shell.bash hook)" ' + f"&& conda activate {args.env_r} " + "&& (cd workflows/rnaseq && bash run_downstream_test.sh)", + shell=True, + check=True, + executable="/bin/bash" + ) + + def _cmd_chipseq(self): + """ + This function handles the "chipseq" subcommand. + """ + + parser = argparse.ArgumentParser( + description="Run chipseq workflow", + parents=[self.global_parser], + ) + parser.add_argument( + "--run-workflow", + action="store_true", + help="""Run chipseq workflow using the run_tesh.sh harness, which + edits the Snakefile to use test settings before running. Additional + args not specified here are passed to Snakemake, or use other flags + below to easily specify config sets.""", + ) + parser.add_argument( + "--trackhub", action="store_true", help="Build the rnaseq track hub" + ) + args, extra = parser.parse_known_args(sys.argv[2:]) + workflow_prefix = "bash run_test.sh" + workflow_dir = TOPLEVEL / "workflows/chipseq" + + if args.run_workflow: + extra = [i.replace("__ORIG__", args.orig) for i in extra] + strargs = " ".join(extra) + cmd = ( + 'eval "$(conda shell.bash hook)" ' + f"&& conda activate {args.env} " + f"&& (cd {workflow_dir} && {workflow_prefix} {strargs})" + ) + print_header(f"Running the following command:\n{cmd}") + sp.run( + cmd, + shell=True, + check=True, + executable="/bin/bash" + ) + if args.trackhub: + cmd = ( + 'eval "$(conda shell.bash hook)" ' + f"&& conda activate {args.env} " + f"&& (cd {workflow_dir} " + "&& python chipseq_trackhub.py config/config.yaml config/hub_config.yaml)" + ) + print_header(f"Building trackhub with command: {cmd}") + + sp.run( + cmd, + shell=True, + check=True, + executable="/bin/bash" + ) + print("See workflows/chipseq/staging for the built trackhub") + + def _cmd_references(self): + parser = argparse.ArgumentParser( + description="Run references workflow", + parents=[self.global_parser], + ) + parser.add_argument( + "--run-workflow", + action="store_true", + help="""Run references workflow using the run_tesh.sh harness, which + edits the Snakefile to use test settings before running.""" + ) + args, extra = parser.parse_known_args(sys.argv[2:]) + + workflow_prefix = "bash run_test.sh" + workflow_dir = TOPLEVEL / "workflows/references" + if args.run_workflow: + extra = [i.replace("__ORIG__", args.orig) for i in extra] + strargs = " ".join(extra) + cmd = ( + 'eval "$(conda shell.bash hook)" ' + f"&& conda activate {args.env} " + f"&& (cd {workflow_dir} && {workflow_prefix} {strargs})" + ) + print_header(f"Running the following command:\n{cmd}") + sp.run( + cmd, + shell=True, + check=True, + executable="/bin/bash" + ) + + +if __name__ == "__main__": + Runner() + +# vim: ft=python diff --git a/test/test_configs/star_1pass.tsv b/test/test_configs/star_1pass.tsv new file mode 100644 index 00000000..3c73275e --- /dev/null +++ b/test/test_configs/star_1pass.tsv @@ -0,0 +1,3 @@ +samplename group layout orig_filename +sample1-star-1pass control SE data/example_data/rnaseq_sample1PE_1.fq.gz +sample2-star-1pass control SE data/example_data/rnaseq_sample2.fq.gz diff --git a/test/test_configs/star_2pass.tsv b/test/test_configs/star_2pass.tsv new file mode 100644 index 00000000..8cf98eb0 --- /dev/null +++ b/test/test_configs/star_2pass.tsv @@ -0,0 +1,3 @@ +samplename group layout orig_filename +sample1-star-2pass control SE data/example_data/rnaseq_sample1PE_1.fq.gz +sample2-star-2pass control SE data/example_data/rnaseq_sample2.fq.gz diff --git a/test/test_configs/test_sra_sampletable_SE_only.tsv b/test/test_configs/test_sra_sampletable_SE_only.tsv index 486e9396..6b403643 100644 --- a/test/test_configs/test_sra_sampletable_SE_only.tsv +++ b/test/test_configs/test_sra_sampletable_SE_only.tsv @@ -1,4 +1,4 @@ samplename Run layout -sra1 SRR948304 SINGLE -sra2 SRR948304 SINGLE -sra3 SRR948305 SINGLE +sra4 SRR948304 SINGLE +sra5 SRR948304 SINGLE +sra6 SRR948305 SINGLE diff --git a/test/test_configs/two_samples.tsv b/test/test_configs/two_samples.tsv index 30886104..57ccb7d9 100644 --- a/test/test_configs/two_samples.tsv +++ b/test/test_configs/two_samples.tsv @@ -1,3 +1,3 @@ samplename group layout orig_filename -sample1 control SE data/example_data/rnaseq_sample1PE_1.fq.gz -sample2 control SE data/example_data/rnaseq_sample2.fq.gz +sample1_of_2 control SE data/example_data/rnaseq_sample1PE_1.fq.gz +sample2_of_2 control SE data/example_data/rnaseq_sample2.fq.gz diff --git a/test/workflow_test_params.yaml b/test/workflow_test_params.yaml new file mode 100644 index 00000000..70e57da6 --- /dev/null +++ b/test/workflow_test_params.yaml @@ -0,0 +1,67 @@ +# This file configures arguments for running various workflows that are pulled +# into the test/lcdb-wf-test runner script automatically. It is a way of +# +# NOTE: +# +# The "__ORIG__" placeholder is a mechanism for allowing the CI to run config +# files living in a full cloned repo, but within a (different) deployed +# directory. The __ORIG__ indicates the full cloned repo and will be filled in +# with the lcdb-wf-test "--orig" argument, which defaults to the top-level repo +# dir. +# +# For local testing, don't specify --orig and leave the default. This will +# automatically fill in the top-level dir of the current repo. + +rnaseq: + sra-pe: + desc: Tests paired-end data downloaded directly from SRA. Only run until the cutadapt rule. + args: | + --until cutadapt + --configfile __ORIG__/test/test_configs/test_rnaseq_config.yaml + --config sampletable=__ORIG__/test/test_configs/test_sra_sampletable.tsv + + sra-se: + desc: Tests single-end data downloaded directly from SRA. Only run until the cutadapt rule. + args: | + --until cutadapt + --configfile __ORIG__/test/test_configs/test_rnaseq_config.yaml + --config sampletable=__ORIG__/test/test_configs/test_sra_sampletable_SE_only.tsv + + strandedness-pe: + desc: Tests running the strandedness pre-check using paired-end data. + args: | + --until strand_check + --configfile __ORIG__/test/test_configs/test_rnaseq_config.yaml + --config sampletable=__ORIG__/test/test_configs/test_pe_sampletable.tsv + + strandedness-se: + desc: Tests running the strandedness pre-check using single-ended data. + args: | + --until strand_check + --configfile __ORIG__/test/test_configs/test_rnaseq_config.yaml + --config sampletable=__ORIG__/test/test_configs/two_samples.tsv + + star-2pass: + desc: Tests running STAR in 2-pass mode. Only runs until the star_pass2 rule. + args: | + --until star_pass2 + --configfile + __ORIG__/test/test_configs/test_rnaseq_config.yaml + __ORIG__/test/test_configs/star_override_2pass.yaml + --config sampletable=__ORIG__/test/test_configs/star_2pass.tsv + + star-1pass: + desc: Tests running STAR in 1-pass (default) mode. Only runs until the star rule. + args: | + --until star + --configfile + __ORIG__/test/test_configs/test_rnaseq_config.yaml + __ORIG__/test/test_configs/star_override_1pass.yaml + --config sampletable=__ORIG__/test/test_configs/star_1pass.tsv + + pe: + desc: Tests paired-end data + args: | + --until multiqc + --configfile __ORIG__/test/test_configs/test_rnaseq_config.yaml + --config sampletable=__ORIG__/test/test_configs/test_pe_sampletable.tsv diff --git a/workflows/chipseq/Snakefile b/workflows/chipseq/Snakefile index 401448c6..1ccdee16 100644 --- a/workflows/chipseq/Snakefile +++ b/workflows/chipseq/Snakefile @@ -24,11 +24,13 @@ config = common.load_config(config) include: '../references/Snakefile' -shell.prefix( - 'set -euo pipefail; export R_PROFILE_USER=; export TMPDIR={};' - .format(cluster_specific.tempdir_for_biowulf()) -) -shell.executable('/bin/bash') +def autobump(value): + """ + Increments value for each attempt. + """ + def f(wildcards, attempt): + return attempt * value + return f # Verify configuration of config and sampletable files helpers.preflight(config) @@ -59,9 +61,7 @@ def wrapper_for(path): final_targets = utils.flatten(( c.targets['bam'], utils.flatten(c.targets['fastqc']), - utils.flatten(c.targets['libsizes']), [c.targets['fastq_screen']], - [c.targets['libsizes_table']], [c.targets['multiqc']], utils.flatten(c.targets['markduplicates']), utils.flatten(c.targets['bigwig']), @@ -116,6 +116,10 @@ if 'orig_filename' in c.sampletable.columns: orig_for_sample output: render_r1_r2(c.patterns['fastq']) + threads: 1 + resources: + mem_mb=100, + runtime=10, run: assert len(output) == len(input), (input, output) for src, linkname in zip(input, output): @@ -141,6 +145,9 @@ if 'Run' in c.sampletable.columns and sum(c.sampletable['Run'].str.startswith('S is_paired=c.is_paired, sampletable=_st, # limit = 100000, # [TEST SETTINGS] + resources: + mem_mb=autobump(1024), + runtime=autobump(120) conda: '../../wrappers/wrappers/fastq-dump/environment.yaml' script: @@ -154,6 +161,9 @@ rule cutadapt: fastq=render_r1_r2(c.patterns['fastq']) output: fastq=render_r1_r2(c.patterns['cutadapt']) + resources: + mem_mb=1024 * 2, + runtime=autobump(120) log: render_r1_r2(c.patterns['cutadapt'])[0] + '.log' threads: 6 @@ -198,6 +208,9 @@ rule fastqc: output: html='{sample_dir}/{sample}/fastqc/{sample}{suffix}_fastqc.html', zip='{sample_dir}/{sample}/fastqc/{sample}{suffix}_fastqc.zip', + resources: + mem_mb=1024 * 2, + runtime=autobump(120) script: wrapper_for('fastqc/wrapper.py') @@ -214,6 +227,9 @@ rule bowtie2: log: c.patterns['bam'] + '.log' threads: 16 + resources: + mem_mb=1024 * 32, + runtime=autobump(120) run: prefix = aligners.prefix_from_bowtie2_index(input.index) sam = output.bam.replace('.bam', '.sam') @@ -250,6 +266,10 @@ rule unique: c.patterns['bam'] output: c.patterns['unique'] + threads: 1 + resources: + mem_mb=1024, + runtime=autobump(120) shell: # NOTE: the quality score chosen here should reflect the scores output # by the aligner used. For example, STAR uses 255 as max mapping @@ -265,6 +285,10 @@ rule fastq_count: fastq='{sample_dir}/{sample}/{sample}{suffix}.fastq.gz' output: '{sample_dir}/{sample}/{sample}{suffix}.fastq.gz.libsize' + threads: 1 + resources: + mem_mb=1024 * 1, + runtime=autobump(120) shell: 'zcat {input} | echo $((`wc -l`/4)) > {output}' @@ -277,6 +301,10 @@ rule bam_count: bam='{sample_dir}/{sample}/{suffix}.bam' output: '{sample_dir}/{sample}/{suffix}.bam.libsize' + threads: 1 + resources: + mem_mb=1024 * 2, + runtime=autobump(120) shell: 'samtools view -c {input} > {output}' @@ -289,6 +317,10 @@ rule bam_index: bam='{prefix}.bam' output: bai='{prefix}.bam.bai' + threads: 1 + resources: + mem_mb=1024 * 2, + runtime=autobump(120) shell: 'samtools index {input} {output}' @@ -315,67 +347,17 @@ rule fastq_screen: txt=c.patterns['fastq_screen'] log: c.patterns['fastq_screen'] + '.log' + threads: 6 + resources: + mem_mb=autobump(1024 * 4), + runtime=autobump(120) params: subset=100000 script: wrapper_for('fastq_screen/wrapper.py') -rule libsizes_table: - """ - Aggregate fastq and bam counts in to a single table - """ - input: - utils.flatten(c.targets['libsizes']) - output: - json=c.patterns['libsizes_yaml'], - tsv=c.patterns['libsizes_table'] - run: - def sample(f): - return os.path.basename(os.path.dirname(f)) - - def million(f): - return float(open(f).read()) / 1e6 - - def stage(f): - return os.path.basename(f).split('.', 1)[1].replace('.gz', '').replace('.count', '') - - df = pd.DataFrame(dict(filename=list(map(str, input)))) - df['sample'] = df.filename.apply(sample) - df['million'] = df.filename.apply(million) - df['stage'] = df.filename.apply(stage) - df = df.set_index('filename') - df = df.pivot('sample', columns='stage', values='million') - - # make nicer column names - convert = { - 'fastq.libsize': 'stage1_raw', - 'cutadapt.fastq.libsize' : 'stage2_trimmed', - 'cutadapt.bam.libsize': 'stage3_aligned', - 'cutadapt.unique.bam.libsize': 'stage4_unique', - 'cutadapt.unique.nodups.bam.libsize': 'stage5_nodups', - } - - df.columns = [convert[i] for i in df.columns] - - df.to_csv(output.tsv, sep='\t') - y = { - 'id': 'libsizes_table', - 'section_name': 'Library sizes', - 'description': 'Library sizes at various stages of the pipeline', - 'plot_type': 'table', - 'pconfig': { - 'id': 'libsizes_table_table', - 'title': 'Library size table', - 'min': 0 - }, - 'data': yaml.load(df.transpose().to_json(), Loader=yaml.FullLoader), - } - with open(output.json, 'w') as fout: - yaml.dump(y, fout, default_flow_style=False) - multiqc_inputs = [ utils.flatten(c.targets['fastqc']) + - utils.flatten(c.targets['libsizes_yaml']) + utils.flatten(c.targets['cutadapt']) + utils.flatten(c.targets['bam']) + utils.flatten(c.targets['markduplicates']) + @@ -401,6 +383,10 @@ rule multiqc: c.targets['multiqc'] log: c.targets['multiqc'][0] + '.log' + threads: 1 + resources: + mem_mb=1024 * 2, + runtime=autobump(120) run: analysis_directory = set([os.path.dirname(i) for i in input]) outdir = os.path.dirname(c.targets['multiqc'][0]) @@ -429,6 +415,11 @@ rule markduplicates: metrics=c.patterns['markduplicates']['metrics'] log: c.patterns['markduplicates']['bam'] + '.log' + threads: 1 + resources: + mem_mb=1024 * 32, + runtime=autobump(120), + disk_mb=1024 *100 params: # NOTE: Be careful with the memory here; make sure you have enough # and/or it matches the resources you're requesting in the cluster @@ -464,6 +455,11 @@ rule merge_techreps: metrics=c.patterns['merged_techreps'] + '.metrics' log: c.patterns['merged_techreps'] + '.log' + threads: 1 + resources: + mem_mb=1024 * 32, + runtime=autobump(120), + disk_mb=1024 *100 params: # NOTE: Be careful with the memory here; make sure you have enough # and/or it matches the resources you're requesting in the cluster @@ -482,6 +478,10 @@ if c.is_paired: metrics=c.patterns['collectinsertsizemetrics']['metrics'] log: c.patterns['collectinsertsizemetrics']['metrics'] + '.log' + threads: 1 + resources: + mem_mb=1024 * 32, + runtime=autobump(120), params: java_args='-Xmx20g' # java_args='-Xmx2g' # [TEST SETTINGS -1] @@ -507,6 +507,10 @@ rule bigwig: c.patterns['bigwig'] log: c.patterns['bigwig'] + '.log' + threads: 1 + resources: + mem_mb=1024 * 16, + runtime=autobump(120) shell: 'bamCoverage ' '--bam {input.bam} ' @@ -540,11 +544,15 @@ rule fingerprint: metrics=c.patterns['fingerprint']['metrics'] threads: 8 log: c.patterns['fingerprint']['metrics'] + '.log' + threads: 1 + resources: + mem_mb=1024 * 32, + runtime=autobump(120) run: if len(input.control) == 0: jsdsample_arg = "" else: - jsdsample_arg = '--JSDsample {input.control}' + jsdsample_arg = '--JSDsample ' + str(input.control) shell( 'plotFingerprint ' '--bamfiles {input.bams} ' '-p {threads} ' @@ -585,6 +593,9 @@ rule sicer: bed=c.patterns['peaks']['sicer'] log: c.patterns['peaks']['sicer'] + '.log' + resources: + mem_mb=1024 * 16, + runtime=autobump(120) params: block=lambda wc: chipseq.block_for_run(config, wc.sicer_run, 'sicer') wrapper: @@ -608,6 +619,9 @@ rule macs2: chromsizes=refdict[c.organism][config['aligner']['tag']]['chromsizes'], output: bed=c.patterns['peaks']['macs2'] + resources: + mem_mb=1024 * 16, + runtime=autobump(120) log: c.patterns['peaks']['macs2'] + '.log' params: @@ -639,6 +653,9 @@ rule spp: rdata=c.patterns['peaks']['spp'] + '.RData' log: c.patterns['peaks']['spp'] + '.log' + resources: + mem_mb=1024 * 16, + runtime=autobump(120) params: block=lambda wc: chipseq.block_for_run(config, wc.spp_run, 'spp'), keep_tempfiles=False, @@ -660,6 +677,9 @@ rule bed_to_bigbed: bed='{prefix}.bed', chromsizes=refdict[c.organism][config['aligner']['tag']]['chromsizes'] output: '{prefix}.bigbed' + resources: + mem_mb=1024 * 2, + runtime=autobump(120) log: '{prefix}.bigbed.log' run: # Based on the filename, identify the algorithm. Based on the contents, @@ -708,6 +728,9 @@ rule multibigwigsummary: npz=c.targets['multibigwigsummary']['npz'], tab=c.targets['multibigwigsummary']['tab'] threads: 16 + resources: + mem_mb=1024 * 16, + runtime=autobump(120) run: # from the input files, figure out the sample name. labels = ' '.join([i.split('/')[-2] for i in input]) @@ -731,6 +754,9 @@ rule plotcorrelation: output: heatmap=c.targets['plotcorrelation']['heatmap'], tab=c.targets['plotcorrelation']['tab'] + resources: + mem_mb=1024 * 2, + runtime=autobump(120) shell: 'plotCorrelation ' '--corData {input} ' @@ -761,6 +787,9 @@ if 'merged_bigwigs' in config: chromsizes=refdict[c.organism][config['aligner']['tag']]['chromsizes'], output: c.patterns['merged_bigwig'] + resources: + mem_mb=1024 * 16, + runtime=autobump(120) log: c.patterns['merged_bigwig'] + '.log' script: @@ -775,6 +804,9 @@ rule idxstats: bai=c.patterns['markduplicates']['bam'] + '.bai' output: txt=c.patterns['samtools']['idxstats'] + resources: + mem_mb=1024 * 16, + runtime=autobump(120) log: c.patterns['samtools']['idxstats'] + '.log' run: diff --git a/workflows/chipseq/config/chipseq_patterns.yaml b/workflows/chipseq/config/chipseq_patterns.yaml index c93562f2..84d59520 100644 --- a/workflows/chipseq/config/chipseq_patterns.yaml +++ b/workflows/chipseq/config/chipseq_patterns.yaml @@ -24,6 +24,7 @@ patterns_by_sample: markduplicates: bam: 'data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam' + bai: 'data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.bai' metrics: 'data/chipseq_samples/{sample}/{sample}.cutadapt.unique.nodups.bam.metrics' merged_techreps: 'data/chipseq_merged/{label}/{label}.cutadapt.unique.nodups.merged.bam' diff --git a/workflows/rnaseq/Snakefile b/workflows/rnaseq/Snakefile index 29d5af4a..2798827f 100644 --- a/workflows/rnaseq/Snakefile +++ b/workflows/rnaseq/Snakefile @@ -23,11 +23,13 @@ config = common.load_config(config) include: '../references/Snakefile' -shell.prefix( - 'set -euo pipefail; export R_PROFILE_USER=; export TMPDIR={};' - .format(cluster_specific.tempdir_for_biowulf()) -) -shell.executable('/bin/bash') +def autobump(value): + """ + Increments value for each attempt. + """ + def f(wildcards, attempt): + return attempt * value + return f # Verify configuration of config and sampletable files helpers.preflight(config) @@ -51,9 +53,7 @@ def wrapper_for(path): # See "patterns and targets" in the documentation for what's going on here. final_targets = utils.flatten(( utils.flatten(c.targets['fastqc']), - utils.flatten(c.targets['libsizes']), [c.targets['fastq_screen']], - [c.targets['libsizes_table']], [c.targets['rrna_percentages_table']], [c.targets['multiqc']], utils.flatten(c.targets['featurecounts']), @@ -84,7 +84,7 @@ rule targets: if 'orig_filename' in c.sampletable.columns: - localrules: symlinks + localrules: symlinks, symlink_targets # Convert the sampletable to be indexed by the first column, for # convenience in generating the input/output filenames. @@ -108,6 +108,10 @@ if 'orig_filename' in c.sampletable.columns: orig_for_sample output: render_r1_r2(c.patterns['fastq']) + threads: 1 + resources: + mem_mb=100, + runtime=10, run: assert len(output) == len(input), (input, output) for src, linkname in zip(input, output): @@ -133,6 +137,10 @@ if 'Run' in c.sampletable.columns and sum(c.sampletable['Run'].str.startswith('S is_paired=c.is_paired, sampletable=_st, # limit = 100000, # [TEST SETTINGS] + resources: + mem_mb=1024, + disk_mb=autobump(1024), + runtime=autobump(120) conda: '../../wrappers/wrappers/fastq-dump/environment.yaml' script: @@ -154,6 +162,9 @@ rule sample_strand_check: log: c.patterns['strand_check']['tsv'] + '.log' threads: 6 + resources: + mem_mb=1024 * 8, + runtime=autobump(120) run: prefix = aligners.prefix_from_bowtie2_index(input.index) nreads = int(config['strand_check_reads']) * 4 @@ -190,6 +201,9 @@ rule strand_check: filelist=temporary('strand_check/filelist') log: 'strand_check/strandedness.log' + resources: + mem_mb=1024 * 1, + runtime=autobump(120) run: with open(output.filelist, 'w') as fout: for i in input: @@ -214,6 +228,9 @@ rule cutadapt: log: render_r1_r2(c.patterns['cutadapt'])[0] + '.log' threads: 6 + resources: + mem_mb=1024 * 2, + runtime=autobump(120) run: # NOTE: Change cutadapt params here @@ -255,6 +272,9 @@ rule fastqc: output: html='{sample_dir}/{sample}/fastqc/{sample}{suffix}_fastqc.html', zip='{sample_dir}/{sample}/fastqc/{sample}{suffix}_fastqc.zip', + resources: + mem_mb=1024 * 2, + runtime=autobump(120) script: wrapper_for('fastqc/wrapper.py') @@ -272,6 +292,9 @@ if config['aligner']['index'] == 'hisat2': log: c.patterns['bam'] + '.log' threads: 6 + resources: + mem_mb=1024 * 32, + runtime=autobump(120) run: prefix = aligners.prefix_from_bowtie2_index(input.index) sam = output.bam.replace('.bam', '.sam') @@ -344,6 +367,9 @@ if config['aligner']['index'] == 'star': log: c.patterns['bam'].replace('.bam', '.star.bam.log') threads: 16 + resources: + mem_mb=1024 * 64, + runtime=autobump(120) run: genomedir = os.path.dirname(input.index[0]) outdir = os.path.dirname(output[0]) @@ -376,6 +402,9 @@ if config['aligner']['index'] == 'star-twopass': log: c.patterns['bam'].replace('.bam', '.star-pass1.bam.log') threads: 16 + resources: + mem_mb=1024 * 64, + runtime=autobump(120) run: genomedir = os.path.dirname(input.index[0]) outdir = os.path.dirname(output[0]) @@ -413,6 +442,9 @@ if config['aligner']['index'] == 'star-twopass': log: c.patterns['bam'].replace('.bam', '.star-pass2.bam.log') threads: 16 + resources: + mem_mb=1024 * 64, + runtime=autobump(120) run: genomedir = os.path.dirname(input.index[0]) outdir = os.path.dirname(output[0]) @@ -451,6 +483,9 @@ rule rRNA: log: c.patterns['rrna']['bam'] + '.log' threads: 6 + resources: + mem_mb=1024 * 2, + runtime=autobump(120) run: prefix = aligners.prefix_from_bowtie2_index(input.index) sam = output.bam.replace('.bam', '.sam') @@ -481,6 +516,10 @@ rule fastq_count: fastq='{sample_dir}/{sample}/{sample}{suffix}.fastq.gz' output: '{sample_dir}/{sample}/{sample}{suffix}.fastq.gz.libsize' + threads: 1 + resources: + mem_mb=1024 * 1, + runtime=autobump(120) shell: 'zcat {input} | echo $((`wc -l`/4)) > {output}' @@ -493,6 +532,10 @@ rule bam_count: bam='{sample_dir}/{sample}/{suffix}.bam' output: '{sample_dir}/{sample}/{suffix}.bam.libsize' + threads: 1 + resources: + mem_mb=1024 * 2, + runtime=autobump(120) shell: 'samtools view -c {input} > {output}' @@ -505,6 +548,10 @@ rule bam_index: bam='{prefix}.bam' output: bai='{prefix}.bam.bai' + threads: 1 + resources: + mem_mb=1024 * 2, + runtime=autobump(120) shell: 'samtools index {input} {output}' @@ -531,6 +578,10 @@ rule fastq_screen: txt=c.patterns['fastq_screen'] log: c.patterns['fastq_screen'] + '.log' + threads: 6 + resources: + mem_mb=1024 * 4, + runtime=autobump(120) params: subset=100000 script: wrapper_for('fastq_screen/wrapper.py') @@ -548,6 +599,9 @@ rule featurecounts: log: '{sample_dir}/rnaseq_aggregation/featurecounts.txt.log' threads: 8 + resources: + mem_mb=1024 * 2, + runtime=autobump(120) run: # NOTE: By default, we use -p for paired-end p_arg = '' @@ -584,6 +638,10 @@ rule rrna_libsizes_table: output: json=c.patterns['rrna_percentages_yaml'], tsv=c.patterns['rrna_percentages_table'] + threads: 1 + resources: + mem_mb=1024 * 2, + runtime=autobump(120) run: def rrna_sample(f): return helpers.extract_wildcards(c.patterns['rrna']['libsize'], f)['sample'] @@ -625,59 +683,6 @@ rule rrna_libsizes_table: yaml.dump(y, fout, default_flow_style=False) -rule libsizes_table: - """ - Aggregate fastq and bam counts in to a single table - """ - input: - utils.flatten(c.targets['libsizes']) - output: - json=c.patterns['libsizes_yaml'], - tsv=c.patterns['libsizes_table'] - run: - def sample(f): - return os.path.basename(os.path.dirname(f)) - - def million(f): - return float(open(f).read()) / 1e6 - - def stage(f): - return os.path.basename(f).split('.', 1)[1].replace('.gz', '').replace('.count', '') - - df = pd.DataFrame(dict(filename=list(map(str, input)))) - df['sample'] = df.filename.apply(sample) - df['million'] = df.filename.apply(million) - df['stage'] = df.filename.apply(stage) - df = df.set_index('filename') - df = df.pivot('sample', columns='stage', values='million') - - # make nicer column names - convert = { - 'fastq.libsize': 'stage1_raw', - 'cutadapt.fastq.libsize' : 'stage2_trimmed', - 'cutadapt.bam.libsize': 'stage3_aligned', - } - - df.columns = [convert[i] for i in df.columns] - - df.to_csv(output.tsv, sep='\t') - y = { - 'id': 'libsizes_table', - 'section_name': 'Library sizes', - 'description': 'Library sizes at various stages of the pipeline', - 'plot_type': 'table', - 'pconfig': { - 'id': 'libsizes_table_table', - 'title': 'Library size table', - 'min': 0 - }, - 'data': yaml.load(df.transpose().to_json(), Loader=yaml.FullLoader), - } - with open(output.json, 'w') as fout: - yaml.dump(y, fout, default_flow_style=False) - - - rule multiqc: """ Aggregate various QC stats and logs into a single HTML report with MultiQC @@ -687,7 +692,6 @@ rule multiqc: input: files=( utils.flatten(c.targets['fastqc']) + - utils.flatten(c.targets['libsizes_yaml']) + utils.flatten(c.targets['rrna_percentages_yaml']) + utils.flatten(c.targets['cutadapt']) + utils.flatten(c.targets['featurecounts']) + @@ -702,6 +706,10 @@ rule multiqc: config='config/multiqc_config.yaml' output: c.targets['multiqc'] log: c.targets['multiqc'][0] + '.log' + threads: 1 + resources: + mem_mb=1024 * 2, + runtime=autobump(120) run: analysis_directory = set([os.path.dirname(i) for i in input]) outdir = os.path.dirname(c.targets['multiqc'][0]) @@ -736,6 +744,11 @@ rule markduplicates: # config. java_args='-Xmx20g' # java_args='-Xmx2g' # [TEST SETTINGS -1] + threads: 1 + resources: + mem_mb=1024 * 32, + runtime=autobump(120), + disk_mb=1024 *100 shell: 'picard ' '{params.java_args} ' @@ -764,6 +777,10 @@ rule collectrnaseqmetrics: # java_args='-Xmx2g' # [TEST SETTINGS -1] log: c.patterns['collectrnaseqmetrics']['metrics'] + '.log' + threads: 1 + resources: + mem_mb=1024 * 32, + runtime=autobump(120) run: strand_arg = helpers.strand_arg_lookup( c, { @@ -793,6 +810,10 @@ rule preseq: bam=c.patterns['bam'] output: c.patterns['preseq'] + threads: 1 + resources: + mem_mb=1024 * 1, + runtime=autobump(120) shell: 'preseq ' 'c_curve ' @@ -815,6 +836,9 @@ rule salmon: log: c.patterns['salmon'] + '.log' threads: 6 + resources: + mem_mb=1024 * 32, + runtime=autobump(120) run: if c.is_paired: fastq_arg = f'-1 {input.fastq[0]} -2 {input.fastq[1]} ' @@ -855,6 +879,9 @@ rule kallisto: c.patterns['kallisto'] + '.log' threads: 8 + resources: + mem_mb=1024 * 32, + runtime=autobump(120) run: if c.is_paired: se_args = '' @@ -898,6 +925,9 @@ rule rseqc_infer_experiment: txt=c.patterns['rseqc']['infer_experiment'] log: c.patterns['rseqc']['infer_experiment'] + '.log' + resources: + mem_mb=1024 * 2, + runtime=autobump(120) shell: 'infer_experiment.py -r {input.bed12} -i {input.bam} > {output} &> {log}' @@ -913,15 +943,38 @@ rule rseqc_read_distribution: txt=c.patterns['rseqc']['read_distribution'] log: c.patterns['rseqc']['read_distribution'] + '.log' + resources: + mem_mb=1024 * 2, + runtime=autobump(120) shell: 'read_distribution.py -i {input.bam} -r {input.bed12} > {output} &> {log}' +rule idxstats: + """ + Run samtools idxstats on sample bams + """ + input: + bam=c.patterns['markduplicates']['bam'], + bai=c.patterns['markduplicates']['bam'] + '.bai' + output: + txt=c.patterns['samtools']['idxstats'] + log: + c.patterns['samtools']['idxstats'] + '.log' + resources: + mem_mb=1024 * 16, + runtime=autobump(120) + run: + shell( + 'samtools idxstats {input.bam} 2> {log} 1> {output.txt}' + ) + + # Common arguments used for bamCoverage rules below BAMCOVERAGE_ARGS = ( '--minMappingQuality 20 ' # excludes multimappers '--smoothLength 10 ' # smooth signal with specified window - '--normalizeUsing BPM ' # equivalent to TPM + # '--normalizeUsing BPM ' # equivalent to TPM # [TEST SETTINGS] ) rule bigwig_neg: @@ -933,6 +986,9 @@ rule bigwig_neg: bai=c.patterns['markduplicates']['bam'] + '.bai', output: c.patterns['bigwig']['neg'] threads: 8 + resources: + mem_mb=1024 * 16, + runtime=autobump(120) log: c.patterns['bigwig']['neg'] + '.log' run: @@ -963,6 +1019,9 @@ rule bigwig_pos: bai=c.patterns['markduplicates']['bam'] + '.bai', output: c.patterns['bigwig']['pos'] threads: 8 + resources: + mem_mb=1024 * 16, + runtime=autobump(120) log: c.patterns['bigwig']['pos'] + '.log' @@ -1011,6 +1070,9 @@ if 'merged_bigwigs' in config: c.patterns['merged_bigwig'] log: c.patterns['merged_bigwig'] + '.log' + resources: + mem_mb=1024 * 16, + runtime=autobump(120) script: wrapper_for('average-bigwigs/wrapper.py') @@ -1036,22 +1098,4 @@ rule rnaseq_rmarkdown: '> {log} 2>&1' # [TEST_SETTINGS -1] - -rule idxstats: - """ - Run samtools idxstats on sample bams - """ - input: - bam=c.patterns['markduplicates']['bam'], - bai=c.patterns['markduplicates']['bam'] + '.bai' - output: - txt=c.patterns['samtools']['idxstats'] - log: - c.patterns['samtools']['idxstats'] + '.log' - run: - shell( - 'samtools idxstats {input.bam} 2> {log} 1> {output.txt}' - ) - - # vim: ft=python diff --git a/workflows/rnaseq/config/clusterconfig.yaml b/workflows/rnaseq/config/clusterconfig.yaml index f002fffd..e71837e1 100644 --- a/workflows/rnaseq/config/clusterconfig.yaml +++ b/workflows/rnaseq/config/clusterconfig.yaml @@ -20,16 +20,16 @@ collectrnaseqmetrics: prefix: "--gres=lscratch:20 --time=4:00:00 --mem=32g --partition=quick" salmon_index: - prefix: "--mem=32g" + prefix: "--gres=lscratch:20 --mem=32g" salmon: - prefix: "--mem=32g" + prefix: "--gres=lscratch:20 --mem=32g" kallisto_index: - prefix: "--mem=32g" + prefix: "--gres=lscratch:20 --mem=32g" kallisto: - prefix: "--mem=32g" + prefix: "--gres=lscratch:20 --mem=32g" bigwig_neg: prefix: "--gres=lscratch:20 --mem=16g" @@ -38,7 +38,7 @@ bigwig_pos: prefix: "--gres=lscratch:20 --mem=16g" star: - prefix: "--time=8:00:00 --mem=64g" + prefix: "--gres=lscratch:20 --time=8:00:00 --mem=64g" star_index: - prefix: "--time=8:00:00 --mem=64g" + prefix: "--gres=lscratch:20 --time=8:00:00 --mem=64g" diff --git a/workflows/rnaseq/config/rnaseq_patterns.yaml b/workflows/rnaseq/config/rnaseq_patterns.yaml index ecd95d7d..e7a6bdd1 100644 --- a/workflows/rnaseq/config/rnaseq_patterns.yaml +++ b/workflows/rnaseq/config/rnaseq_patterns.yaml @@ -25,6 +25,7 @@ rrna: multiqc: 'data/rnaseq_aggregation/multiqc.html' markduplicates: bam: 'data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam' + bai: 'data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam.bai' metrics: 'data/rnaseq_samples/{sample}/{sample}.cutadapt.markdups.bam.metrics' collectrnaseqmetrics: metrics: 'data/rnaseq_samples/{sample}/{sample}.collectrnaseqmetrics.metrics' diff --git a/workflows/rnaseq/downstream/functional-enrichment.Rmd b/workflows/rnaseq/downstream/functional-enrichment.Rmd index 17df5b9b..f0626f15 100644 --- a/workflows/rnaseq/downstream/functional-enrichment.Rmd +++ b/workflows/rnaseq/downstream/functional-enrichment.Rmd @@ -33,7 +33,7 @@ a particular annotation category. devtools::document('../../../lib/lcdbwf') devtools::load_all('../../../lib/lcdbwf') -config <- lcdbwf::load_config('config.yaml') +config <- lcdbwf:::load_config('config.yaml') ``` ```{r load, cache=TRUE, cache.extra=file.info('combined.Rds')$mtime} @@ -45,8 +45,8 @@ dds_list <- obj$dds_list ```{r functional_enrichment_prep, cache=TRUE, config=config$annotation$keytype, eval=config$toggle$functional_enrichment, dependson='load'} # We assume that keys are unique across all term2gene_lists. -term2gene_list <- lcdbwf::get_go_term2gene(config) -term2name <- lcdbwf::get_go_descriptions() +term2gene_list <- lcdbwf:::get_go_term2gene(config) +term2name <- lcdbwf:::get_go_descriptions() # We need to assign each key to its respective term2name dataframe (or NULL if # none) @@ -57,9 +57,9 @@ ontology_list <- c(term2gene_list) # This can take up a lot of memory on CI/CD, so we only do this if not doing # a test. if (!config$toggle$test){ - msigdb_df <- lcdbwf::get_msigdb_df(config) - msigdb_term2gene_list <- lcdbwf::get_msigdb_term2gene_list(msigdb_df) - msigdb_term2name <- lcdbwf::get_msigdb_term2name(msigdb_df) + msigdb_df <- lcdbwf:::get_msigdb_df(config) + msigdb_term2gene_list <- lcdbwf:::get_msigdb_term2gene_list(msigdb_df) + msigdb_term2name <- lcdbwf:::get_msigdb_term2name(msigdb_df) ontology_term2name_mapping <- c(ontology_term2name_mapping, lapply(msigdb_term2gene_list, function(x) NULL)) ontology_list <- c(ontology_list, msigdb_term2gene_list) } @@ -78,7 +78,7 @@ for (name in names(res_list)){ for (ont in names(config$functional_enrichment$ontologies)){ term2gene <- ontology_list[[ont]] term2name <- ontology_term2name_mapping[[ont]] - enrich_res <- lcdbwf::run_enrichment( + enrich_res <- lcdbwf:::run_enrichment( res_list[[name]], direction=direction, TERM2GENE=term2gene, @@ -99,13 +99,13 @@ for (name in names(res_list)){ ```{r functional_enrichment_plots} # Interestingly, it's the *caching* that causes this to hang for a loooong time. -dotplot_list <- lcdbwf::enrich_list_lapply(all_enrich, dotplots, config=config, send_names=TRUE) -emapplot_list <- lcdbwf::enrich_list_lapply(all_enrich, emapplots, config=config, send_names=TRUE) -cnetplot_list <- lcdbwf::enrich_list_lapply(all_enrich, cnetplots, config=config, send_names=TRUE) +dotplot_list <- lcdbwf:::enrich_list_lapply(all_enrich, dotplots, config=config, send_names=TRUE) +emapplot_list <- lcdbwf:::enrich_list_lapply(all_enrich, emapplots, config=config, send_names=TRUE) +cnetplot_list <- lcdbwf:::enrich_list_lapply(all_enrich, cnetplots, config=config, send_names=TRUE) ``` ```{r, results='asis'} -lcdbwf::mdcat("There are many different databases that annotate genes into sets.", +lcdbwf:::mdcat("There are many different databases that annotate genes into sets.", "The following sets are used here:") knitr::kable(config$functional_enrichment$ontologies %>% as.data.frame %>% t()) @@ -120,19 +120,19 @@ for (name in names(res_list)){ for (direction in config$functional_enrichment$directions){ mdcat("### ", direction, "{.tabset}") if (length(all_enrich[[name]][[direction]]) == 0){ - lcdbwf::mdcat("Too few genes differentially expressed.") + lcdbwf:::mdcat("Too few genes differentially expressed.") next } for (ont in names(all_enrich[[name]][[direction]])){ - lcdbwf::mdcat("#### ", ont, "{.tabset}") + lcdbwf:::mdcat("#### ", ont, "{.tabset}") - lcdbwf::mdcat("##### dotplot") + lcdbwf:::mdcat("##### dotplot") print(dotplot_list[[name]][[direction]][[ont]]) - lcdbwf::mdcat("##### emapplot") + lcdbwf:::mdcat("##### emapplot") print(emapplot_list[[name]][[direction]][[ont]]) - lcdbwf::mdcat("##### cnetplot") + lcdbwf:::mdcat("##### cnetplot") print(cnetplot_list[[name]][[direction]][[ont]]) } } diff --git a/workflows/rnaseq/downstream/gene-patterns.Rmd b/workflows/rnaseq/downstream/gene-patterns.Rmd index 56d514c7..e2bf94e0 100644 --- a/workflows/rnaseq/downstream/gene-patterns.Rmd +++ b/workflows/rnaseq/downstream/gene-patterns.Rmd @@ -1,3 +1,45 @@ +--- +title: Gene pattern analysis +output: + html_document: + code_folding: hide + toc: true + toc_float: true + toc_depth: 3 +--- + +```{r global_options, include=FALSE} +# Sets up global options for rendering RMarkdown into HTML. +knitr::opts_chunk$set( + warning=FALSE, + message=FALSE +) +``` + +```{r} +library(dplyr) +library(tidyr) +library(clusterProfiler) +library(DESeq2) +library(DEGreport) +``` + +```{r load_helpers} +# Load the lcdbwf R package, which is stored locally. +# This package has many custom functions used throughout this document. +devtools::document('../../../lib/lcdbwf') +devtools::load_all('../../../lib/lcdbwf') + +config <- lcdbwf:::load_config('config.yaml') +``` + +```{r load, cache=TRUE, cache.extra=file.info('combined.Rds')$mtime} +obj <- readRDS('combined.Rds') +res.list <- obj$res_list +dds.list <- obj$dds_list +``` + + # Gene patterns {.tabset} We can roughly group genes into expression patterns. This uses the [DEGreport @@ -55,17 +97,23 @@ low.minc <- 1 more, they are merged together - Clusters with fewer than `r minc` genes are not shown. +Gene pattern for changed genes in individual contrasts are also indicated below. + ```{r finalclusters, fig.width=12, results='asis', cache=TRUE, dependson='selections'} # Run the clustering, identify patterns, and generate plots. # Docs: https://lcdb.github.io/lcdb-wf/rnaseq-rmd.html#finalclusters # NOTE: which genes to cluster?------------------------------------------------ # By default, we get all the changed genes, but you may want only the up or # down genes. -ll <- lapply(res.list, function (x) get.sig(x[['res']], 'changed')) +ll <- lapply(res.list, function (x) lcdbwf:::get_sig(x[['res']], 'changed')) # Filter out results where there were zero genes detected. ll <- ll[lapply(ll, length) > 0] +# get the list of all changed in any contrast +all.changed <- list('union_all_contrasts' = unlist(ll) %>% unique()) +ll <- c(all.changed, ll) + add.cluster.id <- function(clusters, res, label){ # Merges the degPattern cluster IDs `cluster` with DESeqresults `res` # `label` will be used to create a cluster column with a unique column name @@ -79,9 +127,16 @@ add.cluster.id <- function(clusters, res, label){ return(res) } +# n.list will store all the clusters to later add the cluster ID to res.list +n.list <- list() + for (name in names(ll)){ # Print a nice Markdown header - mdcat('## ', res.list[[name]][['label']]) + if (name == 'union_all_contrasts') { + mdcat('## ', name) + } else { + mdcat('## ', res.list[[name]][['label']]) + } genes <- ll[[name]] @@ -98,15 +153,23 @@ for (name in names(ll)){ } # Extract the normalized counts for these genes - vsd.i <- varianceStabilizingTransformation(dds.list[[ res.list[[name]][['dds']] ]], blind=TRUE) + if (name == 'union_all_contrasts') { + vsd.i <- varianceStabilizingTransformation(dds.list[[ 1 ]], blind=TRUE) + } else { + vsd.i <- varianceStabilizingTransformation(dds.list[[ res.list[[name]][['dds']] ]], blind=TRUE) + } idx <- rownames(vsd.i) %in% genes ma <- assay(vsd.i)[idx,] # Remove genes with identical normalized counts across all samples ma <- ma[apply(as.data.frame(ma), 1, n_distinct) > 1, ] - colData.i <- colData(dds.list[[ res.list[[name]][['dds']] ]]) - colData.i <- colData.i[,!(colnames(colData.i) %in% exclude.for.printing)] + if (name == 'union_all_contrasts') { + colData.i <- colData(dds.list[[ 1 ]]) + } else { + colData.i <- colData(dds.list[[ res.list[[name]][['dds']] ]]) + } + #colData.i <- colData.i[,!(colnames(colData.i) %in% exclude.for.printing)] # Sometimes, if there are limited clusters, degPattern fails. The solution @@ -151,6 +214,7 @@ for (name in names(ll)){ ) ) + n.list[[name]] <- n2 # In the final_clusters directory, this creates files containing lists of # the genes in each cluster, and adds a link to the Markdown. @@ -167,8 +231,37 @@ for (name in names(ll)){ dev.copy(pdf, file=pdf.file) dev.off() mdcat('- [', pdf.file, '](', pdf.file, '), PDF') +} +``` - # merge the degPattern cluster IDs with res.list - res.list[[name]][['res']] <- add.cluster.id(clusters=n2, res=res.list[[name]][['res']], label=name) +```{r add_cluster_id_res} +# merge the degPattern cluster IDs with res.list +for (name in names(res.list)) { + for (llname in names(ll)) { + res.list[[name]][['res']] <- add.cluster.id(clusters=n.list[[llname]], + res=res.list[[name]][['res']], + label=llname) + } } ``` + + + +# Exported results + +```{r excel, results='asis'} +lcdbwf:::exported_excel(res.list, dds.list, file='final_clusters/consolidated_results.xlsx') +``` + +Here is a single Excel file with one worksheet for each contrast: +[final_clusters/consolidated_results.xlsx](final_clusters/consolidated_results.xlsx) + +Alternatively, the files below are TSVs that can be opened in Excel or used +progammatically with downstream tools: + +```{r write_output, results='asis'} +# Write out files for full and each selection, and create a link to them in the +# HTML generated by this RMarkdown. +tbl <- lcdbwf:::exported_tsvs(res.list, directory='final_clusters') +knitr::kable(tbl, row.names=FALSE) +``` diff --git a/workflows/rnaseq/downstream/rnaseq.Rmd b/workflows/rnaseq/downstream/rnaseq.Rmd index 8d94e011..b56979cf 100644 --- a/workflows/rnaseq/downstream/rnaseq.Rmd +++ b/workflows/rnaseq/downstream/rnaseq.Rmd @@ -27,6 +27,26 @@ devtools::document('../../../lib/lcdbwf') devtools::load_all('../../../lib/lcdbwf') ``` +```{r libraries} +library(AnnotationHub) +library(BiocParallel) +library(clusterProfiler) +library(cowplot) +library(DESeq2) +library(dplyr) +library(DT) +library(genefilter) +library(ggplot2) +library(gridExtra) +library(plotly) +library(purrr) +library(readr) +library(reshape) +library(tibble) +library(tximport) +library(UpSetR) +``` + ```{r config} # HOW TO CONFIGURE ------------------------------------------------------ @@ -60,29 +80,6 @@ if (config$parallel$parallel){ Last run: `r date()` - -```{r libraries} -library(AnnotationHub) -library(BiocParallel) -library(clusterProfiler) -library(cowplot) -library(DESeq2) -library(dplyr) -library(DT) -library(genefilter) -library(ggplot2) -library(gridExtra) -library(plotly) -library(purrr) -library(readr) -library(reshape) -library(tibble) -library(tximport) -library(UpSetR) -``` - - - ```{r coldata_setup} # Set up all of the metadata for the samples and experimental design. Use this # chunk to modify if needed. diff --git a/workflows/rnaseq/run_downstream_test.sh b/workflows/rnaseq/run_downstream_test.sh index 6440d5a1..7544c5dd 100755 --- a/workflows/rnaseq/run_downstream_test.sh +++ b/workflows/rnaseq/run_downstream_test.sh @@ -18,5 +18,4 @@ done # Make sure we move the config file there too cp downstream/config.yaml downstream-test/config.yaml cp downstream/text.yaml downstream-test/text.yaml - Rscript -e "rmarkdown::render('downstream-test/rnaseq.Rmd')"