diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 9688452..3c2bccd 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -9,33 +9,35 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out the repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Sanitize Repo Name for Tagging run: echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]' | (read; echo REPO_LOWER=$REPLY) >> $GITHUB_ENV - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v2 - name: Login to Docker Hub - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Login to GitHub Container Registry - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: registry: ghcr.io username: ${{ secrets.GH_USERNAME }} password: ${{ secrets.GH_TOKEN }} - name: Build and Push - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v4 with: push: true tags: | ghcr.io/${{ env.REPO_LOWER }}:${{ github.event.release.tag_name }} + ghcr.io/${{ env.REPO_LOWER }}:latest ${{ env.REPO_LOWER }}:${{ github.event.release.tag_name }} + ${{ env.REPO_LOWER }}:latest diff --git a/.github/workflows/docker_branches.yml b/.github/workflows/docker_branches.yml index d701a1f..ab756c2 100644 --- a/.github/workflows/docker_branches.yml +++ b/.github/workflows/docker_branches.yml @@ -2,7 +2,7 @@ name: Publish Docker image on: push: branches-ignore: - - 'master' + - 'main' jobs: push_to_registry: @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out the repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Sanitize Repo Name for Tagging run: echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]' | (read; echo REPO_LOWER=$REPLY) >> $GITHUB_ENV @@ -25,18 +25,18 @@ jobs: type=ref,event=pr - name: Set up QEMU - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v2 - name: Login to Docker Hub - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Push to Docker Hub - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v4 with: push: true tags: ${{ steps.meta.outputs.tags }} diff --git a/.github/workflows/lint-check.yml b/.github/workflows/lint-check.yml index 946a9f0..d4dbfc9 100644 --- a/.github/workflows/lint-check.yml +++ b/.github/workflows/lint-check.yml @@ -3,49 +3,40 @@ name: lint-check on: push: branches: - - master + - main pull_request: branches: - - master + - main jobs: import_syntax_check: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Check import syntax run: | EXITCODE=0 for file in $(find . -name '*.wdl'); do >&2 echo "Checking file $file..." import_lines=$(awk '/import/' "$file") - bad_lines=$(echo "$import_lines" | awk '!/https:\/\/raw.githubusercontent.com\/stjude\/xenocp\/master/ && !/https:\/\/raw.githubusercontent.com\/stjudecloud\/workflows\/master/' | grep -v '# lint-check: ignore') || true + + bad_lines=$(echo "$import_lines" | awk '/https:\/\/raw.githubusercontent.com\/stjude\/XenoCP/') || true if [ -n "$bad_lines" ]; then - >&2 echo "Must import files from the master branch on Github." + >&2 echo "Imports from this repo must use relative paths!" >&2 echo "The following lines are bad:" >&2 echo "$bad_lines" >&2 echo "" EXITCODE=1 fi - done - exit $EXITCODE - docker_pull_check: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Ensure SemVer'd docker images are being pulled - run: | - EXITCODE=0 - files=$(find . -name '*.wdl') - for file in $files; do - while IFS= read -r line; do - tag=$(echo "$line" | awk -F ':' '{print substr($3, 1, length($3)-1)}') - if ! [[ $tag =~ ^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?$ ]]; then - >&2 echo "All Docker containers must be using an official SemVer tagged image" - >&2 echo "Offending line: $line" - >&2 echo "In file: $file" - EXITCODE=1 - fi - done < <(awk '/docker: .*stjudecloud/ || /docker: .*stjude/' < "$file") + + bad_lines=$(echo "$import_lines" | awk '/http/ && (/main/ || /master/)') || true + if [ -n "$bad_lines" ]; then + >&2 echo "Imports from external repos must use a tagged release!" + >&2 echo "The following lines are bad:" + >&2 echo "$bad_lines" + >&2 echo "" + EXITCODE=1 + fi + done exit $EXITCODE diff --git a/.github/workflows/miniwdl-check.yml b/.github/workflows/miniwdl-check.yml index 71af5a2..ff51b75 100644 --- a/.github/workflows/miniwdl-check.yml +++ b/.github/workflows/miniwdl-check.yml @@ -6,11 +6,11 @@ jobs: miniwdl_check: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v4 with: - python-version: '3.6' + python-version: '3.10' - name: Install miniwdl run: | python -m pip install --upgrade pip @@ -20,10 +20,7 @@ jobs: EXITCODE=0 echo "Checking WDL files using \`miniwdl check\`." files=$(find . -name '*.wdl') - for file in $files; do - sed -i 's,https://raw.githubusercontent.com/stjude/xenocp/'"$(echo ${GITHUB_REF#refs/heads/})"','"$(pwd)"',g' "$file" - sed -i 's,https://raw.githubusercontent.com/stjude/xenocp/master,'"$(pwd)"',g' "$file" - done + for file in $files; do echo " [***] $file [***]" miniwdl check "$file" diff --git a/.github/workflows/release-check.yml b/.github/workflows/release-check.yml new file mode 100644 index 0000000..d836a76 --- /dev/null +++ b/.github/workflows/release-check.yml @@ -0,0 +1,31 @@ +name: release-check + +on: + push: + branches: + - release + pull_request: + branches: + - release + +jobs: + docker_pull_check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Ensure SemVer'd docker images are being pulled + run: | + EXITCODE=0 + files=$(find . -name '*.wdl') + for file in $files; do + while IFS= read -r line; do + tag=$(echo "$line" | awk -F ':' '{print substr($3, 1, length($3)-1)}') + if ! [[ $tag =~ ^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?$ ]]; then + >&2 echo "All Docker containers must be using an official SemVer tagged image" + >&2 echo "Offending line: $line" + >&2 echo "In file: $file" + EXITCODE=1 + fi + done < <(awk '/docker: .*stjudecloud/ || /docker: .*stjude/' < "$file") + done + exit $EXITCODE diff --git a/Dockerfile b/Dockerfile index c42d6ac..53c4712 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,6 @@ -FROM ubuntu:18.04 as builder +FROM ubuntu:20.04 as builder + +ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update \ && apt-get --yes install \ @@ -73,7 +75,7 @@ RUN cd /tmp/xenocp \ && gradle installDist \ && cp -r build/install/xenocp /opt -FROM ubuntu:18.04 +FROM ubuntu:20.04 RUN apt-get update \ && apt-get --yes install --no-install-recommends \ @@ -82,6 +84,7 @@ RUN apt-get update \ openjdk-11-jre-headless \ python3 \ python3-distutils \ + python-is-python3 \ file \ && rm -rf /var/lib/apt/lists/* @@ -98,4 +101,4 @@ COPY --from=builder /opt/xenocp/bin/* /usr/local/bin/ COPY cwl /opt/xenocp/cwl -ENTRYPOINT ["cwl-runner", "--parallel", "--outdir", "results", "/opt/xenocp/cwl/xenocp.cwl"] +ENTRYPOINT ["cwl-runner", "--parallel", "--outdir", "results", "--no-container", "/opt/xenocp/cwl/xenocp.cwl"] diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 0000000..b6f74b8 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,5 @@ +# Release + + * [ ] Update version in `dx_app/dxapp.json`. + * [ ] Update `wdl/tools/xenocp.wdl` with version. + * [ ] Update `wdl/workflows/xenocp.wdl` with version. diff --git a/bin/picard b/bin/picard new file mode 100755 index 0000000..fea6036 --- /dev/null +++ b/bin/picard @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +java -jar /opt/picard/lib/picard*.jar "$@" diff --git a/cwl/bwa_mem_onlymapped.cwl b/cwl/bwa_mem_onlymapped.cwl index 71f3a69..1e9db9d 100644 --- a/cwl/bwa_mem_onlymapped.cwl +++ b/cwl/bwa_mem_onlymapped.cwl @@ -12,6 +12,11 @@ hints: tweak_sam: specs: ["java.sh org.stjude.compbio.sam.TweakSam"] +requirements: + ResourceRequirement: + ramMin: 15000 + coresMin: 1 + inputs: ref_db_prefix: type: string diff --git a/cwl/cat.cwl b/cwl/cat.cwl new file mode 100644 index 0000000..87dd395 --- /dev/null +++ b/cwl/cat.cwl @@ -0,0 +1,32 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +doc: | + Merge a set of files into file using the cat utility. + +requirements: + - class: InlineJavascriptRequirement + +baseCommand: cat + +inputs: + output_file: + type: string + doc: | + Name of final file. + + input_files: + type: File[] + inputBinding: + position: 4 + doc: | + Array of files to merge. + +stdout: $(inputs.output_file) + +outputs: + combined_file: + type: File + outputBinding: + glob: $(inputs.output_file) \ No newline at end of file diff --git a/cwl/xenocp.cwl b/cwl/xenocp.cwl index 618c553..b741354 100755 --- a/cwl/xenocp.cwl +++ b/cwl/xenocp.cwl @@ -10,7 +10,7 @@ requirements: - class: SubworkflowFeatureRequirement inputs: - bam: + bam: type: File label: Aligned sequences in BAM format secondaryFiles: @@ -19,7 +19,7 @@ inputs: type: string label: contamination genome reference db prefix aligner: - type: + type: type: enum symbols: ["bwa aln", "bwa mem", "star"] name: aligner @@ -39,7 +39,7 @@ inputs: output_extension: type: string? default: bam - n_threads: + n_threads: type: int? default: 1 @@ -51,31 +51,27 @@ outputs: type: File outputSource: finish/flagstat contam_list: - type: - type: array - items: File - outputSource: contamination/contam_list + type: File + outputSource: merge_contam_list/combined_file output_tie_bam: - type: - type: array - items: File - outputSource: contamination/output_tie_bam + type: File + outputSource: merge_tie_bam/final_bam steps: # Step01: extract chromosome information from input bam split: run: extract.cwl - in: + in: bam: bam out: [split_bams, unmapped] - + # Step02: extract mapped reads and convert to fastq mapped-fastq: run: view_awk_picard.cwl in: - input_bam: + input_bam: source: split/split_bams - linkMerge: merge_flattened + linkMerge: merge_flattened output_fastq: valueFrom: $(inputs.input_bam.nameroot).fastq # here "inputs" refers to inputs in view_awk_picard.cwl scatter: [input_bam] @@ -114,7 +110,7 @@ steps: ResourceRequirement: ramMin: 4800 coresMin: 1 - + # Step03c: map extracted reads to the contamination genome with STAR mapping-star: run: star_onlymapped.cwl @@ -137,25 +133,25 @@ steps: run: create_contam_lists.cwl in: input_bam: - source: split/split_bams - output_contam_list: + source: split/split_bams + output_contam_list: valueFrom: $(inputs.input_bam.nameroot).contam.txt tie_bam: valueFrom: $(inputs.input_bam.nameroot).tie.bam - contam_bams: + contam_bams: source: [mapping-bwa-aln/bam, mapping-bwa-mem/bam, mapping-star/bam] linkMerge: merge_flattened pickValue: all_non_null scatter: [input_bam, contam_bams] scatterMethod: dotproduct out: [contam_list, output_tie_bam] - - # Step05: clean the original bam by setting the contamination reads to be unmapped + + # Step05a: clean the original bam by setting the contamination reads to be unmapped cleanse: run: tweak_sam.cwl in: - input_bam: - source: split/split_bams + input_bam: + source: split/split_bams output_bam: valueFrom: $(inputs.input_bam.nameroot).cleaned.bam unmap_reads: contamination/contam_list @@ -163,11 +159,24 @@ steps: scatterMethod: dotproduct out: [cleaned_bam] - # Step06: merge split bams, index and mark duplicates + # Step05b: sort tie BAMs prior to merge + sort-bams: + run: bio-cwl-tools:picard/picard_SortSam.cwl + in: + alignments: contamination/output_tie_bam + sort_order: + valueFrom: $("coordinate") + validation_stringency: + valueFrom: $("SILENT") + scatter: [alignments] + scatterMethod: dotproduct + out: [sorted_alignments] + + # Step06a: merge split bams, index and mark duplicates finish: run: merge_markdup_index.cwl in: - input_bams: + input_bams: source: [cleanse/cleaned_bam, split/unmapped] linkMerge: merge_flattened output_bam: @@ -175,8 +184,33 @@ steps: valueFrom: ${return self.nameroot + ".xenocp.bam"} n_threads: n_threads out: [final_bam, flagstat] - - # Step07: QC the merged bam + + # Step06b: merge tie bams and index + merge_tie_bam: + run: merge_markdup_index.cwl + in: + input_bams: + source: [cleanse/cleaned_bam, split/unmapped] + linkMerge: merge_flattened + output_bam: + source: bam + valueFrom: ${return self.nameroot + ".xenocp.tie.bam"} + n_threads: n_threads + skip_dup: + valueFrom: $(true) + out: [final_bam, flagstat] + + # Step07: Combine contam lists + merge_contam_list: + run: cat.cwl + in: + input_files: contamination/contam_list + output_file: + source: bam + valueFrom: ${return self.nameroot + ".contam.txt"} + out: [combined_file] + + # Step08: QC the merged bam finalqc: run: qc_bam.cwl in: @@ -201,6 +235,7 @@ doc: | $namespaces: s: http://schema.org/ + bio-cwl-tools: https://raw.githubusercontent.com/common-workflow-library/bio-cwl-tools/release/ $schemas: - https://schema.org/version/latest/schemaorg-current-https.rdf diff --git a/dependencies/lib/java/xenocp-dependencies.jar b/dependencies/lib/java/xenocp-dependencies.jar index 8ba7897..c6b43e5 100755 Binary files a/dependencies/lib/java/xenocp-dependencies.jar and b/dependencies/lib/java/xenocp-dependencies.jar differ diff --git a/dx_app/dxapp.json b/dx_app/dxapp.json index ee9edbb..0d3e5b1 100644 --- a/dx_app/dxapp.json +++ b/dx_app/dxapp.json @@ -3,7 +3,7 @@ "title": "XenoCP (St. Jude)", "summary": "Xenograft sample cleaning pipeline", "dxapi": "1.0.0", - "version": "3.1.4", + "version": "4.0.0-alpha", "openSource": true, "developers": ["user-lding", "user-mrusch", "user-athrashe"], "authorizedUsers": [ @@ -46,7 +46,7 @@ "name": "ref_name", "label": "Host Genome", "class": "string", - "choices": ["GRCm38", "MGSCv37", "Custom, specified as input"], + "choices": ["GRCm39", "GRCm38", "MGSCv37", "Custom, specified as input"], "optional": false, "default": "GRCm38", "help": "" @@ -118,7 +118,10 @@ ], "billTo": "org-stjude_cloud", "access": { - "allProjects": "VIEW" + "allProjects": "VIEW", + "network": [ + "*" + ] }, "runSpec": { "timeoutPolicy": { diff --git a/dx_app/src/stjude_xenocp.sh b/dx_app/src/stjude_xenocp.sh index e95055e..0b095f4 100644 --- a/dx_app/src/stjude_xenocp.sh +++ b/dx_app/src/stjude_xenocp.sh @@ -70,7 +70,7 @@ main() { dx download -o $local_reference_dir -r project-F5444K89PZxXjBqVJ3Pp79B4:/global/reference/Mus_musculus/$ref_name/BWA mv $local_reference_dir/BWA/* $local_reference_dir/ else - dx download -o $local_reference_dir -r project-FzJ7yx89Q0f0pBj5P2j1g0vB:/$ref_name/STAR + dx download -o $local_reference_dir -r project-F5444K89PZxXjBqVJ3Pp79B4:/global/reference/Mus_musculus/$ref_name/STAR reference_prefix="STAR" fi else @@ -112,13 +112,6 @@ output_extension: $output_extension aligner: $aligner EOF - echo " [*] Loading container image ..." - image_tarfile_path=/stjude/xenocp-docker.tar - if [ -e $image_tarfile_path.gz ] - then gunzip $image_tarfile_path.gz - fi - docker load -i $image_tarfile_path - echo "=== Execution ===" # Don't make assumptions about the tag that was used when the image was @@ -133,9 +126,10 @@ EOF --mount type=bind,source=$local_data_dir,target=$container_data_dir,readonly \ --mount type=bind,source=$local_reference_dir,target=$container_reference_dir,readonly \ --mount type=bind,source=$local_output_dir,target=$container_output_dir \ - $image_id \ + ghcr.io/stjude/xenocp:3.1.4 \ $container_data_dir/inputs.yml + echo "=== Wrap Up ===" echo " [*] Uploading outputs ..." mv -v $local_output_dir/*.xenocp.bam* $local_output_dir/output_bam diff --git a/wdl/tools/xenocp.wdl b/wdl/tools/xenocp.wdl index d1e2232..fdbde52 100644 --- a/wdl/tools/xenocp.wdl +++ b/wdl/tools/xenocp.wdl @@ -35,7 +35,7 @@ task get_chroms { memory: memory_gb + " GB" disk: disk_size + " GB" cpu: ncpu - docker: 'stjude/xenocp:3.1.4' + docker: 'ghcr.io/stjude/xenocp:4.0.0-alpha' maxRetries: max_retries } @@ -59,7 +59,7 @@ task extract_mismatch { input { File input_bam File input_bai - Int memory_gb = 1 + Int memory_gb = 2 Int? disk_size_gb Int max_retries = 1 } @@ -78,7 +78,7 @@ task extract_mismatch { memory: memory_gb + " GB" disk: disk_size + " GB" cpu: 1 - docker: 'stjude/xenocp:3.1.4' + docker: 'ghcr.io/stjude/xenocp:4.0.0-alpha' maxRetries: max_retries } @@ -103,7 +103,7 @@ task extract_by_chrom { File input_bam File input_bai String chromosome - Int memory_gb = 1 + Int memory_gb = 8 Int? disk_size_gb Int max_retries = 1 } @@ -122,7 +122,7 @@ task extract_by_chrom { memory: memory_gb + " GB" disk: disk_size + " GB" cpu: 1 - docker: 'stjude/xenocp:3.1.4' + docker: 'ghcr.io/stjude/xenocp:4.0.0-alpha' maxRetries: max_retries } @@ -175,7 +175,7 @@ task extract_unmapped { memory: memory_gb + " GB" disk: disk_size + " GB" cpu: ncpu - docker: 'stjude/xenocp:3.1.4' + docker: 'ghcr.io/stjude/xenocp:4.0.0-alpha' maxRetries: max_retries } @@ -217,7 +217,7 @@ task mapped_fastq { memory: memory_gb + " GB" disk: disk_size + " GB" cpu: 1 - docker: 'stjude/xenocp:3.1.4' + docker: 'ghcr.io/stjude/xenocp:4.0.0-alpha' maxRetries: max_retries } @@ -240,13 +240,13 @@ task mapped_fastq { task create_contam_list { input { File input_bam - String output_contam_list = basename(input_bam, ".bam") + "contam.txt" + String output_contam_list = basename(input_bam, ".bam") + ".contam.txt" String tie_bam = basename(input_bam, ".bam") + ".tie.bam" File contam_bam String stringency = "SILENT" Int? disk_size_gb Int max_retries = 1 - Int memory_gb = 1 + Int memory_gb = 2 } Float input_bam_size = size(input_bam, "GiB") @@ -262,7 +262,7 @@ task create_contam_list { memory: memory_gb + " GB" disk: disk_size + " GB" cpu: 1 - docker: 'stjude/xenocp:3.1.4' + docker: 'ghcr.io/stjude/xenocp:4.0.0-alpha' maxRetries: max_retries } @@ -295,7 +295,7 @@ task cleanse { String stringency = "SILENT" Int? disk_size_gb Int max_retries = 1 - Int memory_gb = 1 + Int memory_gb = 7 } Float input_bam_size = size(input_bam, "GiB") @@ -311,7 +311,7 @@ task cleanse { memory: memory_gb + " GB" disk: disk_size + " GB" cpu: 1 - docker: 'stjude/xenocp:3.1.4' + docker: 'ghcr.io/stjude/xenocp:4.0.0-alpha' maxRetries: max_retries } @@ -361,7 +361,7 @@ task merge_markdup_index { memory: memory_gb + " GB" disk: disk_size + " GB" cpu: 1 - docker: 'stjude/xenocp:3.1.4' + docker: 'ghcr.io/stjude/xenocp:4.0.0-alpha' maxRetries: max_retries } @@ -409,7 +409,7 @@ task qc { memory: memory_gb + " GB" disk: disk_size + " GB" cpu: ncpu - docker: 'stjude/xenocp:3.1.4' + docker: 'ghcr.io/stjude/xenocp:4.0.0-alpha' maxRetries: max_retries } @@ -426,3 +426,44 @@ task qc { input_bam: "Input BAM file" } } + +task combine_files { + input { + Array[File] input_files + String output_file + Int? disk_size_gb + Int max_retries = 1 + Int memory_gb = 1 + } + + Float input_file_size = size(input_files, "GiB") + Int disk_size = select_first([disk_size_gb, ceil(input_file_size * 2)]) + + command <<< + set -euo pipefail + + cat ~{sep=" " input_files} > ~{output_file} + >>> + + runtime { + memory: memory_gb + " GB" + disk: disk_size + " GB" + cpu: 1 + docker: 'ghcr.io/stjude/xenocp:4.0.0-alpha' + maxRetries: max_retries + } + + output { + File final_file = output_file + } + + meta { + author: "Andrew Thrasher" + email: "andrew.thrasher@stjude.org" + description: "This WDL tool takes a set of files and combines them with the cat utility." + } + + parameter_meta { + input_files: "Input files to combine with cat" + } +} diff --git a/wdl/workflows/xenocp.wdl b/wdl/workflows/xenocp.wdl index cc27a6b..ae02ddf 100644 --- a/wdl/workflows/xenocp.wdl +++ b/wdl/workflows/xenocp.wdl @@ -27,10 +27,10 @@ version 1.0 -import "https://raw.githubusercontent.com/stjude/xenocp/3.1.4/wdl/tools/xenocp.wdl" as xenocp_tools -import "https://raw.githubusercontent.com/stjudecloud/workflows/rnaseq-standard/v3.0.0/tools/bwa.wdl" -import "https://raw.githubusercontent.com/stjudecloud/workflows/rnaseq-standard/v3.0.0/tools/star.wdl" -import "https://raw.githubusercontent.com/stjudecloud/workflows/rnaseq-standard/v3.0.0/tools/picard.wdl" +import "../tools/xenocp.wdl" as xenocp_tools +import "https://raw.githubusercontent.com/stjudecloud/workflows/rnaseq-standard/v4.0.0-alpha/tools/bwa.wdl" +import "https://raw.githubusercontent.com/stjudecloud/workflows/rnaseq-standard/v4.0.0-alpha/tools/star.wdl" +import "https://raw.githubusercontent.com/stjudecloud/workflows/rnaseq-standard/v4.0.0-alpha/tools/picard.wdl" workflow xenocp { input { @@ -49,8 +49,9 @@ workflow xenocp { aligner: "Which aligner to use to map reads to the host genome to detect contamination: [bwa aln, bwa mem, star]" skip_duplicate_marking: "If true, duplicate marking will be skipped when the cleaned BAMs are merged" } - - String name = basename(input_bam, ".bam") + ".xenocp.bam" + + String name = basename(input_bam, ".bam") + String output_name = name + ".xenocp.bam" call xenocp_tools.get_chroms { input: input_bam=input_bam, ncpu=n_threads } call xenocp_tools.extract_mismatch as mismatch { input: input_bam=input_bam, input_bai=input_bai } @@ -71,7 +72,7 @@ workflow xenocp { } if (aligner == "bwa mem") { scatter (fastq in mapped_fastq.fastq){ - call bwa.bwa_mem as bwa_mem_align { input: fastq=fastq, bwadb_tar_gz=reference_tar_gz, ncpu=n_threads } + call bwa.bwa_mem as bwa_mem_align { input: fastq=fastq, bwadb_tar_gz=reference_tar_gz, ncpu=n_threads, memory_gb=15 } } } if (aligner == "star") { @@ -92,16 +93,24 @@ workflow xenocp { call xenocp_tools.cleanse { input: input_bam=pair.left, unmap_reads=pair.right, stringency=validation_stringency } } - call xenocp_tools.merge_markdup_index { input: input_bams=flatten([cleanse.cleaned_bam, [unmapped.unmapped_bam]]), output_bam=name, skip_dup=skip_duplicate_marking } + call xenocp_tools.merge_markdup_index as final_bam { input: input_bams=flatten([cleanse.cleaned_bam, [unmapped.unmapped_bam]]), output_bam=output_name, skip_dup=skip_duplicate_marking } + + call xenocp_tools.qc { input: input_bam=final_bam.final_bam, input_bai=final_bam.final_bai, flagstat=final_bam.flagstat } + + scatter(bam in create_contam_list.output_tie_bam){ + call picard.sort as tie_sort { input: bam=bam, sort_order="coordinate" } + } + + call xenocp_tools.merge_markdup_index as combine_tie_bam { input: input_bams=tie_sort.sorted_bam, output_bam=name+".tie.bam", skip_dup=true } - call xenocp_tools.qc { input: input_bam=merge_markdup_index.final_bam, input_bai=merge_markdup_index.final_bai, flagstat=merge_markdup_index.flagstat } + call xenocp_tools.combine_files { input: input_files=create_contam_list.contam_list, output_file=name+".contam.txt" } output { - File bam = merge_markdup_index.final_bam - File bam_index = merge_markdup_index.final_bai - File bam_md5 = merge_markdup_index.final_md5 - File flagstat = merge_markdup_index.flagstat - Array[File] contam_list = create_contam_list.contam_list - Array[File] tie_bam = create_contam_list.output_tie_bam + File bam = final_bam.final_bam + File bam_index = final_bam.final_bai + File bam_md5 = final_bam.final_md5 + File flagstat = final_bam.flagstat + File contam_list = combine_files.final_file + File tie_bam = combine_tie_bam.final_bam } }