From 75a640b567f001aa6a5163878cc00cd579d2d60a Mon Sep 17 00:00:00 2001
From: Andrew Thrasher <adthrasher@gmail.com>
Date: Mon, 25 Nov 2024 15:03:11 -0500
Subject: [PATCH] chore: migrate scrnaseq to external repo (#188)

* chore: migrate scrnaseq to external repo
---
 .github/workflows/docker-build.yaml          |   2 +-
 .github/workflows/pytest.yaml                |   6 +-
 .github/workflows/sprocket-check.yaml        |   2 +-
 .github/workflows/sprocket-lint.yaml         |   4 +-
 docker/cellranger/1.1.1/Dockerfile           |  25 ---
 tests/tools/input/pbmc_1k_v3.tar.gz          |   3 -
 tests/tools/input_json/cellranger_count.json |   5 -
 tests/tools/test_cellranger.yaml             |  38 ----
 tools/cellranger.wdl                         | 189 -------------------
 workflows/scrnaseq/10x-bam-to-fastqs.wdl     |  99 ----------
 workflows/scrnaseq/scrnaseq-standard.wdl     | 105 -----------
 11 files changed, 7 insertions(+), 471 deletions(-)
 delete mode 100755 docker/cellranger/1.1.1/Dockerfile
 delete mode 100755 tests/tools/input/pbmc_1k_v3.tar.gz
 delete mode 100644 tests/tools/input_json/cellranger_count.json
 delete mode 100644 tests/tools/test_cellranger.yaml
 delete mode 100755 tools/cellranger.wdl
 delete mode 100755 workflows/scrnaseq/10x-bam-to-fastqs.wdl
 delete mode 100755 workflows/scrnaseq/scrnaseq-standard.wdl

diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml
index 80425dce9..b081c1615 100644
--- a/.github/workflows/docker-build.yaml
+++ b/.github/workflows/docker-build.yaml
@@ -18,7 +18,7 @@ jobs:
           fetch-depth: 0
       - name: set matrix
         id: set-matrix
-        run: echo "images=$(find docker -maxdepth 2 -mindepth 2 -not -path "*/cellranger/*" | jq --raw-input --slurp --compact-output 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
+        run: echo "images=$(find docker -maxdepth 2 -mindepth 2 | jq --raw-input --slurp --compact-output 'split("\n")[:-1]')" >> $GITHUB_OUTPUT
 
   build-images:
     needs: list-images
diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
index 915481d93..03e7827c2 100644
--- a/.github/workflows/pytest.yaml
+++ b/.github/workflows/pytest.yaml
@@ -12,7 +12,7 @@ jobs:
           uses: actions/checkout@v4
         - name: set tags
           id: set-tags
-          run: echo "tags=$(find tests -name '*.yaml' -exec yq --output-format yaml '.[].tags[] ' {} \;| sort | uniq | grep -vE 'deprecated|miniwdl|cellranger' | jq -ncR '[inputs]')" >> $GITHUB_OUTPUT
+          run: echo "tags=$(find tests -name '*.yaml' -exec yq --output-format yaml '.[].tags[] ' {} \;| sort | uniq | grep -vE 'deprecated|miniwdl' | jq -ncR '[inputs]')" >> $GITHUB_OUTPUT
   pytest_check:
     needs: list-tags
     runs-on: ubuntu-latest
@@ -33,7 +33,7 @@ jobs:
         pip install -r requirements-dev.txt
     - name: filter tests
       run: |
-        find tests -name '*.yaml' -exec yq --output-format yaml -i 'del(.[] | select(.tags[] | test("reference|deprecated|cellranger") ) )' {} \;
+        find tests -name '*.yaml' -exec yq --output-format yaml -i 'del(.[] | select(.tags[] | test("reference|deprecated") ) )' {} \;
     - name: Run pytest-workflow
       run: |
-        pytest --git-aware --basetemp /home/runner/work/pytest --symlink --tag ${{ matrix.tag }}
\ No newline at end of file
+        pytest --git-aware --basetemp /home/runner/work/pytest --symlink --tag ${{ matrix.tag }}
diff --git a/.github/workflows/sprocket-check.yaml b/.github/workflows/sprocket-check.yaml
index 1e7a72db7..6ed862cd2 100644
--- a/.github/workflows/sprocket-check.yaml
+++ b/.github/workflows/sprocket-check.yaml
@@ -10,4 +10,4 @@ jobs:
         - name: Run sprocket
           uses: stjude-rust-labs/sprocket-action@main
           with:
-            exclude-patterns: template,scrnaseq,cellranger
\ No newline at end of file
+            exclude-patterns: template
diff --git a/.github/workflows/sprocket-lint.yaml b/.github/workflows/sprocket-lint.yaml
index 80ff79fdb..60d0867e5 100644
--- a/.github/workflows/sprocket-lint.yaml
+++ b/.github/workflows/sprocket-lint.yaml
@@ -11,7 +11,7 @@ jobs:
           uses: stjude-rust-labs/sprocket-action@main
           with:
             lint: true
-            exclude-patterns: template,scrnaseq,cellranger
+            exclude-patterns: template
             deny-warnings: true
             deny-notes: true
-            except: TrailingComma,ContainerValue
\ No newline at end of file
+            except: TrailingComma,ContainerValue
diff --git a/docker/cellranger/1.1.1/Dockerfile b/docker/cellranger/1.1.1/Dockerfile
deleted file mode 100755
index a03cf692e..000000000
--- a/docker/cellranger/1.1.1/Dockerfile
+++ /dev/null
@@ -1,25 +0,0 @@
-# Supply a valid download link and md5sum from "https://support.10xgenomics.com/single-cell-gene-expression/software/downloads/latest"
-
-FROM ubuntu:20.04
-
-ARG CELLRANGER_URL
-ARG CELLRANGER_MD5
-
-RUN apt-get update \
-    && apt-get upgrade -y \
-    && apt-get install curl -y \
-    && rm -r /var/lib/apt/lists/*
-
-WORKDIR /opt
-
-RUN curl -o cellranger.tar.gz \
-    ${CELLRANGER_URL} \
-    && echo "${CELLRANGER_MD5}  cellranger.tar.gz" > cellranger.tar.gz.md5 \
-    && md5sum -c cellranger.tar.gz.md5 \
-    && tar -xzvf cellranger.tar.gz \
-    && mv cellranger-* cellranger \
-    && rm cellranger.tar.gz*
-
-ENV PATH "/opt/cellranger:$PATH"
-
-ENTRYPOINT [ "cellranger" ]
diff --git a/tests/tools/input/pbmc_1k_v3.tar.gz b/tests/tools/input/pbmc_1k_v3.tar.gz
deleted file mode 100755
index 972318ae3..000000000
--- a/tests/tools/input/pbmc_1k_v3.tar.gz
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7461ad873a32dc1f55931c0c36280623913368b07babf4ff97e2a71c3d918453
-size 1468996
diff --git a/tests/tools/input_json/cellranger_count.json b/tests/tools/input_json/cellranger_count.json
deleted file mode 100644
index 2793d73c9..000000000
--- a/tests/tools/input_json/cellranger_count.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "fastqs_tar_gz": "tests/tools/input/pbmc_1k_v3.tar.gz",
-    "transcriptome_tar_gz": "tests/tools/input/GRCh38.tar.gz",
-    "id": "pbmc_1k_v3"
-}
\ No newline at end of file
diff --git a/tests/tools/test_cellranger.yaml b/tests/tools/test_cellranger.yaml
deleted file mode 100644
index 421047ddf..000000000
--- a/tests/tools/test_cellranger.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-- name: cellranger_count
-  tags:
-    - miniwdl
-    - cellranger
-  command: >-
-    miniwdl run --verbose -d test-output/. --task count -i tests/tools/input_json/cellranger_count.json tools/cellranger.wdl
-  files:
-    - path: test-output/out/bam/possorted_genome_bam.bam
-    - path: test-output/out/bam_index/possorted_genome_bam.bam.bai
-    - path: test-output/out/qc/metrics_summary.csv
-    - path: test-output/out/barcodes/barcodes.tsv.gz
-    - path: test-output/out/features/features.tsv.gz
-    - path: test-output/out/matrix/matrix.mtx.gz
-    - path: test-output/out/filtered_gene_h5/filtered_feature_bc_matrix.h5
-    - path: test-output/out/raw_gene_h5/raw_feature_bc_matrix.h5
-    - path: test-output/out/raw_barcodes/barcodes.tsv.gz
-    - path: test-output/out/raw_features/features.tsv.gz
-    - path: test-output/out/raw_matrix/matrix.mtx.gz
-    - path: test-output/out/mol_info_h5/molecule_info.h5
-    - path: test-output/out/web_summary/web_summary.html
-    - path: test-output/out/cloupe/cloupe.cloupe
-
-- name: cellranger_bamtofastq
-  tags:
-    - miniwdl
-    - cellranger
-  command: >-
-    miniwdl run --verbose -d test-output/. --task bamtofastq tools/cellranger.wdl bam="tests/tools/input/possorted_genome_bam.bam"
-  files:
-    - path: test-output/out/fastqs/0/bamtofastq_S1_L001_R1_001.fastq.gz
-    - path: test-output/out/fastqs/1/bamtofastq_S1_L001_R2_001.fastq.gz
-    - path: test-output/out/fastqs/2/bamtofastq_S1_L002_R1_001.fastq.gz
-    - path: test-output/out/fastqs/3/bamtofastq_S1_L002_R2_001.fastq.gz
-    - path: test-output/out/fastqs_archive/archive.tar.gz
-    - path: test-output/out/read_one_fastq_gz/0/bamtofastq_S1_L001_R1_001.fastq.gz
-    - path: test-output/out/read_one_fastq_gz/1/bamtofastq_S1_L002_R1_001.fastq.gz
-    - path: test-output/out/read_two_fastq_gz/0/bamtofastq_S1_L001_R2_001.fastq.gz
-    - path: test-output/out/read_two_fastq_gz/1/bamtofastq_S1_L002_R2_001.fastq.gz
diff --git a/tools/cellranger.wdl b/tools/cellranger.wdl
deleted file mode 100755
index f97c66577..000000000
--- a/tools/cellranger.wdl
+++ /dev/null
@@ -1,189 +0,0 @@
-## Cell Ranger
-##
-## This WDL file wrap the [10x Genomics Cell Ranger](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger) tool.
-## Cell Ranger is a tool for handling scRNA-Seq data.
-#@ except: LineWidth
-
-version 1.1
-
-task count {
-    meta {
-        description: "This WDL task runs Cell Ranger count to generate an aligned BAM and feature counts from scRNA-Seq data."
-        outputs: {
-            bam: "Aligned BAM file",
-            bam_index: "BAM index file",
-            qc: "Quality control metrics in CSV format",
-            barcodes: "Barcodes in gzipped TSV format",
-            features: "Filtered features in gzipped TSV format",
-            matrix: "Filtered matrix of features",
-            filtered_gene_h5: "Filtered gene matrix in H5 format",
-            raw_gene_h5: "Raw gene matrix in H5 format",
-            raw_barcodes: "Raw barcodes in gzipped TSV format",
-            raw_features: "Raw features in gzipped TSV format",
-            raw_matrix: "Raw matrix of features",
-            mol_info_h5: "Molecule information in H5 format",
-            web_summary: "HTML summary of the run",
-            cloupe: "Cloupe file for visualization",
-        }
-    }
-
-    parameter_meta {
-        fastqs_tar_gz: "Path to the FASTQ folder archive in .tar.gz format"
-        transcriptome_tar_gz: "Path to Cell Ranger-compatible transcriptome reference in .tar.gz format"
-        id: "A unique run ID"
-        use_all_cores: "Use all cores? Recommended for cloud environments."
-        ncpu: "Number of cores to allocate for task"
-        memory_gb: "RAM to allocate for task, specified in GB"
-        modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB."
-    }
-
-    input {
-        File fastqs_tar_gz
-        File transcriptome_tar_gz
-        String id
-        Boolean use_all_cores = false
-        Int ncpu = 1
-        Int memory_gb = 16
-        Int modify_disk_size_gb = 0
-    }
-
-    Float fastq_size = size(fastqs_tar_gz, "GiB")
-    Float transcriptome_size = size(transcriptome_tar_gz, "GiB")
-    Int disk_size_gb = (
-        ceil((fastq_size + transcriptome_size) * 2) + 10 + modify_disk_size_gb
-    )
-
-    command <<<
-        set -euo pipefail
-
-        n_cores=~{ncpu}
-        if ~{use_all_cores}; then
-            n_cores=$(nproc)
-        fi
-
-        mkdir transcriptome_dir
-        tar -xzf ~{transcriptome_tar_gz} \
-            -C transcriptome_dir \
-            --strip-components 1 \
-            --no-same-owner
-
-        mkdir fastqs
-        tar -xzf ~{fastqs_tar_gz} -C fastqs --no-same-owner
-
-        files=(fastqs/*.fastq.gz)
-        # sample parameter to cellranger count must match
-        # the sample prefix contained in the FASTQ file.
-        # So we infer it here by manipulating the file name.
-        # expected sample name extension comes from:
-        # https://support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl2fastq/bcl2fastq2-v2-20-software-guide-15051736-03.pdf
-        sample_id="$(basename "${files[0]}" | sed -E 's/_S[1-9]_L[0-9]{3}_[I,R][1,2]_001.fastq.gz$//')"
-
-        cellranger count \
-            --id ~{id} \
-            --transcriptome transcriptome_dir \
-            --fastqs fastqs \
-            --sample "${sample_id}" \
-            --jobmode local \
-            --localcores "$n_cores" \
-            --localmem ~{memory_gb} \
-            --disable-ui
-    >>>
-
-    output {
-        File bam = glob("*/outs/possorted_genome_bam.bam")[0]
-        File bam_index = glob("*/outs/possorted_genome_bam.bam.bai")[0]
-        File qc = glob("*/outs/metrics_summary.csv")[0]
-        File barcodes = glob("*/outs/filtered_feature_bc_matrix/barcodes.tsv.gz")[0]
-        File features = glob("*/outs/filtered_feature_bc_matrix/features.tsv.gz")[0]
-        File matrix = glob("*/outs/filtered_feature_bc_matrix/matrix.mtx.gz")[0]
-        File filtered_gene_h5 = glob("*/outs/filtered_feature_bc_matrix.h5")[0]
-        File raw_gene_h5 = glob("*/outs/raw_feature_bc_matrix.h5")[0]
-        File raw_barcodes = glob("*/outs/raw_feature_bc_matrix/barcodes.tsv.gz")[0]
-        File raw_features = glob("*/outs/raw_feature_bc_matrix/features.tsv.gz")[0]
-        File raw_matrix = glob("*/outs/raw_feature_bc_matrix/matrix.mtx.gz")[0]
-        File mol_info_h5 = glob("*/outs/molecule_info.h5")[0]
-        File web_summary = glob("*/outs/web_summary.html")[0]
-        File cloupe = glob("*/outs/cloupe.cloupe" )[0]
-    }
-
-    runtime {
-        cpu: ncpu
-        memory: "~{memory_gb} GB"
-        disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/cellranger:1.1.1"
-        maxRetries: 1
-    }
-}
-
-task bamtofastq {
-    meta {
-        description: "This WDL task runs the 10x bamtofastq tool to convert Cell Ranger generated BAM files back to FASTQ files"
-        outputs: {
-            fastqs: "FASTQ files",
-            fastqs_archive: "FASTQ files in a tarball",
-            read_one_fastq_gz: "Read 1 FASTQ files",
-            read_two_fastq_gz: "Read 2 FASTQ files",
-        }
-    }
-
-    parameter_meta {
-        bam: "Input BAM to convert to Cell Ranger compatible fastqs"
-        cellranger11: "Convert a BAM produced by Cell Ranger 1.0-1.1"
-        longranger20: "Convert a BAM produced by Longranger 2.0"
-        gemcode: "Convert a BAM produced from GemCode data (Longranger 1.0 - 1.3)"
-        use_all_cores: "Use all cores? Recommended for cloud environments."
-        ncpu: "Number of cores to allocate for task"
-        memory_gb: "RAM to allocate for task, specified in GB"
-        modify_disk_size_gb: "Add to or subtract from dynamic disk space allocation. Default disk size is determined by the size of the inputs. Specified in GB."
-    }
-
-    input {
-        File bam
-        Boolean cellranger11 = false
-        Boolean longranger20 = false
-        Boolean gemcode = false
-        Boolean use_all_cores = false
-        Int ncpu = 1
-        Int memory_gb = 40
-        Int modify_disk_size_gb = 0
-    }
-
-    Float bam_size = size(bam, "GiB")
-    Int disk_size_gb = ceil(bam_size * 2) + 10 + modify_disk_size_gb
-
-    String data_arg = (
-        if (cellranger11) then "--cr11"
-        else if (longranger20) then "--lr10"
-        else if (gemcode) then "--gemcode"
-        else ""
-    )
-
-    command <<<
-        set -euo pipefail
-
-        n_cores=~{ncpu}
-        if ~{use_all_cores}; then
-            n_cores=$(nproc)
-        fi
-
-        cellranger bamtofastq --nthreads "$n_cores" ~{data_arg} ~{bam} fastqs
-
-        cd fastqs/*/
-        tar -czf archive.tar.gz ./*.fastq.gz
-    >>>
-
-    output {
-        Array[File] fastqs = glob("fastqs/*/*fastq.gz")
-        File fastqs_archive = glob("fastqs/*/*.tar.gz")[0]
-        Array[File] read_one_fastq_gz = glob("fastqs/*/*R1*.fastq.gz")
-        Array[File] read_two_fastq_gz = glob("fastqs/*/*R2*.fastq.gz")
-    }
-
-    runtime {
-        cpu: ncpu
-        memory: "~{memory_gb} GB"
-        disks: "~{disk_size_gb} GB"
-        container: "ghcr.io/stjudecloud/cellranger:1.1.1"
-        maxRetries: 1
-    }
-}
diff --git a/workflows/scrnaseq/10x-bam-to-fastqs.wdl b/workflows/scrnaseq/10x-bam-to-fastqs.wdl
deleted file mode 100755
index d57f116ba..000000000
--- a/workflows/scrnaseq/10x-bam-to-fastqs.wdl
+++ /dev/null
@@ -1,99 +0,0 @@
-version 1.1
-
-import "../../tools/cellranger.wdl"
-import "../../tools/fq.wdl"
-import "../../tools/samtools.wdl"
-
-workflow cell_ranger_bam_to_fastqs {
-    meta {
-        description: "Convert a 10x Genomics BAM file to FASTQs."
-        allowNestedInputs: true
-        outputs: {
-            fastqs: "FASTQ files with reads.",
-            fastqs_archive: "Compressed archive of FASTQ files.",
-            read1s: "Gzipped read 1 FASTQ files.",
-            read2s: "Gzipped read 2 FASTQ files.",
-        }
-    }
-
-    parameter_meta {
-        bam: "BAM file to split into FASTQs."
-        cellranger11: "Convert a BAM produced by Cell Ranger 1.0-1.1"
-        longranger20: "Convert a BAM produced by Longranger 2.0"
-        gemcode: "Convert a BAM produced from GemCode data (Longranger 1.0 - 1.3)"
-        use_all_cores: "Use all cores for multi-core steps?"
-    }
-
-    input {
-        File bam
-        Boolean cellranger11 = false
-        Boolean longranger20 = false
-        Boolean gemcode = false
-        Boolean use_all_cores = false
-    }
-
-    call samtools.quickcheck { input: bam }
-    call cellranger.bamtofastq { input:
-        bam,
-        cellranger11,
-        longranger20,
-        gemcode,
-        use_all_cores,
-    }
-    scatter (reads in zip(bamtofastq.read_one_fastq_gz, bamtofastq.read_two_fastq_gz)) {
-        call fq.fqlint { input:
-            read_one_fastq = reads.left,
-            read_two_fastq = reads.right,
-        }
-    }
-
-    output {
-        Array[File] fastqs = bamtofastq.fastqs
-        File fastqs_archive = bamtofastq.fastqs_archive
-        Array[File] read1s = bamtofastq.read_one_fastq_gz
-        Array[File] read2s = bamtofastq.read_two_fastq_gz
-    }
-}
-
-task parse_input {
-    meta {
-        description: "Parse 10x-bam-to-fastqs workflow inputs and validate"
-        outputs: {
-            input_check: "String indicating if input checks passed."
-        }
-    }
-
-    parameter_meta {
-        cellranger11: "Convert a BAM produced by Cell Ranger 1.0-1.1"
-        longranger20: "Convert a BAM produced by Longranger 2.0"
-        gemcode: "Convert a BAM produced from GemCode data (Longranger 1.0 - 1.3)"
-    }
-
-    input {
-        Boolean cellranger11
-        Boolean longranger20
-        Boolean gemcode
-    }
-
-    Int exclusive_arg = (if cellranger11 then 1 else 0)
-        + (if longranger20 then 1 else 0)
-        + (if gemcode then 1 else 0)
-
-    command <<<
-        if [ "~{exclusive_arg}" -gt 1 ]; then
-            >&2 echo "Only one of cellranger11, longranger20, or gemcode can be set"
-            exit 1
-        fi
-    >>>
-
-    output {
-        String input_check = "passed"
-    }
-
-    runtime {
-        memory: "4 GB"
-        disks: "10 GB"
-        container: "ghcr.io/stjudecloud/util:1.3.0"
-        maxRetries: 1
-    }
-}
diff --git a/workflows/scrnaseq/scrnaseq-standard.wdl b/workflows/scrnaseq/scrnaseq-standard.wdl
deleted file mode 100755
index 1e09fa69e..000000000
--- a/workflows/scrnaseq/scrnaseq-standard.wdl
+++ /dev/null
@@ -1,105 +0,0 @@
-version 1.1
-
-import "../../tools/cellranger.wdl"
-import "../../tools/md5sum.wdl"
-import "../../tools/ngsderive.wdl"
-import "../../tools/picard.wdl"
-import "../../tools/samtools.wdl"
-import "./10x-bam-to-fastqs.wdl" as bam_to_fastqs
-
-workflow scrnaseq_standard {
-    meta {
-        description: "Align 10x Genomics FASTQ files to a reference genome and perform quantification."
-        allowNestedInputs: true
-        outputs: {
-            harmonized_bam: "Aligned BAM file",
-            bam_checksum: "Checksum of aligned BAM file",
-            bam_index: "Index of aligned BAM file",
-            qc: "Quality control metrics",
-            barcodes: "Barcode information",
-            features: "Feature information",
-            matrix: "Gene expression matrix",
-            filtered_gene_h5: "Filtered gene expression matrix",
-            raw_gene_h5: "Raw gene expression matrix",
-            raw_barcodes: "Raw barcode information",
-            raw_features: "Raw feature information",
-            raw_matrix: "Raw gene expression matrix",
-            mol_info_h5: "Molecule information",
-            web_summary: "HTML summary",
-            inferred_strandedness: "Inferred strandedness",
-        }
-    }
-
-    parameter_meta {
-        bam: "Input BAM format file to quality check"
-        gtf: "Gzipped GTF feature file"
-        transcriptome_tar_gz: "Database of reference files for Cell Ranger. Can be downloaded from 10x Genomics."
-        prefix: "Prefix for output files"
-        validate_input: "Ensure input BAM is well-formed before beginning harmonization?"
-        use_all_cores: "Use all cores for multi-core steps?"
-        subsample_n_reads: "Only process a random sampling of `n` reads. <=`0` for processing entire input BAM."
-    }
-
-    input {
-        File bam
-        File gtf
-        File transcriptome_tar_gz
-        String prefix = basename(bam, ".bam")
-        Boolean validate_input = true
-        Boolean use_all_cores = false
-        Int subsample_n_reads = -1
-    }
-
-    if (validate_input) {
-        call picard.validate_bam as validate_input_bam { input:
-            bam,
-        }
-    }
-
-    if (subsample_n_reads > 0) {
-        call samtools.subsample { input:
-            bam,
-            desired_reads = subsample_n_reads,
-            use_all_cores,
-        }
-    }
-    File selected_bam = select_first([subsample.sampled_bam, bam])
-
-    call bam_to_fastqs.cell_ranger_bam_to_fastqs { input:
-        bam = selected_bam,
-        use_all_cores,
-    }
-
-    call cellranger.count { input:
-        fastqs_tar_gz = cell_ranger_bam_to_fastqs.fastqs_archive,
-        transcriptome_tar_gz,
-        id = prefix,
-        use_all_cores,
-    }
-    call picard.validate_bam { input: bam = count.bam }
-    call ngsderive.strandedness { input:
-        bam = count.bam,
-        bam_index = count.bam_index,
-        gene_model = gtf,
-    }
-
-    call md5sum.compute_checksum { input: file = count.bam }
-
-    output {
-        File harmonized_bam = count.bam
-        File bam_checksum = compute_checksum.md5sum
-        File bam_index = count.bam_index
-        File qc = count.qc
-        File barcodes = count.barcodes
-        File features = count.features
-        File matrix = count.matrix
-        File filtered_gene_h5 = count.filtered_gene_h5
-        File raw_gene_h5 = count.raw_gene_h5
-        File raw_barcodes = count.raw_barcodes
-        File raw_features = count.raw_features
-        File raw_matrix = count.raw_matrix
-        File mol_info_h5 = count.mol_info_h5
-        File web_summary = count.web_summary
-        File inferred_strandedness = strandedness.strandedness_file
-    }
-}