Skip to content

Commit

Permalink
Add Dorado basecalling workflow and update dockstore.yml
Browse files Browse the repository at this point in the history
  • Loading branch information
fraser-combe committed Sep 30, 2024
1 parent 2a8562c commit a31ee2e
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -281,5 +281,10 @@ workflows:
- name: Snippy_Streamline_FASTA_PHB
subclass: WDL
primaryDescriptorPath: /workflows/phylogenetics/wf_snippy_streamline_fasta.wdl
testParameterFiles:
- /tests/inputs/empty.json
- name: Dorado_Basecalling_PHB
subclass: WDL
primaryDescriptorPath: /workflows/utilities/wf_dorado_basecalling.wdl
testParameterFiles:
- /tests/inputs/empty.json
68 changes: 68 additions & 0 deletions workflows/utilities/wf_dorado_basecalling.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
version 1.0

task dorado_basecall {

meta {
description: "This task performs Dorado basecalling on POD5 files."
}

input {
Array[File] input_files
String dorado_model
String output_prefix
String docker = "us-docker.pkg.dev/general-theiagen/staphb/dorado:0.8.0"
}

command <<<
set -e
mkdir -p output

# Copy input files
for file in ~{sep=" " input_files}; do
cp "$file" ./ || { echo "Error copying $file" >&2; exit 1; }
done

# List input files
INPUT_FILES=$(ls *.pod5)

if [[ -z "$INPUT_FILES" ]]; then
echo "No POD5 files found" >&2
exit 1
fi

# Run Dorado basecaller using GPU
dorado basecaller \
~{dorado_model} \
$INPUT_FILES \
--device cuda:all \
--emit-fastq \
--output-dir output

# Rename output
if ls output/*.fastq 1> /dev/null 2>&1; then
mv output/*.fastq output/~{output_prefix}.fastq
else
echo "Error: No FASTQ output generated" >&2
exit 1
fi
>>>

output {
File basecalled_fastq = "output/~{output_prefix}.fastq"
}

runtime {
docker: "~{docker}"
cpu: 8
memory: "32GB"
gpuCount: 1
}
}

# To run this workflow locally, you can still provide a different Docker image:
# miniwdl run /home/fraser_combe_theiagen_com/workflows/dorado_basecalling.wdl \
# input_files=/home/fraser_combe_theiagen_com/workflows/dna_r10.4.1_e8.2_260bps-FLO_PRO114-SQK_NBD114_96_260-4000.pod5 \
# dorado_model=/dorado_models/[email protected] \
# output_prefix=sample_output \
# docker=us-docker.pkg.dev/general-theiagen/staphb/dorado:new_version \
# --debug --verbose

0 comments on commit a31ee2e

Please sign in to comment.