Skip to content

Commit

Permalink
update workflow remove ouput prefix and run array through dorado
Browse files Browse the repository at this point in the history
  • Loading branch information
fraser-combe committed Oct 9, 2024
1 parent ebdee28 commit a20bb58
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 41 deletions.
49 changes: 11 additions & 38 deletions tasks/basecalling/task_dorado_basecall.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ task basecall {
input {
Array[File] input_files
String dorado_model
String output_prefix
String docker = "us-docker.pkg.dev/general-theiagen/staphb/dorado:0.8.0"
}

Expand All @@ -20,50 +19,23 @@ task basecall {
echo "Input files: ${input_files_array[@]}"
echo "Dorado model: ~{dorado_model}"

# Loop over each input file
for file in ${input_files_array[@]}; do
base_name=$(basename $file .pod5)

# Extract barcode (assuming 'barcodeXX' is part of the base name)
barcode=$(echo $base_name | grep -o 'barcode[0-9]\+')

# Create a directory for the current barcode inside the fastq folder
barcode_dir="${output_base}${barcode}"
mkdir -p ${barcode_dir}/fastqs
mkdir -p ${barcode_dir}/logs

echo "Processing file $file with base name $base_name"

# Run Dorado basecaller and store outputs in the barcode-specific directories
dorado basecaller \
/dorado_models/~{dorado_model} \
"$file" \
--device cuda:all \
--emit-fastq \
--output-dir ${barcode_dir}/fastqs > ${barcode_dir}/logs/${base_name}_basecaller.log 2>&1 || { echo "Dorado basecaller failed for $file" >&2; exit 1; }

# Rename each FASTQ file with base name and an index
generated_fastqs=(${barcode_dir}/fastqs/*.fastq)
if [[ ${#generated_fastqs[@]} -gt 0 ]]; then
for idx in ${!generated_fastqs[@]}; do
mv "${generated_fastqs[$idx]}" "${barcode_dir}/fastqs/basecalled_${base_name}_part${idx}.fastq"
done
echo "FASTQ files generated for $base_name"
else
echo "Error: No FASTQ generated for $file" >&2
exit 1
fi
done
# Run Dorado basecaller on all input files
dorado basecaller \
/dorado_models/~{dorado_model} \
${input_files_array[@]} \
--device cuda:all \
--emit-fastq \
--output-dir ${output_base} > ${output_base}/basecall.log 2>&1 || { echo "Dorado basecaller failed" >&2; exit 1; }

# Log the final directory structure for debugging
echo "Final output directory structure:"
ls -lh $output_base
>>>

output {
# Output all the FASTQ files and logs in their respective folders
Array[File] basecalled_fastqs = glob("output/fastq/barcode*/fastqs/*.fastq")
Array[File] logs = glob("output/fastq/barcode*/logs/*.log")
# Output all the FASTQ files and logs in the output folder
Array[File] basecalled_fastqs = glob("output/fastq/barcode*/**/*.fastq")
Array[File] logs = glob("output/fastq/basecall.log")
}

runtime {
Expand All @@ -75,3 +47,4 @@ task basecall {
maxRetries: 3
}
}

4 changes: 1 addition & 3 deletions workflows/utilities/wf_dorado_basecalling.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,12 @@ workflow dorado_basecalling_workflow {
input {
Array[File] input_files
String dorado_model
String output_prefix
}

call basecall_task.basecall {
input:
input_files = input_files,
dorado_model = dorado_model,
output_prefix = output_prefix
dorado_model = dorado_model
}

output {
Expand Down

0 comments on commit a20bb58

Please sign in to comment.