Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update RBA workflows to run on Tahoma #33

Merged
merged 4 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 8 additions & 57 deletions ReadbasedAnalysis.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ workflow ReadbasedAnalysis {
String db_gottcha2 = "/refdata/gottcha2/RefSeq-r223/gottcha_db.BAVFPt.species.fna"
String db_kraken2 = "/refdata/kraken2/"
String db_centrifuge = "/refdata/centrifuge/p_compressed"
Int cpu = 8
String input_file
Int cpu = 8
File input_file
String proj
String prefix = sub(proj, ":", "_")
Boolean? paired = false
Boolean? long_read = false
Boolean paired = false
Boolean long_read = false
String bbtools_container = "microbiomedata/bbtools:38.96"
String docker = "microbiomedata/nmdc_taxa_profilers:1.0.8"
}
Expand Down Expand Up @@ -69,18 +69,13 @@ workflow ReadbasedAnalysis {
db_centrifuge = db_centrifuge,
docker = docker,
gottcha2_info = profilerGottcha2.info,
gottcha2_report_tsv = profilerGottcha2.report_tsv,
centrifuge_report_tsv = profilerCentrifuge.report_tsv,
centrifuge_info = profilerCentrifuge.info,
kraken2_report_tsv = profilerKraken2.report_tsv,
kraken2_info = profilerKraken2.info,
}

call finish_reads {
input:
proj=proj,
start=stage.start,
input_file=stage.read_in,
container="microbiomedata/workflowmeta:1.1.1",
gottcha2_report_tsv=profilerGottcha2.report_tsv,
gottcha2_full_tsv=profilerGottcha2.full_tsv,
Expand All @@ -105,7 +100,7 @@ workflow ReadbasedAnalysis {
File final_kraken2_report_tsv = finish_reads.kr_report_tsv
File final_kraken2_krona_html = finish_reads.kr_krona_html
File info_file = finish_reads.rb_info_file
String? info = make_info_file.profiler_info_text
String info = make_info_file.profiler_info_text
}

meta {
Expand All @@ -119,9 +114,9 @@ workflow ReadbasedAnalysis {
task stage {
input {
String container
String input_file
File input_file
Boolean? paired = false
String? memory = "4G"
String memory = "4G"
String target = "staged.fastq.gz"
String output1 = "input.left.fastq.gz"
String output2 = "input.right.fastq.gz"
Expand Down Expand Up @@ -160,12 +155,10 @@ task stage {

task finish_reads {
input {
String input_file
String container
String proj
String prefix=sub(proj, ":", "_")
String start
File prof_info_file
File prof_info_file
File? gottcha2_report_tsv
File? gottcha2_full_tsv
File? gottcha2_krona_html
Expand Down Expand Up @@ -231,45 +224,6 @@ task finish_reads {
}
}


task make_outputs{
input {
String outdir
File gottcha2_report_tsv
File gottcha2_full_tsv
File gottcha2_krona_html
File centrifuge_classification_tsv
File centrifuge_report_tsv
File centrifuge_krona_html
File kraken2_classification_tsv
File kraken2_report_tsv
File kraken2_krona_html
String container
}
command<<<

set -oeu pipefail
mkdir -p ~{outdir}/gottcha2
cp ~{gottcha2_report_tsv} ~{gottcha2_full_tsv} ~{gottcha2_krona_html} \
~{outdir}/gottcha2
mkdir -p ~{outdir}/centrifuge
cp ~{centrifuge_classification_tsv} ~{centrifuge_report_tsv} ~{centrifuge_krona_html} \
~{outdir}/centrifuge
mkdir -p ~{outdir}/kraken2
cp ~{kraken2_classification_tsv} ~{kraken2_report_tsv} ~{kraken2_krona_html} \
~{outdir}/kraken2
>>>
runtime {
docker: container
memory: "1 GiB"
cpu: 1
}
output{
Array[String] fastq_files = glob("~{outdir}/*.fastq*")
}
}


task make_info_file {
input {
Boolean enabled_tools_gottcha2
Expand All @@ -279,11 +233,8 @@ task make_info_file {
String db_kraken2
String db_centrifuge
String docker
File? gottcha2_report_tsv
File? gottcha2_info
File? centrifuge_report_tsv
File? centrifuge_info
File? kraken2_report_tsv
File? kraken2_info
String info_filename = "profiler.info"
}
Expand Down
12 changes: 6 additions & 6 deletions ReadbasedAnalysisTasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ task profilerGottcha2 {
Array[File] READS
String DB
String PREFIX
String? RELABD_COL = "ROLLUP_DOC"
Boolean? LONG_READ = false
String RELABD_COL = "ROLLUP_DOC"
Boolean LONG_READ = false
String DOCKER
Int? CPU = 4
Int CPU = 4
}
command <<<
set -euo pipefail
Expand Down Expand Up @@ -53,7 +53,7 @@ task profilerCentrifuge {
Array[File] READS
String DB
String PREFIX
Int? CPU = 4
Int CPU = 4
String DOCKER
}
command <<<
Expand Down Expand Up @@ -97,8 +97,8 @@ task profilerKraken2 {
Array[File] READS
String DB
String PREFIX
Boolean? PAIRED = false
Int? CPU = 4
Boolean PAIRED = false
Int CPU = 4
String DOCKER
}

Expand Down
4 changes: 2 additions & 2 deletions ReadbasedAnalysis_inputs.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"ReadbasedAnalysis.input_file": "https://nmdc-edge.org/projects/KUYAOFKQW2mZJFBc/output/ReadsQC/SRR7877884-int-0.1/SRR7877884-int-0.1.anqdpht.fastq.gz",
"ReadbasedAnalysis.input_file": "/pscratch/sd/n/nmdcda/jaws_outputs/vli/98943/65dc6aa5-3fdb-4822-ba3c-0ca9ad58ef48/call-LongReadsQC/LongReadsQC/7fedfc56-3c0e-4456-9d54-b9755960fd5e/call-finish_rqc/execution/nmdc_xxxxxxx_filtered.fastq.gz",
"ReadbasedAnalysis.paired": false,
"ReadbasedAnalysis.long_read": false,
"ReadbasedAnalysis.long_read": true,
"ReadbasedAnalysis.prefix": "TEST",
"ReadbasedAnalysis.cpu": 8,
"ReadbasedAnalysis.proj": "TEST"
Expand Down
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v1.0.9
v1.0.10