Skip to content

Commit

Permalink
Updates to complete skeleton of pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
DLBPointon committed Aug 9, 2024
1 parent 2470082 commit 56760f8
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 74 deletions.
4 changes: 2 additions & 2 deletions assets/test.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
assembly_id: Oscheius_DF5033
reference_hap1: /nfs/users/nfs_d/dp24/sanger-tol-ear/test.fa
reference_hap2: /nfs/users/nfs_d/dp24/sanger-tol-ear/test.fa
reference_hap1: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa
reference_hap2: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa
longread:
type: hifi
dir: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/
Expand Down
8 changes: 8 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,12 @@ process {
ext.prefix = { "${meta.id}_sorted"}
}

withName: SANGER_TOL_BTK {
ext.args = "--blastx_outext 'txt'"
ext.executor = "bsub -Is -tty -e test.e -o test.log -n 2 -q oversubscribed -M1400 -R'select[mem>1400] rusage[mem=1400] span[hosts=1]'"
ext.profiles = "singularity,sanger"
ext.get_versions = "lsid | head -n1 | cut -d ',' -f 1"
ext.version = "draft_assemblies"
}

}
60 changes: 28 additions & 32 deletions modules/local/sanger_tol_btk.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,44 +17,41 @@ process SANGER_TOL_BTK {
val gca_accession

output:
tuple val(meta), path("${meta.id}_btk_out/blobtoolkit/draft"), emit: dataset
path("${meta.id}_btk_out/blobtoolkit/plots"), emit: plots
path("${meta.id}_btk_out/blobtoolkit/draft/summary.json.gz"), emit: summary_json
path("${meta.id}_btk_out/busco"), emit: busco_data
path("${meta.id}_btk_out/multiqc"), emit: multiqc_report
path("blobtoolkit_pipeline_info"), emit: pipeline_info
path "versions.yml", emit: versions
tuple val(meta), path("${meta.id}_btk_out/blobtoolkit/REFERENCE"), emit: dataset
path("${meta.id}_btk_out/blobtoolkit/plots"), emit: plots
path("${meta.id}_btk_out/blobtoolkit/REFERENCE/summary.json.gz"), emit: summary_json
path("${meta.id}_btk_out/busco"), emit: busco_data
path("${meta.id}_btk_out/multiqc"), emit: multiqc_report
path("blobtoolkit_pipeline_info"), emit: pipeline_info
path "versions.yml", emit: versions

script:
def prefix = task.ext.prefix ?: "${meta.id}"
def args = task.ext.args ?: ""
def executor = task.ext.executor ?: ""
def profiles = task.ext.profiles ?: ""
def get_version = task.ext.version_data ?: "UNKNOWN - SETTING NOT SET"
def btk_config = btk_config_file ? "-c $btk_config_file" : ""
def pipeline_version = task.ext.version ?: "main"
def pipeline_version = task.ext.version ?: "draft_assemblies"
// YAML used to avoid the use of GCA accession number
// https://github.com/sanger-tol/blobtoolkit/issues/77

// Seems to be an issue where a nested pipeline can't see the files in the same directory
// Running realpath gets around this but the files copied into the folder are
// now just wasted space.
// now just wasted space. Should be fixed with using Mahesh's method of nesting but
// this is proving a bit complicated with BTK

// outdir should be an arg

// --accession draft \\

// blastx and blastp use the same database hence the StageAs
// blastx and blastp can use the same database hence the StageAs


"""
$executor 'nextflow run sanger-tol/blobtoolkit \\
-r $pipeline_version \\
-profile $profiles \\
--input "\$(realpath $samplesheet_csv)" \\
--outdir ${prefix}_btk_out \\
--fasta "\$(realpath REFERENCE.fa)" \\
--yaml "\$(realpath BTK.yaml)" \\
--outdir ${meta.id}_btk_out \\
--fasta ./REFERENCE.fa \\
--busco_lineages $busco_lineages \\
--taxon $taxon \\
--taxdump "\$(realpath $tax_dump)" \\
Expand All @@ -64,7 +61,7 @@ process SANGER_TOL_BTK {
$btk_config \\
$args'
mv ${prefix}_btk_out/pipeline_info blobtoolkit_pipeline_info
mv ${meta.id}_btk_out/pipeline_info blobtoolkit_pipeline_info
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand All @@ -75,27 +72,26 @@ process SANGER_TOL_BTK {
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def pipeline_version = task.ext.version ?: "main"
def pipeline_version = task.ext.version ?: "draft_assemblies"

"""
mkdir -p ${prefix}_btk_out/blobtoolkit/$gca_accession
touch ${prefix}_btk_out/blobtoolkit/$gca_accession/test.json.gz
mkdir -p ${meta.id}_btk_out/blobtoolkit/${meta.id}_out
touch ${meta.id}_btk_out/blobtoolkit/${meta.id}_out/test.json.gz
mkdir ${prefix}_btk_out/blobtoolkit/plots
touch ${prefix}_btk_out/blobtoolkit/plots/test.png
mkdir ${meta.id}_btk_out/blobtoolkit/plots
touch ${meta.id}_btk_out/blobtoolkit/plots/test.png
mkdir ${prefix}_btk_out/busco
touch ${prefix}_btk_out/busco/test.batch_summary.txt
touch ${prefix}_btk_out/busco/test.fasta.txt
touch ${prefix}_btk_out/busco/test.json
mkdir ${meta.id}_btk_out/busco
touch ${meta.id}_btk_out/busco/test.batch_summary.txt
touch ${meta.id}_btk_out/busco/test.fasta.txt
touch ${meta.id}_btk_out/busco/test.json
mkdir ${prefix}_btk_out/multiqc
mkdir ${prefix}_btk_out/multiqc/multiqc_data
mkdir ${prefix}_btk_out/multiqc/multiqc_plots
touch ${prefix}_btk_out/multiqc/multiqc_report.html
mkdir ${meta.id}_btk_out/multiqc
mkdir ${meta.id}_btk_out/multiqc/multiqc_data
mkdir ${meta.id}_btk_out/multiqc/multiqc_plots
touch ${meta.id}_btk_out/multiqc/multiqc_report.html
mv ${prefix}_btk_out/pipeline_info blobtoolkit_pipeline_info
mv ${meta.id}_btk_out/pipeline_info blobtoolkit_pipeline_info
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
6 changes: 3 additions & 3 deletions subworkflows/local/yaml_input.nf
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,14 @@ workflow YAML_INPUT {
//
// LOGIC: Building BlobToolKit specific channels
//
btk_nt_database = Channel.of([inputs.assembly_id], inputs.btk.nt_database)
btk_nt_database = Channel.of(inputs.btk.nt_database)
btk_nt_database_prefix = Channel.of(inputs.btk.nt_database_prefix)
btk_nt_diamond_database = Channel.of(inputs.btk.diamond_nt_database_path)
btk_nt_diamond_database = Channel.of(inputs.btk.diamond_nr_database_path)
btk_un_diamond_database = Channel.of(inputs.btk.diamond_uniprot_database_path)
btk_ncbi_taxonomy_path = Channel.of(inputs.btk.ncbi_taxonomy_path)
btk_ncbi_lineage_path = Channel.of(inputs.btk.ncbi_rankedlineage_path)
btk_yaml = Channel.of(inputs.btk.btk_yaml)
btk_taxid = Channel.of([inputs.assembly_id], inputs.btk.taxid)
btk_taxid = Channel.of(inputs.btk.taxid)
btk_gca_accession = Channel.of(inputs.btk.gca_accession)
busco_lineages = Channel.of(inputs.btk.lineages)

Expand Down
52 changes: 15 additions & 37 deletions workflows/ear.nf
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,6 @@ workflow EAR {
)
ch_versions = ch_versions.mix(SE_MAPPING.out.versions)

SE_MAPPING.out.mapped_bam.view()

ch_align_bam
.mix( SE_MAPPING.out.mapped_bam )
.set { merged_bam }
Expand All @@ -156,8 +154,6 @@ workflow EAR {
.set { merged_bam }
}

merged_bam.view()

//
// MODULE: SORT MAPPED BAM
//
Expand All @@ -172,56 +168,38 @@ workflow EAR {
//
YAML_INPUT.out.sample_id
.combine(merged_bam)
.map{ sample_id, pacbio_path ->
.map{ sample_id, pacbio_meta, pacbio_path ->
tuple( [id: sample_id],
pacbio_path
)
}
.set { samplesheet_input }
.set { mapped_bam }


GENERATE_SAMPLESHEET(
samplesheet_input
mapped_bam
)
ch_versions = ch_versions.mix( GENERATE_SAMPLESHEET.out.versions )

//
// MODULE: Run Sanger-ToL/BlobToolKit
// - This was built using: https://github.com/mahesh-panchal/nf-cascade
//

// BLOBTOOLKIT(
// "sanger-tol/blobtoolkit",
// [
// "-r 0.5.0",
// "--input",
// GENERATE_SAMPLESHEET.out.csv,
// "--fasta",
// reference,
// "--yaml",
// btk_yaml,
// "-taxon",
// btk_taxon,
// "--taxdump",
// btk_taxdump,
// "--blastp",
// btk_blastp,
// "--blastn",
// btk_blastn,
// "--blastx",
// btk_uniprot,
// "-profile singularity,sanger"
// ].join(" ").trim(), // workflow opts
// Channel.value([]),//readWithDefault( params.demo.params_file, Channel.value([]) ), // params file
// Channel.value([]),//readWithDefault( params.demo.input, Channel.value([]) ), // samplesheet
// Channel.value([])//readWithDefault( params.demo.add_config, Channel.value([]) ), // custom config
// )
YAML_INPUT.out.reference_hap1.view{ it -> "Reference: $it"}
mapped_bam.view{ it -> "samplesheet: $it"}
GENERATE_SAMPLESHEET.out.csv.view{ it -> "samplesheetcsv: $it"}
YAML_INPUT.out.btk_un_diamond_database.view{ it -> "un diamond: $it"}
YAML_INPUT.out.btk_nt_database.view{ it -> "nt diamond: $it"}
YAML_INPUT.out.btk_ncbi_taxonomy_path.view{ it -> "Taxdump: $it"}
YAML_INPUT.out.btk_yaml.view{ it -> "btk_yaml: $it"}
YAML_INPUT.out.busco_lineages.view{ it -> "lineages: $it"}
YAML_INPUT.out.btk_taxid.view{ it -> "TAXID: $it"}

SANGER_TOL_BTK (
YAML_INPUT.out.reference_hap1,
samplesheet_input,
mapped_bam,
GENERATE_SAMPLESHEET.out.csv,
YAML_INPUT.out.btk_un_diamond_database,
YAML_INPUT.out.btk_nt_diamond_database,
YAML_INPUT.out.btk_nt_database,
YAML_INPUT.out.btk_un_diamond_database,
[],
YAML_INPUT.out.btk_ncbi_taxonomy_path,
Expand Down

0 comments on commit 56760f8

Please sign in to comment.