From 9ba6b02bbcb322ff4265cc51fca0ee5d8420b929 Mon Sep 17 00:00:00 2001 From: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com> Date: Wed, 7 Aug 2024 09:19:54 +0100 Subject: [PATCH] Replace bedtools sort with unix sort in BEDTOOLS_GENOMECOV (#6063) * Replace bedtools sort with unix sort in BEDTOOLS_GENOMECOV `bedtools sort` uses a large amount of CPUs and memory, but when using it here it doesn't require the additional genome based features of `bedtools`. Replacing it should speed up the process and make it many times more efficient. * add args2 for for customisation of GNU sort command Allows customisation of GNU * quoting for args2 * Use LC_ALL and default options for performance and consistency * Handle null memory value * Remove tags.yml --- modules/nf-core/bedtools/genomecov/main.nf | 11 +++++++---- modules/nf-core/bedtools/genomecov/tests/tags.yml | 2 -- 2 files changed, 7 insertions(+), 6 deletions(-) delete mode 100644 modules/nf-core/bedtools/genomecov/tests/tags.yml diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf index 954e8913d3b..8403c530380 100644 --- a/modules/nf-core/bedtools/genomecov/main.nf +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -4,8 +4,8 @@ process BEDTOOLS_GENOMECOV { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : - 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + 'oras://community.wave.seqera.io/library/bedtools_coreutils:ba273c06a3909a15': + 'community.wave.seqera.io/library/bedtools_coreutils:a623c13f66d5262b' }" input: tuple val(meta), path(intervals), val(scale) @@ -21,13 +21,16 @@ process BEDTOOLS_GENOMECOV { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def args_list = args.tokenize() args += (scale > 0 && scale != 1) ? " -scale $scale" : "" if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { args += " -bg" } - def sort_cmd = sort ? '| bedtools sort' : '' + // Sorts output file by chromosome and position using additional options for performance and consistency + // See https://www.biostars.org/p/66927/ for further details + def buffer = task.memory ? "--buffer-size=${task.memory.toGiga().intdiv(2)}G" : '' + def sort_cmd = sort ? "| LC_ALL=C sort --parallel=$task.cpus $buffer -k1,1 -k2,2n" : '' def prefix = task.ext.prefix ?: "${meta.id}" if (intervals.name =~ /\.bam/) { diff --git a/modules/nf-core/bedtools/genomecov/tests/tags.yml b/modules/nf-core/bedtools/genomecov/tests/tags.yml deleted file mode 100644 index 55fce47800f..00000000000 --- a/modules/nf-core/bedtools/genomecov/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -bedtools/genomecov: - - "modules/nf-core/bedtools/genomecov/**"