Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new module : jvarkit/vcffilterjdk #6621

Merged
merged 24 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
c088a57
vcffilterdjdk
lindenb Sep 10, 2024
979ab53
update params
lindenb Sep 10, 2024
6ed3f43
update params
lindenb Sep 10, 2024
a6af30a
Merge branch 'master' into pl_jvarkit_vcffilterjdk
lindenb Sep 10, 2024
202e632
oppsss tag and TODO
lindenb Sep 10, 2024
e7e6aca
Merge branch 'pl_jvarkit_vcffilterjdk' of https://github.com/lindenb/…
lindenb Sep 10, 2024
ca16e60
target/region
lindenb Sep 12, 2024
19dbc29
Merge branch 'master' into pl_jvarkit_vcffilterjdk
lindenb Sep 12, 2024
139f36a
Merge branch 'master' into pl_jvarkit_vcffilterjdk
lindenb Sep 13, 2024
9650802
answers to review
lindenb Sep 13, 2024
5fd36e8
f...g space
lindenb Sep 13, 2024
320a657
Merge branch 'pl_jvarkit_vcffilterjdk' of https://github.com/lindenb/…
lindenb Sep 13, 2024
4d4745b
fix conda problem https://nfcore.slack.com/archives/CJRH30T6V/p172623…
lindenb Sep 13, 2024
2fe0b8f
Merge branch 'master' into pl_jvarkit_vcffilterjdk
jfy133 Sep 16, 2024
7aa618a
Merge branch 'master' into pl_jvarkit_vcffilterjdk
lindenb Sep 17, 2024
6a81fc6
add test+bed
lindenb Sep 17, 2024
e128738
reset polyx
lindenb Sep 17, 2024
02431f5
prevent test exception md5sum for empty file
lindenb Sep 17, 2024
2450755
Merge branch 'master' into pl_jvarkit_vcffilterjdk
lindenb Sep 17, 2024
62ecb77
update main.nf.test
lindenb Sep 19, 2024
2878799
Merge branch 'master' into pl_jvarkit_vcffilterjdk
lindenb Sep 20, 2024
ba8aea1
update meta.yml
lindenb Sep 20, 2024
a1ef479
remove suggestion
lindenb Sep 20, 2024
c3176e8
Merge branch 'pl_jvarkit_vcffilterjdk' of https://github.com/lindenb/…
lindenb Sep 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::jvarkit=2024.08.25"
- "bioconda:bcftools=1.20"
88 changes: 88 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
process JVARKIT_VCFFILTERJDK {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/jvarkit:2024.08.25--hdfd78af_1':
'biocontainers/jvarkit:2024.08.25--hdfd78af_1' }"

input:
tuple val(meta), path(vcf), path(tbi), path(regions_file)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(dict)
tuple val(meta5), path(code)
jfy133 marked this conversation as resolved.
Show resolved Hide resolved
tuple val(meta6), path(pedigree)

output:
jfy133 marked this conversation as resolved.
Show resolved Hide resolved
tuple val(meta), path("*.${extension}"), emit: vcf
jfy133 marked this conversation as resolved.
Show resolved Hide resolved
tuple val(meta), path("*.tbi") , emit: tbi, optional: true
tuple val(meta), path("*.csi") , emit: csi, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args1 = task.ext.args1 ?: ''
def args2 = task.ext.args2 ?: ''
def args3 = task.ext.args3 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def script_file = code ? "--script \"${code}\"" : ""
def pedigree_file = pedigree ? " --pedigree \"${pedigree}\" " : ""
def regions_cmd = regions_file ? (tbi ? " --regions-file" : " --targets-file") + " \"${regions_file}\" " : ""

extension = getVcfExtension(args3); /* custom function, see below */

if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
"""
mkdir -p TMP

bcftools view \\
-O v \\
${regions_cmd} \\
${args1} \\
"${vcf}" |\\
jvarkit -Xmx${task.memory.giga}g -XX:-UsePerfData -Djava.io.tmpdir=TMP vcffilterjdk \\
${pedigree_file} \\
${script_file} \\
${args2} |\\
bcftools view \\
--output "${prefix}.${extension}" \\
${args3}

rm -rf TMP

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
jvarkit: \$(jvarkit -v)
END_VERSIONS
"""

stub:
def args3 = task.ext.args3 ?: ''
extension = getVcfExtension(args3); /* custom function, see below */
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch "${prefix}.${extension}"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
jvarkit: \$(jvarkit -v)
END_VERSIONS
"""
}



// Custom Function to get VCF extension
String getVcfExtension(String args) {
return args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
"vcf";
}
118 changes: 118 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "jvarkit_vcffilterjdk"
description: Filtering VCF with dynamically-compiled java expressions
keywords:
- vcf
- bcf
- filter
jfy133 marked this conversation as resolved.
Show resolved Hide resolved
- variant
- java
- script
tools:
- "jvarkit":
description: "Java utilities for Bioinformatics."
homepage: "https://github.com/lindenb/jvarkit"
documentation: "https://jvarkit.readthedocs.io/"
tool_dev_url: "https://github.com/lindenb/jvarkit"
doi: "10.1093/bioinformatics/btx734 "
licence: ["MIT License"]
jfy133 marked this conversation as resolved.
Show resolved Hide resolved
args_id: "$args2"

- "bcftools":
description: |
View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF
homepage: "http://samtools.github.io/bcftools/bcftools.html"
documentation: "http://www.htslib.org/doc/bcftools.html"
doi: "10.1093/bioinformatics/btp352"
licence: ["MIT"]
args_id: ["$args1", "$args3"]
input:
- meta:
type: map
description: |
Groovy Map containing VCF information
jfy133 marked this conversation as resolved.
Show resolved Hide resolved
e.g. [ id:'test_reference' ]
- vcf:
type: file
description: Input VCF/BCF file
pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}"
- tbi:
type: file
description: Optional VCF/BCF index file
pattern: "*.{tbi,csi}"
- regions_file:
type: file
description: Optional. Restrict to regions listed in a file
pattern: "*.{bed,bed.gz,txt,tsv}"
- meta2:
type: map
description: |
Groovy Map containing fasta information
e.g. [ id:'test_reference' ]
- fasta:
type: file
description: Fasta reference file
pattern: "*.fasta"
- meta3:
type: map
description: |
Groovy Map containing fasta.fai information
e.g. [ id:'test_reference' ]
- fai:
type: file
description: Fasta file index
pattern: "*.fasta.fai"
- meta4:
type: map
description: |
Groovy Map containing fasta.dict information
e.g. [ id:'test_reference' ]
- dict:
type: file
description: GATK sequence dictionary
pattern: "*.dict"
- meta5:
type: map
description: |
Groovy Map containing code information
e.g. [ id:'test_reference' ]
- code:
type: file
description: File containing custom user code . May be empty if script if provided via `task.ext.args2`.
pattern: "*.{code,script,txt,tsv,java,js}"
- meta6:
type: map
description: |
Groovy Map containing pedigree information
e.g. [ id:'test_reference' ]
- pedigree:
type: file
description: Optional jvarkit pedigree.
pattern: "*.{tsv,ped,pedigree}"
output:
- meta:
type: map
description: |
Groovy Map containing VCF information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: VCF filtered output file
pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}"
- csi:
type: file
description: Default VCF file index
pattern: "*.csi"
- tbi:
type: file
description: Alternative VCF file index
pattern: "*.tbi"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@lindenb"
maintainers:
- "@lindenb"
119 changes: 119 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// nf-core modules test jvarkit/vcffilterjdk
nextflow_process {

name "Test Process JVARKIT_VCFFILTERJDK"
script "../main.nf"
process "JVARKIT_VCFFILTERJDK"
config "./nextflow.config"


tag "modules"
tag "modules_nfcore"
tag "jvarkit"
tag "jvarkit/vcffilterjdk"

test("sarscov2 - vcf") {

when {
process {
"""
input[0] =[
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[],
[]
]
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ]
input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ]
input[4] = [ [] , []]
input[5] = [ [] , []]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf[0][1]).vcf.variantsMD5,
process.out.versions
).match()
}
)
}

}



test("sarscov2 - vcf+bed") {

when {
process {
"""
input[0] =[
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true)
]
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ]
input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ]
input[4] = [ [] , []]
input[5] = [ [] , []]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert file(process.out.vcf[0][1]).exists() },
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does .variantsMD5 not work here, like you did above?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The BED provided in the test dataset does not overlap any variant. The output vcf is empty, When I test the variants md5, I then get an error md5sum for empty file

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK fair enough

{ assert snapshot(process.out.versions).match()
}
)
}
}




test("sarscov2 - vcf - stub") {

options "-stub"

when {
process {
"""
input[0] =[
[id:"vcf_test"],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
[],
[]
]
input[1] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
input[2] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) ]
input[3] = [ [:] , file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ]
input[4] = [ [] , []]
input[5] = [ [] , []]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf[0][1]),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For stubs I believe you can just use

Suggested change
path(process.out.vcf[0][1]),
process.out,

within the snapshot

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jfy133 that didn't work, there is an error (snap) (I keep path(process.out.vcf[0][1]),)

process.out.versions
).match()
}
)
}

}


}
45 changes: 45 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"sarscov2 - vcf": {
"content": [
"335cdc0f8c403378e1e9d75c41c3736f",
jfy133 marked this conversation as resolved.
Show resolved Hide resolved
[
"versions.yml:md5,3601751995727e2ee7102d8ef18e5304"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-03T14:00:13.118369362"
},


"sarscov2 - vcf+bed": {
"content": [
[
"versions.yml:md5,3601751995727e2ee7102d8ef18e5304"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-03T14:00:13.118369362"
},


"sarscov2 - vcf - stub": {
"content": [
"vcf_test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e",
[
"versions.yml:md5,3601751995727e2ee7102d8ef18e5304"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-09-03T14:00:13.118369362"
}

}
5 changes: 5 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
withName: JVARKIT_VCFFILTERJDK {
ext.args2=" --expression 'return variant.getStart()%2==1;' "
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/jvarkit/vcffilterjdk/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
jvarkit/vcffilterjdk:
- "modules/nf-core/jvarkit/vcffilterjdk/**"
Loading