Skip to content
This repository has been archived by the owner on Aug 20, 2024. It is now read-only.

Commit

Permalink
Merge pull request #125 from awgymer/add-json-support
Browse files Browse the repository at this point in the history
Add json samplesheet support
  • Loading branch information
awgymer committed Dec 14, 2023
2 parents 26f5631 + eb88ebb commit bcb92d8
Show file tree
Hide file tree
Showing 20 changed files with 270 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
2 changes: 1 addition & 1 deletion examples/fromSamplesheetMeta/pipeline/nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
2 changes: 1 addition & 1 deletion examples/paramsHelp/pipeline/nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
2 changes: 1 addition & 1 deletion examples/paramsSummaryLog/pipeline/nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
2 changes: 1 addition & 1 deletion examples/paramsSummaryMap/pipeline/nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
2 changes: 1 addition & 1 deletion examples/validateParameters/log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ ERROR ~ ERROR: Validation of pipeline parameters failed!
-- Check '.nextflow.log' file for details
The following invalid input values have been detected:

* --input: string [samplesheet.txt] does not match pattern ^\S+\.(csv|tsv|yaml)$ (samplesheet.txt)
* --input: string [samplesheet.txt] does not match pattern ^\S+\.(csv|tsv|yaml|json)$ (samplesheet.txt)
* --input: the file 'samplesheet.txt' does not exist (samplesheet.txt)
2 changes: 1 addition & 1 deletion examples/validateParameters/pipeline/nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "assets/schema_input.json",
"pattern": "^\\S+\\.(csv|tsv|yaml)$",
"pattern": "^\\S+\\.(csv|tsv|yaml|json)$",
"description": "Path to comma-separated file containing information about the samples in the experiment.",
"help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
"fa_icon": "fas fa-file-csv"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ class SamplesheetConverter {
return ["empty": it] as Map
}
}
else if(fileType == "json"){
samplesheetList = new JsonSlurper().parseText(samplesheetFile.text).collect {
if(containsHeader) {
return it as Map
}
return ["empty": it] as Map
}
}
else {
Path fileSamplesheet = Nextflow.file(samplesheetFile) as Path
samplesheetList = fileSamplesheet.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"')
Expand Down Expand Up @@ -215,7 +223,7 @@ class SamplesheetConverter {
Path samplesheetFile
) {
def String extension = samplesheetFile.getExtension()
if (extension in ["csv", "tsv", "yml", "yaml"]) {
if (extension in ["csv", "tsv", "yml", "yaml", "json"]) {
return extension == "yml" ? "yaml" : extension
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,14 @@ class SchemaValidator extends PluginExtensionPoint {
return ["empty": it] as Map
}
}
else if(fileType == "json"){
fileContent = new JsonSlurper().parseText(samplesheetFile.text).collect {
if(containsHeader) {
return it as Map
}
return ["empty": it] as Map
}
}
else {
fileContent = samplesheetFile.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"')
}
Expand Down Expand Up @@ -437,6 +445,14 @@ class SchemaValidator extends PluginExtensionPoint {
return ["empty": it] as Map
}
}
else if(fileType == "json"){
fileContent = new JsonSlurper().parseText(file_path.text).collect {
if(containsHeader) {
return it as Map
}
return ["empty": it] as Map
}
}
else {
fileContent = file_path.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"')
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
schema_dest.delete()
}

def 'should validate a schema csv' () {
def 'should validate a schema - CSV' () {
given:
def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
Expand All @@ -148,7 +148,7 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
!stdout
}

def 'should validate a schema tsv' () {
def 'should validate a schema - TSV' () {
given:
def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
Expand All @@ -171,7 +171,7 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
!stdout
}

def 'should validate a schema yaml' () {
def 'should validate a schema - YAML' () {
given:
def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
Expand All @@ -194,7 +194,88 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
!stdout
}

def 'should validate a schema yaml with failures' () {
def 'should validate a schema - JSON' () {
given:
def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
params.input = 'src/testResources/correct.json'
params.outdir = 'src/testResources/testDir'
include { validateParameters } from 'plugin/nf-validation'
validateParameters(parameters_schema: '$schema')
"""

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null }

then:
noExceptionThrown()
!stdout
}

def 'should validate a schema with failures - CSV' () {
given:
def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
params.input = 'src/testResources/wrong.csv'
params.outdir = 'src/testResources/testDir'
include { validateParameters } from 'plugin/nf-validation'
validateParameters(parameters_schema: '$schema')
"""

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null }

then:
def error = thrown(SchemaValidationException)
def errorMessages = error.message.readLines()
errorMessages[0] == "\033[0;31mThe following errors have been detected:"
errorMessages[2] == "* -- Entry 1: Missing required value: sample"
errorMessages[3] == "* -- Entry 1 - strandedness: Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded' (weird)"
errorMessages[4] == "* -- Entry 1 - fastq_2: FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test1_fastq2.fasta)"
errorMessages[5] == "* -- Entry 2 - sample: Sample name must be provided and cannot contain spaces (test 2)"
!stdout
}

def 'should validate a schema with failures - TSV' () {
given:
def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
params.input = 'src/testResources/wrong.tsv'
params.outdir = 'src/testResources/testDir'
include { validateParameters } from 'plugin/nf-validation'
validateParameters(parameters_schema: '$schema')
"""

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null }

then:
def error = thrown(SchemaValidationException)
def errorMessages = error.message.readLines()
errorMessages[0] == "\033[0;31mThe following errors have been detected:"
errorMessages[2] == "* -- Entry 1: Missing required value: sample"
errorMessages[3] == "* -- Entry 1 - strandedness: Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded' (weird)"
errorMessages[4] == "* -- Entry 1 - fastq_2: FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test1_fastq2.fasta)"
errorMessages[5] == "* -- Entry 2 - sample: Sample name must be provided and cannot contain spaces (test 2)"
!stdout
}

def 'should validate a schema with failures - YAML' () {
given:
def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
Expand Down Expand Up @@ -223,6 +304,35 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
!stdout
}

def 'should validate a schema with failures - JSON' () {
given:
def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
params.input = 'src/testResources/wrong.json'
params.outdir = 'src/testResources/testDir'
include { validateParameters } from 'plugin/nf-validation'
validateParameters(parameters_schema: '$schema')
"""

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null }

then:
def error = thrown(SchemaValidationException)
def errorMessages = error.message.readLines()
errorMessages[0] == "\033[0;31mThe following errors have been detected:"
errorMessages[2] == "* -- Entry 1: Missing required value: sample"
errorMessages[3] == "* -- Entry 1 - strandedness: Strandedness must be provided and be one of 'forward', 'reverse' or 'unstranded' (weird)"
errorMessages[4] == "* -- Entry 1 - fastq_2: FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz' (test1_fastq2.fasta)"
errorMessages[5] == "* -- Entry 2 - sample: Sample name must be provided and cannot contain spaces (test 2)"
!stdout
}

def 'should find unexpected params' () {
given:
def schema = Path.of('src/testResources/nextflow_schema.json').toAbsolutePath().toString()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,33 @@ class SamplesheetConverterTest extends Dsl2Spec{
stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String)
}

def 'should work fine - JSON' () {
given:
def SCRIPT_TEXT = '''
include { fromSamplesheet } from 'plugin/nf-validation'
params.input = 'src/testResources/correct.json'
workflow {
Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_converter.json").view()
}
'''

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.startsWith('[[') ? it : null }

then:
noExceptionThrown()
stdout.contains("[[string1:fullField, string2:fullField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25.12, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/test.txt, unique1, 1, itDoesExist]" as String)
stdout.contains("[[string1:value, string2:value, integer1:0, integer2:0, boolean1:true, boolean2:true], string1, 25.08, false, [], [], [], [], [], itDoesExist]")
stdout.contains("[[string1:dependentRequired, string2:dependentRequired, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, [], [], [], unique2, 1, itDoesExist]")
stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String)
}

def 'no header - CSV' () {
given:
def SCRIPT_TEXT = '''
Expand Down Expand Up @@ -214,6 +241,32 @@ class SamplesheetConverterTest extends Dsl2Spec{
stdout.contains("[test_2]")
}

def 'no header - JSON' () {
given:
def SCRIPT_TEXT = '''
include { fromSamplesheet } from 'plugin/nf-validation'
params.input = 'src/testResources/no_header.json'
workflow {
Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_no_header.json").view()
}
'''

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.startsWith('[') ? it : null }

then:
noExceptionThrown()
stdout.contains("[test_1]")
stdout.contains("[test_2]")
}


def 'extra field' () {
given:
def SCRIPT_TEXT = '''
Expand Down
Loading

0 comments on commit bcb92d8

Please sign in to comment.