Skip to content
This repository has been archived by the owner on Aug 20, 2024. It is now read-only.

Commit

Permalink
Merge pull request #128 from awgymer/add-array-support
Browse files Browse the repository at this point in the history
Add array support for samplesheets
  • Loading branch information
awgymer committed Jan 30, 2024
2 parents dac66fa + 8bc3d3a commit 981c126
Show file tree
Hide file tree
Showing 11 changed files with 537 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -97,18 +97,19 @@ class SamplesheetConverter {
def Boolean headerCheck = true
this.rows = []
resetCount()

def List outputs = samplesheetList.collect { Map<String,String> fullRow ->
increaseCount()

Map<String,String> row = fullRow.findAll { it.value != "" }
Map<String,Object> row = fullRow.findAll { it.value != "" }
def Set rowKeys = containsHeader ? row.keySet() : ["empty"].toSet()
def String yamlInfo = fileType == "yaml" ? " for entry ${this.getCount()}." : ""
def String entryInfo = fileType in ["yaml", "json"] ? " for entry ${this.getCount()}." : ""

// Check the header (CSV/TSV) or present fields (YAML)
if(headerCheck) {
def unexpectedFields = containsHeader ? rowKeys - allFields : []
if(unexpectedFields.size() > 0) {
this.warnings << "The samplesheet contains following unchecked field(s): ${unexpectedFields}${yamlInfo}".toString()
this.warnings << "The samplesheet contains following unchecked field(s): ${unexpectedFields}${entryInfo}".toString()
}

if(fileType != 'yaml'){
Expand All @@ -128,7 +129,7 @@ class SamplesheetConverter {

for( Map.Entry<String, Map> field : schemaFields ){
def String key = containsHeader ? field.key : "empty"
def String input = row[key]
def Object input = row[key]

// Check if the field is deprecated
if(field['value']['deprecated']){
Expand Down Expand Up @@ -159,7 +160,7 @@ class SamplesheetConverter {
if(input in booleanUniques[key] && input){
this.errors << addSample("The '${key}' value needs to be unique. '${input}' was found at least twice in the samplesheet.".toString())
}
booleanUniques[key].add(input)
booleanUniques[key].add(input as String)
}
else if(unique && uniqueIsList) {
def Map<String,String> newMap = (Map) row.subMap((List) [key] + (List) unique)
Expand All @@ -176,20 +177,20 @@ class SamplesheetConverter {
def List<String> metaNames = field['value']['meta'] as List<String>
if(metaNames) {
for(name : metaNames) {
meta[name] = (input != '' && input) ?
castToType(input, field) :
field['value']['default'] != null ?
castToType(field['value']['default'] as String, field) :
meta[name] = (input != '' && input != null) ?
castToNFType(input, field) :
field['value']['default'] != null ?
castToNFType(field['value']['default'], field) :
null
}
}
else {
def inputFile = (input != '' && input) ?
castToType(input, field) :
field['value']['default'] != null ?
castToType(field['value']['default'] as String, field) :
def inputVal = (input != '' && input != null) ?
castToNFType(input, field) :
field['value']['default'] != null ?
castToNFType(field['value']['default'], field) :
[]
output.add(inputFile)
output.add(inputVal)
}
}
// Add meta to the output when a meta field has been created
Expand Down Expand Up @@ -253,26 +254,36 @@ class SamplesheetConverter {
}

// Function to transform an input field from the samplesheet to its desired type
private static castToType(
String input,
private static castToNFType(
Object input,
Map.Entry<String, Map> field
) {
def String type = field['value']['type']
def String key = field.key

// Recursively call this function for each item in the array if the field is an array-type
// The returned values are collected into a single array
if (type == "array") {
def Map.Entry<String, Map> subfield = (Map.Entry<String, Map>) Map.entry(field.key, field['value']['items'])
log.debug "subfield = $subfield"
def ArrayList result = input.collect{ castToNFType(it, subfield) } as ArrayList
return result
}

def String inputStr = input as String
// Convert string values
if(type == "string" || !type) {
def String result = input as String
def String result = inputStr as String

// Check and convert to the desired format
def String format = field['value']['format']
if(format) {
if(format == "file-path-pattern") {
def ArrayList inputFiles = Nextflow.file(input) as ArrayList
def ArrayList inputFiles = Nextflow.file(inputStr) as ArrayList
return inputFiles
}
if(format.contains("path")) {
def Path inputFile = Nextflow.file(input) as Path
def Path inputFile = Nextflow.file(inputStr) as Path
return inputFile
}
}
Expand All @@ -285,36 +296,36 @@ class SamplesheetConverter {
// Convert number values
else if(type == "number") {
try {
def int result = input as int
def int result = inputStr as int
return result
}
catch (NumberFormatException e) {
log.debug("Could not convert ${input} to an integer. Trying to convert to a float.")
}

try {
def float result = input as float
def float result = inputStr as float
return result
}
catch (NumberFormatException e) {
log.debug("Could not convert ${input} to a float. Trying to convert to a double.")
log.debug("Could not convert ${inputStr} to a float. Trying to convert to a double.")
}

def double result = input as double
def double result = inputStr as double
return result
}

// Convert integer values
else if(type == "integer") {

def int result = input as int
def int result = inputStr as int
return result
}

// Convert boolean values
else if(type == "boolean") {

if(input.toLowerCase() == "true") {
if(inputStr.toLowerCase() == "true") {
return true
}
return false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,23 +184,29 @@ class SchemaValidator extends PluginExtensionPoint {
def String fileType = SamplesheetConverter.getFileType(samplesheetFile)
def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null
def List<Map<String,String>> fileContent
def List<Map<String,String>> fileContentCasted
def Boolean s3PathCheck = params.validationS3PathCheck ? params.validationS3PathCheck : false
def Map types = variableTypes(schemaFile.toString(), baseDir)
if (types.find{ it.value == "array" } as Boolean && fileType in ["csv", "tsv"]){
def msg = "Using \"type\": \"array\" in schema with a \".$fileType\" samplesheet is not supported\n"
log.error("ERROR: Validation of pipeline parameters failed!")
throw new SchemaValidationException(msg, [])
}
def Boolean containsHeader = !(types.keySet().size() == 1 && types.keySet()[0] == "")

if(!containsHeader){
types = ["empty": types[""]]
}
if(fileType == "yaml"){
fileContent = new Yaml().load((samplesheetFile.text)).collect {
fileContentCasted = new Yaml().load((samplesheetFile.text)).collect {
if(containsHeader) {
return it as Map
}
return ["empty": it] as Map
}
}
else if(fileType == "json"){
fileContent = new JsonSlurper().parseText(samplesheetFile.text).collect {
fileContentCasted = new JsonSlurper().parseText(samplesheetFile.text).collect {
if(containsHeader) {
return it as Map
}
Expand All @@ -209,8 +215,8 @@ class SchemaValidator extends PluginExtensionPoint {
}
else {
fileContent = samplesheetFile.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"')
fileContentCasted = castToType(fileContent, types)
}
def List<Map<String,String>> fileContentCasted = castToType(fileContent, types)
if (validateFile(false, samplesheetFile.toString(), fileContentCasted, schemaFile.toString(), baseDir, s3PathCheck)) {
log.debug "Validation passed: '$samplesheetFile' with '$schemaFile'"
}
Expand Down Expand Up @@ -430,23 +436,29 @@ class SchemaValidator extends PluginExtensionPoint {
def String fileType = SamplesheetConverter.getFileType(file_path)
def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null
def List<Map<String,String>> fileContent
def List<Map<String,String>> fileContentCasted
def Map types = variableTypes(schema_name, baseDir)
if (types.find{ it.value == "array" } as Boolean && fileType in ["csv", "tsv"]){
def msg = "${colors.red}Using {\"type\": \"array\"} in schema with a \".$fileType\" samplesheet is not supported${colors.reset}\n"
log.error("ERROR: Validation of pipeline parameters failed!")
throw new SchemaValidationException(msg, [])
}
def Boolean containsHeader = !(types.keySet().size() == 1 && types.keySet()[0] == "")

if(!containsHeader){
types = ["empty": types[""]]
}

if(fileType == "yaml"){
fileContent = new Yaml().load(file_path.text).collect {
fileContentCasted = new Yaml().load(file_path.text).collect {
if(containsHeader) {
return it as Map
}
return ["empty": it] as Map
}
}
else if(fileType == "json"){
fileContent = new JsonSlurper().parseText(file_path.text).collect {
fileContentCasted = new JsonSlurper().parseText(file_path.text).collect {
if(containsHeader) {
return it as Map
}
Expand All @@ -455,8 +467,8 @@ class SchemaValidator extends PluginExtensionPoint {
}
else {
fileContent = file_path.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'\"')
fileContentCasted = castToType(fileContent, types)
}
def List<Map<String,String>> fileContentCasted = castToType(fileContent, types)
if (validateFile(useMonochromeLogs, key, fileContentCasted, schema_name, baseDir, s3PathCheck)) {
log.debug "Validation passed: '$key': '$file_path' with '$schema_name'"
}
Expand Down Expand Up @@ -554,6 +566,8 @@ class SchemaValidator extends PluginExtensionPoint {
Boolean monochrome_logs, String paramName, Object fileContent, String schemaFilename, String baseDir, Boolean s3PathCheck = false

) {
// declare this once for the method
def colors = logColours(monochrome_logs)

// Load the schema
def String schema_string = Files.readString( Path.of(getSchemaPath(baseDir, schemaFilename)) )
Expand Down Expand Up @@ -591,7 +605,10 @@ class SchemaValidator extends PluginExtensionPoint {
pathsToCheck.each { String fieldName ->
for (int i=0; i < arrayJSON.size(); i++) {
def JSONObject entry = arrayJSON.getJSONObject(i)
if ( entry.has(fieldName) ) {
if ( entry.has(fieldName) && entry[fieldName] instanceof JSONArray ) {
entry[fieldName].collect{ pathExists(it.toString(), " Entry ${(i+1).toString()} - ${fieldName.toString()}", s3PathCheck) }
}
else if ( entry.has(fieldName) ) {
pathExists(entry[fieldName].toString(), " Entry ${(i+1).toString()} - ${fieldName.toString()}", s3PathCheck)
}
}
Expand All @@ -607,13 +624,11 @@ class SchemaValidator extends PluginExtensionPoint {
validator.performValidation(schema, arrayJSON);
if (this.hasErrors()) {
// Needed for custom errors such as pathExists() errors
def colors = logColours(monochrome_logs)
def msg = "${colors.red}The following errors have been detected:\n\n" + this.getErrors().join('\n').trim() + "\n${colors.reset}\n"
log.error("ERROR: Validation of '$paramName' file failed!")
throw new SchemaValidationException(msg, this.getErrors())
}
} catch (ValidationException e) {
def colors = logColours(monochrome_logs)
JSONObject exceptionJSON = (JSONObject) e.toJSON()
JSONObject objectJSON = new JSONObject();
objectJSON.put("objects",arrayJSON);
Expand Down Expand Up @@ -651,7 +666,10 @@ class SchemaValidator extends PluginExtensionPoint {
def Map properties = (Map) group.value['properties']
for (p in properties) {
def String key = (String) p.key
def Map property = properties[key] as Map
def Map<String,Object> property = properties[key] as Map
if(property.containsKey('items')){
property = property.items as Map
}
if (property.containsKey('exists') && property.containsKey('format')) {
if (property['exists'] && (property['format'] == 'file-path' || property['format'] == 'directory-path' || property['format'] == 'path') ) {
exists.push(key)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -918,4 +918,28 @@ class PluginExtensionMethodsTest extends Dsl2Spec{
error.message == '''The following errors have been detected:\n\n* -- Entry 1: Missing required value: sample\n* -- Entry 2: Missing required value: sample\n\n'''
!stdout
}
}

def 'should fail because of arrays with csv' () {
given:
def schema = Path.of('src/testResources/nextflow_schema_with_samplesheet_converter_arrays.json').toAbsolutePath().toString()
def SCRIPT_TEXT = """
params.monochrome_logs = true
params.input = 'src/testResources/correct.csv'
include { validateParameters } from 'plugin/nf-validation'
validateParameters(parameters_schema: '$schema', monochrome_logs: params.monochrome_logs)
"""

when:
dsl_eval(SCRIPT_TEXT)
def stdout = capture
.toString()
.readLines()
.findResults {it.contains('WARN nextflow.validation.SchemaValidator') || it.startsWith('* --') ? it : null }

then:
def error = thrown(SchemaValidationException)
error.message == '''Using {"type": "array"} in schema with a ".csv" samplesheet is not supported\n'''
!stdout
}
}
Loading

0 comments on commit 981c126

Please sign in to comment.