Merge branch 'master' into feat/format-filepathpattern

nextflow-io · Oct 18, 2023 · 83a6b5c · 83a6b5c
2 parents 90983d0 + 4884fae
commit 83a6b5c
Show file tree

Hide file tree

Showing 10 changed files with 160 additions and 8 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,10 @@
 # nextflow-io/nf-validation: Changelog
 
-# Version 1.1.0
+# Version 1.1.0 - Miso
+
+## Features
+
+- Add support for samplesheets with no header ([#115](https://github.com/nextflow-io/nf-validation/pull/115))
 
 ## Bug fixes
 
@@ -11,7 +15,7 @@
 
 - Added `file-path-pattern` format to check every file fetched using a glob pattern. Using a glob is now also possible in the samplesheet and will create a list of all files found using that glob pattern. ([#118](https://github.com/nextflow-io/nf-validation/pull/118))
 
-# Version 1.0.0
+# Version 1.0.0 - Tonkotsu
 
 The nf-validation plugin is now in production use across many pipelines and has (we hope) now reached a point of relative stability. The bump to major version v1.0.0 signifies that it is suitable for use in production pipelines.
 

diff --git a/README.md b/README.md
@@ -60,6 +60,10 @@ ch_input = Channel.fromSamplesheet("input")
 - Java 11 or later
 - <https://github.com/everit-org/json-schema>
 
+## Slack channel
+
+There is a dedicated [nf-validation Slack channel](https://nfcore.slack.com/archives/C056RQB10LU) in the [Nextflow Slack workspace](nextflow.slack.com).
+
 ## Credits
 
 This plugin was written based on code initially written within the nf-core community,

diff --git a/docs/samplesheets/examples.md b/docs/samplesheets/examples.md
@@ -44,6 +44,47 @@ tuple val(meta), path(fastq_1), path(fastq_2), path(bed)
 
 It may be necessary to manipulate this channel to fit your process inputs. For more documentation, check out the [Nextflow operator docs](https://www.nextflow.io/docs/latest/operator.html), however here are some common use cases with `.fromSamplesheet()`.
 
+## Using a samplesheet with no headers
+
+Sometimes you only have one possible input in the pipeline samplesheet. In this case it doesn't make sense to have a header in the samplesheet. This can be done by creating a samplesheet with an empty string as input key:
+
+```json
+{
+  "$schema": "http://json-schema.org/draft-07/schema",
+  "description": "Schema for the file provided with params.input",
+  "type": "array",
+  "items": {
+    "type": "object",
+    "properties": {
+      "": {
+        "type": "string"
+      }
+    }
+  }
+}
+```
+
+When using samplesheets like this CSV file:
+
+```csv
+test_1
+test_2
+```
+
+or this YAML file:
+
+```yaml
+- test_1
+- test_2
+```
+
+The output of `.fromSamplesheet()` will look like this:
+
+```bash
+[test_1]
+[test_2]
+```
+
 ## Changing the structure of channel items
 
 Each item in the channel will be a flat tuple, but some processes will use multiple files as a list in their input channel, this is common in nf-core modules. For example, consider the following input declaration in a process, where FASTQ could be > 1 file:

diff --git a/plugins/nf-validation/src/main/nextflow/validation/SamplesheetConverter.groovy b/plugins/nf-validation/src/main/nextflow/validation/SamplesheetConverter.groovy
@@ -64,17 +64,23 @@ class SamplesheetConverter {
         def Map<String, Map<String, String>> schemaFields = (Map) schemaMap["items"]["properties"]
         def Set<String> allFields = schemaFields.keySet()
         def List<String> requiredFields = (List) schemaMap["items"]["required"]
+        def Boolean containsHeader = !(allFields.size() == 1 && allFields[0] == "")
 
         def String fileType = getFileType(samplesheetFile)
         def String delimiter = fileType == "csv" ? "," : fileType == "tsv" ? "\t" : null
         def List<Map<String,String>> samplesheetList
 
         if(fileType == "yaml"){
-            samplesheetList = new Yaml().load((samplesheetFile.text))
+            samplesheetList = new Yaml().load((samplesheetFile.text)).collect {
+                if(containsHeader) {
+                    return it as Map
+                }
+                return ["empty": it] as Map
+            }
         }
         else {
             Path fileSamplesheet = Nextflow.file(samplesheetFile) as Path
-            samplesheetList = fileSamplesheet.splitCsv(header:true, strip:true, sep:delimiter, quote:'"')
+            samplesheetList = fileSamplesheet.splitCsv(header:containsHeader ?: ["empty"], strip:true, sep:delimiter, quote:'"')
         }
 
         // Field checks + returning the channels
@@ -83,17 +89,16 @@ class SamplesheetConverter {
         def Boolean headerCheck = true
         this.rows = []
         resetCount()
-
         def List outputs = samplesheetList.collect { Map<String,String> fullRow ->
             increaseCount()
 
             Map<String,String> row = fullRow.findAll { it.value != "" }
-            def Set rowKeys = row.keySet()
+            def Set rowKeys = containsHeader ? row.keySet() : ["empty"].toSet()
             def String yamlInfo = fileType == "yaml" ? " for entry ${this.getCount()}." : ""
 
             // Check the header (CSV/TSV) or present fields (YAML)
             if(headerCheck) {
-                def unexpectedFields = rowKeys - allFields
+                def unexpectedFields = containsHeader ? rowKeys - allFields : []
                 if(unexpectedFields.size() > 0) {
                     this.warnings << "The samplesheet contains following unchecked field(s): ${unexpectedFields}${yamlInfo}".toString()
                 }
@@ -114,7 +119,7 @@ class SamplesheetConverter {
             def ArrayList output = []
 
             for( Map.Entry<String, Map> field : schemaFields ){
-                def String key = field.key
+                def String key = containsHeader ? field.key : "empty"
                 def String input = row[key]
 
                 // Check if the field is deprecated

diff --git a/plugins/nf-validation/src/test/nextflow/validation/SamplesheetConverterTest.groovy b/plugins/nf-validation/src/test/nextflow/validation/SamplesheetConverterTest.groovy
@@ -137,6 +137,56 @@ class SamplesheetConverterTest extends Dsl2Spec{
         stdout.contains("[[string1:extraField, string2:extraField, integer1:10, integer2:10, boolean1:true, boolean2:true], string1, 25, false, ${this.getRootString()}/src/testResources/test.txt, ${this.getRootString()}/src/testResources/testDir, ${this.getRootString()}/src/testResources/testDir, unique3, 1, itDoesExist]" as String)
     }
 
+    def 'no header - CSV' () {
+        given:
+        def SCRIPT_TEXT = '''
+            include { fromSamplesheet } from 'plugin/nf-validation'
+
+            params.input = 'src/testResources/no_header.csv'
+
+            workflow {
+                Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_no_header.json").view()
+            }
+        '''
+
+        when:
+        dsl_eval(SCRIPT_TEXT)
+        def stdout = capture
+                .toString()
+                .readLines()
+                .findResults {it.startsWith('[') ? it : null }
+
+        then:
+        noExceptionThrown()
+        stdout.contains("[test_1]")
+        stdout.contains("[test_2]")
+    }
+
+    def 'no header - YAML' () {
+        given:
+        def SCRIPT_TEXT = '''
+            include { fromSamplesheet } from 'plugin/nf-validation'
+
+            params.input = 'src/testResources/no_header.yaml'
+
+            workflow {
+                Channel.fromSamplesheet("input", parameters_schema:"src/testResources/nextflow_schema_with_samplesheet_no_header.json").view()
+            }
+        '''
+
+        when:
+        dsl_eval(SCRIPT_TEXT)
+        def stdout = capture
+                .toString()
+                .readLines()
+                .findResults {it.startsWith('[') ? it : null }
+
+        then:
+        noExceptionThrown()
+        stdout.contains("[test_1]")
+        stdout.contains("[test_2]")
+    }
+
     def 'extra field' () {
         given:
         def SCRIPT_TEXT = '''

diff --git a/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_no_header.json b/plugins/nf-validation/src/testResources/nextflow_schema_with_samplesheet_no_header.json
@@ -0,0 +1,28 @@
+{
+    "$schema": "http://json-schema.org/draft-07/schema",
+    "$id": "https://raw.githubusercontent.com/nf-core/testpipeline/master/nextflow_schema.json",
+    "title": "nf-core/testpipeline pipeline parameters",
+    "description": "this is a test",
+    "type": "object",
+    "definitions": {
+        "input_output_options": {
+            "title": "Input/output options",
+            "type": "object",
+            "fa_icon": "fas fa-terminal",
+            "description": "Define where the pipeline should find input data and save output data.",
+            "required": ["input"],
+            "properties": {
+                "input": {
+                    "type": "string",
+                    "format": "file-path",
+                    "mimetype": "text/csv",
+                    "pattern": "^\\S+\\.csv$",
+                    "schema": "src/testResources/no_header_schema.json",
+                    "description": "Path to comma-separated file containing information about the samples in the experiment.",
+                    "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/testpipeline/usage#samplesheet-input).",
+                    "fa_icon": "fas fa-file-csv"
+                }
+            }
+        }
+    }
+}
diff --git a/plugins/nf-validation/src/testResources/no_header.csv b/plugins/nf-validation/src/testResources/no_header.csv
@@ -0,0 +1,2 @@
+test_1
+test_2
diff --git a/plugins/nf-validation/src/testResources/no_header.yaml b/plugins/nf-validation/src/testResources/no_header.yaml
@@ -0,0 +1,2 @@
+- test_1
+- test_2
diff --git a/plugins/nf-validation/src/testResources/no_header_schema.json b/plugins/nf-validation/src/testResources/no_header_schema.json
@@ -0,0 +1,14 @@
+{
+    "$schema": "http://json-schema.org/draft-07/schema",
+    "description": "Schema for the file provided with params.input",
+    "type": "array",
+    "items": {
+        "type": "object",
+        "properties": {
+            "": {
+                "type": "string"
+            }
+        }
+    }
+}
+
diff --git a/plugins/nf-validation/src/testResources/samplesheet_no_header.csv b/plugins/nf-validation/src/testResources/samplesheet_no_header.csv
@@ -0,0 +1,2 @@
+test_1
+test_2