Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow Nf-Core dataset registration without providing a runId file #374

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ final class NfCorePipelineResult {

private SampleIds sampleIds

// The RunId is only generated if the result was generated by a NF-Tower instance
private RunId runId

private PipelineInformationFolder pipelineInformationFolder
Expand All @@ -53,7 +54,7 @@ final class NfCorePipelineResult {

private List<DataFolder> processFolders

NfCorePipelineResult(PipelineInformationFolder pipelineInformationFolder, QualityControlFolder qualityControlFolder, List<DataFolder> processFolders, RunId runId, SampleIds sampleIds) {
NfCorePipelineResult(PipelineInformationFolder pipelineInformationFolder, QualityControlFolder qualityControlFolder, List<DataFolder> processFolders, RunId runId, SampleIds sampleIds) {
Objects.requireNonNull(pipelineInformationFolder, "Please provide a PipelineInformation folder.")
Objects.requireNonNull(qualityControlFolder, "Please provide a QualityControl folder")
Objects.requireNonNull(processFolders, "Please provide a List of process folders")
Expand All @@ -67,6 +68,17 @@ final class NfCorePipelineResult {
this.sampleIds = sampleIds
}

NfCorePipelineResult(PipelineInformationFolder pipelineInformationFolder, QualityControlFolder qualityControlFolder, List<DataFolder> processFolders, SampleIds sampleIds) {
Objects.requireNonNull(pipelineInformationFolder, "Please provide a PipelineInformation folder.")
Objects.requireNonNull(qualityControlFolder, "Please provide a QualityControl folder")
Objects.requireNonNull(processFolders, "Please provide a List of process folders")
Objects.requireNonNull(sampleIds, "Please provide a sampleIds file")
wow-such-code marked this conversation as resolved.
Show resolved Hide resolved
this.pipelineInformationFolder = pipelineInformationFolder
this.qualityControlFolder = qualityControlFolder
this.processFolders = processFolders
this.sampleIds = sampleIds
}

/**
* Static factory method that creates a new nfcoreExperiment instance from the bioinformatic pipeline output.
* See this @{link <a href="https://github.com/qbicsoftware/data-model-lib/blob/master/src/test/resources/examples/resultset/valid-resultset-example.json">example</a>}
Expand All @@ -80,14 +92,13 @@ final class NfCorePipelineResult {

//Check if all required folders are in root directory
Objects.requireNonNull(bioinformaticPipelineOutput.get("pipelineInformation"), "The root folder must contain a PipelineInformation folder.")
Objects.requireNonNull(bioinformaticPipelineOutput.get("qualityControl"),"The root folder must contain a QualityControl folder.")
Objects.requireNonNull(bioinformaticPipelineOutput.get("qualityControl"), "The root folder must contain a QualityControl folder.")
Objects.requireNonNull(bioinformaticPipelineOutput.get("processFolders"), "The root folder must contain at least one process folder.")
//Check if all required files are in the pipeline_info directory
Map pipelineInfoMap = bioinformaticPipelineOutput["pipelineInformation"] as Map
Objects.requireNonNull(pipelineInfoMap.get("softwareVersions"), "The pipeline_info folder must contain a softwareVersions.yml file.")
Objects.requireNonNull(pipelineInfoMap.get("executionReport"), "The pipeline_info folder must contain a executionReport.html file.")
//Check if all required files are in root directory
Objects.requireNonNull(bioinformaticPipelineOutput.get("runId"), "The root folder must contain a run_id.txt file.")
Objects.requireNonNull(bioinformaticPipelineOutput.get("sampleIds"), "The root folder must contain an sample_ids.txt file.")

//Parse all folders in the root directory
Expand All @@ -108,12 +119,16 @@ final class NfCorePipelineResult {
pipelineInformation.softwareVersions = softwareVersions as SoftwareVersions
pipelineInformation.executionReport = executionReport as ExecutionReport

//Parse all files in the root directory
DataFile runId = parseFile(bioinformaticPipelineOutput.get("runId") as Map) as RunId
//Parse all mandatory files in the root directory
DataFile sampleIds = parseFile(bioinformaticPipelineOutput.get("sampleIds") as Map) as SampleIds

//Create new NfCorePipelineResult with parsed information
return new NfCorePipelineResult(pipelineInformation, qualityControl, processFolders, runId, sampleIds)
// Parse optional Files in the root directory and generate NfCorePipelineResult accordingly
if (bioinformaticPipelineOutput.get("runId") != null) {
DataFile runId = parseFile(bioinformaticPipelineOutput.get("runId") as Map) as RunId
return new NfCorePipelineResult(pipelineInformation, qualityControl, processFolders, runId, sampleIds)
} else {
return new NfCorePipelineResult(pipelineInformation, qualityControl, processFolders, sampleIds)
}
}

/**
Expand Down Expand Up @@ -165,11 +180,12 @@ final class NfCorePipelineResult {
/*
* Helper method that creates a DataFile instance from a map
*/

private static DataFile parseFile(Map fileTree) throws IllegalArgumentException {
String name = fileTree.get("name")
String fileType = fileTree.get("fileType")
String path = fileTree.get("path")

for (String nfCoreFileType : nfCoreFileTypes) {
Class<?> c = Class.forName(nfCoreFileType)
Method method = c.getDeclaredMethod("create", String.class, String.class)
Expand All @@ -183,15 +199,15 @@ final class NfCorePipelineResult {
}
}
// We have to check for files of unknown type since this Parser will encounter variable file output dependent on the pipeline
if(!fileType)
{
throw new IllegalArgumentException("File $name with path $path is of unknown nfcore file type.")
if (!fileType) {
throw new IllegalArgumentException("File $name with path $path is of unknown nfcore file type.")
}
}

/*
* Helper method that creates a DataFolder instance from a map
*/

private static DataFolder parseFolder(Map fileTree) throws IllegalArgumentException {

def name = fileTree.get("name") as String
Expand All @@ -215,25 +231,27 @@ final class NfCorePipelineResult {
* Helper method that tries to create a DataFolder instance
* based on the DataFolder's different static factory create methods.
*/

private static Optional<DataFolder> tryToCreateDataFolder(Method method,
String name,
String relativePath,
List children) {
Optional<DataFolder> folder = Optional.empty()
try {
// We only have named Folders
def dataFolder = method.invoke(null, name, relativePath, children) as DataFolder
folder = Optional.of(dataFolder)
} catch (InvocationTargetException e2) {
// Do nothing
}
try {
// We only have named Folders
def dataFolder = method.invoke(null, name, relativePath, children) as DataFolder
folder = Optional.of(dataFolder)
} catch (InvocationTargetException e2) {
// Do nothing
}

return folder
}

/*
* Helper method that parses the children of a folder.
*/

private static List parseChildren(List<Map> children) {
def parsedChildren = []
children.each { Map unknownChild ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@
"pipelineInformation",
"qualityControl",
"processFolders",
"sampleIds",
"runId"
"sampleIds"
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ class NfCorePipelineResultSpec extends Specification {
@Shared
Map missingQualityControlDataStructure

@Shared
Map validDataStructureWithoutRunId

def setupSpec() {
InputStream validStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("examples/resultset/valid-resultset-example.json")
validDataStructure = (Map) new JsonSlurper().parse(validStream)
Expand All @@ -55,6 +58,10 @@ class NfCorePipelineResultSpec extends Specification {
InputStream missingQualityControlStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("examples/resultset/missing-quality-control-resultset-example.json")
missingQualityControlDataStructure = (Map) new JsonSlurper().parse(missingQualityControlStream)
missingQualityControlStream.close()

InputStream validStreamWithoutRunId = Thread.currentThread().getContextClassLoader().getResourceAsStream("examples/resultset/valid-resultset-no-run_id-example.json")
validDataStructureWithoutRunId = (Map) new JsonSlurper().parse(validStreamWithoutRunId)
validStreamWithoutRunId.close()
}

def "Create NfCorePipelineOutput from Map successfully"() {
Expand Down Expand Up @@ -122,4 +129,24 @@ class NfCorePipelineResultSpec extends Specification {
thrown(NullPointerException)
}

def "Create NfCorePipelineOutput from Map without RunId successfully"() {
given:
final Map validExample = validDataStructureWithoutRunId

when:
final NfCorePipelineResult validPipelineResult = NfCorePipelineResult.createFrom(validExample)
SampleIds sampleIds = validPipelineResult.getSampleIds()
List<DataFolder> processFolders = validPipelineResult.getProcessFolders()
QualityControlFolder qualityControlFolder = validPipelineResult.getQualityControlFolder()
PipelineInformationFolder pipelineInformationFolder = validPipelineResult.getPipelineInformation()

then:
sampleIds.name == "sample_ids.txt"
processFolders.get(0).name == "salmon"
qualityControlFolder.name == "multiqc"
pipelineInformationFolder.getSoftwareVersions().name == "software_versions.yml"
pipelineInformationFolder.getExecutionReport().name == "execution_report.html"
assert validPipelineResult.runId == null
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"pipelineInformation": {
"name": "pipeline_info",
"path": "./pipeline_info",
"children": [],
"softwareVersions": {
"name": "software_versions.yml",
"fileType": "yml",
"path": "./pipeline_info/software_versions.yml"
},
"executionReport": {
"name": "execution_report.html",
"fileType": "html",
"path": "./pipeline_info/execution_report.html"
}
},
"qualityControl": {
"name": "multiqc",
"path": "./multiqc",
"children": [
{
"name": "star_salmon",
"path": "./multiqc/star_salmon",
"children": [
{
"name": "multiqc_report.html",
"path": "./multiqc/star_salmon/multiqc_report.html",
"fileType": "html"
}
]
}
]
},
"processFolders": [
{
"name": "salmon",
"path": "./salmon",
"children": [
{
"name": "salmon.merged.gene_tpm.tsv",
"fileType": "tsv",
"path": "./salmon/salmon.merged.gene_tpm.tsv"
}
]
}
],
"sampleIds": {
"name": "sample_ids.txt",
"fileType": "txt",
"path": "./sample_ids.txt"
}
}
Loading