Skip to content

Commit

Permalink
Provide Data Files and Folders for Bam and Pod5 registration
Browse files Browse the repository at this point in the history
  • Loading branch information
Steffengreiner committed Oct 24, 2024
1 parent 8c76c22 commit d43dd4a
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ final class OxfordNanoporeExperiment implements ExperimentFolder {
FAST5_FILE(FQDN_FILES + ".Fast5File"),
FASTQ_FILE(FQDN_FILES + ".FastQFile"),
FASTQ_ZIPPED_FILE(FQDN_FILES + ".FastQZippedFile"),
BAM_FILE(FQDN_FILES + ".BamFile"),
POD5_FILE(FQDN_FILES + ".Pod5File"),
FINAL_SUMMARY_LOG(FQDN_FILES + ".FinalSummaryLog"),
MUX_SCAN_DATA_LOG(FQDN_FILES + ".MuxScanDataLog"),
Expand Down Expand Up @@ -308,6 +309,8 @@ final class OxfordNanoporeExperiment implements ExperimentFolder {
POD5_PASS_FOLDER(FQDN_FOLDERS + ".Pod5PassFolder"),
POD5_FAIL_FOLDER(FQDN_FOLDERS + ".Pod5FailFolder"),
POD5_SKIP_FOLDER(FQDN_FOLDERS + ".Pod5SkipFolder"),
BAM_PASS_FOLDER(FQDN_FOLDERS + ".BamPassFolder"),
BAM_FAIL_FOLDER(FQDN_FOLDERS + ".BamFailFolder"),
OTHER_REPORTS_FOLDER(FQDN_FOLDERS + ".OtherReportsFolder"),
BASECALLING_FOLDER(FQDN_FOLDERS + ".BasecallingFolder"),

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ final class OxfordNanoporeMeasurement {
case Pod5SkipFolder:
folders["pod5skip"] = element as Pod5SkipFolder
break
case BamPassFolder:
folders["bampass"] = element as BamPassFolder
break
case BamFailFolder:
folders["bamfail"] = element as BamFailFolder
break
case DataFile:
logFilesCollection.add(element as DataFile)
break
Expand All @@ -118,10 +124,13 @@ final class OxfordNanoporeMeasurement {
if (areFast5FoldersInMeasurement() && areFastQFoldersInMeasurement()) {
isValid = true
}
//// We need to ensure that pod5_skip and fast5_skip information is provided if dorado basecaller was used
//// We need to ensure that pod5_pass and pod5_fail information is provided if dorado basecaller was used
if (arePod5FoldersInMeasurement()) {
isValid = true
}
if (areBamFoldersInMeasurement()) {
isValid = true
}
if (isValid == false) {
throw new IllegalStateException("No valid data is contained in measurement")
}
Expand All @@ -137,7 +146,11 @@ final class OxfordNanoporeMeasurement {
}
// Condition three: Don't allow empty Pod5 skip and fast5 skip folder
private boolean arePod5FoldersInMeasurement() {
return isDataFolderInMeasurement("fast5skip") || isDataFolderInMeasurement("pod5skip")
return isDataFolderInMeasurement("pod5pass") || isDataFolderInMeasurement("pod5fail")
}

private boolean areBamFoldersInMeasurement() {
return isDataFolderInMeasurement("bampass") || isDataFolderInMeasurement("bamfail")
}

private boolean isDataFolderInMeasurement(String string) {
Expand All @@ -159,6 +172,10 @@ final class OxfordNanoporeMeasurement {
* "fast5pass": DataFolder
* "fastqfail": DataFolder
* "fastqpass": DataFolder
* "bamfail": DataFolder
* "bampass": DataFolder
* "pod5fail": DataFolder
* "pod5pass": DataFolder
* "Other sample code": // In case of pooled samples
* ...
* @return nested Map with sample codes and data folders
Expand Down Expand Up @@ -310,25 +327,18 @@ final class OxfordNanoporeMeasurement {
private Map<String, Map<String, DataFolder>> prepareRawData(String sampleId) {
final def result = new HashMap()
final def dataFolders = [
"fast5fail" : (folders.get("fast5fail") as DataFolder),
"fast5pass" : (folders.get("fast5pass") as DataFolder),
"fastqpass" : (folders.get("fastqpass") as DataFolder),
"fastqfail" : (folders.get("fastqfail") as DataFolder)
"fast5fail": (folders.get("fast5fail") as DataFolder),
"fast5pass": (folders.get("fast5pass") as DataFolder),
"fast5skip": (folders.get("fast5skip") as DataFolder),
"fastqpass": (folders.get("fastqpass") as DataFolder),
"fastqfail": (folders.get("fastqfail") as DataFolder),
"pod5pass": (folders.get("pod5pass") as DataFolder),
"pod5fail": (folders.get("pod5fail") as DataFolder),
"pod5skip": (folders.get("pod5skip") as DataFolder),
"bampass": (folders.get("bampass") as DataFolder),
"bamfail": (folders.get("bamfail") as DataFolder)
]
if (hasBasecallingData) dataFolders.put("basecalling", (folders.get("basecalling") as DataFolder))
//Only add dorado based minimal required datafolders if present
if (folders.get("fast5skip") != null) {
dataFolders.put("fast5skip", (folders.get("fast5skip") as DataFolder))
}
if (folders.get("pod5skip") != null) {
dataFolders.put("pod5skip", (folders.get("pod5skip") as DataFolder))
}
if (folders.get("pod5fail") != null) {
dataFolders.put("pod5fail", (folders.get("pod5fail") as DataFolder))
}
if (folders.get("pod5pass") != null) {
dataFolders.put("pod5pass", (folders.get("pod5pass") as DataFolder))
}
result.put(sampleId, dataFolders)
return result
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package life.qbic.datamodel.datasets.datastructure.files.nanopore

import life.qbic.datamodel.datasets.datastructure.files.DataFile

/**
* A specialisation of a DataFile, represents an Oxford Nanopore bam file
*
*/
class BamFile extends DataFile {

final private static String FILE_TYPE = "bam"

final private static String NAME_SCHEMA = /.*\.bam$/

protected BamFile(String name, String relativePath) {
super(name, relativePath, FILE_TYPE)
validateName()
}

static BamFile create(String name, String relativePath) {
return new BamFile(name, relativePath)
}

private void validateName() {
if (!(this.name =~ NAME_SCHEMA)) {
throw new IllegalArgumentException("Name must match the Nanopore summary schema!")
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package life.qbic.datamodel.datasets.datastructure.folders.nanopore

import life.qbic.datamodel.datasets.datastructure.folders.DataFolder

/**
* A special case of a DataFolder, its name is always bam_fail.
*
* Its children field contains either a list of type List<BamFiles> or List<BamFolder>
*
*/
class BamFailFolder extends DataFolder {

final private static String NAME_SCHEMA = /bam_fail/

protected BamFailFolder() {}

protected BamFailFolder(String name, String relativePath, List<?> children) {
super(name, relativePath, children)
validateName()
}

/**
* Creates a new instance of a BamFailFolder object
*
* @param name The folder name
* @param relativePath The relative path of the folder
* @param children A list with child elements of the folder
* @return A new instance of a BamFailFolder object
*/
static BamFailFolder create(String name, String relativePath, List<?> children) {
new BamFailFolder(name, relativePath, children)
}

private void validateName() {
if (!(this.name =~ NAME_SCHEMA)) {
throw new IllegalArgumentException("Name must match the Nanopore BamFail directory schema!")
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package life.qbic.datamodel.datasets.datastructure.folders.nanopore

import life.qbic.datamodel.datasets.datastructure.folders.DataFolder

/**
* A special case of a DataFolder, its name is always bam_pass.
*
* Its children field contains either a list of type List<BamFiles> or List<BamFolder>
*
*/
class BamPassFolder extends DataFolder {

final private static String NAME_SCHEMA = /bam_pass/

protected BamPassFolder() {}

protected BamPassFolder(String name, String relativePath, List<?> children) {
super(name, relativePath, children)
validateName()
}

/**
* Creates a new instance of a BamPassFolder object
*
* @param name The folder name
* @param relativePath The relative path of the folder
* @param children A list with child elements of the folder
* @return A new instance of a BamPassFolder object
*/
static BamPassFolder create(String name, String relativePath, List<?> children) {
new BamPassFolder(name, relativePath, children)
}

private void validateName() {
if (!(this.name =~ NAME_SCHEMA)) {
throw new IllegalArgumentException("Name must match the Nanopore BamPass directory schema!")
}
}
}

0 comments on commit d43dd4a

Please sign in to comment.