From d43dd4a5e656b9cbd3d392c4f19d346097c1a669 Mon Sep 17 00:00:00 2001 From: Steffengreiner Date: Thu, 24 Oct 2024 17:51:19 +0200 Subject: [PATCH] Provide Data Files and Folders for Bam and Pod5 registration --- .../datasets/OxfordNanoporeExperiment.groovy | 3 ++ .../datasets/OxfordNanoporeMeasurement.groovy | 48 +++++++++++-------- .../files/nanopore/BamFile.groovy | 30 ++++++++++++ .../folders/nanopore/BamFailFolder.groovy | 39 +++++++++++++++ .../folders/nanopore/BamPassFolder.groovy | 39 +++++++++++++++ 5 files changed, 140 insertions(+), 19 deletions(-) create mode 100644 src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BamFile.groovy create mode 100644 src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamFailFolder.groovy create mode 100644 src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamPassFolder.groovy diff --git a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy index a8179982b..5183fb5ad 100644 --- a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy +++ b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeExperiment.groovy @@ -244,6 +244,7 @@ final class OxfordNanoporeExperiment implements ExperimentFolder { FAST5_FILE(FQDN_FILES + ".Fast5File"), FASTQ_FILE(FQDN_FILES + ".FastQFile"), FASTQ_ZIPPED_FILE(FQDN_FILES + ".FastQZippedFile"), + BAM_FILE(FQDN_FILES + ".BamFile"), POD5_FILE(FQDN_FILES + ".Pod5File"), FINAL_SUMMARY_LOG(FQDN_FILES + ".FinalSummaryLog"), MUX_SCAN_DATA_LOG(FQDN_FILES + ".MuxScanDataLog"), @@ -308,6 +309,8 @@ final class OxfordNanoporeExperiment implements ExperimentFolder { POD5_PASS_FOLDER(FQDN_FOLDERS + ".Pod5PassFolder"), POD5_FAIL_FOLDER(FQDN_FOLDERS + ".Pod5FailFolder"), POD5_SKIP_FOLDER(FQDN_FOLDERS + ".Pod5SkipFolder"), + BAM_PASS_FOLDER(FQDN_FOLDERS + ".BamPassFolder"), + BAM_FAIL_FOLDER(FQDN_FOLDERS + ".BamFailFolder"), OTHER_REPORTS_FOLDER(FQDN_FOLDERS + ".OtherReportsFolder"), BASECALLING_FOLDER(FQDN_FOLDERS + ".BasecallingFolder"), diff --git a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy index 463075248..df002c9ea 100644 --- a/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy +++ b/src/main/groovy/life/qbic/datamodel/datasets/OxfordNanoporeMeasurement.groovy @@ -102,6 +102,12 @@ final class OxfordNanoporeMeasurement { case Pod5SkipFolder: folders["pod5skip"] = element as Pod5SkipFolder break + case BamPassFolder: + folders["bampass"] = element as BamPassFolder + break + case BamFailFolder: + folders["bamfail"] = element as BamFailFolder + break case DataFile: logFilesCollection.add(element as DataFile) break @@ -118,10 +124,13 @@ final class OxfordNanoporeMeasurement { if (areFast5FoldersInMeasurement() && areFastQFoldersInMeasurement()) { isValid = true } - //// We need to ensure that pod5_skip and fast5_skip information is provided if dorado basecaller was used + //// We need to ensure that pod5_pass and pod5_fail information is provided if dorado basecaller was used if (arePod5FoldersInMeasurement()) { isValid = true } + if (areBamFoldersInMeasurement()) { + isValid = true + } if (isValid == false) { throw new IllegalStateException("No valid data is contained in measurement") } @@ -137,7 +146,11 @@ final class OxfordNanoporeMeasurement { } // Condition three: Don't allow empty Pod5 skip and fast5 skip folder private boolean arePod5FoldersInMeasurement() { - return isDataFolderInMeasurement("fast5skip") || isDataFolderInMeasurement("pod5skip") + return isDataFolderInMeasurement("pod5pass") || isDataFolderInMeasurement("pod5fail") + } + + private boolean areBamFoldersInMeasurement() { + return isDataFolderInMeasurement("bampass") || isDataFolderInMeasurement("bamfail") } private boolean isDataFolderInMeasurement(String string) { @@ -159,6 +172,10 @@ final class OxfordNanoporeMeasurement { * "fast5pass": DataFolder * "fastqfail": DataFolder * "fastqpass": DataFolder + * "bamfail": DataFolder + * "bampass": DataFolder + * "pod5fail": DataFolder + * "pod5pass": DataFolder * "Other sample code": // In case of pooled samples * ... * @return nested Map with sample codes and data folders @@ -310,25 +327,18 @@ final class OxfordNanoporeMeasurement { private Map> prepareRawData(String sampleId) { final def result = new HashMap() final def dataFolders = [ - "fast5fail" : (folders.get("fast5fail") as DataFolder), - "fast5pass" : (folders.get("fast5pass") as DataFolder), - "fastqpass" : (folders.get("fastqpass") as DataFolder), - "fastqfail" : (folders.get("fastqfail") as DataFolder) + "fast5fail": (folders.get("fast5fail") as DataFolder), + "fast5pass": (folders.get("fast5pass") as DataFolder), + "fast5skip": (folders.get("fast5skip") as DataFolder), + "fastqpass": (folders.get("fastqpass") as DataFolder), + "fastqfail": (folders.get("fastqfail") as DataFolder), + "pod5pass": (folders.get("pod5pass") as DataFolder), + "pod5fail": (folders.get("pod5fail") as DataFolder), + "pod5skip": (folders.get("pod5skip") as DataFolder), + "bampass": (folders.get("bampass") as DataFolder), + "bamfail": (folders.get("bamfail") as DataFolder) ] if (hasBasecallingData) dataFolders.put("basecalling", (folders.get("basecalling") as DataFolder)) - //Only add dorado based minimal required datafolders if present - if (folders.get("fast5skip") != null) { - dataFolders.put("fast5skip", (folders.get("fast5skip") as DataFolder)) - } - if (folders.get("pod5skip") != null) { - dataFolders.put("pod5skip", (folders.get("pod5skip") as DataFolder)) - } - if (folders.get("pod5fail") != null) { - dataFolders.put("pod5fail", (folders.get("pod5fail") as DataFolder)) - } - if (folders.get("pod5pass") != null) { - dataFolders.put("pod5pass", (folders.get("pod5pass") as DataFolder)) - } result.put(sampleId, dataFolders) return result } diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BamFile.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BamFile.groovy new file mode 100644 index 000000000..49e82ff79 --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/files/nanopore/BamFile.groovy @@ -0,0 +1,30 @@ +package life.qbic.datamodel.datasets.datastructure.files.nanopore + +import life.qbic.datamodel.datasets.datastructure.files.DataFile + +/** + * A specialisation of a DataFile, represents an Oxford Nanopore bam file + * + */ +class BamFile extends DataFile { + + final private static String FILE_TYPE = "bam" + + final private static String NAME_SCHEMA = /.*\.bam$/ + + protected BamFile(String name, String relativePath) { + super(name, relativePath, FILE_TYPE) + validateName() + } + + static BamFile create(String name, String relativePath) { + return new BamFile(name, relativePath) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore summary schema!") + } + } + +} diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamFailFolder.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamFailFolder.groovy new file mode 100644 index 000000000..a5adce73e --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamFailFolder.groovy @@ -0,0 +1,39 @@ +package life.qbic.datamodel.datasets.datastructure.folders.nanopore + +import life.qbic.datamodel.datasets.datastructure.folders.DataFolder + +/** + * A special case of a DataFolder, its name is always bam_fail. + * + * Its children field contains either a list of type List or List + * + */ +class BamFailFolder extends DataFolder { + + final private static String NAME_SCHEMA = /bam_fail/ + + protected BamFailFolder() {} + + protected BamFailFolder(String name, String relativePath, List children) { + super(name, relativePath, children) + validateName() + } + + /** + * Creates a new instance of a BamFailFolder object + * + * @param name The folder name + * @param relativePath The relative path of the folder + * @param children A list with child elements of the folder + * @return A new instance of a BamFailFolder object + */ + static BamFailFolder create(String name, String relativePath, List children) { + new BamFailFolder(name, relativePath, children) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore BamFail directory schema!") + } + } +} diff --git a/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamPassFolder.groovy b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamPassFolder.groovy new file mode 100644 index 000000000..e41136a30 --- /dev/null +++ b/src/main/groovy/life/qbic/datamodel/datasets/datastructure/folders/nanopore/BamPassFolder.groovy @@ -0,0 +1,39 @@ +package life.qbic.datamodel.datasets.datastructure.folders.nanopore + +import life.qbic.datamodel.datasets.datastructure.folders.DataFolder + +/** + * A special case of a DataFolder, its name is always bam_pass. + * + * Its children field contains either a list of type List or List + * + */ +class BamPassFolder extends DataFolder { + + final private static String NAME_SCHEMA = /bam_pass/ + + protected BamPassFolder() {} + + protected BamPassFolder(String name, String relativePath, List children) { + super(name, relativePath, children) + validateName() + } + + /** + * Creates a new instance of a BamPassFolder object + * + * @param name The folder name + * @param relativePath The relative path of the folder + * @param children A list with child elements of the folder + * @return A new instance of a BamPassFolder object + */ + static BamPassFolder create(String name, String relativePath, List children) { + new BamPassFolder(name, relativePath, children) + } + + private void validateName() { + if (!(this.name =~ NAME_SCHEMA)) { + throw new IllegalArgumentException("Name must match the Nanopore BamPass directory schema!") + } + } +}