From aba60afa850d11c64bf2c1b0a5de2db61cb48b86 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Thu, 9 Nov 2023 15:59:23 +0100 Subject: [PATCH] allow maxquant data containing the 'combined' subfolder (#129) * re-enable maxquant data containing the combined subfolder Co-authored-by: Steffengreiner --- .../life/qbic/utils/MaxQuantParser.groovy | 14 +++++++-- .../life/qbic/utils/MaxQuantParserSpec.groovy | 31 +++++++++++++++++++ .../validates2/QABCD_sample_ids.txt | 0 .../validates2/combined/txt/allPeptides.txt | 0 .../validates2/combined/txt/evidence.txt | 0 .../validates2/combined/txt/parameters.txt | 0 .../validates2/combined/txt/peptides.txt | 0 .../validates2/combined/txt/proteinGroups.txt | 0 .../maxquant-run-output/validates2/mqpar.xml | 0 9 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 src/test/resources/dummyFileSystem/maxquant-run-output/validates2/QABCD_sample_ids.txt create mode 100644 src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/allPeptides.txt create mode 100644 src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/evidence.txt create mode 100644 src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/parameters.txt create mode 100644 src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/peptides.txt create mode 100644 src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/proteinGroups.txt create mode 100644 src/test/resources/dummyFileSystem/maxquant-run-output/validates2/mqpar.xml diff --git a/src/main/groovy/life/qbic/utils/MaxQuantParser.groovy b/src/main/groovy/life/qbic/utils/MaxQuantParser.groovy index aed1088f..edc427bf 100644 --- a/src/main/groovy/life/qbic/utils/MaxQuantParser.groovy +++ b/src/main/groovy/life/qbic/utils/MaxQuantParser.groovy @@ -118,7 +118,7 @@ class MaxQuantParser implements DatasetParser { rootChildren.each { currentChild -> if (currentChild.containsKey("children")) { //folder - parseTxtFolder(map) + parseSubFolders(map) } else if (currentChild.containsKey("fileType")) { //file String name = currentChild.get("name") @@ -132,7 +132,7 @@ class MaxQuantParser implements DatasetParser { } /** - * Method which adapts the parsed content of the txt directory in place to the expected file structure. + * Method which adapts the parsed content of the txt directory in place to the expected file structure. This directory can be inside an optional 'combined' directory * @see {valid datastructure example} * * After parsing, the files of the txt directory are contained in the children property of the root directory. @@ -140,10 +140,18 @@ class MaxQuantParser implements DatasetParser { * @param maxQuantInformation a nested map representing the parsed fileTree structure * @since 1.9.0 */ - private static void parseTxtFolder(Map maxQuantInformation) { + private static void parseSubFolders(Map maxQuantInformation) { List rootFolderInformation = maxQuantInformation.get("children") as List def txtFolderInformation rootFolderInformation.findAll { map -> + if (map.get("name") == "combined") { + def combinedFolderInformation = map.get("children") as List + combinedFolderInformation.findAll() { innerMap -> + if (innerMap.get("name") == "txt") { + txtFolderInformation = innerMap.get("children") as List + } + } + } if (map.get("name") == "txt") { txtFolderInformation = map.get("children") as List } diff --git a/src/test/groovy/life/qbic/utils/MaxQuantParserSpec.groovy b/src/test/groovy/life/qbic/utils/MaxQuantParserSpec.groovy index c2bfe703..7e0bb8d3 100644 --- a/src/test/groovy/life/qbic/utils/MaxQuantParserSpec.groovy +++ b/src/test/groovy/life/qbic/utils/MaxQuantParserSpec.groovy @@ -52,6 +52,37 @@ class MaxQuantParserSpec extends Specification { assert maxQuantRunResult.proteinGroups.getName()== "proteinGroups.txt" } + def "parsing the old file structure with combined folder returns a maxQuantRunResult object"() { + given: "A valid maxQuant run output data structure" + def pathToDirectory = Paths.get(exampleDirectoriesRoot, "validates2") + when: "we parse this valid structure" + MaxQuantRunResult maxQuantRunResult = maxQuantParser.parseFrom(pathToDirectory) + then: "we expect no exception should be thrown" + assert maxQuantRunResult instanceof MaxQuantRunResult + //Root files can be parsed + assert maxQuantRunResult.runParameters.getRelativePath() == "./mqpar.xml" + assert maxQuantRunResult.runParameters.getName()== "mqpar.xml" + + assert maxQuantRunResult.sampleIds.getRelativePath() == "./QABCD_sample_ids.txt" + assert maxQuantRunResult.sampleIds.getName()== "QABCD_sample_ids.txt" + + //Files in ./txt/ can be parsed + assert maxQuantRunResult.allPeptides.getRelativePath() == "./combined/txt/allPeptides.txt" + assert maxQuantRunResult.allPeptides.getName()== "allPeptides.txt" + + assert maxQuantRunResult.evidence.getRelativePath() == "./combined/txt/evidence.txt" + assert maxQuantRunResult.evidence.getName()== "evidence.txt" + + assert maxQuantRunResult.parameters.getRelativePath() == "./combined/txt/parameters.txt" + assert maxQuantRunResult.parameters.getName()== "parameters.txt" + + assert maxQuantRunResult.peptides.getRelativePath() == "./combined/txt/peptides.txt" + assert maxQuantRunResult.peptides.getName()== "peptides.txt" + + assert maxQuantRunResult.proteinGroups.getRelativePath() == "./combined/txt/proteinGroups.txt" + assert maxQuantRunResult.proteinGroups.getName()== "proteinGroups.txt" + } + def "parsing an invalid file structure throws DatasetValidationException"() { given: def pathToDirectory = Paths.get(exampleDirectoriesRoot, "fails/missing_txt_directory") diff --git a/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/QABCD_sample_ids.txt b/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/QABCD_sample_ids.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/allPeptides.txt b/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/allPeptides.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/evidence.txt b/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/evidence.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/parameters.txt b/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/parameters.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/peptides.txt b/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/peptides.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/proteinGroups.txt b/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/combined/txt/proteinGroups.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/mqpar.xml b/src/test/resources/dummyFileSystem/maxquant-run-output/validates2/mqpar.xml new file mode 100644 index 00000000..e69de29b