diff --git a/project-management/src/main/java/life/qbic/projectmanagement/application/measurement/MeasurementService.java b/project-management/src/main/java/life/qbic/projectmanagement/application/measurement/MeasurementService.java
index 2e0d29fc1..8850df4ac 100644
--- a/project-management/src/main/java/life/qbic/projectmanagement/application/measurement/MeasurementService.java
+++ b/project-management/src/main/java/life/qbic/projectmanagement/application/measurement/MeasurementService.java
@@ -554,7 +554,7 @@ private int readInjectionVolume(String value) throws NumberFormatException {
if (value.isBlank()) {
return -1;
}
- return Integer.parseInt(value);
+ return (int) Double.parseDouble(value);
}
/**
diff --git a/project-management/src/main/java/life/qbic/projectmanagement/domain/model/sample/SampleCode.java b/project-management/src/main/java/life/qbic/projectmanagement/domain/model/sample/SampleCode.java
index 0042fb8c0..8de5cbfdb 100644
--- a/project-management/src/main/java/life/qbic/projectmanagement/domain/model/sample/SampleCode.java
+++ b/project-management/src/main/java/life/qbic/projectmanagement/domain/model/sample/SampleCode.java
@@ -19,7 +19,7 @@ protected SampleCode() {
// needed for JPA
}
- private SampleCode(String code) {
+ private SampleCode(String code) throws IllegalArgumentException{
Objects.requireNonNull(code, "Sample code must not be null");
if (code.isBlank()) {
throw new IllegalArgumentException("Sample code must not be blank");
@@ -27,7 +27,7 @@ private SampleCode(String code) {
this.code = code;
}
- public static SampleCode create(String code) {
+ public static SampleCode create(String code) throws IllegalArgumentException {
return new SampleCode(code);
}
diff --git a/user-interface/src/main/bundles/dev.bundle b/user-interface/src/main/bundles/dev.bundle
index 72f81be38..3f4d38347 100644
Binary files a/user-interface/src/main/bundles/dev.bundle and b/user-interface/src/main/bundles/dev.bundle differ
diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/MeasurementMetadataConverter.java b/user-interface/src/main/java/life/qbic/datamanager/parser/MeasurementMetadataConverter.java
new file mode 100644
index 000000000..bc7e3ecae
--- /dev/null
+++ b/user-interface/src/main/java/life/qbic/datamanager/parser/MeasurementMetadataConverter.java
@@ -0,0 +1,52 @@
+package life.qbic.datamanager.parser;
+
+import java.util.List;
+import life.qbic.projectmanagement.application.measurement.MeasurementMetadata;
+
+/**
+ * Measurement Metadata Converter
+ *
+ * Measurement metadata converter enable the client to process a {@link ParsingResult} object and
+ * convert them into known implementations of the {@link MeasurementMetadata} interface.
+ *
+ * @since 1.4.0
+ */
+public interface MeasurementMetadataConverter {
+
+ /**
+ * Takes an instance of {@link ParsingResult} and tries to convert it to known implementations of
+ * the {@link MeasurementMetadata} interface.
+ *
+ * Currently supported implementations are:
+ *
+ *
+ * - NGS Measurement Metadata {@link life.qbic.projectmanagement.application.measurement.NGSMeasurementMetadata}
+ * - Proteomics Measurement Metadata {@link life.qbic.projectmanagement.application.measurement.ProteomicsMeasurementMetadata}
+ *
+ *
+ * @param parsingResult the parsing result to take as input for the conversion.
+ * @param ignoreMeasurementId weather to ignore the measurement identifier or not
+ * @return a list of converted implementations of {@link MeasurementMetadata}.
+ * @throws UnknownMetadataTypeException if no matching implementation of
+ * {@link MeasurementMetadata} can be associated from the
+ * provided {@link ParsingResult#keys()}.
+ * @since 1.4.0
+ */
+ List extends MeasurementMetadata> convert(ParsingResult parsingResult,
+ boolean ignoreMeasurementId)
+ throws UnknownMetadataTypeException;
+
+ class UnknownMetadataTypeException extends RuntimeException {
+
+ public UnknownMetadataTypeException(String message) {
+ super(message);
+ }
+ }
+
+ class MissingSampleIdException extends RuntimeException {
+ public MissingSampleIdException(String message) {
+ super(message);
+ }
+ }
+
+}
diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/MetadataConverter.java b/user-interface/src/main/java/life/qbic/datamanager/parser/MetadataConverter.java
new file mode 100644
index 000000000..d69933cd5
--- /dev/null
+++ b/user-interface/src/main/java/life/qbic/datamanager/parser/MetadataConverter.java
@@ -0,0 +1,369 @@
+package life.qbic.datamanager.parser;
+
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.COMMENT;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.CYCLE;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.DIGESTION_ENZYME;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.DIGESTION_METHOD;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.ENRICHMENT_METHOD;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.FACILITY;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.INJECTION_VOLUME;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.INSTRUMENT;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.LABEL;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.LABELING_TYPE;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.LCMS_METHOD;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.LC_COLUMN;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.MEASUREMENT_ID;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.ORGANISATION_ID;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.QBIC_SAMPLE_ID;
+import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.SAMPLE_POOL_GROUP;
+import static life.qbic.logging.service.LoggerFactory.logger;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import life.qbic.datamanager.parser.ParsingResult.Row;
+import life.qbic.logging.api.Logger;
+import life.qbic.projectmanagement.application.measurement.Labeling;
+import life.qbic.projectmanagement.application.measurement.MeasurementMetadata;
+import life.qbic.projectmanagement.application.measurement.NGSMeasurementMetadata;
+import life.qbic.projectmanagement.application.measurement.ProteomicsMeasurementMetadata;
+import life.qbic.projectmanagement.domain.model.sample.SampleCode;
+
+/**
+ * Metadata Converter
+ *
+ * Enables clients to convert {@link ParsingResult} objects into lists of known metadata
+ * properties.
+ *
+ * Currently supported metadata properties cover:
+ *
+ *
+ * - Proteomics Measurement {@link ProteomicsMeasurementProperty}
+ * - NGS Measurement {@link NGSMeasurementProperty}
+ *
+ *
+ * @since 1.4.0
+ */
+public class MetadataConverter implements MeasurementMetadataConverter {
+
+ private static final Logger log = logger(MetadataConverter.class);
+
+ private MetadataConverter() {
+ }
+
+ public static MetadataConverter measurementConverter() {
+ return new MetadataConverter();
+ }
+
+ /**
+ * Generates a hit map, storing the number of matches of a defined set of String values (hit
+ * values), in a target of interest collection of String values.
+ *
+ * The resulting map will contain the number of occurrences of every value in the hit values
+ * collection found in the target collection to investigate.
+ *
+ * @param target the collection of interest to search in
+ * @param hitValues a set of distinct values, that should be represented in the hit result map
+ * @return a hit result map, containing the number of occurrences of every hit value in the target
+ * String collection (0, if no target was found for a value).
+ * @since 1.4.0
+ */
+ private static Map countHits(Collection target, Set hitValues,
+ String... ignoredProperties) {
+ Map hits = new HashMap<>();
+ for (String t : hitValues) {
+ hits.put(t, 0);
+ }
+ for (String s : target) {
+ if (hitValues.contains(s)) {
+ var currentHit = hits.get(s);
+ hits.put(s, ++currentHit);
+ }
+ }
+ for (String ignoredProperty : ignoredProperties) {
+ if (hits.containsKey((ignoredProperty))) {
+ hits.remove(ignoredProperty);
+ }
+ }
+ return hits;
+ }
+
+ static String sanitizeValue(String value) {
+ return value.trim().toLowerCase();
+ }
+
+ @Override
+ public List convert(ParsingResult parsingResult, boolean ignoreMeasurementId)
+ throws UnknownMetadataTypeException, MissingSampleIdException {
+ Objects.requireNonNull(parsingResult);
+ var properties = parsingResult.keys().keySet();
+ if (looksLikeNgsMeasurement(properties, ignoreMeasurementId)) {
+ return tryConversion(this::convertNGSMeasurement, parsingResult);
+ } else if (looksLikeProteomicsMeasurement(properties, ignoreMeasurementId)) {
+ return tryConversion(this::convertProteomicsMeasurement, parsingResult);
+ } else {
+ throw new UnknownMetadataTypeException(
+ "Unknown metadata type: cannot match properties to any known metadata type. Provided [%s]".formatted(
+ String.join(", ", properties)));
+ }
+ }
+
+ private List tryConversion(
+ Function> converter, ParsingResult parsingResult) {
+ try {
+ return converter.apply(parsingResult);
+ } catch (IllegalArgumentException e) {
+ throw new MissingSampleIdException("Missing sample ID metadata");
+ }
+ }
+
+ private List convertProteomicsMeasurement(ParsingResult parsingResult) {
+ var result = new ArrayList();
+ var keyIndices = parsingResult.keys();
+ for (ParsingResult.Row row : parsingResult.rows()) {
+ // we us -1 as default value if a property cannot be accessed, thus ending up in an empty String
+ var pxpMetaDatum = new ProteomicsMeasurementMetadata(
+ safeListAccess(row.values(), keyIndices.getOrDefault(MEASUREMENT_ID.propertyName(), -1),
+ ""),
+ SampleCode.create(
+ safeListAccess(row.values(),
+ keyIndices.getOrDefault(QBIC_SAMPLE_ID.propertyName(), -1),
+ "")),
+ safeListAccess(row.values(), keyIndices.getOrDefault(ORGANISATION_ID.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(INSTRUMENT.propertyName(), -1), ""),
+ safeListAccess(row.values(),
+ keyIndices.getOrDefault(SAMPLE_POOL_GROUP.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(FACILITY.propertyName(), -1), ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(CYCLE.propertyName(), -1), ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(DIGESTION_ENZYME.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(DIGESTION_METHOD.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(),
+ keyIndices.getOrDefault(ENRICHMENT_METHOD.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(INJECTION_VOLUME.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(LC_COLUMN.propertyName(), -1), ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(LCMS_METHOD.propertyName(), -1), ""),
+ new Labeling(
+ safeListAccess(row.values(),
+ keyIndices.getOrDefault(LABELING_TYPE.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(LABEL.propertyName(), -1), "")),
+ safeListAccess(row.values(), keyIndices.getOrDefault(COMMENT.propertyName(), -1), "")
+ );
+ result.add(pxpMetaDatum);
+ }
+ return result;
+ }
+
+ private String safeListAccess(List list, Integer index, String defaultValue) {
+ if (index >= list.size() || index < 0) {
+ return defaultValue;
+ }
+ return list.get(index);
+ }
+
+ private List convertNGSMeasurement(ParsingResult parsingResult) {
+ var result = new ArrayList();
+ var keyIndices = parsingResult.keys();
+ for (Row row : parsingResult.rows()) {
+ var ngsMeasurementMetadata = new NGSMeasurementMetadata(
+ safeListAccess(row.values(), keyIndices.getOrDefault(MEASUREMENT_ID.propertyName(), -1),
+ ""),
+ List.of(SampleCode.create(
+ safeListAccess(row.values(),
+ keyIndices.getOrDefault(QBIC_SAMPLE_ID.propertyName(), -1),
+ ""))),
+ safeListAccess(row.values(), keyIndices.getOrDefault(ORGANISATION_ID.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(INSTRUMENT.propertyName(), -1), ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(FACILITY.propertyName(), -1), ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(
+ NGSMeasurementProperty.SEQUENCING_READ_TYPE.propertyName(), -1), ""),
+ safeListAccess(row.values(),
+ keyIndices.getOrDefault(NGSMeasurementProperty.LIBRARY_KIT.propertyName(), -1), ""),
+ safeListAccess(row.values(),
+ keyIndices.getOrDefault(NGSMeasurementProperty.FLOW_CELL.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(
+ NGSMeasurementProperty.SEQUENCING_RUN_PROTOCOL.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(),
+ keyIndices.getOrDefault(SAMPLE_POOL_GROUP.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(),
+ keyIndices.getOrDefault(NGSMeasurementProperty.INDEX_I7.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(),
+ keyIndices.getOrDefault(NGSMeasurementProperty.INDEX_I5.propertyName(), -1),
+ ""),
+ safeListAccess(row.values(), keyIndices.getOrDefault(COMMENT.propertyName(), -1), "")
+ );
+ result.add(ngsMeasurementMetadata);
+ }
+ return result;
+ }
+
+ private boolean looksLikeNgsMeasurement(Collection properties, boolean ignoreID) {
+ var formattedProperties = properties.stream().map(String::toLowerCase)
+ .collect(Collectors.toList());
+ Map hitMap;
+ if (ignoreID) {
+ formattedProperties.remove(MEASUREMENT_ID.propertyName());
+ hitMap = countHits(formattedProperties,
+ Arrays.stream(NGSMeasurementProperty.values())
+ .map(NGSMeasurementProperty::propertyName).collect(
+ Collectors.toSet()), MEASUREMENT_ID.propertyName());
+ } else {
+ hitMap = countHits(formattedProperties,
+ Arrays.stream(NGSMeasurementProperty.values())
+ .map(NGSMeasurementProperty::propertyName).collect(
+ Collectors.toSet()));
+ }
+ var missingProperties = new ArrayList<>();
+ for (Entry entry : hitMap.entrySet()) {
+ if (entry.getValue() == 0) {
+ missingProperties.add(entry.getKey());
+ }
+ }
+ if (missingProperties.isEmpty()) {
+ return true;
+ } else {
+ log.debug("Missing properties for NGS measurement: %s".formatted(missingProperties));
+ }
+ return false;
+ }
+
+ private boolean looksLikeProteomicsMeasurement(Collection properties, boolean ignoreID) {
+ var formattedProperties = properties.stream().map(String::toLowerCase)
+ .collect(Collectors.toList());
+ Map hitMap;
+ if (ignoreID) {
+ formattedProperties.remove(MEASUREMENT_ID.propertyName());
+ hitMap = countHits(formattedProperties,
+ Arrays.stream(ProteomicsMeasurementProperty.values())
+ .map(ProteomicsMeasurementProperty::propertyName).collect(
+ Collectors.toSet()), MEASUREMENT_ID.propertyName());
+ } else {
+ hitMap = countHits(formattedProperties,
+ Arrays.stream(ProteomicsMeasurementProperty.values())
+ .map(ProteomicsMeasurementProperty::propertyName).collect(
+ Collectors.toSet()));
+ }
+ var missingProperties = new ArrayList<>();
+ for (Entry entry : hitMap.entrySet()) {
+ if (entry.getValue() == 0) {
+ missingProperties.add(entry.getKey());
+ }
+ }
+ if (missingProperties.isEmpty()) {
+ return true;
+ } else {
+ log.debug("Missing properties for proteomics measurement: %s".formatted(missingProperties));
+ }
+ return false;
+ }
+
+
+ enum ProteomicsMeasurementProperty {
+ MEASUREMENT_ID("measurement id"),
+ QBIC_SAMPLE_ID("qbic sample id"),
+ SAMPLE_POOL_GROUP("sample pool group"),
+ ORGANISATION_ID("organisation id"),
+ FACILITY("facility"),
+ INSTRUMENT("instrument"),
+ CYCLE("cycle/fraction name"),
+ DIGESTION_METHOD("digestion method"),
+ DIGESTION_ENZYME("digestion enzyme"),
+ ENRICHMENT_METHOD("enrichment method"),
+ INJECTION_VOLUME("injection volume (ul)"),
+ LC_COLUMN("lc column"),
+ LCMS_METHOD("lcms method"),
+ LABELING_TYPE("labeling type"),
+ LABEL("label"),
+ COMMENT("comment");
+
+ private final String name;
+
+ ProteomicsMeasurementProperty(String value) {
+ this.name = value;
+ }
+
+ static Optional fromString(String value) {
+ var sanitizedValue = sanitizeValue(value);
+ return Arrays.stream(ProteomicsMeasurementProperty.values())
+ .filter(property -> property.propertyName().equals(sanitizedValue)).findFirst();
+ }
+
+ static boolean valueMatchesAnyProperty(String value) {
+ var sanitizedValue = sanitizeValue(value);
+ return Arrays.stream(ProteomicsMeasurementProperty.values())
+ .map(ProteomicsMeasurementProperty::name)
+ .anyMatch(sanitizedValue::equalsIgnoreCase);
+ }
+
+ public String propertyName() {
+ return name;
+ }
+
+ }
+
+ enum NGSMeasurementProperty {
+ MEASUREMENT_ID("measurement id"),
+ ORGANISATION_ID("organisation id"),
+ SAMPLE_POOL_GROUP("sample pool group"),
+ FACILITY("facility"),
+ INSTRUMENT("instrument"),
+ SEQUENCING_READ_TYPE("sequencing read type"),
+ LIBRARY_KIT("library kit"),
+ FLOW_CELL("flow cell"),
+ SEQUENCING_RUN_PROTOCOL("sequencing run protocol"),
+ INDEX_I7("index i7"),
+ INDEX_I5("index i5"),
+ COMMENT("comment");
+
+ private final String name;
+
+ NGSMeasurementProperty(String value) {
+ this.name = value;
+ }
+
+ /**
+ * Tries to convert an input property value to a known {@link NGSMeasurementProperty}.
+ *
+ * Trailing whitespace will be ignored.
+ *
+ * @param value the presumed value to convert to a known {@link NGSMeasurementProperty}
+ * @return the matching property, or {@link Optional#empty()}.
+ * @since 1.4.0
+ */
+ static Optional fromStringTrimmed(String value) {
+ var sanitizedValue = sanitizeValue(value);
+ return Arrays.stream(NGSMeasurementProperty.values())
+ .filter(property -> property.propertyName().equalsIgnoreCase(sanitizedValue)).findFirst();
+ }
+
+ static boolean valueMatchesAnyProperty(String value) {
+ var sanitizedValue = sanitizeValue(value);
+ return Arrays.stream(NGSMeasurementProperty.values()).map(NGSMeasurementProperty::name)
+ .anyMatch(sanitizedValue::equalsIgnoreCase);
+ }
+
+ String propertyName() {
+ return name;
+ }
+ }
+}
diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/MetadataParser.java b/user-interface/src/main/java/life/qbic/datamanager/parser/MetadataParser.java
new file mode 100644
index 000000000..add4a6453
--- /dev/null
+++ b/user-interface/src/main/java/life/qbic/datamanager/parser/MetadataParser.java
@@ -0,0 +1,35 @@
+package life.qbic.datamanager.parser;
+
+import java.io.InputStream;
+
+/**
+ * Metadata Parser
+ *
+ * A generic interface to hide the details of input file formats to parse, and enables clients and
+ * downstream consumers to work with an intermediate abstraction of the content, which is contained
+ * in the {@link ParsingResult} object.
+ *
+ * @since 1.4.0
+ */
+public interface MetadataParser {
+
+ ParsingResult parse(InputStream inputStream);
+
+ class UnknownPropertiesException extends RuntimeException {
+
+ public UnknownPropertiesException(String message) {
+ super(message);
+ }
+ }
+
+ class ParsingException extends RuntimeException {
+
+ public ParsingException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public ParsingException(String message) {
+ super(message);
+ }
+ }
+}
diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/ParsingResult.java b/user-interface/src/main/java/life/qbic/datamanager/parser/ParsingResult.java
new file mode 100644
index 000000000..95281917d
--- /dev/null
+++ b/user-interface/src/main/java/life/qbic/datamanager/parser/ParsingResult.java
@@ -0,0 +1,91 @@
+package life.qbic.datamanager.parser;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Stream;
+
+/**
+ * Parsing Result
+ *
+ * A parsing result represents a two dimensional grid of information.
+ *
+ * A grid is contained of values, which is a list of rows. Every row is a list of String values.
+ *
+ * Its main feature is a property list (=> 'keys') with its String value and position in every row.
+ *
+ * Example for a grid:
+ *
+ *
+ *
+ * key 'A' - position 0 |
+ * key 'B' - position 1 |
+ * key 'C' - position 2 |
+ *
+ *
+ * Value A1 |
+ * Value B1 |
+ * Value C1 |
+ *
+ *
+ * Value A2 |
+ * Value B2 |
+ * Value C2 |
+ *
+ *
+ * ... |
+ * ... |
+ * ... |
+ *
+ *
+ *
+ * So the resulting stored positions of every key in a row can be accessed via {@link #keys()} and would look like:
+ *
+ *
+ * - A - 0
+ * - B - 1
+ * - C - 2
+ *
+ *
+ * and iterating through the rows would look like:
+ *
+ *
+ * - Value A1, Value B1, Value C1
+ * - Value A2, Value B2, Value C2
+ * - ...
+ *
+ *
+ * @since 1.4.0
+ */
+public record ParsingResult(Map keys, List rows) {
+
+ public ParsingResult(Map keys, List rows) {
+ this.keys = Map.copyOf(keys);
+ this.rows = List.copyOf(rows);
+ }
+
+ public Stream rowsStream() {
+ return rows.stream();
+ }
+
+ public Iterator iterator() {
+ return rows.iterator();
+ }
+
+ public List getRow(int rowIndex) {
+ if (rowIndex < 0 || rowIndex >= rows.size()) {
+ throw new IndexOutOfBoundsException(
+ "Row index out of bounds: %s but size is %s".formatted(rowIndex, rows.size()));
+ }
+ return rows.get(rowIndex).values;
+ }
+
+ public record Row(List values) {
+
+ public Row(List values) {
+ this.values = List.copyOf(values);
+ }
+
+ }
+
+}
diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/Sanitizer.java b/user-interface/src/main/java/life/qbic/datamanager/parser/Sanitizer.java
new file mode 100644
index 000000000..f518a7c41
--- /dev/null
+++ b/user-interface/src/main/java/life/qbic/datamanager/parser/Sanitizer.java
@@ -0,0 +1,47 @@
+package life.qbic.datamanager.parser;
+
+import java.util.Arrays;
+import java.util.Objects;
+
+/**
+ * Sanitizer
+ *
+ * Cleans String literals according to encoding requirements for parsing.
+ *
+ * @since 1.4.0
+ */
+public class Sanitizer {
+
+ private static final String ASTERIX = "\\*";
+
+ /**
+ * Removes all available `*` (asterix) symbols, executes {@link String#trim()} and
+ * {@link String#toLowerCase()} on a given input String.
+ *
+ * @param value the String value to be sanitized
+ * @return the sanitized value
+ * @since 1.4.0
+ */
+ public static String headerEncoder(String value) {
+ Objects.requireNonNull(value);
+ return value.replaceAll(ASTERIX, "").trim().toLowerCase();
+ }
+
+ /**
+ * Investigates an array for information and can be used to e.g. filter out blank arrays.
+ *
+ * An array is considered to contain information, if at least one element contains a value that is
+ * NOT {@link String#isEmpty()} and NOT {@link String#isBlank()}.
+ *
+ * If the array contains only empty or blank values, the function returns false
.
+ *
+ * @param array the array to investigate
+ * @return true
, if at least one value is not blank or empty, else returns
+ * false
+ * @since 1.4.0
+ */
+ public static boolean containsInformation(String[] array) {
+ return !Arrays.stream(array).allMatch(value -> value.isEmpty() || value.isBlank());
+ }
+
+}
diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/tsv/TSVParser.java b/user-interface/src/main/java/life/qbic/datamanager/parser/tsv/TSVParser.java
new file mode 100644
index 000000000..43e818e6a
--- /dev/null
+++ b/user-interface/src/main/java/life/qbic/datamanager/parser/tsv/TSVParser.java
@@ -0,0 +1,95 @@
+package life.qbic.datamanager.parser.tsv;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Optional;
+import life.qbic.datamanager.parser.MetadataParser;
+import life.qbic.datamanager.parser.ParsingResult;
+import life.qbic.datamanager.parser.ParsingResult.Row;
+import life.qbic.datamanager.parser.Sanitizer;
+
+/**
+ * TSV Parser
+ *
+ * Tab-seperated value format support for the {@link MetadataParser} interface.
+ *
+ * Support for UTF-16 encoding available.
+ *
+ * This implementation always considers the first line as the header, and will use its information
+ * to create the {@link ParsingResult#keys()} in the returned {@link ParsingResult} object
+ * instance.
+ *
+ * @since 1.4.0
+ */
+public class TSVParser implements MetadataParser {
+
+ private static final String VALUE_SEPARATOR = "\t";
+
+ private TSVParser() {
+
+ }
+
+ public static TSVParser create() {
+ return new TSVParser();
+ }
+
+ /**
+ * Prevents nasty {@link IndexOutOfBoundsException} and supports a more fluent API and cleaner
+ * code through the usage of Java's {@link Optional}.
+ *
+ * @param array the array to access an element from
+ * @param index the index of the element in the array to access
+ * @return the array element at position of the index wrapped in {@link Optional}, or
+ * {@link Optional#empty}, if the index is out of bounds.
+ * @since 1.4.0
+ */
+ private static Optional safeAccess(String[] array, Integer index) {
+ if (index >= array.length || index < 0) {
+ return Optional.empty();
+ }
+ return Optional.of(array[index]);
+ }
+
+ @Override
+ public ParsingResult parse(InputStream inputStream) {
+ List content;
+ try (BufferedReader reader = new BufferedReader(
+ new InputStreamReader(inputStream, StandardCharsets.UTF_16))) {
+ content = reader.lines().toList();
+ } catch (IOException e) {
+ throw new ParsingException("Cannot read from input stream", e);
+ }
+ if (content.isEmpty()) {
+ throw new ParsingException("No content provided!");
+ }
+ var propertyToIndex = new HashMap();
+
+ var header = content.get(0).split(VALUE_SEPARATOR);
+ for (int i = 0; i < header.length; i++) {
+ propertyToIndex.put(Sanitizer.headerEncoder(header[i]), i);
+ }
+
+ var values = content.subList(1, content.size());
+ List rows = new ArrayList<>();
+ for (String row : values) {
+ var rowContent = row.split(VALUE_SEPARATOR);
+ String[] rowData = new String[header.length];
+ for (Entry propertyEntry : propertyToIndex.entrySet()) {
+ rowData[propertyEntry.getValue()] = safeAccess(rowContent, propertyEntry.getValue()).orElse(
+ "");
+ }
+ if (Sanitizer.containsInformation(rowData)) {
+ rows.add(new Row(Arrays.stream(rowData).toList()));
+ }
+ }
+ return new ParsingResult(propertyToIndex, rows);
+ }
+}
diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/xlsx/XLSXParser.java b/user-interface/src/main/java/life/qbic/datamanager/parser/xlsx/XLSXParser.java
new file mode 100644
index 000000000..6ec69b393
--- /dev/null
+++ b/user-interface/src/main/java/life/qbic/datamanager/parser/xlsx/XLSXParser.java
@@ -0,0 +1,120 @@
+package life.qbic.datamanager.parser.xlsx;
+
+import static java.util.Objects.isNull;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Optional;
+import life.qbic.datamanager.parser.MetadataParser;
+import life.qbic.datamanager.parser.ParsingResult;
+import life.qbic.datamanager.parser.Sanitizer;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.xssf.usermodel.XSSFRow;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+
+/**
+ * XLSX parser implementation of the {@link MetadataParser} interface.
+ *
+ * Parses information from content following the XLSX format specification:
+ *
+ *
+ * https://learn.microsoft.com/en-us/openspecs/office_standards/ms-xlsx/2c5dee00-eff2-4b22-92b6-0738acd4475e
+ *
+ * @since 1.4.0
+ */
+public class XLSXParser implements MetadataParser {
+
+ private XLSXParser() {
+ }
+
+ public static XLSXParser create() {
+ return new XLSXParser();
+ }
+
+ /**
+ * Reads the cell value as String. If the cell is `null`, or one of the following types is
+ * present, the function will return an empty String:
+ *
+ *
+ * - _NONE
+ * - ERROR
+ * - BOOLEAN
+ * - FORMULA
+ * - BLANK
+ *
+ *
+ * @param cell the cell to extract the value from
+ * @return the cell value in String representation
+ * @since 1.4.0
+ */
+ private static String readCellAsString(Cell cell) {
+ if (cell == null) {
+ return "";
+ }
+ return switch (cell.getCellType()) {
+ case _NONE, ERROR, FORMULA, BLANK -> "";
+ case BOOLEAN -> Boolean.toString(cell.getBooleanCellValue());
+ case NUMERIC -> String.valueOf(cell.getNumericCellValue());
+ case STRING -> cell.getStringCellValue();
+ };
+ }
+
+ @Override
+ public ParsingResult parse(InputStream inputStream) {
+ try (XSSFWorkbook workbook = new XSSFWorkbook(inputStream)) {
+ return parse(workbook);
+ } catch (IOException e) {
+ throw new ParsingException("Parsing failed", e);
+ }
+ }
+
+ private ParsingResult parse(XSSFWorkbook workbook) {
+ XSSFSheet metadataSheet = workbook.getSheetAt(0);
+ XSSFRow headerRow = Optional.ofNullable(metadataSheet.getRow(0))
+ .orElseThrow(() -> new ParsingException("No header row found"));
+
+ if (isNull(headerRow)) {
+ throw new MetadataParser.UnknownPropertiesException(
+ "No properties have been found: did you provide a header row?");
+ }
+ List rows = new ArrayList<>();
+ Map propertyToIndex = new HashMap<>();
+ Iterator cellIterator = headerRow.cellIterator();
+ //do not use while loop with the cell iterator!
+ //It will not return null but the same cell over and over if hasNext is not checked.
+
+ Cell cell;
+ while (cellIterator.hasNext()) {
+ cell = cellIterator.next();
+ var cellValue = Sanitizer.headerEncoder(readCellAsString(cell));
+ propertyToIndex.put(cellValue, cell.getColumnIndex());
+ }
+
+ Iterator rowIterator = metadataSheet.rowIterator();
+ Row row;
+ rowIterator.next(); // skip the first entry, since it contains the header
+
+ while (rowIterator.hasNext()) {
+ row = rowIterator.next();
+ String[] rowData = new String[propertyToIndex.size()];
+ for (Entry columnEntry : propertyToIndex.entrySet()) {
+ rowData[columnEntry.getValue()] = readCellAsString(row.getCell(columnEntry.getValue()));
+ }
+ if (Sanitizer.containsInformation(rowData)) {
+ rows.add(new ParsingResult.Row(Arrays.stream(rowData).toList()));
+ }
+ }
+
+ return new ParsingResult(propertyToIndex, rows);
+ }
+}
diff --git a/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementMetadataUploadDialog.java b/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementMetadataUploadDialog.java
index d21f73dc5..93a2b600d 100644
--- a/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementMetadataUploadDialog.java
+++ b/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementMetadataUploadDialog.java
@@ -20,41 +20,35 @@
import com.vaadin.flow.dom.DomEvent;
import com.vaadin.flow.shared.Registration;
import elemental.json.JsonObject;
-import java.io.BufferedReader;
import java.io.InputStream;
-import java.io.InputStreamReader;
import java.io.Serial;
-import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentLinkedDeque;
-import java.util.stream.IntStream;
-import life.qbic.application.commons.Result;
+import life.qbic.datamanager.parser.MeasurementMetadataConverter.MissingSampleIdException;
+import life.qbic.datamanager.parser.MeasurementMetadataConverter.UnknownMetadataTypeException;
+import life.qbic.datamanager.parser.MetadataConverter;
+import life.qbic.datamanager.parser.ParsingResult;
+import life.qbic.datamanager.parser.tsv.TSVParser;
+import life.qbic.datamanager.parser.xlsx.XLSXParser;
import life.qbic.datamanager.views.CancelConfirmationNotificationDialog;
import life.qbic.datamanager.views.general.InfoBox;
import life.qbic.datamanager.views.general.WizardDialogWindow;
import life.qbic.datamanager.views.notifications.ErrorMessage;
import life.qbic.datamanager.views.notifications.StyledNotification;
import life.qbic.datamanager.views.projects.EditableMultiFileMemoryBuffer;
-import life.qbic.projectmanagement.application.measurement.Labeling;
import life.qbic.projectmanagement.application.measurement.MeasurementMetadata;
import life.qbic.projectmanagement.application.measurement.NGSMeasurementMetadata;
import life.qbic.projectmanagement.application.measurement.ProteomicsMeasurementMetadata;
-import life.qbic.projectmanagement.application.measurement.validation.MeasurementNGSValidator.NGS_PROPERTY;
-import life.qbic.projectmanagement.application.measurement.validation.MeasurementProteomicsValidator.PROTEOMICS_PROPERTY;
import life.qbic.projectmanagement.application.measurement.validation.MeasurementValidationService;
import life.qbic.projectmanagement.application.measurement.validation.ValidationResult;
import life.qbic.projectmanagement.domain.model.experiment.Experiment;
import life.qbic.projectmanagement.domain.model.project.ProjectId;
-import life.qbic.projectmanagement.domain.model.sample.SampleCode;
import org.springframework.util.StringUtils;
@@ -95,7 +89,8 @@ public MeasurementMetadataUploadDialog(MeasurementValidationService measurementV
this.measurementMetadataUploads = new ArrayList<>();
this.measurementFileItems = new ArrayList<>();
Upload upload = new Upload(uploadBuffer);
- upload.setAcceptedFileTypes("text/tab-separated-values", "text/plain");
+ upload.setAcceptedFileTypes("text/tab-separated-values", "text/plain",
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
upload.setMaxFileSize(MAX_FILE_SIZE_BYTES);
setModeBasedLabels();
uploadItemsDisplay = new UploadItemsDisplay(upload);
@@ -117,156 +112,6 @@ public MeasurementMetadataUploadDialog(MeasurementValidationService measurementV
}
- private static List parseHeaderContent(String header) {
- return Arrays.stream(header.replace("*", "").strip().split("\t")).map(String::strip).toList();
- }
-
- private static Map propertyColumnMap(List properties) {
- var propertyIterator = properties.listIterator();
- Map map = new HashMap<>();
- int index;
- while ((index = propertyIterator.nextIndex()) < properties.size()) {
- map.put(propertyIterator.next().toLowerCase(), index);
- }
- return map;
- }
-
- private static MetadataContent read(InputStream inputStream) {
- var content = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_16)).lines().toList();
-
- return new MetadataContent(content.isEmpty() ? null : content.get(0),
- content.size() > 1 ? content.subList(1, content.size()) : new ArrayList<>());
- }
-
- private static boolean isRowNotEmpty(String row) {
- return row.split("\t").length > 0;
- }
-
- private static Result generateNGSRequest(
- String row, Map columns) {
- var columnValues = row.split("\t"); // tab separated values
- // we consider an empty row as a reason to warn, not to fail
- if (columnValues.length == 0) {
- return Result.fromValue(null);
- }
-
- Integer measurementIdIndex = columns.getOrDefault(MeasurementProperty.MEASUREMENT_ID.label(),
- -1);
- Integer sampleCodeColumnIndex = columns.get(NGS_PROPERTY.QBIC_SAMPLE_ID.label());
- Integer organisationColumnIndex = columns.get(NGS_PROPERTY.ORGANISATION_ID.label());
- Integer instrumentColumnIndex = columns.get(NGS_PROPERTY.INSTRUMENT.label());
- Integer facilityIndex = columns.get(NGS_PROPERTY.FACILITY.label());
- Integer readTypeIndex = columns.get(NGS_PROPERTY.SEQUENCING_READ_TYPE.label());
- Integer libraryKitIndex = columns.get(NGS_PROPERTY.LIBRARY_KIT.label());
- Integer flowCellIndex = columns.get(NGS_PROPERTY.FLOW_CELL.label());
- Integer runProtocolIndex = columns.get(NGS_PROPERTY.SEQUENCING_RUN_PROTOCOL.label());
- Integer samplePoolIndex = columns.get(NGS_PROPERTY.SAMPLE_POOL_GROUP.label());
- Integer indexI7Index = columns.get(NGS_PROPERTY.INDEX_I7.label());
- Integer indexI5Index = columns.get(NGS_PROPERTY.INDEX_I5.label());
- Integer commentIndex = columns.get(NGS_PROPERTY.COMMENT.label());
-
- int maxPropertyIndex = IntStream.of(sampleCodeColumnIndex,
- organisationColumnIndex,
- instrumentColumnIndex)
- .max().orElseThrow();
- if (columns.size() <= maxPropertyIndex) {
- return Result.fromError("Not enough columns provided for row: %s".formatted(row));
- }
-
- String measurementId = safeArrayAccess(columnValues, measurementIdIndex).orElse("");
- List sampleCodes = List.of(
- SampleCode.create(safeArrayAccess(columnValues, sampleCodeColumnIndex).orElse("")));
-
- String organisationRoRId = safeArrayAccess(columnValues, organisationColumnIndex).orElse("");
- String instrumentCURIE = safeArrayAccess(columnValues, instrumentColumnIndex).orElse("");
- String facility = safeArrayAccess(columnValues, facilityIndex).orElse("");
- String readType = safeArrayAccess(columnValues, readTypeIndex).orElse("");
- String libraryKit = safeArrayAccess(columnValues, libraryKitIndex).orElse("");
- String flowCell = safeArrayAccess(columnValues, flowCellIndex).orElse("");
- String runProtocol = safeArrayAccess(columnValues, runProtocolIndex).orElse("");
- String samplePool = safeArrayAccess(columnValues, samplePoolIndex).orElse("");
- String indexI7 = safeArrayAccess(columnValues, indexI7Index).orElse("");
- String indexI5 = safeArrayAccess(columnValues, indexI5Index).orElse("");
- String comment = safeArrayAccess(columnValues, commentIndex).orElse("");
- NGSMeasurementMetadata metadata = new NGSMeasurementMetadata(measurementId, sampleCodes,
- organisationRoRId, instrumentCURIE, facility, readType,
- libraryKit, flowCell, runProtocol, samplePool, indexI7, indexI5, comment);
- return Result.fromValue(metadata);
- }
-
- private static Result generatePxPRequest(
- String row, Map columns) {
- var columnValues = row.split("\t"); // tab separated values
- // we consider an empty row as a reason to warn, not to fail
- if (columnValues.length == 0) {
- return Result.fromValue(null);
- }
-
- Integer measurementIdIndex = columns.getOrDefault(MeasurementProperty.MEASUREMENT_ID.label(),
- -1);
- Integer sampleCodeColumnIndex = columns.get(PROTEOMICS_PROPERTY.QBIC_SAMPLE_ID.label());
- Integer organisationColumnIndex = columns.get(PROTEOMICS_PROPERTY.ORGANISATION_ID.label());
- Integer instrumentColumnIndex = columns.get(PROTEOMICS_PROPERTY.INSTRUMENT.label());
- Integer samplePoolGroupIndex = columns.get(PROTEOMICS_PROPERTY.SAMPLE_POOL_GROUP.label());
- Integer facilityIndex = columns.get(PROTEOMICS_PROPERTY.FACILITY.label());
- Integer fractionNameIndex = columns.get(PROTEOMICS_PROPERTY.CYCLE_FRACTION_NAME.label());
- Integer digestionEnzymeIndex = columns.get(PROTEOMICS_PROPERTY.DIGESTION_ENZYME.label());
- Integer digestionMethodIndex = columns.get(PROTEOMICS_PROPERTY.DIGESTION_METHOD.label());
- Integer enrichmentMethodIndex = columns.get(PROTEOMICS_PROPERTY.ENRICHMENT_METHOD.label());
- Integer injectionVolumeIndex = columns.get(PROTEOMICS_PROPERTY.INJECTION_VOLUME.label());
- Integer lcColumnIndex = columns.get(PROTEOMICS_PROPERTY.LC_COLUMN.label());
- Integer lcmsMethodIndex = columns.get(PROTEOMICS_PROPERTY.LCMS_METHOD.label());
- Integer labelingTypeIndex = columns.get(PROTEOMICS_PROPERTY.LABELING_TYPE.label());
- Integer labelIndex = columns.get(PROTEOMICS_PROPERTY.LABEL.label());
- Integer noteIndex = columns.get(PROTEOMICS_PROPERTY.COMMENT.label());
-
- int maxPropertyIndex = IntStream.of(sampleCodeColumnIndex,
- organisationColumnIndex,
- instrumentColumnIndex)
- .max().orElseThrow();
- if (columns.size() <= maxPropertyIndex) {
- return Result.fromError("Not enough columns provided for row: %s".formatted(row));
- }
-
- String measurementId = safeArrayAccess(columnValues, measurementIdIndex).orElse("");
- SampleCode sampleCode = SampleCode.create(
- safeArrayAccess(columnValues, sampleCodeColumnIndex).orElse(""));
- String organisationRoRId = safeArrayAccess(columnValues, organisationColumnIndex).orElse("");
- String instrumentCURIE = safeArrayAccess(columnValues, instrumentColumnIndex).orElse("");
- String samplePoolGroup = safeArrayAccess(columnValues, samplePoolGroupIndex).orElse("");
- String facility = safeArrayAccess(columnValues, facilityIndex).orElse("");
- String fractionName = safeArrayAccess(columnValues, fractionNameIndex).orElse("");
- String digestionEnzyme = safeArrayAccess(columnValues, digestionEnzymeIndex).orElse("");
- String digestionMethod = safeArrayAccess(columnValues, digestionMethodIndex).orElse("");
- String enrichmentMethod = safeArrayAccess(columnValues, enrichmentMethodIndex).orElse("");
- String injectionVolume = safeArrayAccess(columnValues, injectionVolumeIndex).orElse("");
- String lcColumn = safeArrayAccess(columnValues, lcColumnIndex).orElse("");
- String lcmsMethod = safeArrayAccess(columnValues, lcmsMethodIndex).orElse("");
- String labelingType = safeArrayAccess(columnValues, labelingTypeIndex).orElse("");
- String label = safeArrayAccess(columnValues, labelIndex).orElse("");
- String note = safeArrayAccess(columnValues, noteIndex).orElse("");
-
- ProteomicsMeasurementMetadata metadata = new ProteomicsMeasurementMetadata(measurementId,
- sampleCode,
- organisationRoRId, instrumentCURIE, samplePoolGroup, facility, fractionName,
- digestionEnzyme,
- digestionMethod, enrichmentMethod, injectionVolume, lcColumn, lcmsMethod,
- new Labeling(labelingType, label), note);
- return Result.fromValue(metadata);
- }
-
- private static List parseSampleCode(String sampleCodeEntry) {
- return Arrays.stream(sampleCodeEntry.split(",")).map(SampleCode::create).toList();
- }
-
- private static Optional safeArrayAccess(String[] array, int index) {
- try {
- return Optional.of(array[index]);
- } catch (ArrayIndexOutOfBoundsException e) {
- return Optional.empty();
- }
- }
-
private void setModeBasedLabels() {
switch (mode) {
case ADD -> {
@@ -315,19 +160,53 @@ private void onUploadFailed(FailedEvent failedEvent) {
showErrorNotification("File upload was interrupted", failedEvent.getReason().getMessage());
}
+ private MeasurementValidationReport validate(List extends MeasurementMetadata> metadata) {
+ if (metadata == null || metadata.isEmpty()) {
+ return new MeasurementValidationReport(0,
+ ValidationResult.withFailures(0, List.of("The metadata sheet seems to be empty")));
+ }
+ if (metadata.get(0) instanceof NGSMeasurementMetadata) {
+ return validateNGS((List) metadata);
+ }
+ return validatePxP((List) metadata);
+ }
+
+ private ParsingResult parseXLSX(InputStream inputStream) {
+ return XLSXParser.create().parse(inputStream);
+ }
+
+ private ParsingResult parseTSV(InputStream inputStream) {
+ return TSVParser.create().parse(inputStream);
+ }
+
private void onUploadSucceeded(SucceededEvent succeededEvent) {
- MetadataContent content = read(
- uploadBuffer.inputStream(succeededEvent.getFileName()).orElseThrow());
- var contentHeader = content.theHeader()
- .orElseThrow(() -> new RuntimeException("No header row found"));
- var domain = measurementValidationService.inferDomainByPropertyTypes(
- parseHeaderContent(contentHeader))
- .orElseThrow(() -> new RuntimeException(
- "Header row could not be recognized, Please provide a valid template file"));
- var validationReport = switch (domain) {
- case PROTEOMICS -> validatePxP(content);
- case NGS -> validateNGS(content);
- };
+ var fileName = succeededEvent.getFileName();
+ ParsingResult parsingResult;
+ if (fileName.endsWith(".xlsx")) {
+ parsingResult = parseXLSX(uploadBuffer.inputStream(fileName).orElseThrow());
+ } else if (fileName.endsWith(".tsv") || fileName.endsWith(".txt")) {
+ parsingResult = parseTSV(uploadBuffer.inputStream(fileName).orElseThrow());
+ } else {
+ displayError(succeededEvent.getFileName(),
+ "Unsupported file type. Please make sure to upload a TSV or XLSX file.");
+ return;
+ }
+ List result;
+ try {
+ result = MetadataConverter.measurementConverter()
+ .convert(parsingResult, mode.equals(MODE.ADD));
+ } catch (
+ UnknownMetadataTypeException e) { // we want to display this in the dialog, not via the notification system
+ displayError(succeededEvent.getFileName(),
+ "Unknown metadata file content. Please make sure to include all metadata properties, even the optional ones");
+ return;
+ } catch (MissingSampleIdException e) {
+ displayError(succeededEvent.getFileName(), "Looks like at least one sample id is missing.");
+ return;
+ }
+
+ var validationReport = validate(result);
+
MeasurementFileItem measurementFileItem = new MeasurementFileItem(succeededEvent.getFileName(),
validationReport);
//We don't want to upload any invalid measurements in spreadsheet
@@ -336,16 +215,22 @@ private void onUploadSucceeded(SucceededEvent succeededEvent) {
succeededEvent.getFileName(), Collections.emptyList());
addFile(measurementFileItem, metadataUpload);
} else {
- var measurementMetadata = switch (domain) {
- case PROTEOMICS -> generatePxPMetadata(content);
- case NGS -> generateNGSMetadata(content);
- };
MeasurementMetadataUpload metadataUpload = new MeasurementMetadataUpload(
- succeededEvent.getFileName(), measurementMetadata);
+ succeededEvent.getFileName(), result);
addFile(measurementFileItem, metadataUpload);
}
}
+ private void displayError(String fileName, String reason) {
+ MeasurementMetadataUpload metadataUpload = new MeasurementMetadataUpload<>(
+ fileName, Collections.emptyList());
+ MeasurementFileItem measurementFileItem = new MeasurementFileItem(
+ fileName,
+ new MeasurementValidationReport(1, ValidationResult.withFailures(1, List.of(
+ reason))));
+ addFile(measurementFileItem, metadataUpload);
+ }
+
private void addFile(MeasurementFileItem measurementFileItem,
MeasurementMetadataUpload metadataUpload) {
measurementMetadataUploads.add(metadataUpload);
@@ -353,58 +238,13 @@ private void addFile(MeasurementFileItem measurementFileItem,
showFile(measurementFileItem);
}
- private List generateNGSMetadata(
- MetadataContent content) {
- var propertyColumnMap = propertyColumnMap(parseHeaderContent(content.header()));
-
- var results = content.rows().stream()
- .map(row -> generateNGSRequest(row, propertyColumnMap))
- .toList();
- if (results.stream().anyMatch(Result::isError)) {
- return new ArrayList<>();
- }
- return results.stream()
- .filter(Result::isValue)
- .map(Result::getValue)
- .filter(Objects::nonNull)
- .toList();
- }
-
- private List generatePxPMetadata(
- MetadataContent content) {
- var propertyColumnMap = propertyColumnMap(parseHeaderContent(content.header()));
-
- var results = content.rows().stream()
- .map(row -> generatePxPRequest(row, propertyColumnMap))
- .toList();
- if (results.stream().anyMatch(Result::isError)) {
- return new ArrayList<>();
- }
- return results.stream()
- .filter(Result::isValue)
- .map(Result::getValue)
- .filter(Objects::nonNull)
- .toList();
- }
-
- private MeasurementValidationReport validateNGS(MetadataContent content) {
+ private MeasurementValidationReport validateNGS(List content) {
var validationResult = ValidationResult.successful(0);
- var propertyColumnMap = propertyColumnMap(parseHeaderContent(content.header()));
- // we check if there are any rows provided or if we have only rows with empty content
- if (content.rows().isEmpty() || content.rows().stream()
- .noneMatch(MeasurementMetadataUploadDialog::isRowNotEmpty)) {
- validationResult = validationResult.combine(
- ValidationResult.withFailures(0,
- List.of("The metadata sheet seems to be empty")));
- return new MeasurementValidationReport(0, validationResult);
- }
ConcurrentLinkedDeque concurrentLinkedDeque = new ConcurrentLinkedDeque<>();
List> tasks = new ArrayList<>();
- for (String row : content.rows().stream()
- .filter(MeasurementMetadataUploadDialog::isRowNotEmpty).toList()) {
- tasks.add(validateNGSRow(propertyColumnMap, row).thenAccept(concurrentLinkedDeque::add));
+ for (NGSMeasurementMetadata metaDatum : content) {
+ tasks.add(validateNGSMetaDatum(metaDatum).thenAccept(concurrentLinkedDeque::add));
}
-
CompletableFuture.allOf(tasks.toArray(new CompletableFuture[0])).join();
return new MeasurementValidationReport(concurrentLinkedDeque.size(),
@@ -412,26 +252,14 @@ private MeasurementValidationReport validateNGS(MetadataContent content) {
validationResult, ValidationResult::combine));
}
- private MeasurementValidationReport validatePxP(MetadataContent content) {
+ private MeasurementValidationReport validatePxP(List content) {
var validationResult = ValidationResult.successful(0);
- var propertyColumnMap = propertyColumnMap(parseHeaderContent(content.header()));
- // we check if there are any rows provided or if we have only rows with empty content
- if (content.rows().isEmpty() || content.rows().stream()
- .noneMatch(MeasurementMetadataUploadDialog::isRowNotEmpty)) {
- validationResult = validationResult.combine(
- ValidationResult.withFailures(0,
- List.of("The metadata sheet seems to be empty")));
- return new MeasurementValidationReport(0, validationResult);
- }
-
ConcurrentLinkedDeque concurrentLinkedDeque = new ConcurrentLinkedDeque<>();
List> tasks = new ArrayList<>();
- for (String row : content.rows().stream()
- .filter(MeasurementMetadataUploadDialog::isRowNotEmpty).toList()) {
- tasks.add(validatePxPRow(propertyColumnMap, row).thenAccept(concurrentLinkedDeque::add));
+ for (ProteomicsMeasurementMetadata metaDatum : content) {
+ tasks.add(validatePxpMetaDatum(metaDatum).thenAccept(concurrentLinkedDeque::add));
}
-
CompletableFuture.allOf(tasks.toArray(new CompletableFuture[0])).join();
return new MeasurementValidationReport(concurrentLinkedDeque.size(),
@@ -439,150 +267,20 @@ private MeasurementValidationReport validatePxP(MetadataContent content) {
validationResult, ValidationResult::combine));
}
- private CompletableFuture validateNGSRow(Map propertyColumnMap,
- String row) {
- var validationResult = ValidationResult.successful(0);
- var metaDataValues = row.split("\t"); // tab separated values
- // we consider an empty row as a reason to warn, not to fail
- if (metaDataValues.length == 0) {
- validationResult.combine(
- ValidationResult.successful(1, List.of("Empty row provided.")));
- return CompletableFuture.supplyAsync(() -> validationResult);
- }
- if (metaDataValues.length != propertyColumnMap.keySet().size()) {
- validationResult.combine(ValidationResult.withFailures(1, List.of("")));
- }
- var measurementIdIndex = propertyColumnMap.getOrDefault(
- MeasurementProperty.MEASUREMENT_ID.label(), -1);
- var sampleCodeColumnIndex = propertyColumnMap.get(
- NGS_PROPERTY.QBIC_SAMPLE_ID.label());
- var organisationsColumnIndex = propertyColumnMap.get(
- NGS_PROPERTY.ORGANISATION_ID.label());
- var facilityIndex = propertyColumnMap.get(NGS_PROPERTY.FACILITY.label());
- var instrumentColumnIndex = propertyColumnMap.get(
- NGS_PROPERTY.INSTRUMENT.label());
- var sequencingReadTypeIndex = propertyColumnMap.get(
- NGS_PROPERTY.SEQUENCING_READ_TYPE.label());
- var libraryKitIndex = propertyColumnMap.get(
- NGS_PROPERTY.LIBRARY_KIT.label());
- var flowCellIndex = propertyColumnMap.get(
- NGS_PROPERTY.FLOW_CELL.label());
- var sequencingRunProtocolIndex = propertyColumnMap.get(
- NGS_PROPERTY.SEQUENCING_RUN_PROTOCOL.label());
- var samplePoolIndex = propertyColumnMap.get(
- NGS_PROPERTY.SAMPLE_POOL_GROUP.label());
- var indexI7Index = propertyColumnMap.get(
- NGS_PROPERTY.INDEX_I7.label());
- var indexI5Index = propertyColumnMap.get(
- NGS_PROPERTY.INDEX_I5.label());
- Integer commentIndex = propertyColumnMap.get(NGS_PROPERTY.COMMENT.label());
- int maxPropertyIndex = IntStream.of(sampleCodeColumnIndex, organisationsColumnIndex,
- instrumentColumnIndex).max().orElseThrow();
- if (propertyColumnMap.size() <= maxPropertyIndex) {
- return CompletableFuture.supplyAsync(
- () -> validationResult.combine(ValidationResult.withFailures(1,
- List.of("Not enough columns provided for row: \"%s\"".formatted(row)))));
- }
- var measurementId = safeArrayAccess(metaDataValues, measurementIdIndex).orElse("");
- var sampleCodes = SampleCode.create(
- safeArrayAccess(metaDataValues, sampleCodeColumnIndex).orElse(""));
- var organisationRoRId = safeArrayAccess(metaDataValues, organisationsColumnIndex).orElse("");
- var instrumentCURIE = safeArrayAccess(metaDataValues, instrumentColumnIndex).orElse("");
- var facility = safeArrayAccess(metaDataValues, facilityIndex).orElse("");
- var sequencingReadType = safeArrayAccess(metaDataValues, sequencingReadTypeIndex).orElse("");
- var libraryKit = safeArrayAccess(metaDataValues, libraryKitIndex).orElse("");
- var flowCell = safeArrayAccess(metaDataValues, flowCellIndex).orElse("");
- var sequencingRunProtocol = safeArrayAccess(metaDataValues, sequencingRunProtocolIndex).orElse(
- "");
- var samplePoolGroup = safeArrayAccess(metaDataValues, samplePoolIndex).orElse("");
- var indexI7 = safeArrayAccess(metaDataValues, indexI7Index).orElse("");
- var indexI5 = safeArrayAccess(metaDataValues, indexI5Index).orElse("");
- var comment = safeArrayAccess(metaDataValues, commentIndex).orElse("");
-
- var metadata = new NGSMeasurementMetadata(measurementId, List.of(sampleCodes),
- organisationRoRId, instrumentCURIE, facility, sequencingReadType,
- libraryKit, flowCell, sequencingRunProtocol, samplePoolGroup, indexI7, indexI5, comment);
+ private CompletableFuture validateNGSMetaDatum(
+ NGSMeasurementMetadata metaDatum) {
var measurementNGSValidationExecutor = new MeasurementNGSValidationExecutor(
measurementValidationService);
return generateModeDependentValidationResult(
- measurementNGSValidationExecutor, metadata);
+ measurementNGSValidationExecutor, metaDatum);
}
- private CompletableFuture validatePxPRow(Map propertyColumnMap,
- String row) {
- var validationResult = ValidationResult.successful(0);
- var metaDataValues = row.split("\t"); // tab separated values
- // we consider an empty row as a reason to warn, not to fail
- if (metaDataValues.length == 0) {
- validationResult.combine(
- ValidationResult.successful(1, List.of("Empty row provided.")));
- return CompletableFuture.supplyAsync(() -> validationResult);
- }
- if (metaDataValues.length != propertyColumnMap.keySet().size()) {
- validationResult.combine(ValidationResult.withFailures(1, List.of("")));
- }
-
- var measurementIdIndex = propertyColumnMap.getOrDefault(
- MeasurementProperty.MEASUREMENT_ID.label(), -1);
- var sampleCodeColumnIndex = propertyColumnMap.get(
- PROTEOMICS_PROPERTY.QBIC_SAMPLE_ID.label());
- var organisationsColumnIndex = propertyColumnMap.get(
- PROTEOMICS_PROPERTY.ORGANISATION_ID.label());
- var instrumentColumnIndex = propertyColumnMap.get(
- PROTEOMICS_PROPERTY.INSTRUMENT.label());
- var samplePoolGroupIndex = propertyColumnMap.get(
- PROTEOMICS_PROPERTY.SAMPLE_POOL_GROUP.label());
- var facilityIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.FACILITY.label());
- var fractionNameIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.CYCLE_FRACTION_NAME.label());
- var digestionEnzymeIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.DIGESTION_ENZYME.label());
- var digestionMethodIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.DIGESTION_METHOD.label());
- Integer enrichmentMethodIndex = propertyColumnMap.get(
- PROTEOMICS_PROPERTY.ENRICHMENT_METHOD.label());
- Integer injectionVolumeIndex = propertyColumnMap.get(
- PROTEOMICS_PROPERTY.INJECTION_VOLUME.label());
- Integer lcColumnIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.LC_COLUMN.label());
- Integer lcmsMethodIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.LCMS_METHOD.label());
- Integer labelingTypeIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.LABELING_TYPE.label());
- Integer labelIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.LABEL.label());
- Integer noteIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.COMMENT.label());
-
- int maxPropertyIndex = IntStream.of(sampleCodeColumnIndex, organisationsColumnIndex,
- instrumentColumnIndex).max().orElseThrow();
- if (propertyColumnMap.size() <= maxPropertyIndex) {
- return CompletableFuture.completedFuture(
- validationResult.combine(ValidationResult.withFailures(1,
- List.of("Not enough columns provided for row: \"%s\"".formatted(row)))));
- }
-
- var measurementId = safeArrayAccess(metaDataValues, measurementIdIndex).orElse("");
- var sampleCode = SampleCode.create(
- safeArrayAccess(metaDataValues, sampleCodeColumnIndex).orElse(""));
- var organisationRoRId = safeArrayAccess(metaDataValues, organisationsColumnIndex).orElse("");
- var instrumentCURIE = safeArrayAccess(metaDataValues, instrumentColumnIndex).orElse("");
- var samplePoolGroup = safeArrayAccess(metaDataValues, samplePoolGroupIndex).orElse("");
- var facility = safeArrayAccess(metaDataValues, facilityIndex).orElse("");
- var fractionName = safeArrayAccess(metaDataValues, fractionNameIndex).orElse("");
- var digestionEnzyme = safeArrayAccess(metaDataValues, digestionEnzymeIndex).orElse("");
- var digestionMethod = safeArrayAccess(metaDataValues, digestionMethodIndex).orElse("");
- var enrichmentMethod = safeArrayAccess(metaDataValues, enrichmentMethodIndex).orElse("");
- var injectionVolume = safeArrayAccess(metaDataValues, injectionVolumeIndex).orElse("");
- var lcColumn = safeArrayAccess(metaDataValues, lcColumnIndex).orElse("");
- var lcmsMethod = safeArrayAccess(metaDataValues, lcmsMethodIndex).orElse("");
- var labelingType = safeArrayAccess(metaDataValues, labelingTypeIndex).orElse("");
- var label = safeArrayAccess(metaDataValues, labelIndex).orElse("");
-
- var note = safeArrayAccess(metaDataValues, noteIndex).orElse("");
-
- var metadata = new ProteomicsMeasurementMetadata(measurementId, sampleCode,
- organisationRoRId, instrumentCURIE, samplePoolGroup, facility, fractionName,
- digestionEnzyme,
- digestionMethod, enrichmentMethod, injectionVolume, lcColumn, lcmsMethod,
- new Labeling(labelingType, label), note);
- var measurementProteomicsValidationExecutor = new MeasurementProteomicsValidationExecutor(
+ private CompletableFuture validatePxpMetaDatum(
+ ProteomicsMeasurementMetadata metaDatum) {
+ MeasurementValidationExecutor proteomicsValidationExecutor = new MeasurementProteomicsValidationExecutor(
measurementValidationService);
- var finalValidationResult = generateModeDependentValidationResult(
- measurementProteomicsValidationExecutor, metadata);
- return finalValidationResult;
+ return generateModeDependentValidationResult(
+ proteomicsValidationExecutor, metaDatum);
}
private CompletableFuture generateModeDependentValidationResult(
@@ -828,12 +526,12 @@ public UploadItemsDisplay(Upload upload) {
uploadSectionTitle.addClassName("section-title");
var saveYourFileInfo = new InfoBox().setInfoText(
- "Please save your excel file as UTF-16 Unicode Text (*.txt) before uploading.")
+ "When uploading a tab-separated file, please save your Excel file as UTF-16 Unicode Text (*.txt) before uploading.")
.setClosable(false);
var restrictions = new Div();
restrictions.addClassName("restrictions");
- restrictions.add(new Span("Supported file formats: .txt, .tsv"));
+ restrictions.add(new Span("Supported file formats: .txt, .tsv, .xlsx"));
restrictions.add(
"Maximum file size: %s MB".formatted(MAX_FILE_SIZE_BYTES / Math.pow(1024, 2)));
diff --git a/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementProteomicsValidationExecutor.java b/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementProteomicsValidationExecutor.java
index f796d76f4..2d95b27ff 100644
--- a/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementProteomicsValidationExecutor.java
+++ b/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementProteomicsValidationExecutor.java
@@ -6,6 +6,7 @@
import life.qbic.projectmanagement.application.measurement.validation.MeasurementValidationService;
import life.qbic.projectmanagement.application.measurement.validation.ValidationResult;
import life.qbic.projectmanagement.domain.model.project.ProjectId;
+import org.apache.poi.ss.formula.functions.T;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.stereotype.Component;
diff --git a/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementValidationExecutor.java b/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementValidationExecutor.java
index 5696f33b9..ff17bab52 100644
--- a/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementValidationExecutor.java
+++ b/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementValidationExecutor.java
@@ -1,6 +1,7 @@
package life.qbic.datamanager.views.projects.project.measurements;
import java.util.concurrent.CompletableFuture;
+import life.qbic.projectmanagement.application.measurement.MeasurementMetadata;
import life.qbic.projectmanagement.application.measurement.validation.MeasurementValidationService;
import life.qbic.projectmanagement.application.measurement.validation.ValidationResult;
import life.qbic.projectmanagement.domain.model.project.ProjectId;
@@ -16,12 +17,12 @@
*
* @since 1.0.0
*/
-public interface MeasurementValidationExecutor {
+public interface MeasurementValidationExecutor {
- CompletableFuture validateRegistration(MeasurementMetadata metadata,
+ CompletableFuture validateRegistration(T metadata,
ProjectId projectId);
- CompletableFuture validateUpdate(MeasurementMetadata metadata,
+ CompletableFuture validateUpdate(T metadata,
ProjectId projectId);
}
diff --git a/user-interface/src/main/resources/messages.properties b/user-interface/src/main/resources/messages.properties
index 4c1044caa..9f616b1fd 100644
--- a/user-interface/src/main/resources/messages.properties
+++ b/user-interface/src/main/resources/messages.properties
@@ -12,3 +12,4 @@ NO_SPECIMEN_DEFINED=No specimens were defined -> At least one specimen has to be
NO_ANALYTE_DEFINED=No analytes were defined -> At least one analyte has to be selected during project creation
DATA_ATTACHED_TO_SAMPLES=Samples could not be deleted -> At least one of the samples has data attached.
SERVICE_FAILED=A service failed -> Unfortunately a service failed, please try again later or feel free to contact us at support@qbic.zendesk.com if the issue persists.
+UNKNOWN_METADATA=Unknown metadata type -> Please make sure that all property names are provided.
|