diff --git a/application-commons/src/main/java/life/qbic/application/commons/ApplicationException.java b/application-commons/src/main/java/life/qbic/application/commons/ApplicationException.java index 49c4bdd01..28e152716 100644 --- a/application-commons/src/main/java/life/qbic/application/commons/ApplicationException.java +++ b/application-commons/src/main/java/life/qbic/application/commons/ApplicationException.java @@ -150,7 +150,8 @@ public enum ErrorCode { NO_ANALYTE_DEFINED, DATA_ATTACHED_TO_SAMPLES, SAMPLES_ATTACHED_TO_EXPERIMENT, - SERVICE_FAILED; + SERVICE_FAILED, + UNKNOWN_METADATA; @Override public String toString() { diff --git a/logging/pom.xml b/logging/pom.xml index bbbdc631a..ec3fae572 100644 --- a/logging/pom.xml +++ b/logging/pom.xml @@ -15,12 +15,12 @@ org.slf4j slf4j-api - 2.0.13 + 2.0.16 org.slf4j slf4j-simple - 2.0.13 + 2.0.16 diff --git a/project-management/src/main/java/life/qbic/projectmanagement/application/measurement/MeasurementService.java b/project-management/src/main/java/life/qbic/projectmanagement/application/measurement/MeasurementService.java index 2e0d29fc1..8850df4ac 100644 --- a/project-management/src/main/java/life/qbic/projectmanagement/application/measurement/MeasurementService.java +++ b/project-management/src/main/java/life/qbic/projectmanagement/application/measurement/MeasurementService.java @@ -554,7 +554,7 @@ private int readInjectionVolume(String value) throws NumberFormatException { if (value.isBlank()) { return -1; } - return Integer.parseInt(value); + return (int) Double.parseDouble(value); } /** diff --git a/project-management/src/main/java/life/qbic/projectmanagement/domain/model/sample/SampleCode.java b/project-management/src/main/java/life/qbic/projectmanagement/domain/model/sample/SampleCode.java index 0042fb8c0..8de5cbfdb 100644 --- a/project-management/src/main/java/life/qbic/projectmanagement/domain/model/sample/SampleCode.java +++ b/project-management/src/main/java/life/qbic/projectmanagement/domain/model/sample/SampleCode.java @@ -19,7 +19,7 @@ protected SampleCode() { // needed for JPA } - private SampleCode(String code) { + private SampleCode(String code) throws IllegalArgumentException{ Objects.requireNonNull(code, "Sample code must not be null"); if (code.isBlank()) { throw new IllegalArgumentException("Sample code must not be blank"); @@ -27,7 +27,7 @@ private SampleCode(String code) { this.code = code; } - public static SampleCode create(String code) { + public static SampleCode create(String code) throws IllegalArgumentException { return new SampleCode(code); } diff --git a/user-interface/src/main/bundles/dev.bundle b/user-interface/src/main/bundles/dev.bundle index 72f81be38..3f4d38347 100644 Binary files a/user-interface/src/main/bundles/dev.bundle and b/user-interface/src/main/bundles/dev.bundle differ diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/MeasurementMetadataConverter.java b/user-interface/src/main/java/life/qbic/datamanager/parser/MeasurementMetadataConverter.java new file mode 100644 index 000000000..bc7e3ecae --- /dev/null +++ b/user-interface/src/main/java/life/qbic/datamanager/parser/MeasurementMetadataConverter.java @@ -0,0 +1,52 @@ +package life.qbic.datamanager.parser; + +import java.util.List; +import life.qbic.projectmanagement.application.measurement.MeasurementMetadata; + +/** + * Measurement Metadata Converter + *

+ * Measurement metadata converter enable the client to process a {@link ParsingResult} object and + * convert them into known implementations of the {@link MeasurementMetadata} interface. + * + * @since 1.4.0 + */ +public interface MeasurementMetadataConverter { + + /** + * Takes an instance of {@link ParsingResult} and tries to convert it to known implementations of + * the {@link MeasurementMetadata} interface. + *

+ * Currently supported implementations are: + * + *

+ * + * @param parsingResult the parsing result to take as input for the conversion. + * @param ignoreMeasurementId weather to ignore the measurement identifier or not + * @return a list of converted implementations of {@link MeasurementMetadata}. + * @throws UnknownMetadataTypeException if no matching implementation of + * {@link MeasurementMetadata} can be associated from the + * provided {@link ParsingResult#keys()}. + * @since 1.4.0 + */ + List convert(ParsingResult parsingResult, + boolean ignoreMeasurementId) + throws UnknownMetadataTypeException; + + class UnknownMetadataTypeException extends RuntimeException { + + public UnknownMetadataTypeException(String message) { + super(message); + } + } + + class MissingSampleIdException extends RuntimeException { + public MissingSampleIdException(String message) { + super(message); + } + } + +} diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/MetadataConverter.java b/user-interface/src/main/java/life/qbic/datamanager/parser/MetadataConverter.java new file mode 100644 index 000000000..d69933cd5 --- /dev/null +++ b/user-interface/src/main/java/life/qbic/datamanager/parser/MetadataConverter.java @@ -0,0 +1,369 @@ +package life.qbic.datamanager.parser; + +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.COMMENT; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.CYCLE; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.DIGESTION_ENZYME; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.DIGESTION_METHOD; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.ENRICHMENT_METHOD; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.FACILITY; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.INJECTION_VOLUME; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.INSTRUMENT; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.LABEL; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.LABELING_TYPE; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.LCMS_METHOD; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.LC_COLUMN; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.MEASUREMENT_ID; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.ORGANISATION_ID; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.QBIC_SAMPLE_ID; +import static life.qbic.datamanager.parser.MetadataConverter.ProteomicsMeasurementProperty.SAMPLE_POOL_GROUP; +import static life.qbic.logging.service.LoggerFactory.logger; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import life.qbic.datamanager.parser.ParsingResult.Row; +import life.qbic.logging.api.Logger; +import life.qbic.projectmanagement.application.measurement.Labeling; +import life.qbic.projectmanagement.application.measurement.MeasurementMetadata; +import life.qbic.projectmanagement.application.measurement.NGSMeasurementMetadata; +import life.qbic.projectmanagement.application.measurement.ProteomicsMeasurementMetadata; +import life.qbic.projectmanagement.domain.model.sample.SampleCode; + +/** + * Metadata Converter + * + *

Enables clients to convert {@link ParsingResult} objects into lists of known metadata + * properties.

+ *

+ * Currently supported metadata properties cover: + * + *

+ * + * @since 1.4.0 + */ +public class MetadataConverter implements MeasurementMetadataConverter { + + private static final Logger log = logger(MetadataConverter.class); + + private MetadataConverter() { + } + + public static MetadataConverter measurementConverter() { + return new MetadataConverter(); + } + + /** + * Generates a hit map, storing the number of matches of a defined set of String values (hit + * values), in a target of interest collection of String values. + *

+ * The resulting map will contain the number of occurrences of every value in the hit values + * collection found in the target collection to investigate. + * + * @param target the collection of interest to search in + * @param hitValues a set of distinct values, that should be represented in the hit result map + * @return a hit result map, containing the number of occurrences of every hit value in the target + * String collection (0, if no target was found for a value). + * @since 1.4.0 + */ + private static Map countHits(Collection target, Set hitValues, + String... ignoredProperties) { + Map hits = new HashMap<>(); + for (String t : hitValues) { + hits.put(t, 0); + } + for (String s : target) { + if (hitValues.contains(s)) { + var currentHit = hits.get(s); + hits.put(s, ++currentHit); + } + } + for (String ignoredProperty : ignoredProperties) { + if (hits.containsKey((ignoredProperty))) { + hits.remove(ignoredProperty); + } + } + return hits; + } + + static String sanitizeValue(String value) { + return value.trim().toLowerCase(); + } + + @Override + public List convert(ParsingResult parsingResult, boolean ignoreMeasurementId) + throws UnknownMetadataTypeException, MissingSampleIdException { + Objects.requireNonNull(parsingResult); + var properties = parsingResult.keys().keySet(); + if (looksLikeNgsMeasurement(properties, ignoreMeasurementId)) { + return tryConversion(this::convertNGSMeasurement, parsingResult); + } else if (looksLikeProteomicsMeasurement(properties, ignoreMeasurementId)) { + return tryConversion(this::convertProteomicsMeasurement, parsingResult); + } else { + throw new UnknownMetadataTypeException( + "Unknown metadata type: cannot match properties to any known metadata type. Provided [%s]".formatted( + String.join(", ", properties))); + } + } + + private List tryConversion( + Function> converter, ParsingResult parsingResult) { + try { + return converter.apply(parsingResult); + } catch (IllegalArgumentException e) { + throw new MissingSampleIdException("Missing sample ID metadata"); + } + } + + private List convertProteomicsMeasurement(ParsingResult parsingResult) { + var result = new ArrayList(); + var keyIndices = parsingResult.keys(); + for (ParsingResult.Row row : parsingResult.rows()) { + // we us -1 as default value if a property cannot be accessed, thus ending up in an empty String + var pxpMetaDatum = new ProteomicsMeasurementMetadata( + safeListAccess(row.values(), keyIndices.getOrDefault(MEASUREMENT_ID.propertyName(), -1), + ""), + SampleCode.create( + safeListAccess(row.values(), + keyIndices.getOrDefault(QBIC_SAMPLE_ID.propertyName(), -1), + "")), + safeListAccess(row.values(), keyIndices.getOrDefault(ORGANISATION_ID.propertyName(), -1), + ""), + safeListAccess(row.values(), keyIndices.getOrDefault(INSTRUMENT.propertyName(), -1), ""), + safeListAccess(row.values(), + keyIndices.getOrDefault(SAMPLE_POOL_GROUP.propertyName(), -1), + ""), + safeListAccess(row.values(), keyIndices.getOrDefault(FACILITY.propertyName(), -1), ""), + safeListAccess(row.values(), keyIndices.getOrDefault(CYCLE.propertyName(), -1), ""), + safeListAccess(row.values(), keyIndices.getOrDefault(DIGESTION_ENZYME.propertyName(), -1), + ""), + safeListAccess(row.values(), keyIndices.getOrDefault(DIGESTION_METHOD.propertyName(), -1), + ""), + safeListAccess(row.values(), + keyIndices.getOrDefault(ENRICHMENT_METHOD.propertyName(), -1), + ""), + safeListAccess(row.values(), keyIndices.getOrDefault(INJECTION_VOLUME.propertyName(), -1), + ""), + safeListAccess(row.values(), keyIndices.getOrDefault(LC_COLUMN.propertyName(), -1), ""), + safeListAccess(row.values(), keyIndices.getOrDefault(LCMS_METHOD.propertyName(), -1), ""), + new Labeling( + safeListAccess(row.values(), + keyIndices.getOrDefault(LABELING_TYPE.propertyName(), -1), + ""), + safeListAccess(row.values(), keyIndices.getOrDefault(LABEL.propertyName(), -1), "")), + safeListAccess(row.values(), keyIndices.getOrDefault(COMMENT.propertyName(), -1), "") + ); + result.add(pxpMetaDatum); + } + return result; + } + + private String safeListAccess(List list, Integer index, String defaultValue) { + if (index >= list.size() || index < 0) { + return defaultValue; + } + return list.get(index); + } + + private List convertNGSMeasurement(ParsingResult parsingResult) { + var result = new ArrayList(); + var keyIndices = parsingResult.keys(); + for (Row row : parsingResult.rows()) { + var ngsMeasurementMetadata = new NGSMeasurementMetadata( + safeListAccess(row.values(), keyIndices.getOrDefault(MEASUREMENT_ID.propertyName(), -1), + ""), + List.of(SampleCode.create( + safeListAccess(row.values(), + keyIndices.getOrDefault(QBIC_SAMPLE_ID.propertyName(), -1), + ""))), + safeListAccess(row.values(), keyIndices.getOrDefault(ORGANISATION_ID.propertyName(), -1), + ""), + safeListAccess(row.values(), keyIndices.getOrDefault(INSTRUMENT.propertyName(), -1), ""), + safeListAccess(row.values(), keyIndices.getOrDefault(FACILITY.propertyName(), -1), ""), + safeListAccess(row.values(), keyIndices.getOrDefault( + NGSMeasurementProperty.SEQUENCING_READ_TYPE.propertyName(), -1), ""), + safeListAccess(row.values(), + keyIndices.getOrDefault(NGSMeasurementProperty.LIBRARY_KIT.propertyName(), -1), ""), + safeListAccess(row.values(), + keyIndices.getOrDefault(NGSMeasurementProperty.FLOW_CELL.propertyName(), -1), + ""), + safeListAccess(row.values(), keyIndices.getOrDefault( + NGSMeasurementProperty.SEQUENCING_RUN_PROTOCOL.propertyName(), -1), + ""), + safeListAccess(row.values(), + keyIndices.getOrDefault(SAMPLE_POOL_GROUP.propertyName(), -1), + ""), + safeListAccess(row.values(), + keyIndices.getOrDefault(NGSMeasurementProperty.INDEX_I7.propertyName(), -1), + ""), + safeListAccess(row.values(), + keyIndices.getOrDefault(NGSMeasurementProperty.INDEX_I5.propertyName(), -1), + ""), + safeListAccess(row.values(), keyIndices.getOrDefault(COMMENT.propertyName(), -1), "") + ); + result.add(ngsMeasurementMetadata); + } + return result; + } + + private boolean looksLikeNgsMeasurement(Collection properties, boolean ignoreID) { + var formattedProperties = properties.stream().map(String::toLowerCase) + .collect(Collectors.toList()); + Map hitMap; + if (ignoreID) { + formattedProperties.remove(MEASUREMENT_ID.propertyName()); + hitMap = countHits(formattedProperties, + Arrays.stream(NGSMeasurementProperty.values()) + .map(NGSMeasurementProperty::propertyName).collect( + Collectors.toSet()), MEASUREMENT_ID.propertyName()); + } else { + hitMap = countHits(formattedProperties, + Arrays.stream(NGSMeasurementProperty.values()) + .map(NGSMeasurementProperty::propertyName).collect( + Collectors.toSet())); + } + var missingProperties = new ArrayList<>(); + for (Entry entry : hitMap.entrySet()) { + if (entry.getValue() == 0) { + missingProperties.add(entry.getKey()); + } + } + if (missingProperties.isEmpty()) { + return true; + } else { + log.debug("Missing properties for NGS measurement: %s".formatted(missingProperties)); + } + return false; + } + + private boolean looksLikeProteomicsMeasurement(Collection properties, boolean ignoreID) { + var formattedProperties = properties.stream().map(String::toLowerCase) + .collect(Collectors.toList()); + Map hitMap; + if (ignoreID) { + formattedProperties.remove(MEASUREMENT_ID.propertyName()); + hitMap = countHits(formattedProperties, + Arrays.stream(ProteomicsMeasurementProperty.values()) + .map(ProteomicsMeasurementProperty::propertyName).collect( + Collectors.toSet()), MEASUREMENT_ID.propertyName()); + } else { + hitMap = countHits(formattedProperties, + Arrays.stream(ProteomicsMeasurementProperty.values()) + .map(ProteomicsMeasurementProperty::propertyName).collect( + Collectors.toSet())); + } + var missingProperties = new ArrayList<>(); + for (Entry entry : hitMap.entrySet()) { + if (entry.getValue() == 0) { + missingProperties.add(entry.getKey()); + } + } + if (missingProperties.isEmpty()) { + return true; + } else { + log.debug("Missing properties for proteomics measurement: %s".formatted(missingProperties)); + } + return false; + } + + + enum ProteomicsMeasurementProperty { + MEASUREMENT_ID("measurement id"), + QBIC_SAMPLE_ID("qbic sample id"), + SAMPLE_POOL_GROUP("sample pool group"), + ORGANISATION_ID("organisation id"), + FACILITY("facility"), + INSTRUMENT("instrument"), + CYCLE("cycle/fraction name"), + DIGESTION_METHOD("digestion method"), + DIGESTION_ENZYME("digestion enzyme"), + ENRICHMENT_METHOD("enrichment method"), + INJECTION_VOLUME("injection volume (ul)"), + LC_COLUMN("lc column"), + LCMS_METHOD("lcms method"), + LABELING_TYPE("labeling type"), + LABEL("label"), + COMMENT("comment"); + + private final String name; + + ProteomicsMeasurementProperty(String value) { + this.name = value; + } + + static Optional fromString(String value) { + var sanitizedValue = sanitizeValue(value); + return Arrays.stream(ProteomicsMeasurementProperty.values()) + .filter(property -> property.propertyName().equals(sanitizedValue)).findFirst(); + } + + static boolean valueMatchesAnyProperty(String value) { + var sanitizedValue = sanitizeValue(value); + return Arrays.stream(ProteomicsMeasurementProperty.values()) + .map(ProteomicsMeasurementProperty::name) + .anyMatch(sanitizedValue::equalsIgnoreCase); + } + + public String propertyName() { + return name; + } + + } + + enum NGSMeasurementProperty { + MEASUREMENT_ID("measurement id"), + ORGANISATION_ID("organisation id"), + SAMPLE_POOL_GROUP("sample pool group"), + FACILITY("facility"), + INSTRUMENT("instrument"), + SEQUENCING_READ_TYPE("sequencing read type"), + LIBRARY_KIT("library kit"), + FLOW_CELL("flow cell"), + SEQUENCING_RUN_PROTOCOL("sequencing run protocol"), + INDEX_I7("index i7"), + INDEX_I5("index i5"), + COMMENT("comment"); + + private final String name; + + NGSMeasurementProperty(String value) { + this.name = value; + } + + /** + * Tries to convert an input property value to a known {@link NGSMeasurementProperty}. + *

+ * Trailing whitespace will be ignored. + * + * @param value the presumed value to convert to a known {@link NGSMeasurementProperty} + * @return the matching property, or {@link Optional#empty()}. + * @since 1.4.0 + */ + static Optional fromStringTrimmed(String value) { + var sanitizedValue = sanitizeValue(value); + return Arrays.stream(NGSMeasurementProperty.values()) + .filter(property -> property.propertyName().equalsIgnoreCase(sanitizedValue)).findFirst(); + } + + static boolean valueMatchesAnyProperty(String value) { + var sanitizedValue = sanitizeValue(value); + return Arrays.stream(NGSMeasurementProperty.values()).map(NGSMeasurementProperty::name) + .anyMatch(sanitizedValue::equalsIgnoreCase); + } + + String propertyName() { + return name; + } + } +} diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/MetadataParser.java b/user-interface/src/main/java/life/qbic/datamanager/parser/MetadataParser.java new file mode 100644 index 000000000..add4a6453 --- /dev/null +++ b/user-interface/src/main/java/life/qbic/datamanager/parser/MetadataParser.java @@ -0,0 +1,35 @@ +package life.qbic.datamanager.parser; + +import java.io.InputStream; + +/** + * Metadata Parser + *

+ * A generic interface to hide the details of input file formats to parse, and enables clients and + * downstream consumers to work with an intermediate abstraction of the content, which is contained + * in the {@link ParsingResult} object. + * + * @since 1.4.0 + */ +public interface MetadataParser { + + ParsingResult parse(InputStream inputStream); + + class UnknownPropertiesException extends RuntimeException { + + public UnknownPropertiesException(String message) { + super(message); + } + } + + class ParsingException extends RuntimeException { + + public ParsingException(String message, Throwable cause) { + super(message, cause); + } + + public ParsingException(String message) { + super(message); + } + } +} diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/ParsingResult.java b/user-interface/src/main/java/life/qbic/datamanager/parser/ParsingResult.java new file mode 100644 index 000000000..95281917d --- /dev/null +++ b/user-interface/src/main/java/life/qbic/datamanager/parser/ParsingResult.java @@ -0,0 +1,91 @@ +package life.qbic.datamanager.parser; + +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; + +/** + * Parsing Result + *

+ * A parsing result represents a two dimensional grid of information. + *

+ * A grid is contained of values, which is a list of rows. Every row is a list of String values. + *

+ * Its main feature is a property list (=> 'keys') with its String value and position in every row. + *

+ * Example for a grid: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
key 'A' - position 0key 'B' - position 1key 'C' - position 2
Value A1Value B1Value C1
Value A2Value B2Value C2
.........
+ *

+ * So the resulting stored positions of every key in a row can be accessed via {@link #keys()} and would look like: + * + *

+ *

+ * and iterating through the rows would look like: + * + *

+ * + * @since 1.4.0 + */ +public record ParsingResult(Map keys, List rows) { + + public ParsingResult(Map keys, List rows) { + this.keys = Map.copyOf(keys); + this.rows = List.copyOf(rows); + } + + public Stream rowsStream() { + return rows.stream(); + } + + public Iterator iterator() { + return rows.iterator(); + } + + public List getRow(int rowIndex) { + if (rowIndex < 0 || rowIndex >= rows.size()) { + throw new IndexOutOfBoundsException( + "Row index out of bounds: %s but size is %s".formatted(rowIndex, rows.size())); + } + return rows.get(rowIndex).values; + } + + public record Row(List values) { + + public Row(List values) { + this.values = List.copyOf(values); + } + + } + +} diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/Sanitizer.java b/user-interface/src/main/java/life/qbic/datamanager/parser/Sanitizer.java new file mode 100644 index 000000000..f518a7c41 --- /dev/null +++ b/user-interface/src/main/java/life/qbic/datamanager/parser/Sanitizer.java @@ -0,0 +1,47 @@ +package life.qbic.datamanager.parser; + +import java.util.Arrays; +import java.util.Objects; + +/** + * Sanitizer + * + *

Cleans String literals according to encoding requirements for parsing.

+ * + * @since 1.4.0 + */ +public class Sanitizer { + + private static final String ASTERIX = "\\*"; + + /** + * Removes all available `*` (asterix) symbols, executes {@link String#trim()} and + * {@link String#toLowerCase()} on a given input String. + * + * @param value the String value to be sanitized + * @return the sanitized value + * @since 1.4.0 + */ + public static String headerEncoder(String value) { + Objects.requireNonNull(value); + return value.replaceAll(ASTERIX, "").trim().toLowerCase(); + } + + /** + * Investigates an array for information and can be used to e.g. filter out blank arrays. + *

+ * An array is considered to contain information, if at least one element contains a value that is + * NOT {@link String#isEmpty()} and NOT {@link String#isBlank()}. + *

+ * If the array contains only empty or blank values, the function returns false. + * + * @param array the array to investigate + * @return true, if at least one value is not blank or empty, else returns + * false + * @since 1.4.0 + */ + public static boolean containsInformation(String[] array) { + return !Arrays.stream(array).allMatch(value -> value.isEmpty() || value.isBlank()); + } + +} diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/tsv/TSVParser.java b/user-interface/src/main/java/life/qbic/datamanager/parser/tsv/TSVParser.java new file mode 100644 index 000000000..43e818e6a --- /dev/null +++ b/user-interface/src/main/java/life/qbic/datamanager/parser/tsv/TSVParser.java @@ -0,0 +1,95 @@ +package life.qbic.datamanager.parser.tsv; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map.Entry; +import java.util.Optional; +import life.qbic.datamanager.parser.MetadataParser; +import life.qbic.datamanager.parser.ParsingResult; +import life.qbic.datamanager.parser.ParsingResult.Row; +import life.qbic.datamanager.parser.Sanitizer; + +/** + * TSV Parser + *

+ * Tab-seperated value format support for the {@link MetadataParser} interface. + *

+ * Support for UTF-16 encoding available. + *

+ * This implementation always considers the first line as the header, and will use its information + * to create the {@link ParsingResult#keys()} in the returned {@link ParsingResult} object + * instance. + * + * @since 1.4.0 + */ +public class TSVParser implements MetadataParser { + + private static final String VALUE_SEPARATOR = "\t"; + + private TSVParser() { + + } + + public static TSVParser create() { + return new TSVParser(); + } + + /** + * Prevents nasty {@link IndexOutOfBoundsException} and supports a more fluent API and cleaner + * code through the usage of Java's {@link Optional}. + * + * @param array the array to access an element from + * @param index the index of the element in the array to access + * @return the array element at position of the index wrapped in {@link Optional}, or + * {@link Optional#empty}, if the index is out of bounds. + * @since 1.4.0 + */ + private static Optional safeAccess(String[] array, Integer index) { + if (index >= array.length || index < 0) { + return Optional.empty(); + } + return Optional.of(array[index]); + } + + @Override + public ParsingResult parse(InputStream inputStream) { + List content; + try (BufferedReader reader = new BufferedReader( + new InputStreamReader(inputStream, StandardCharsets.UTF_16))) { + content = reader.lines().toList(); + } catch (IOException e) { + throw new ParsingException("Cannot read from input stream", e); + } + if (content.isEmpty()) { + throw new ParsingException("No content provided!"); + } + var propertyToIndex = new HashMap(); + + var header = content.get(0).split(VALUE_SEPARATOR); + for (int i = 0; i < header.length; i++) { + propertyToIndex.put(Sanitizer.headerEncoder(header[i]), i); + } + + var values = content.subList(1, content.size()); + List rows = new ArrayList<>(); + for (String row : values) { + var rowContent = row.split(VALUE_SEPARATOR); + String[] rowData = new String[header.length]; + for (Entry propertyEntry : propertyToIndex.entrySet()) { + rowData[propertyEntry.getValue()] = safeAccess(rowContent, propertyEntry.getValue()).orElse( + ""); + } + if (Sanitizer.containsInformation(rowData)) { + rows.add(new Row(Arrays.stream(rowData).toList())); + } + } + return new ParsingResult(propertyToIndex, rows); + } +} diff --git a/user-interface/src/main/java/life/qbic/datamanager/parser/xlsx/XLSXParser.java b/user-interface/src/main/java/life/qbic/datamanager/parser/xlsx/XLSXParser.java new file mode 100644 index 000000000..6ec69b393 --- /dev/null +++ b/user-interface/src/main/java/life/qbic/datamanager/parser/xlsx/XLSXParser.java @@ -0,0 +1,120 @@ +package life.qbic.datamanager.parser.xlsx; + +import static java.util.Objects.isNull; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import life.qbic.datamanager.parser.MetadataParser; +import life.qbic.datamanager.parser.ParsingResult; +import life.qbic.datamanager.parser.Sanitizer; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.xssf.usermodel.XSSFRow; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; + +/** + * XLSX parser implementation of the {@link MetadataParser} interface. + *

+ * Parses information from content following the XLSX format specification: + *

+ * + * https://learn.microsoft.com/en-us/openspecs/office_standards/ms-xlsx/2c5dee00-eff2-4b22-92b6-0738acd4475e + * + * @since 1.4.0 + */ +public class XLSXParser implements MetadataParser { + + private XLSXParser() { + } + + public static XLSXParser create() { + return new XLSXParser(); + } + + /** + * Reads the cell value as String. If the cell is `null`, or one of the following types is + * present, the function will return an empty String: + * + *

    + *
  • _NONE
  • + *
  • ERROR
  • + *
  • BOOLEAN
  • + *
  • FORMULA
  • + *
  • BLANK
  • + *
+ * + * @param cell the cell to extract the value from + * @return the cell value in String representation + * @since 1.4.0 + */ + private static String readCellAsString(Cell cell) { + if (cell == null) { + return ""; + } + return switch (cell.getCellType()) { + case _NONE, ERROR, FORMULA, BLANK -> ""; + case BOOLEAN -> Boolean.toString(cell.getBooleanCellValue()); + case NUMERIC -> String.valueOf(cell.getNumericCellValue()); + case STRING -> cell.getStringCellValue(); + }; + } + + @Override + public ParsingResult parse(InputStream inputStream) { + try (XSSFWorkbook workbook = new XSSFWorkbook(inputStream)) { + return parse(workbook); + } catch (IOException e) { + throw new ParsingException("Parsing failed", e); + } + } + + private ParsingResult parse(XSSFWorkbook workbook) { + XSSFSheet metadataSheet = workbook.getSheetAt(0); + XSSFRow headerRow = Optional.ofNullable(metadataSheet.getRow(0)) + .orElseThrow(() -> new ParsingException("No header row found")); + + if (isNull(headerRow)) { + throw new MetadataParser.UnknownPropertiesException( + "No properties have been found: did you provide a header row?"); + } + List rows = new ArrayList<>(); + Map propertyToIndex = new HashMap<>(); + Iterator cellIterator = headerRow.cellIterator(); + //do not use while loop with the cell iterator! + //It will not return null but the same cell over and over if hasNext is not checked. + + Cell cell; + while (cellIterator.hasNext()) { + cell = cellIterator.next(); + var cellValue = Sanitizer.headerEncoder(readCellAsString(cell)); + propertyToIndex.put(cellValue, cell.getColumnIndex()); + } + + Iterator rowIterator = metadataSheet.rowIterator(); + Row row; + rowIterator.next(); // skip the first entry, since it contains the header + + while (rowIterator.hasNext()) { + row = rowIterator.next(); + String[] rowData = new String[propertyToIndex.size()]; + for (Entry columnEntry : propertyToIndex.entrySet()) { + rowData[columnEntry.getValue()] = readCellAsString(row.getCell(columnEntry.getValue())); + } + if (Sanitizer.containsInformation(rowData)) { + rows.add(new ParsingResult.Row(Arrays.stream(rowData).toList())); + } + } + + return new ParsingResult(propertyToIndex, rows); + } +} diff --git a/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementMetadataUploadDialog.java b/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementMetadataUploadDialog.java index d21f73dc5..93a2b600d 100644 --- a/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementMetadataUploadDialog.java +++ b/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementMetadataUploadDialog.java @@ -20,41 +20,35 @@ import com.vaadin.flow.dom.DomEvent; import com.vaadin.flow.shared.Registration; import elemental.json.JsonObject; -import java.io.BufferedReader; import java.io.InputStream; -import java.io.InputStreamReader; import java.io.Serial; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentLinkedDeque; -import java.util.stream.IntStream; -import life.qbic.application.commons.Result; +import life.qbic.datamanager.parser.MeasurementMetadataConverter.MissingSampleIdException; +import life.qbic.datamanager.parser.MeasurementMetadataConverter.UnknownMetadataTypeException; +import life.qbic.datamanager.parser.MetadataConverter; +import life.qbic.datamanager.parser.ParsingResult; +import life.qbic.datamanager.parser.tsv.TSVParser; +import life.qbic.datamanager.parser.xlsx.XLSXParser; import life.qbic.datamanager.views.CancelConfirmationNotificationDialog; import life.qbic.datamanager.views.general.InfoBox; import life.qbic.datamanager.views.general.WizardDialogWindow; import life.qbic.datamanager.views.notifications.ErrorMessage; import life.qbic.datamanager.views.notifications.StyledNotification; import life.qbic.datamanager.views.projects.EditableMultiFileMemoryBuffer; -import life.qbic.projectmanagement.application.measurement.Labeling; import life.qbic.projectmanagement.application.measurement.MeasurementMetadata; import life.qbic.projectmanagement.application.measurement.NGSMeasurementMetadata; import life.qbic.projectmanagement.application.measurement.ProteomicsMeasurementMetadata; -import life.qbic.projectmanagement.application.measurement.validation.MeasurementNGSValidator.NGS_PROPERTY; -import life.qbic.projectmanagement.application.measurement.validation.MeasurementProteomicsValidator.PROTEOMICS_PROPERTY; import life.qbic.projectmanagement.application.measurement.validation.MeasurementValidationService; import life.qbic.projectmanagement.application.measurement.validation.ValidationResult; import life.qbic.projectmanagement.domain.model.experiment.Experiment; import life.qbic.projectmanagement.domain.model.project.ProjectId; -import life.qbic.projectmanagement.domain.model.sample.SampleCode; import org.springframework.util.StringUtils; @@ -95,7 +89,8 @@ public MeasurementMetadataUploadDialog(MeasurementValidationService measurementV this.measurementMetadataUploads = new ArrayList<>(); this.measurementFileItems = new ArrayList<>(); Upload upload = new Upload(uploadBuffer); - upload.setAcceptedFileTypes("text/tab-separated-values", "text/plain"); + upload.setAcceptedFileTypes("text/tab-separated-values", "text/plain", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); upload.setMaxFileSize(MAX_FILE_SIZE_BYTES); setModeBasedLabels(); uploadItemsDisplay = new UploadItemsDisplay(upload); @@ -117,156 +112,6 @@ public MeasurementMetadataUploadDialog(MeasurementValidationService measurementV } - private static List parseHeaderContent(String header) { - return Arrays.stream(header.replace("*", "").strip().split("\t")).map(String::strip).toList(); - } - - private static Map propertyColumnMap(List properties) { - var propertyIterator = properties.listIterator(); - Map map = new HashMap<>(); - int index; - while ((index = propertyIterator.nextIndex()) < properties.size()) { - map.put(propertyIterator.next().toLowerCase(), index); - } - return map; - } - - private static MetadataContent read(InputStream inputStream) { - var content = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_16)).lines().toList(); - - return new MetadataContent(content.isEmpty() ? null : content.get(0), - content.size() > 1 ? content.subList(1, content.size()) : new ArrayList<>()); - } - - private static boolean isRowNotEmpty(String row) { - return row.split("\t").length > 0; - } - - private static Result generateNGSRequest( - String row, Map columns) { - var columnValues = row.split("\t"); // tab separated values - // we consider an empty row as a reason to warn, not to fail - if (columnValues.length == 0) { - return Result.fromValue(null); - } - - Integer measurementIdIndex = columns.getOrDefault(MeasurementProperty.MEASUREMENT_ID.label(), - -1); - Integer sampleCodeColumnIndex = columns.get(NGS_PROPERTY.QBIC_SAMPLE_ID.label()); - Integer organisationColumnIndex = columns.get(NGS_PROPERTY.ORGANISATION_ID.label()); - Integer instrumentColumnIndex = columns.get(NGS_PROPERTY.INSTRUMENT.label()); - Integer facilityIndex = columns.get(NGS_PROPERTY.FACILITY.label()); - Integer readTypeIndex = columns.get(NGS_PROPERTY.SEQUENCING_READ_TYPE.label()); - Integer libraryKitIndex = columns.get(NGS_PROPERTY.LIBRARY_KIT.label()); - Integer flowCellIndex = columns.get(NGS_PROPERTY.FLOW_CELL.label()); - Integer runProtocolIndex = columns.get(NGS_PROPERTY.SEQUENCING_RUN_PROTOCOL.label()); - Integer samplePoolIndex = columns.get(NGS_PROPERTY.SAMPLE_POOL_GROUP.label()); - Integer indexI7Index = columns.get(NGS_PROPERTY.INDEX_I7.label()); - Integer indexI5Index = columns.get(NGS_PROPERTY.INDEX_I5.label()); - Integer commentIndex = columns.get(NGS_PROPERTY.COMMENT.label()); - - int maxPropertyIndex = IntStream.of(sampleCodeColumnIndex, - organisationColumnIndex, - instrumentColumnIndex) - .max().orElseThrow(); - if (columns.size() <= maxPropertyIndex) { - return Result.fromError("Not enough columns provided for row: %s".formatted(row)); - } - - String measurementId = safeArrayAccess(columnValues, measurementIdIndex).orElse(""); - List sampleCodes = List.of( - SampleCode.create(safeArrayAccess(columnValues, sampleCodeColumnIndex).orElse(""))); - - String organisationRoRId = safeArrayAccess(columnValues, organisationColumnIndex).orElse(""); - String instrumentCURIE = safeArrayAccess(columnValues, instrumentColumnIndex).orElse(""); - String facility = safeArrayAccess(columnValues, facilityIndex).orElse(""); - String readType = safeArrayAccess(columnValues, readTypeIndex).orElse(""); - String libraryKit = safeArrayAccess(columnValues, libraryKitIndex).orElse(""); - String flowCell = safeArrayAccess(columnValues, flowCellIndex).orElse(""); - String runProtocol = safeArrayAccess(columnValues, runProtocolIndex).orElse(""); - String samplePool = safeArrayAccess(columnValues, samplePoolIndex).orElse(""); - String indexI7 = safeArrayAccess(columnValues, indexI7Index).orElse(""); - String indexI5 = safeArrayAccess(columnValues, indexI5Index).orElse(""); - String comment = safeArrayAccess(columnValues, commentIndex).orElse(""); - NGSMeasurementMetadata metadata = new NGSMeasurementMetadata(measurementId, sampleCodes, - organisationRoRId, instrumentCURIE, facility, readType, - libraryKit, flowCell, runProtocol, samplePool, indexI7, indexI5, comment); - return Result.fromValue(metadata); - } - - private static Result generatePxPRequest( - String row, Map columns) { - var columnValues = row.split("\t"); // tab separated values - // we consider an empty row as a reason to warn, not to fail - if (columnValues.length == 0) { - return Result.fromValue(null); - } - - Integer measurementIdIndex = columns.getOrDefault(MeasurementProperty.MEASUREMENT_ID.label(), - -1); - Integer sampleCodeColumnIndex = columns.get(PROTEOMICS_PROPERTY.QBIC_SAMPLE_ID.label()); - Integer organisationColumnIndex = columns.get(PROTEOMICS_PROPERTY.ORGANISATION_ID.label()); - Integer instrumentColumnIndex = columns.get(PROTEOMICS_PROPERTY.INSTRUMENT.label()); - Integer samplePoolGroupIndex = columns.get(PROTEOMICS_PROPERTY.SAMPLE_POOL_GROUP.label()); - Integer facilityIndex = columns.get(PROTEOMICS_PROPERTY.FACILITY.label()); - Integer fractionNameIndex = columns.get(PROTEOMICS_PROPERTY.CYCLE_FRACTION_NAME.label()); - Integer digestionEnzymeIndex = columns.get(PROTEOMICS_PROPERTY.DIGESTION_ENZYME.label()); - Integer digestionMethodIndex = columns.get(PROTEOMICS_PROPERTY.DIGESTION_METHOD.label()); - Integer enrichmentMethodIndex = columns.get(PROTEOMICS_PROPERTY.ENRICHMENT_METHOD.label()); - Integer injectionVolumeIndex = columns.get(PROTEOMICS_PROPERTY.INJECTION_VOLUME.label()); - Integer lcColumnIndex = columns.get(PROTEOMICS_PROPERTY.LC_COLUMN.label()); - Integer lcmsMethodIndex = columns.get(PROTEOMICS_PROPERTY.LCMS_METHOD.label()); - Integer labelingTypeIndex = columns.get(PROTEOMICS_PROPERTY.LABELING_TYPE.label()); - Integer labelIndex = columns.get(PROTEOMICS_PROPERTY.LABEL.label()); - Integer noteIndex = columns.get(PROTEOMICS_PROPERTY.COMMENT.label()); - - int maxPropertyIndex = IntStream.of(sampleCodeColumnIndex, - organisationColumnIndex, - instrumentColumnIndex) - .max().orElseThrow(); - if (columns.size() <= maxPropertyIndex) { - return Result.fromError("Not enough columns provided for row: %s".formatted(row)); - } - - String measurementId = safeArrayAccess(columnValues, measurementIdIndex).orElse(""); - SampleCode sampleCode = SampleCode.create( - safeArrayAccess(columnValues, sampleCodeColumnIndex).orElse("")); - String organisationRoRId = safeArrayAccess(columnValues, organisationColumnIndex).orElse(""); - String instrumentCURIE = safeArrayAccess(columnValues, instrumentColumnIndex).orElse(""); - String samplePoolGroup = safeArrayAccess(columnValues, samplePoolGroupIndex).orElse(""); - String facility = safeArrayAccess(columnValues, facilityIndex).orElse(""); - String fractionName = safeArrayAccess(columnValues, fractionNameIndex).orElse(""); - String digestionEnzyme = safeArrayAccess(columnValues, digestionEnzymeIndex).orElse(""); - String digestionMethod = safeArrayAccess(columnValues, digestionMethodIndex).orElse(""); - String enrichmentMethod = safeArrayAccess(columnValues, enrichmentMethodIndex).orElse(""); - String injectionVolume = safeArrayAccess(columnValues, injectionVolumeIndex).orElse(""); - String lcColumn = safeArrayAccess(columnValues, lcColumnIndex).orElse(""); - String lcmsMethod = safeArrayAccess(columnValues, lcmsMethodIndex).orElse(""); - String labelingType = safeArrayAccess(columnValues, labelingTypeIndex).orElse(""); - String label = safeArrayAccess(columnValues, labelIndex).orElse(""); - String note = safeArrayAccess(columnValues, noteIndex).orElse(""); - - ProteomicsMeasurementMetadata metadata = new ProteomicsMeasurementMetadata(measurementId, - sampleCode, - organisationRoRId, instrumentCURIE, samplePoolGroup, facility, fractionName, - digestionEnzyme, - digestionMethod, enrichmentMethod, injectionVolume, lcColumn, lcmsMethod, - new Labeling(labelingType, label), note); - return Result.fromValue(metadata); - } - - private static List parseSampleCode(String sampleCodeEntry) { - return Arrays.stream(sampleCodeEntry.split(",")).map(SampleCode::create).toList(); - } - - private static Optional safeArrayAccess(String[] array, int index) { - try { - return Optional.of(array[index]); - } catch (ArrayIndexOutOfBoundsException e) { - return Optional.empty(); - } - } - private void setModeBasedLabels() { switch (mode) { case ADD -> { @@ -315,19 +160,53 @@ private void onUploadFailed(FailedEvent failedEvent) { showErrorNotification("File upload was interrupted", failedEvent.getReason().getMessage()); } + private MeasurementValidationReport validate(List metadata) { + if (metadata == null || metadata.isEmpty()) { + return new MeasurementValidationReport(0, + ValidationResult.withFailures(0, List.of("The metadata sheet seems to be empty"))); + } + if (metadata.get(0) instanceof NGSMeasurementMetadata) { + return validateNGS((List) metadata); + } + return validatePxP((List) metadata); + } + + private ParsingResult parseXLSX(InputStream inputStream) { + return XLSXParser.create().parse(inputStream); + } + + private ParsingResult parseTSV(InputStream inputStream) { + return TSVParser.create().parse(inputStream); + } + private void onUploadSucceeded(SucceededEvent succeededEvent) { - MetadataContent content = read( - uploadBuffer.inputStream(succeededEvent.getFileName()).orElseThrow()); - var contentHeader = content.theHeader() - .orElseThrow(() -> new RuntimeException("No header row found")); - var domain = measurementValidationService.inferDomainByPropertyTypes( - parseHeaderContent(contentHeader)) - .orElseThrow(() -> new RuntimeException( - "Header row could not be recognized, Please provide a valid template file")); - var validationReport = switch (domain) { - case PROTEOMICS -> validatePxP(content); - case NGS -> validateNGS(content); - }; + var fileName = succeededEvent.getFileName(); + ParsingResult parsingResult; + if (fileName.endsWith(".xlsx")) { + parsingResult = parseXLSX(uploadBuffer.inputStream(fileName).orElseThrow()); + } else if (fileName.endsWith(".tsv") || fileName.endsWith(".txt")) { + parsingResult = parseTSV(uploadBuffer.inputStream(fileName).orElseThrow()); + } else { + displayError(succeededEvent.getFileName(), + "Unsupported file type. Please make sure to upload a TSV or XLSX file."); + return; + } + List result; + try { + result = MetadataConverter.measurementConverter() + .convert(parsingResult, mode.equals(MODE.ADD)); + } catch ( + UnknownMetadataTypeException e) { // we want to display this in the dialog, not via the notification system + displayError(succeededEvent.getFileName(), + "Unknown metadata file content. Please make sure to include all metadata properties, even the optional ones"); + return; + } catch (MissingSampleIdException e) { + displayError(succeededEvent.getFileName(), "Looks like at least one sample id is missing."); + return; + } + + var validationReport = validate(result); + MeasurementFileItem measurementFileItem = new MeasurementFileItem(succeededEvent.getFileName(), validationReport); //We don't want to upload any invalid measurements in spreadsheet @@ -336,16 +215,22 @@ private void onUploadSucceeded(SucceededEvent succeededEvent) { succeededEvent.getFileName(), Collections.emptyList()); addFile(measurementFileItem, metadataUpload); } else { - var measurementMetadata = switch (domain) { - case PROTEOMICS -> generatePxPMetadata(content); - case NGS -> generateNGSMetadata(content); - }; MeasurementMetadataUpload metadataUpload = new MeasurementMetadataUpload( - succeededEvent.getFileName(), measurementMetadata); + succeededEvent.getFileName(), result); addFile(measurementFileItem, metadataUpload); } } + private void displayError(String fileName, String reason) { + MeasurementMetadataUpload metadataUpload = new MeasurementMetadataUpload<>( + fileName, Collections.emptyList()); + MeasurementFileItem measurementFileItem = new MeasurementFileItem( + fileName, + new MeasurementValidationReport(1, ValidationResult.withFailures(1, List.of( + reason)))); + addFile(measurementFileItem, metadataUpload); + } + private void addFile(MeasurementFileItem measurementFileItem, MeasurementMetadataUpload metadataUpload) { measurementMetadataUploads.add(metadataUpload); @@ -353,58 +238,13 @@ private void addFile(MeasurementFileItem measurementFileItem, showFile(measurementFileItem); } - private List generateNGSMetadata( - MetadataContent content) { - var propertyColumnMap = propertyColumnMap(parseHeaderContent(content.header())); - - var results = content.rows().stream() - .map(row -> generateNGSRequest(row, propertyColumnMap)) - .toList(); - if (results.stream().anyMatch(Result::isError)) { - return new ArrayList<>(); - } - return results.stream() - .filter(Result::isValue) - .map(Result::getValue) - .filter(Objects::nonNull) - .toList(); - } - - private List generatePxPMetadata( - MetadataContent content) { - var propertyColumnMap = propertyColumnMap(parseHeaderContent(content.header())); - - var results = content.rows().stream() - .map(row -> generatePxPRequest(row, propertyColumnMap)) - .toList(); - if (results.stream().anyMatch(Result::isError)) { - return new ArrayList<>(); - } - return results.stream() - .filter(Result::isValue) - .map(Result::getValue) - .filter(Objects::nonNull) - .toList(); - } - - private MeasurementValidationReport validateNGS(MetadataContent content) { + private MeasurementValidationReport validateNGS(List content) { var validationResult = ValidationResult.successful(0); - var propertyColumnMap = propertyColumnMap(parseHeaderContent(content.header())); - // we check if there are any rows provided or if we have only rows with empty content - if (content.rows().isEmpty() || content.rows().stream() - .noneMatch(MeasurementMetadataUploadDialog::isRowNotEmpty)) { - validationResult = validationResult.combine( - ValidationResult.withFailures(0, - List.of("The metadata sheet seems to be empty"))); - return new MeasurementValidationReport(0, validationResult); - } ConcurrentLinkedDeque concurrentLinkedDeque = new ConcurrentLinkedDeque<>(); List> tasks = new ArrayList<>(); - for (String row : content.rows().stream() - .filter(MeasurementMetadataUploadDialog::isRowNotEmpty).toList()) { - tasks.add(validateNGSRow(propertyColumnMap, row).thenAccept(concurrentLinkedDeque::add)); + for (NGSMeasurementMetadata metaDatum : content) { + tasks.add(validateNGSMetaDatum(metaDatum).thenAccept(concurrentLinkedDeque::add)); } - CompletableFuture.allOf(tasks.toArray(new CompletableFuture[0])).join(); return new MeasurementValidationReport(concurrentLinkedDeque.size(), @@ -412,26 +252,14 @@ private MeasurementValidationReport validateNGS(MetadataContent content) { validationResult, ValidationResult::combine)); } - private MeasurementValidationReport validatePxP(MetadataContent content) { + private MeasurementValidationReport validatePxP(List content) { var validationResult = ValidationResult.successful(0); - var propertyColumnMap = propertyColumnMap(parseHeaderContent(content.header())); - // we check if there are any rows provided or if we have only rows with empty content - if (content.rows().isEmpty() || content.rows().stream() - .noneMatch(MeasurementMetadataUploadDialog::isRowNotEmpty)) { - validationResult = validationResult.combine( - ValidationResult.withFailures(0, - List.of("The metadata sheet seems to be empty"))); - return new MeasurementValidationReport(0, validationResult); - } - ConcurrentLinkedDeque concurrentLinkedDeque = new ConcurrentLinkedDeque<>(); List> tasks = new ArrayList<>(); - for (String row : content.rows().stream() - .filter(MeasurementMetadataUploadDialog::isRowNotEmpty).toList()) { - tasks.add(validatePxPRow(propertyColumnMap, row).thenAccept(concurrentLinkedDeque::add)); + for (ProteomicsMeasurementMetadata metaDatum : content) { + tasks.add(validatePxpMetaDatum(metaDatum).thenAccept(concurrentLinkedDeque::add)); } - CompletableFuture.allOf(tasks.toArray(new CompletableFuture[0])).join(); return new MeasurementValidationReport(concurrentLinkedDeque.size(), @@ -439,150 +267,20 @@ private MeasurementValidationReport validatePxP(MetadataContent content) { validationResult, ValidationResult::combine)); } - private CompletableFuture validateNGSRow(Map propertyColumnMap, - String row) { - var validationResult = ValidationResult.successful(0); - var metaDataValues = row.split("\t"); // tab separated values - // we consider an empty row as a reason to warn, not to fail - if (metaDataValues.length == 0) { - validationResult.combine( - ValidationResult.successful(1, List.of("Empty row provided."))); - return CompletableFuture.supplyAsync(() -> validationResult); - } - if (metaDataValues.length != propertyColumnMap.keySet().size()) { - validationResult.combine(ValidationResult.withFailures(1, List.of(""))); - } - var measurementIdIndex = propertyColumnMap.getOrDefault( - MeasurementProperty.MEASUREMENT_ID.label(), -1); - var sampleCodeColumnIndex = propertyColumnMap.get( - NGS_PROPERTY.QBIC_SAMPLE_ID.label()); - var organisationsColumnIndex = propertyColumnMap.get( - NGS_PROPERTY.ORGANISATION_ID.label()); - var facilityIndex = propertyColumnMap.get(NGS_PROPERTY.FACILITY.label()); - var instrumentColumnIndex = propertyColumnMap.get( - NGS_PROPERTY.INSTRUMENT.label()); - var sequencingReadTypeIndex = propertyColumnMap.get( - NGS_PROPERTY.SEQUENCING_READ_TYPE.label()); - var libraryKitIndex = propertyColumnMap.get( - NGS_PROPERTY.LIBRARY_KIT.label()); - var flowCellIndex = propertyColumnMap.get( - NGS_PROPERTY.FLOW_CELL.label()); - var sequencingRunProtocolIndex = propertyColumnMap.get( - NGS_PROPERTY.SEQUENCING_RUN_PROTOCOL.label()); - var samplePoolIndex = propertyColumnMap.get( - NGS_PROPERTY.SAMPLE_POOL_GROUP.label()); - var indexI7Index = propertyColumnMap.get( - NGS_PROPERTY.INDEX_I7.label()); - var indexI5Index = propertyColumnMap.get( - NGS_PROPERTY.INDEX_I5.label()); - Integer commentIndex = propertyColumnMap.get(NGS_PROPERTY.COMMENT.label()); - int maxPropertyIndex = IntStream.of(sampleCodeColumnIndex, organisationsColumnIndex, - instrumentColumnIndex).max().orElseThrow(); - if (propertyColumnMap.size() <= maxPropertyIndex) { - return CompletableFuture.supplyAsync( - () -> validationResult.combine(ValidationResult.withFailures(1, - List.of("Not enough columns provided for row: \"%s\"".formatted(row))))); - } - var measurementId = safeArrayAccess(metaDataValues, measurementIdIndex).orElse(""); - var sampleCodes = SampleCode.create( - safeArrayAccess(metaDataValues, sampleCodeColumnIndex).orElse("")); - var organisationRoRId = safeArrayAccess(metaDataValues, organisationsColumnIndex).orElse(""); - var instrumentCURIE = safeArrayAccess(metaDataValues, instrumentColumnIndex).orElse(""); - var facility = safeArrayAccess(metaDataValues, facilityIndex).orElse(""); - var sequencingReadType = safeArrayAccess(metaDataValues, sequencingReadTypeIndex).orElse(""); - var libraryKit = safeArrayAccess(metaDataValues, libraryKitIndex).orElse(""); - var flowCell = safeArrayAccess(metaDataValues, flowCellIndex).orElse(""); - var sequencingRunProtocol = safeArrayAccess(metaDataValues, sequencingRunProtocolIndex).orElse( - ""); - var samplePoolGroup = safeArrayAccess(metaDataValues, samplePoolIndex).orElse(""); - var indexI7 = safeArrayAccess(metaDataValues, indexI7Index).orElse(""); - var indexI5 = safeArrayAccess(metaDataValues, indexI5Index).orElse(""); - var comment = safeArrayAccess(metaDataValues, commentIndex).orElse(""); - - var metadata = new NGSMeasurementMetadata(measurementId, List.of(sampleCodes), - organisationRoRId, instrumentCURIE, facility, sequencingReadType, - libraryKit, flowCell, sequencingRunProtocol, samplePoolGroup, indexI7, indexI5, comment); + private CompletableFuture validateNGSMetaDatum( + NGSMeasurementMetadata metaDatum) { var measurementNGSValidationExecutor = new MeasurementNGSValidationExecutor( measurementValidationService); return generateModeDependentValidationResult( - measurementNGSValidationExecutor, metadata); + measurementNGSValidationExecutor, metaDatum); } - private CompletableFuture validatePxPRow(Map propertyColumnMap, - String row) { - var validationResult = ValidationResult.successful(0); - var metaDataValues = row.split("\t"); // tab separated values - // we consider an empty row as a reason to warn, not to fail - if (metaDataValues.length == 0) { - validationResult.combine( - ValidationResult.successful(1, List.of("Empty row provided."))); - return CompletableFuture.supplyAsync(() -> validationResult); - } - if (metaDataValues.length != propertyColumnMap.keySet().size()) { - validationResult.combine(ValidationResult.withFailures(1, List.of(""))); - } - - var measurementIdIndex = propertyColumnMap.getOrDefault( - MeasurementProperty.MEASUREMENT_ID.label(), -1); - var sampleCodeColumnIndex = propertyColumnMap.get( - PROTEOMICS_PROPERTY.QBIC_SAMPLE_ID.label()); - var organisationsColumnIndex = propertyColumnMap.get( - PROTEOMICS_PROPERTY.ORGANISATION_ID.label()); - var instrumentColumnIndex = propertyColumnMap.get( - PROTEOMICS_PROPERTY.INSTRUMENT.label()); - var samplePoolGroupIndex = propertyColumnMap.get( - PROTEOMICS_PROPERTY.SAMPLE_POOL_GROUP.label()); - var facilityIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.FACILITY.label()); - var fractionNameIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.CYCLE_FRACTION_NAME.label()); - var digestionEnzymeIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.DIGESTION_ENZYME.label()); - var digestionMethodIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.DIGESTION_METHOD.label()); - Integer enrichmentMethodIndex = propertyColumnMap.get( - PROTEOMICS_PROPERTY.ENRICHMENT_METHOD.label()); - Integer injectionVolumeIndex = propertyColumnMap.get( - PROTEOMICS_PROPERTY.INJECTION_VOLUME.label()); - Integer lcColumnIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.LC_COLUMN.label()); - Integer lcmsMethodIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.LCMS_METHOD.label()); - Integer labelingTypeIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.LABELING_TYPE.label()); - Integer labelIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.LABEL.label()); - Integer noteIndex = propertyColumnMap.get(PROTEOMICS_PROPERTY.COMMENT.label()); - - int maxPropertyIndex = IntStream.of(sampleCodeColumnIndex, organisationsColumnIndex, - instrumentColumnIndex).max().orElseThrow(); - if (propertyColumnMap.size() <= maxPropertyIndex) { - return CompletableFuture.completedFuture( - validationResult.combine(ValidationResult.withFailures(1, - List.of("Not enough columns provided for row: \"%s\"".formatted(row))))); - } - - var measurementId = safeArrayAccess(metaDataValues, measurementIdIndex).orElse(""); - var sampleCode = SampleCode.create( - safeArrayAccess(metaDataValues, sampleCodeColumnIndex).orElse("")); - var organisationRoRId = safeArrayAccess(metaDataValues, organisationsColumnIndex).orElse(""); - var instrumentCURIE = safeArrayAccess(metaDataValues, instrumentColumnIndex).orElse(""); - var samplePoolGroup = safeArrayAccess(metaDataValues, samplePoolGroupIndex).orElse(""); - var facility = safeArrayAccess(metaDataValues, facilityIndex).orElse(""); - var fractionName = safeArrayAccess(metaDataValues, fractionNameIndex).orElse(""); - var digestionEnzyme = safeArrayAccess(metaDataValues, digestionEnzymeIndex).orElse(""); - var digestionMethod = safeArrayAccess(metaDataValues, digestionMethodIndex).orElse(""); - var enrichmentMethod = safeArrayAccess(metaDataValues, enrichmentMethodIndex).orElse(""); - var injectionVolume = safeArrayAccess(metaDataValues, injectionVolumeIndex).orElse(""); - var lcColumn = safeArrayAccess(metaDataValues, lcColumnIndex).orElse(""); - var lcmsMethod = safeArrayAccess(metaDataValues, lcmsMethodIndex).orElse(""); - var labelingType = safeArrayAccess(metaDataValues, labelingTypeIndex).orElse(""); - var label = safeArrayAccess(metaDataValues, labelIndex).orElse(""); - - var note = safeArrayAccess(metaDataValues, noteIndex).orElse(""); - - var metadata = new ProteomicsMeasurementMetadata(measurementId, sampleCode, - organisationRoRId, instrumentCURIE, samplePoolGroup, facility, fractionName, - digestionEnzyme, - digestionMethod, enrichmentMethod, injectionVolume, lcColumn, lcmsMethod, - new Labeling(labelingType, label), note); - var measurementProteomicsValidationExecutor = new MeasurementProteomicsValidationExecutor( + private CompletableFuture validatePxpMetaDatum( + ProteomicsMeasurementMetadata metaDatum) { + MeasurementValidationExecutor proteomicsValidationExecutor = new MeasurementProteomicsValidationExecutor( measurementValidationService); - var finalValidationResult = generateModeDependentValidationResult( - measurementProteomicsValidationExecutor, metadata); - return finalValidationResult; + return generateModeDependentValidationResult( + proteomicsValidationExecutor, metaDatum); } private CompletableFuture generateModeDependentValidationResult( @@ -828,12 +526,12 @@ public UploadItemsDisplay(Upload upload) { uploadSectionTitle.addClassName("section-title"); var saveYourFileInfo = new InfoBox().setInfoText( - "Please save your excel file as UTF-16 Unicode Text (*.txt) before uploading.") + "When uploading a tab-separated file, please save your Excel file as UTF-16 Unicode Text (*.txt) before uploading.") .setClosable(false); var restrictions = new Div(); restrictions.addClassName("restrictions"); - restrictions.add(new Span("Supported file formats: .txt, .tsv")); + restrictions.add(new Span("Supported file formats: .txt, .tsv, .xlsx")); restrictions.add( "Maximum file size: %s MB".formatted(MAX_FILE_SIZE_BYTES / Math.pow(1024, 2))); diff --git a/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementProteomicsValidationExecutor.java b/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementProteomicsValidationExecutor.java index f796d76f4..2d95b27ff 100644 --- a/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementProteomicsValidationExecutor.java +++ b/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementProteomicsValidationExecutor.java @@ -6,6 +6,7 @@ import life.qbic.projectmanagement.application.measurement.validation.MeasurementValidationService; import life.qbic.projectmanagement.application.measurement.validation.ValidationResult; import life.qbic.projectmanagement.domain.model.project.ProjectId; +import org.apache.poi.ss.formula.functions.T; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.stereotype.Component; diff --git a/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementValidationExecutor.java b/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementValidationExecutor.java index 5696f33b9..ff17bab52 100644 --- a/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementValidationExecutor.java +++ b/user-interface/src/main/java/life/qbic/datamanager/views/projects/project/measurements/MeasurementValidationExecutor.java @@ -1,6 +1,7 @@ package life.qbic.datamanager.views.projects.project.measurements; import java.util.concurrent.CompletableFuture; +import life.qbic.projectmanagement.application.measurement.MeasurementMetadata; import life.qbic.projectmanagement.application.measurement.validation.MeasurementValidationService; import life.qbic.projectmanagement.application.measurement.validation.ValidationResult; import life.qbic.projectmanagement.domain.model.project.ProjectId; @@ -16,12 +17,12 @@ * * @since 1.0.0 */ -public interface MeasurementValidationExecutor { +public interface MeasurementValidationExecutor { - CompletableFuture validateRegistration(MeasurementMetadata metadata, + CompletableFuture validateRegistration(T metadata, ProjectId projectId); - CompletableFuture validateUpdate(MeasurementMetadata metadata, + CompletableFuture validateUpdate(T metadata, ProjectId projectId); } diff --git a/user-interface/src/main/resources/messages.properties b/user-interface/src/main/resources/messages.properties index 4c1044caa..9f616b1fd 100644 --- a/user-interface/src/main/resources/messages.properties +++ b/user-interface/src/main/resources/messages.properties @@ -12,3 +12,4 @@ NO_SPECIMEN_DEFINED=No specimens were defined -> At least one specimen has to be NO_ANALYTE_DEFINED=No analytes were defined -> At least one analyte has to be selected during project creation DATA_ATTACHED_TO_SAMPLES=Samples could not be deleted -> At least one of the samples has data attached. SERVICE_FAILED=A service failed -> Unfortunately a service failed, please try again later or feel free to contact us at support@qbic.zendesk.com if the issue persists. +UNKNOWN_METADATA=Unknown metadata type -> Please make sure that all property names are provided.