Skip to content

Commit f29d2ef

Browse files
authored
Enable thread amount config and pattern validation (#8)
The number of threads per process are now configurable via environment variables, and the QBiC measurement ID is now format validated in the registration request to fail early.
1 parent 7944a41 commit f29d2ef

File tree

9 files changed

+60
-51
lines changed

9 files changed

+60
-51
lines changed

README.md

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -157,12 +157,14 @@ perform checksum validation. Feel free to use it as template for subsequent proc
157157

158158
### Evaluation
159159

160-
Last but not least, this step looks for any present QBiC measurement ID in the dataset name. If none
161-
is given, the registration cannot be executed.
160+
Last but not least, this step validates the QBiC measurement ID via a [configurable](#evaluation-step-config) regex pattern.
162161

163-
In this case the process moves the task directory into the user's home error folder. After the user
164-
has
165-
provided a valid QBiC measurement id, they can move the dataset into registration again.
162+
In case of invalid measurement ID formats, the process moves the task directory into the user's home error folder.
163+
After the user has provided a valid QBiC measurement id, they can move the dataset into registration again.
164+
165+
In case of a successful ID validation, the dataset will be moved to the configured destination folder.
166+
If multiple destination folders are provided in the [configuration](#evaluation-step-config), the assignment of the next target directory is based
167+
on a round-robin approach, to balance any downstream task load (e.g. openBIS dropbox registration).
166168

167169
## Configuration
168170

@@ -218,7 +220,8 @@ finished tasks are moved to after successful operation.
218220
#----------------
219221
# Settings for the registration worker threads
220222
#----------------
221-
registration.threads=2
223+
registration.threads=${REGISTRATION_THREADS:2}
224+
registration.metadata.filename=metadata.txt
222225
registration.working.dir=${WORKING_DIR:}
223226
registration.target.dir=${PROCESSING_DIR:}
224227
```
@@ -233,7 +236,7 @@ finished tasks are moved to after successful operation.
233236
# Settings for the 1. processing step
234237
# Proper packaging and provenance data, some simple checks
235238
#------------------------------------
236-
processing.threads=2
239+
processing.threads=${PROCESSING_THREADS:2}
237240
processing.working.dir=${PROCESSING_DIR}
238241
processing.target.dir=${EVALUATION_DIR}
239242
```
@@ -248,7 +251,7 @@ finished tasks are moved to after successful operation.
248251
# Setting for the 2. processing step:
249252
# Measurement ID evaluation
250253
# ---------------------------------
251-
evaluations.threads=2
254+
evaluations.threads=${EVALUATION_THREADS:2}
252255
evaluation.working.dir=${EVALUATION_DIR}
253256
# Define one or more target directories here
254257
# Example single target dir:

src/main/java/life/qbic/data/processing/AppConfig.java

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,20 +52,17 @@ RegistrationConfiguration registrationConfiguration(
5252

5353
@Bean
5454
EvaluationWorkersConfig evaluationWorkersConfig(
55-
@Value("${evaluations.threads}") int amountOfWorkers,
55+
@Value("${evaluation.threads}") int amountOfWorkers,
5656
@Value("${evaluation.working.dir}") String workingDirectory,
57-
@Value("${evaluation.target.dirs}") String[] targetDirectory,
58-
@Value("${evaluation.measurement-id.pattern}") String measurementIdPattern) {
59-
return new EvaluationWorkersConfig(amountOfWorkers, workingDirectory,
60-
measurementIdPattern, Arrays.stream(targetDirectory).toList());
57+
@Value("${evaluation.target.dirs}") String[] targetDirectory) {
58+
return new EvaluationWorkersConfig(amountOfWorkers, workingDirectory, Arrays.stream(targetDirectory).toList());
6159
}
6260

6361
@Bean
6462
EvaluationConfiguration evaluationConfiguration(EvaluationWorkersConfig evaluationWorkersConfig,
6563
GlobalConfig globalConfig) {
6664
return new EvaluationConfiguration(evaluationWorkersConfig.workingDirectory().toString(),
67-
evaluationWorkersConfig.targetDirectories(),
68-
evaluationWorkersConfig.measurementIdPattern().toString(), globalConfig);
65+
evaluationWorkersConfig.targetDirectories(), globalConfig);
6966
}
7067

7168
@Bean
@@ -86,8 +83,9 @@ ProcessingConfiguration processingConfiguration(ProcessingWorkersConfig processi
8683
@Bean
8784
GlobalConfig globalConfig(
8885
@Value("${users.error.directory.name}") String usersErrorDirectoryName,
89-
@Value("${users.registration.directory.name}") String usersRegistrationDirectoryName) {
90-
return new GlobalConfig(usersErrorDirectoryName, usersRegistrationDirectoryName);
86+
@Value("${users.registration.directory.name}") String usersRegistrationDirectoryName,
87+
@Value("${qbic.measurement-id.pattern}") String measurementIdPattern) {
88+
return new GlobalConfig(usersErrorDirectoryName, usersRegistrationDirectoryName, measurementIdPattern);
9189
}
9290

9391
}

src/main/java/life/qbic/data/processing/GlobalConfig.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,29 @@
22

33
import java.nio.file.Path;
44
import java.nio.file.Paths;
5+
import java.util.regex.Pattern;
56

67
public class GlobalConfig {
78

89
private final Path usersErrorDirectoryName;
910

1011
private final Path usersDirectoryRegistrationName;
1112

12-
public GlobalConfig(String usersErrorDirectoryName, String usersRegistrationDirectoryName) {
13+
private final Pattern qbicMeasurementIdPattern;
14+
15+
public GlobalConfig(String usersErrorDirectoryName, String usersRegistrationDirectoryName, String qbicMeasurementIdPattern) {
1316
if (usersErrorDirectoryName == null || usersErrorDirectoryName.isBlank()) {
1417
throw new IllegalArgumentException("usersErrorDirectoryName cannot be null or empty");
1518
}
1619
if (usersRegistrationDirectoryName == null || usersRegistrationDirectoryName.isBlank()) {
1720
throw new IllegalArgumentException("usersRegistrationDirectoryName cannot be null or empty");
1821
}
22+
if (qbicMeasurementIdPattern == null || qbicMeasurementIdPattern.isBlank()) {
23+
throw new IllegalArgumentException("qbicMeasurementIdPattern cannot be null or empty");
24+
}
1925
this.usersErrorDirectoryName = Paths.get(usersErrorDirectoryName);
2026
this.usersDirectoryRegistrationName = Paths.get(usersRegistrationDirectoryName);
27+
this.qbicMeasurementIdPattern = Pattern.compile(qbicMeasurementIdPattern);
2128
}
2229

2330
public Path usersErrorDirectory() {
@@ -28,4 +35,8 @@ public Path usersDirectoryRegistration() {
2835
return this.usersDirectoryRegistrationName;
2936
}
3037

38+
public Pattern qbicMeasurementIdPattern() {
39+
return this.qbicMeasurementIdPattern;
40+
}
41+
3142
}

src/main/java/life/qbic/data/processing/config/EvaluationWorkersConfig.java

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@ public class EvaluationWorkersConfig {
1010
private final int threads;
1111
private final Path workingDirectory;
1212
private final Collection<Path> targetDirectories;
13-
private final Pattern measurementIdPattern;
1413

15-
public EvaluationWorkersConfig(int threads, String workingDirectory, String measurementIdPattern,
14+
public EvaluationWorkersConfig(int threads, String workingDirectory,
1615
Collection<String> targetDirectories) {
1716
if (threads < 1) {
1817
throw new IllegalArgumentException(
@@ -32,10 +31,6 @@ public EvaluationWorkersConfig(int threads, String workingDirectory, String meas
3231
throw new IllegalArgumentException(
3332
"Evaluation target directory '%s' does not exist".formatted(path));
3433
});
35-
if (measurementIdPattern.isBlank()) {
36-
throw new IllegalArgumentException("Measurement id pattern cannot be blank");
37-
}
38-
this.measurementIdPattern = Pattern.compile(measurementIdPattern);
3934
}
4035

4136
public int threads() {
@@ -49,8 +44,4 @@ public Path workingDirectory() {
4944
public Collection<Path> targetDirectories() {
5045
return targetDirectories;
5146
}
52-
53-
public Pattern measurementIdPattern() {
54-
return measurementIdPattern;
55-
}
5647
}

src/main/java/life/qbic/data/processing/evaluation/EvaluationConfiguration.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,10 @@ public class EvaluationConfiguration {
1818

1919
private final Path workingDirectory;
2020
private final Collection<Path> targetDirectories;
21-
private final Pattern measurementIdPattern;
2221
private final Path usersErrorDirectory;
2322
private final RoundRobinDraw<Path> targetDirectoriesRoundRobinDraw;
2423

2524
public EvaluationConfiguration(String workingDirectory, Collection<Path> targetDirectories,
26-
String measurementIdPattern,
2725
GlobalConfig globalConfig) {
2826
this.workingDirectory = Paths.get(workingDirectory);
2927
if (!this.workingDirectory.toFile().exists()) {
@@ -35,11 +33,7 @@ public EvaluationConfiguration(String workingDirectory, Collection<Path> targetD
3533
"Evaluation target directory '%s' does not exist".formatted(path));
3634
});
3735
this.targetDirectoriesRoundRobinDraw = RoundRobinDraw.create(targetDirectories);
38-
if (measurementIdPattern.isBlank()) {
39-
throw new IllegalArgumentException("Measurement id pattern cannot be blank");
40-
}
4136
this.usersErrorDirectory = globalConfig.usersErrorDirectory();
42-
this.measurementIdPattern = Pattern.compile(measurementIdPattern);
4337
}
4438

4539
public Path workingDirectory() {
@@ -50,10 +44,6 @@ public RoundRobinDraw<Path> targetDirectories() {
5044
return targetDirectoriesRoundRobinDraw;
5145
}
5246

53-
public Pattern measurementIdPattern() {
54-
return measurementIdPattern;
55-
}
56-
5747
public Path usersErrorDirectory() {
5848
return usersErrorDirectory;
5949
}

src/main/java/life/qbic/data/processing/evaluation/EvaluationRequest.java

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,15 @@ public class EvaluationRequest extends Thread {
5050
private final AtomicBoolean active = new AtomicBoolean(false);
5151
private final AtomicBoolean terminated = new AtomicBoolean(false);
5252
private final Path workingDirectory;
53-
private final Pattern measurementIdPattern;
5453
private final Path usersErrorDirectory;
5554
private final RoundRobinDraw<Path> targetDirectories;
5655
private Path assignedTargetDirectory;
5756

5857
public EvaluationRequest(Path workingDirectory, RoundRobinDraw<Path> targetDirectories,
59-
Pattern measurementIdPattern, Path usersErrorDirectory) {
58+
Path usersErrorDirectory) {
6059
this.setName(THREAD_NAME.formatted(nextThreadNumber()));
6160
this.workingDirectory = workingDirectory;
6261
this.targetDirectories = targetDirectories;
63-
this.measurementIdPattern = measurementIdPattern;
6462
if (!workingDirectory.resolve(INTERVENTION_DIRECTORY).toFile().mkdir()
6563
&& !workingDirectory.resolve(
6664
INTERVENTION_DIRECTORY).toFile().exists()) {
@@ -73,7 +71,6 @@ public EvaluationRequest(Path workingDirectory, RoundRobinDraw<Path> targetDirec
7371

7472
public EvaluationRequest(EvaluationConfiguration evaluationConfiguration) {
7573
this(evaluationConfiguration.workingDirectory(), evaluationConfiguration.targetDirectories(),
76-
evaluationConfiguration.measurementIdPattern(),
7774
evaluationConfiguration.usersErrorDirectory());
7875
}
7976

@@ -185,10 +182,6 @@ private boolean createMarkerFile(Path targetDirectory, String name) throws IOExc
185182
return targetDirectory.resolve(markerFileName).toFile().createNewFile();
186183
}
187184

188-
private Optional<File> findDataset(File taskDir) {
189-
return Arrays.stream(taskDir.listFiles()).filter(File::isDirectory).findFirst();
190-
}
191-
192185
private void moveToSystemIntervention(File taskDir, String reason) {
193186
try {
194187
var errorFile = taskDir.toPath().resolve("error.txt").toFile();

src/main/java/life/qbic/data/processing/registration/ErrorCode.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@
99
*/
1010
public enum ErrorCode {
1111
METADATA_FILE_NOT_FOUND,
12-
INCOMPLETE_METADATA, FILE_NOT_FOUND, MISSING_FILE_ENTRY, IO_EXCEPTION
12+
INCOMPLETE_METADATA, FILE_NOT_FOUND, MISSING_FILE_ENTRY, INVALID_MEASUREMENT_ID_FORMAT, IO_EXCEPTION
1313
}

src/main/java/life/qbic/data/processing/registration/ProcessRegistrationRequest.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import java.util.Optional;
1818
import java.util.UUID;
1919
import java.util.concurrent.atomic.AtomicBoolean;
20+
import java.util.regex.Pattern;
2021
import java.util.stream.Collectors;
2122
import life.qbic.data.processing.ConcurrentRegistrationQueue;
2223
import life.qbic.data.processing.GlobalConfig;
@@ -53,6 +54,7 @@ public class ProcessRegistrationRequest extends Thread {
5354
private final Path targetDirectory;
5455
private final String metadataFileName;
5556
private final Path userErrorDirectory;
57+
private final Pattern measurementIdPattern;
5658
private AtomicBoolean active = new AtomicBoolean(false);
5759

5860
public ProcessRegistrationRequest(@NonNull ConcurrentRegistrationQueue registrationQueue,
@@ -63,6 +65,7 @@ public ProcessRegistrationRequest(@NonNull ConcurrentRegistrationQueue registrat
6365
this.targetDirectory = configuration.targetDirectory();
6466
this.metadataFileName = configuration.metadataFileName();
6567
this.userErrorDirectory = globalConfig.usersErrorDirectory();
68+
this.measurementIdPattern = globalConfig.qbicMeasurementIdPattern();
6669
}
6770

6871
private static int nextThreadNumber() {
@@ -221,6 +224,8 @@ public void run() {
221224
var registrationMetadata = findAndParseMetadata(workingTargetDir);
222225
validateFileEntries(registrationMetadata, workingTargetDir);
223226

227+
validateMeasurementIds(registrationMetadata);
228+
224229
var aggregatedFilesByMeasurementId = registrationMetadata.stream().collect(
225230
Collectors.groupingBy(RegistrationMetadata::measurementId));
226231

@@ -244,6 +249,24 @@ public void run() {
244249
}
245250
}
246251

252+
private void validateMeasurementIds(List<RegistrationMetadata> registrationMetadata)
253+
throws ValidationException {
254+
registrationMetadata.stream().map(RegistrationMetadata::measurementId)
255+
.filter(this::isMeasurementIdInvalid).findAny().ifPresent(invalidEntry -> {
256+
throw new ValidationException(
257+
"Invalid measurement ID format found: %s".formatted(invalidEntry),
258+
ErrorCode.INVALID_MEASUREMENT_ID_FORMAT);
259+
});
260+
}
261+
262+
private boolean isMeasurementIdInvalid(String measurementId) {
263+
return !isMeasurementIdValid(measurementId);
264+
}
265+
266+
private boolean isMeasurementIdValid(String measurementId) {
267+
return measurementIdPattern.matcher(measurementId).matches();
268+
}
269+
247270
private void processAll(Map<String, List<RegistrationMetadata>> aggregatedFilesByMeasurementId,
248271
Path workingTargetDir, RegistrationRequest request) throws IOException {
249272
for (String measurementId : aggregatedFilesByMeasurementId.keySet()) {

src/main/resources/application.properties

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ users.error.directory.name=error
1313
# Needs to be present in the users' home folders
1414
# e.g. /home/<user1>/registration
1515
users.registration.directory.name=registration
16+
qbic.measurement-id.pattern=^(MS|NGS)Q[A-Z0-9]{4}[0-9]{3}[A-Z0-9]{2}-[0-9]*
1617

1718
#--------------------------------------
1819
# Settings for the data scanning thread
@@ -30,7 +31,7 @@ scanner.interval=1000
3031
#----------------
3132
# Settings for the registration worker threads
3233
#----------------
33-
registration.threads=2
34+
registration.threads=${REGISTRATION_THREADS:2}
3435
registration.metadata.filename=metadata.txt
3536
registration.working.dir=${WORKING_DIR:}
3637
registration.target.dir=${PROCESSING_DIR:}
@@ -39,23 +40,22 @@ registration.target.dir=${PROCESSING_DIR:}
3940
# Settings for the 1. processing step
4041
# Proper packaging and provenance data, some simple checks
4142
#------------------------------------
42-
processing.threads=2
43+
processing.threads=${PROCESSING_THREADS:2}
4344
processing.working.dir=${PROCESSING_DIR}
4445
processing.target.dir=${EVALUATION_DIR}
4546

4647
#----------------------------------
4748
# Setting for the 2. processing step:
4849
# Measurement ID evaluation
4950
# ---------------------------------
50-
evaluations.threads=2
51+
evaluation.threads=${EVALUATION_THREADS:2}
5152
evaluation.working.dir=${EVALUATION_DIR}
5253
# Define one or more target directories here
5354
# Example single target dir:
5455
# evaluation.target.dirs=/my/example/target/dir
5556
# Example multiple target dir:
5657
# evaluation.target.dirs=/my/example/target/dir1,/my/example/target/dir2,/my/example/target/dir3
5758
evaluation.target.dirs=${OPENBIS_ETL_DIRS}
58-
evaluation.measurement-id.pattern=^(MS|NGS)Q[A-Z0-9]{4}[0-9]{3}[A-Z0-9]{2}-[0-9]*
5959

6060
# ----------------
6161
# Logging settings

0 commit comments

Comments
 (0)