diff --git a/tests/integration-tests/tests-base/pom.xml b/tests/integration-tests/tests-base/pom.xml
index ebad13e..279977a 100644
--- a/tests/integration-tests/tests-base/pom.xml
+++ b/tests/integration-tests/tests-base/pom.xml
@@ -17,6 +17,12 @@
com.tngtech.archunit
archunit-junit5
+
+ io.github.ardoco
+ metrics
+ 0.1.1-SNAPSHOT
+ compile
+
io.github.ardoco.core
common
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/TestUtil.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/TestUtil.java
index ba2a76c..7adba91 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/TestUtil.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/TestUtil.java
@@ -3,17 +3,14 @@
import java.util.Locale;
import java.util.Set;
-import java.util.stream.Collectors;
import org.eclipse.collections.api.collection.ImmutableCollection;
-import org.eclipse.collections.api.factory.Lists;
-import org.eclipse.collections.api.list.ImmutableList;
import org.slf4j.Logger;
import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ResultMatrix;
+import edu.kit.kastel.mcse.ardoco.metrics.ClassificationMetricsCalculator;
/**
* This utility class provides methods for running the tests, especially regarding the evaluations.
@@ -37,20 +34,18 @@ public static EvaluationResults compareTLR(ArDoCoResult arDoCoResult, Imm
Set distinctTraceLinks = new java.util.LinkedHashSet<>(results.castToCollection());
Set distinctGoldStandard = new java.util.LinkedHashSet<>(goldStandard.castToCollection());
- // True Positives are the trace links that are contained on both lists
- Set truePositives = distinctTraceLinks.stream().filter(distinctGoldStandard::contains).collect(Collectors.toSet());
- ImmutableList truePositivesList = Lists.immutable.ofAll(truePositives);
+ int sentences = arDoCoResult.getText().getSentences().size();
+ int modelElements = 0;
+ for (var model : arDoCoResult.getModelIds()) {
+ modelElements += arDoCoResult.getModelState(model).getInstances().size();
+ }
- // False Positives are the trace links that are only contained in the result set
- Set falsePositives = distinctTraceLinks.stream().filter(tl -> !distinctGoldStandard.contains(tl)).collect(Collectors.toSet());
- ImmutableList falsePositivesList = Lists.immutable.ofAll(falsePositives);
+ int confusionMatrixSum = sentences * modelElements;
- // False Negatives are the trace links that are only contained in the gold standard
- Set falseNegatives = distinctGoldStandard.stream().filter(tl -> !distinctTraceLinks.contains(tl)).collect(Collectors.toSet());
- ImmutableList falseNegativesList = Lists.immutable.ofAll(falseNegatives);
+ var calculator = ClassificationMetricsCalculator.getInstance();
- int trueNegatives = TestUtil.calculateTrueNegativesForTLR(arDoCoResult, truePositives.size(), falsePositives.size(), falseNegatives.size());
- return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList));
+ var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum);
+ return new EvaluationResults<>(classification);
}
/**
@@ -67,57 +62,11 @@ public static EvaluationResults compareInconsistencies(ArDoCoResult arDoC
Set distinctTraceLinks = new java.util.LinkedHashSet<>(results.castToCollection());
Set distinctGoldStandard = new java.util.LinkedHashSet<>(goldStandard.castToCollection());
- // True Positives are the trace links that are contained on both lists
- Set truePositives = distinctTraceLinks.stream().filter(distinctGoldStandard::contains).collect(Collectors.toSet());
- ImmutableList truePositivesList = Lists.immutable.ofAll(truePositives);
-
- // False Positives are the trace links that are only contained in the result set
- Set falsePositives = distinctTraceLinks.stream().filter(tl -> !distinctGoldStandard.contains(tl)).collect(Collectors.toSet());
- ImmutableList falsePositivesList = Lists.immutable.ofAll(falsePositives);
-
- // False Negatives are the trace links that are only contained in the gold standard
- Set falseNegatives = distinctGoldStandard.stream().filter(tl -> !distinctTraceLinks.contains(tl)).collect(Collectors.toSet());
- ImmutableList falseNegativesList = Lists.immutable.ofAll(falseNegatives);
-
- int trueNegatives = TestUtil.calculateTrueNegativesForInconsistencies(arDoCoResult, truePositives.size(), falsePositives.size(), falseNegatives.size());
- return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList));
- }
-
- /**
- * Calculates the number of true negatives based on the given {@link ArDoCoResult} and the calculated {@link EvaluationResults evaluation results}. Uses the
- * total sum of all entries in the confusion matrix and then substracts the true positives, false positives, and false negatives.
- *
- * @param arDoCoResult the output of ArDoCo
- * @param truePositives nr of true positives
- * @param falsePositives nr of false positives
- * @param falseNegatives nr of false negatives
- * @return the number of true negatives
- */
- public static int calculateTrueNegativesForTLR(ArDoCoResult arDoCoResult, int truePositives, int falsePositives, int falseNegatives) {
- int sentences = arDoCoResult.getText().getSentences().size();
- int modelElements = 0;
- for (var model : arDoCoResult.getModelIds()) {
- modelElements += arDoCoResult.getModelState(model).getInstances().size();
- }
-
- int confusionMatrixSum = sentences * modelElements;
- return confusionMatrixSum - (truePositives + falsePositives + falseNegatives);
- }
-
- /**
- * Calculates the number of true negatives based on the given {@link ArDoCoResult} and the calculated {@link EvaluationResults evaluation results}. Uses the
- * total sum of all sentences in the {@link ArDoCoResult} and then substracts the true positives, false positives, and false negatives.
- *
- * @param arDoCoResult the output of ArDoCo
- * @param truePositives nr of true positives
- * @param falsePositives nr of false positives
- * @param falseNegatives nr of false negatives
- * @return the number of true negatives
- */
- public static int calculateTrueNegativesForInconsistencies(ArDoCoResult arDoCoResult, int truePositives, int falsePositives, int falseNegatives) {
- int numberOfSentences = arDoCoResult.getText().getSentences().size();
- return numberOfSentences - (truePositives + falsePositives + falseNegatives);
+ int confusionMatrixSum = arDoCoResult.getText().getSentences().size();
+ var calculator = ClassificationMetricsCalculator.getInstance();
+ var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum);
+ return new EvaluationResults<>(classification);
}
/**
@@ -168,19 +117,6 @@ public static void logExplicitResults(Logger logger, String name, EvaluationResu
logger.info(logString);
}
- /**
- * Log the provided {@link EvaluationResults} using the provided logger and name. Additionally, provided the expected results.
- *
- * @param logger Logger to use
- * @param name Name to show in the output
- * @param results the results
- * @param expectedResults the expected results
- */
- public static void logResultsWithExpected(Logger logger, String name, EvaluationResults> results, ExpectedResults expectedResults) {
- var infoString = String.format(Locale.ENGLISH, "%n%s:%n%s", name, results.getResultStringWithExpected(expectedResults));
- logger.info(infoString);
- }
-
public static void logExtendedResultsWithExpected(Logger logger, Object testClass, String name, EvaluationResults> results,
ExpectedResults expectedResults) {
var infoString = String.format(Locale.ENGLISH, """
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/architecture/DeterministicArDoCoTest.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/architecture/DeterministicArDoCoTest.java
index ac86729..6dfab30 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/architecture/DeterministicArDoCoTest.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/architecture/DeterministicArDoCoTest.java
@@ -50,7 +50,7 @@ public class DeterministicArDoCoTest {
@ArchTest
public static final ArchRule forbidUnorderedSetsAndMaps = noClasses().that()
- .resideOutsideOfPackages("..tests..")
+ .resideOutsideOfPackages("..tests..", "..metrics..")
.and(areNotDirectlyAnnotatedWith(Deterministic.class))
.should()
.accessClassesThat(areForbiddenClasses())
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/CodeProject.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/CodeProject.java
index 31255a6..bab51bb 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/CodeProject.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/CodeProject.java
@@ -5,6 +5,7 @@
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
@@ -159,8 +160,7 @@ public String getCodeModelDirectory() {
loadCodeModelFromResourcesIfNeeded();
return getTemporaryCodeLocation().getAbsolutePath();
} catch (IOException e) {
- logger.error(e.getMessage(), e);
- return null;
+ throw new UncheckedIOException(e);
}
}
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetrics.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetrics.java
deleted file mode 100644
index cf9c0a2..0000000
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetrics.java
+++ /dev/null
@@ -1,212 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.core.tests.eval;
-
-import java.math.BigDecimal;
-import java.math.MathContext;
-
-public class EvaluationMetrics {
- private EvaluationMetrics() throws IllegalAccessException {
- throw new IllegalAccessException();
- }
-
- /**
- * Checks the provided recall. Returns 1.0 if it is NaN, because this means that there was no missing
- * classification.
- *
- * @param recall the precision
- * @return 1.0 if recall is NaN, else the original value
- */
- public static double checkAndRepairRecall(double recall) {
- if (Double.isNaN(recall)) {
- return 1.0;
- }
- return recall;
- }
-
- /**
- * Calculates the recall for the given True Positives (TPs) and False Negatives (FNs). If TP+NP=0, then returns 1.0
- * because there was no missing element.
- *
- * @param truePositives number of TPs
- * @param falseNegatives number of FNs
- * @return the Recall; 1.0 iff TP+NP=0
- */
- public static double calculateRecall(int truePositives, int falseNegatives) {
- double denominator = (truePositives + falseNegatives);
- var recall = 1.0 * truePositives / denominator;
- return checkAndRepairRecall(recall);
- }
-
- /**
- * Checks the provided precision. Returns 1.0 if it is NaN, because this means that there was no wrong
- * classification.
- *
- * @param precision the precision
- * @return 1.0 if precision is NaN, else the original value
- */
- public static double checkAndRepairPrecision(double precision) {
- if (Double.isNaN(precision)) {
- return 1.0;
- }
- return precision;
- }
-
- /**
- * Calculates the precision for the given True Positives (TPs) and False Positives (FPs). If TP+FP=0, then returns
- * 1.0 because there was no wrong classification.
- *
- * @param truePositives number of TPs
- * @param falsePositives number of FPs
- * @return the Precision; 1.0 iff TP+FP=0
- */
- public static double calculatePrecision(int truePositives, int falsePositives) {
- double denominator = (truePositives + falsePositives);
- var precision = 1.0 * truePositives / denominator;
- return checkAndRepairPrecision(precision);
- }
-
- /**
- * Checks the provided F1-score. Iff it is NaN, returns 0.0, otherwise returns the original value
- *
- * @param f1 the f1-score to check
- * @return Iff score is NaN, returns 0.0, otherwise returns the original value
- */
- public static double checkAndRepairF1(double f1) {
- if (Double.isNaN(f1)) {
- return 0.0;
- }
- return f1;
- }
-
- /**
- * Calculates the F1-score using the provided precision and recall. If precision+recall=0, returns 0.0.
- *
- * @param precision the precision
- * @param recall the recall
- * @return the F1-Score; 0.0 iff precision+recall=0
- */
- public static double calculateF1(double precision, double recall) {
- var f1 = 2 * precision * recall / (precision + recall);
- return checkAndRepairF1(f1);
- }
-
- /**
- * Calculates the F1-score using the provided True Positives (TPs), False Positives (FPs), and False Negatives
- * (FNs). If intermediate calculation shows that precision+recall=0, returns 0.0.
- *
- * @param truePositives number of TPs
- * @param falsePositives number of FPs
- * @param falseNegatives number of FNs
- * @return the F1-score. See also {@link #calculateF1(double, double)}
- */
- public static double calculateF1(int truePositives, int falsePositives, int falseNegatives) {
- var precision = calculatePrecision(truePositives, falsePositives);
- var recall = calculateRecall(truePositives, falseNegatives);
- return calculateF1(precision, recall);
- }
-
- /**
- * Calculates the accuracy based on the true positives, false positives, false negatives, and true negatives.
- *
- * @see Wikipedia: Accuracy and Precision
- * @return the accuracy
- */
- public static double calculateAccuracy(int truePositives, int falsePositives, int falseNegatives, int trueNegatives) {
- double numerator = truePositives + trueNegatives;
- double denominator = truePositives + falsePositives + falseNegatives + trueNegatives;
- return numerator / denominator;
- }
-
- /**
- * Returns the Phi Coefficient (also known as mean square contingency coefficient (MCC)) based on the true positives, false positives, false negatives, and
- * true negatives.
- * The return value lies between -1 and +1. -1 show perfect disagreement, +1 shows perfect agreement and 0 indicates no relationship.
- * Therefore, good values should be close to +1.
- *
- * @see Wikipedia: Phi coefficient
- *
- * @return the value for Phi Coefficient (or MCC)
- */
- public static double calculatePhiCoefficient(int truePositives, int falsePositives, int falseNegatives, int trueNegatives) {
- var tp = BigDecimal.valueOf(truePositives);
- var fp = BigDecimal.valueOf(falsePositives);
- var fn = BigDecimal.valueOf(falseNegatives);
- var tn = BigDecimal.valueOf(trueNegatives);
-
- var num = tp.multiply(tn).subtract((fp.multiply(fn)));
-
- var a = tp.add(fp);
- var b = tp.add(fn);
- var c = tn.add(fp);
- var d = tn.add(fn);
- if (a.equals(BigDecimal.ZERO) || b.equals(BigDecimal.ZERO) || c.equals(BigDecimal.ZERO) || d.equals(BigDecimal.ZERO)) {
- return 0d;
- }
-
- var productOfSumsInDenominator = a.multiply(b).multiply(c).multiply(d);
- var denominator = productOfSumsInDenominator.sqrt(MathContext.DECIMAL128);
-
- return num.divide(denominator, MathContext.DECIMAL128).doubleValue();
- }
-
- /**
- * Calculates the maximum possible value of the phi coefficient given the four values of the confusion matrix (TP, FP, FN, TN).
- *
- * @see Paper about PhiMax by Ferguson (1941)
- * @see Paper about Phi/PhiMax by Davenport et al. (1991)
- * @param truePositives number of true positives
- * @param falsePositives number of false positives
- * @param falseNegatives number of false negatives
- * @param trueNegatives number of true negatives
- * @return The maximum possible value of phi.
- */
- public static double calculatePhiCoefficientMax(int truePositives, int falsePositives, int falseNegatives, int trueNegatives) {
- var tp = BigDecimal.valueOf(truePositives);
- var fp = BigDecimal.valueOf(falsePositives);
- var fn = BigDecimal.valueOf(falseNegatives);
- var tn = BigDecimal.valueOf(trueNegatives);
-
- var test = fn.add(tp).compareTo(fp.add(tp)) >= 0;
- var nominator = (fp.add(tn)).multiply(tp.add(fp)).sqrt(MathContext.DECIMAL128);
- var denominator = (fn.add(tn)).multiply(tp.add(fn)).sqrt(MathContext.DECIMAL128);
- if (test) {
- // standard case
- return nominator.divide(denominator, MathContext.DECIMAL128).doubleValue();
- } else {
- // if test is not true, you have to swap nominator and denominator as then you have to mirror the confusion matrix (,i.e., swap TP and TN)
- return denominator.divide(nominator, MathContext.DECIMAL128).doubleValue();
- }
- }
-
- /**
- * Calculates the normalized phi correlation coefficient value that is phi divided by its maximum possible value.
- *
- * @see Paper about Phi/PhiMax
- * @param truePositives number of true positives
- * @param falsePositives number of false positives
- * @param falseNegatives number of false negatives
- * @param trueNegatives number of true negatives
- * @return The value of Phi/PhiMax
- */
- public static double calculatePhiOverPhiMax(int truePositives, int falsePositives, int falseNegatives, int trueNegatives) {
- var phi = calculatePhiCoefficient(truePositives, falsePositives, falseNegatives, trueNegatives);
- var phiMax = calculatePhiCoefficientMax(truePositives, falsePositives, falseNegatives, trueNegatives);
- return phi / phiMax;
- }
-
- /**
- * Calculates the specificity, also known as selectivity or true negative rate, based on the number of true negatives and false positives.
- *
- * @see Wikipedia: Sensitivity and specificity
- * @param trueNegatives the number of true negatives
- * @param falsePositives the number of false positives
- * @return the specificity
- */
- public static double calculateSpecificity(int trueNegatives, int falsePositives) {
- double specificity = trueNegatives / ((double) trueNegatives + falsePositives);
- if (Double.isNaN(specificity)) {
- return 1.0;
- }
- return specificity;
- }
-}
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/HistoricProject.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/HistoricProject.java
new file mode 100644
index 0000000..b28cc51
--- /dev/null
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/HistoricProject.java
@@ -0,0 +1,222 @@
+/* Licensed under MIT 2021-2024. */
+package edu.kit.kastel.mcse.ardoco.core.tests.eval;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.eclipse.collections.api.factory.Lists;
+import org.eclipse.collections.api.list.ImmutableList;
+import org.eclipse.collections.api.list.MutableList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import edu.kit.kastel.mcse.ardoco.core.api.models.ArchitectureModelType;
+import edu.kit.kastel.mcse.ardoco.core.api.models.arcotl.ArchitectureModel;
+import edu.kit.kastel.mcse.ardoco.core.execution.ConfigurationHelper;
+import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults;
+
+/**
+ * This enum captures the different case studies that are used for evaluation in the integration tests (only historic versions).
+ */
+public enum HistoricProject implements GoldStandardProject {
+ TEASTORE_HISTORICAL( //
+ "TS-H", //
+ "/benchmark/teastore/model_2020/pcm/teastore.repository", //
+ "/benchmark/teastore/text_2018/teastore_2018_AB.txt", //
+ "/benchmark/teastore/goldstandards/goldstandard_sad_2018-sam_2020_AB.csv", //
+ "/configurations/ts/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
+ "/benchmark/teastore/goldstandards/goldstandard_sad_2018-sam_2020_AB_UME.csv", //
+ new ExpectedResults(.999, .740, .850, .984, .853, .999), //
+ new ExpectedResults(.082, .983, .153, .332, .147, .291) //
+ ), //
+ TEAMMATES_HISTORICAL( //
+ "TM-H", //
+ "/benchmark/teammates/model_2021/pcm/teammates.repository", //
+ "/benchmark/teammates/text_2015/teammates_2015.txt", //
+ "/benchmark/teammates/goldstandards/goldstandard_sad_2015-sam_2021.csv", //
+ "/configurations/tm/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
+ "/benchmark/teammates/goldstandards/goldstandard_sad_2015-sam_2021_UME.csv", //
+ new ExpectedResults(.524, .695, .597, .970, .589, .979), //
+ new ExpectedResults(.161, .695, .262, .878, .295, .884) //
+ ), //
+ BIGBLUEBUTTON_HISTORICAL( //
+ "BBB-H", "/benchmark/bigbluebutton/model_2021/pcm/bbb.repository", //
+ "/benchmark/bigbluebutton/text_2015/bigbluebutton_2015.txt", //
+ "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2015-sam_2021.csv", //
+ "/configurations/bbb/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
+ "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2015-sam_2021_UME.csv", //
+ new ExpectedResults(.807, .617, .699, .978, .695, .993), //
+ new ExpectedResults(.048, .176, .076, .829, .018, .857) //
+ ), //
+
+ JABREF_HISTORICAL( //
+ "JR-H", "/benchmark/jabref/model_2021/pcm/jabref.repository", //
+ "/benchmark/jabref/text_2016/jabref_2016.txt", //
+ "/benchmark/jabref/goldstandards/goldstandard_sad_2016-sam_2021.csv", //
+ "/configurations/jabref/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
+ "/benchmark/jabref/goldstandards/goldstandard_sad_2016-sam_2021_UME.csv", //
+ new ExpectedResults(.817, .999, .899, .966, .886, .960), //
+ new ExpectedResults(.083, .111, .095, .683, -0.094, .784) //
+ );
+
+ private static final Logger logger = LoggerFactory.getLogger(HistoricProject.class);
+
+ private final String alias;
+ private final String model;
+ private final String textFile;
+ private final String configurationsFile;
+ private final String goldStandardTraceabilityLinkRecovery;
+ private final String goldStandardMissingTextForModelElement;
+ private final ExpectedResults expectedTraceLinkResults;
+ private final ExpectedResults expectedInconsistencyResults;
+ private final SortedSet resourceNames;
+
+ HistoricProject(String alias, String model, String textFile, String goldStandardTraceabilityLinkRecovery, String configurationsFile,
+ String goldStandardMissingTextForModelElement, ExpectedResults expectedTraceLinkResults, ExpectedResults expectedInconsistencyResults) {
+ this.alias = alias;
+ this.model = model;
+ this.textFile = textFile;
+ this.configurationsFile = configurationsFile;
+ this.goldStandardTraceabilityLinkRecovery = goldStandardTraceabilityLinkRecovery;
+ this.goldStandardMissingTextForModelElement = goldStandardMissingTextForModelElement;
+ this.expectedTraceLinkResults = expectedTraceLinkResults;
+ this.expectedInconsistencyResults = expectedInconsistencyResults;
+ resourceNames = new TreeSet<>(List.of(model, textFile, goldStandardTraceabilityLinkRecovery, configurationsFile,
+ goldStandardMissingTextForModelElement));
+ }
+
+ @Override
+ public String getAlias() {
+ return alias;
+ }
+
+ @Override
+ public File getModelFile() {
+ return ProjectHelper.loadFileFromResources(model);
+ }
+
+ @Override
+ public String getModelResourceName() {
+ return model;
+ }
+
+ @Override
+ public File getModelFile(ArchitectureModelType modelType) {
+ return switch (modelType) {
+ case PCM -> getModelFile();
+ case UML -> ProjectHelper.loadFileFromResources(model.replace("/pcm/", "/uml/").replace(".repository", ".uml"));
+ };
+ }
+
+ @Override
+ public String getModelResourceName(ArchitectureModelType modelType) {
+ return switch (modelType) {
+ case PCM -> model;
+ case UML -> model.replace("/pcm/", "/uml/").replace(".repository", ".uml");
+ };
+ }
+
+ @Override
+ public File getTextFile() {
+ return ProjectHelper.loadFileFromResources(textFile);
+ }
+
+ @Override
+ public String getTextResourceName() {
+ return textFile;
+ }
+
+ @Override
+ public SortedMap getAdditionalConfigurations() {
+ return ConfigurationHelper.loadAdditionalConfigs(getAdditionalConfigurationsFile());
+ }
+
+ @Override
+ public File getAdditionalConfigurationsFile() {
+ return ProjectHelper.loadFileFromResources(this.configurationsFile);
+ }
+
+ @Override
+ public String getAdditionalConfigurationsResourceName() {
+ return configurationsFile;
+ }
+
+ @Override
+ public File getTlrGoldStandardFile() {
+ return ProjectHelper.loadFileFromResources(goldStandardTraceabilityLinkRecovery);
+ }
+
+ @Override
+ public String getTlrGoldStandardResourceName() {
+ return goldStandardTraceabilityLinkRecovery;
+ }
+
+ @Override
+ public ImmutableList getTlrGoldStandard() {
+ var path = Paths.get(this.getTlrGoldStandardFile().toURI());
+ List goldLinks = Lists.mutable.empty();
+ try {
+ goldLinks = Files.readAllLines(path);
+ } catch (IOException e) {
+ logger.error(e.getMessage(), e);
+ }
+ goldLinks.removeFirst();
+ goldLinks.removeIf(String::isBlank);
+ return Lists.immutable.ofAll(goldLinks);
+ }
+
+ @Override
+ public GoldStandard getTlrGoldStandard(ArchitectureModel architectureModel) {
+ return new GoldStandard(getTlrGoldStandardFile(), architectureModel);
+ }
+
+ @Override
+ public MutableList getMissingTextForModelElementGoldStandard() {
+ var path = Paths.get(this.getMissingTextForModelElementGoldStandardFile().toURI());
+ List goldLinks = Lists.mutable.empty();
+ try {
+ goldLinks = Files.readAllLines(path);
+ } catch (IOException e) {
+ logger.error(e.getMessage(), e);
+ }
+ goldLinks.remove("missingModelElementID");
+ goldLinks.removeIf(String::isBlank);
+ return Lists.mutable.ofAll(goldLinks);
+ }
+
+ @Override
+ public File getMissingTextForModelElementGoldStandardFile() {
+ return ProjectHelper.loadFileFromResources(goldStandardMissingTextForModelElement);
+ }
+
+ @Override
+ public String getMissingTextForModelElementGoldStandardResourceName() {
+ return goldStandardMissingTextForModelElement;
+ }
+
+ @Override
+ public ExpectedResults getExpectedTraceLinkResults() {
+ return expectedTraceLinkResults;
+ }
+
+ @Override
+ public ExpectedResults getExpectedInconsistencyResults() {
+ return expectedInconsistencyResults;
+ }
+
+ @Override
+ public String getProjectName() {
+ return this.name();
+ }
+
+ @Override
+ public SortedSet getResourceNames() {
+ return new TreeSet<>(resourceNames);
+ }
+}
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/Project.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/Project.java
index b2929bd..d6d6b27 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/Project.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/Project.java
@@ -33,7 +33,7 @@ public enum Project implements GoldStandardProject {
"/configurations/ms/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
"/benchmark/mediastore/goldstandards/goldstandard_sad_2016-sam_2016_UME.csv", //
new ExpectedResults(.999, .620, .765, .978, .778, .999), //
- new ExpectedResults(.212, .792, .328, .702, .227, .690) //
+ new ExpectedResults(.127, .793, .220, .685, .227, .679) //
), //
TEASTORE( //
"TS", //
@@ -43,17 +43,7 @@ public enum Project implements GoldStandardProject {
"/configurations/ts/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
"/benchmark/teastore/goldstandards/goldstandard_sad_2020-sam_2020_UME.csv", //
new ExpectedResults(.999, .740, .850, .984, .853, .999), //
- new ExpectedResults(.962, .703, .784, .957, .808, .994) //
- ), //
- TEASTORE_HISTORICAL( //
- "TS-H", //
- "/benchmark/teastore/model_2020/pcm/teastore.repository", //
- "/benchmark/teastore/text_2018/teastore_2018_AB.txt", //
- "/benchmark/teastore/goldstandards/goldstandard_sad_2018-sam_2020_AB.csv", //
- "/configurations/ts/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
- "/benchmark/teastore/goldstandards/goldstandard_sad_2018-sam_2020_AB_UME.csv", //
- new ExpectedResults(.999, .740, .850, .984, .853, .999), //
- new ExpectedResults(.163, .982, .278, .376, .146, .289) //
+ new ExpectedResults(.950, .703, .808, .980, .808, .998) //
), //
TEAMMATES( //
"TM", //
@@ -63,17 +53,7 @@ public enum Project implements GoldStandardProject {
"/configurations/tm/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
"/benchmark/teammates/goldstandards/goldstandard_sad_2021-sam_2021_UME.csv", //
new ExpectedResults(.555, .882, .681, .965, .688, .975), //
- new ExpectedResults(.175, .745, .279, .851, .287, .851) //
- ), //
- TEAMMATES_HISTORICAL( //
- "TM-H", //
- "/benchmark/teammates/model_2021/pcm/teammates.repository", //
- "/benchmark/teammates/text_2015/teammates_2015.txt", //
- "/benchmark/teammates/goldstandards/goldstandard_sad_2015-sam_2021.csv", //
- "/configurations/tm/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
- "/benchmark/teammates/goldstandards/goldstandard_sad_2015-sam_2021_UME.csv", //
- new ExpectedResults(.524, .695, .597, .970, .589, .979), //
- new ExpectedResults(.168, .629, .263, .863, .260, .870) //
+ new ExpectedResults(.147, .745, .245, .852, .287, .856) //
), //
BIGBLUEBUTTON( //
"BBB", "/benchmark/bigbluebutton/model_2021/pcm/bbb.repository", //
@@ -82,16 +62,7 @@ public enum Project implements GoldStandardProject {
"/configurations/bbb/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
"/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2021-sam_2021_UME.csv", //
new ExpectedResults(.875, .826, .850, .985, .835, .985), //
- new ExpectedResults(.887, .461, .429, .956, .534, .984) //
- ), //
- BIGBLUEBUTTON_HISTORICAL( //
- "BBB-H", "/benchmark/bigbluebutton/model_2021/pcm/bbb.repository", //
- "/benchmark/bigbluebutton/text_2015/bigbluebutton_2015.txt", //
- "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2015-sam_2021.csv", //
- "/configurations/bbb/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
- "/benchmark/bigbluebutton/goldstandards/goldstandard_sad_2015-sam_2021_UME.csv", //
- new ExpectedResults(.807, .617, .699, .978, .695, .993), //
- new ExpectedResults(.085, .175, .111, .813, .018, .869) //
+ new ExpectedResults(.666, .461, .545, .960, .535, .988) //
), //
JABREF( //
"JR", "/benchmark/jabref/model_2021/pcm/jabref.repository", //
@@ -100,16 +71,7 @@ public enum Project implements GoldStandardProject {
"/configurations/jabref/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
"/benchmark/jabref/goldstandards/goldstandard_sad_2021-sam_2021_UME.csv", //
new ExpectedResults(.899, .999, .946, .973, .932, .966), //
- new ExpectedResults(1.0, .443, .443, .845, .616, 1.0) //
- ), //
- JABREF_HISTORICAL( //
- "JR-H", "/benchmark/jabref/model_2021/pcm/jabref.repository", //
- "/benchmark/jabref/text_2016/jabref_2016.txt", //
- "/benchmark/jabref/goldstandards/goldstandard_sad_2016-sam_2021.csv", //
- "/configurations/jabref/filterlists_all.txt", // options: filterlists_none.txt, filterlists_onlyCommon.txt, filterlists_all.txt
- "/benchmark/jabref/goldstandards/goldstandard_sad_2016-sam_2021_UME.csv", //
- new ExpectedResults(.817, .999, .899, .966, .886, .960), //
- new ExpectedResults(.110, .110, .110, .366, -.249, .475) //
+ new ExpectedResults(1.0, .444, .615, .871, .617, 1.0) //
);
private static final Logger logger = LoggerFactory.getLogger(Project.class);
@@ -179,11 +141,7 @@ public String getTextResourceName() {
return textFile;
}
- /**
- * Return the map of additional configuration options
- *
- * @return the map of additional configuration options
- */
+ @Override
public SortedMap getAdditionalConfigurations() {
return ConfigurationHelper.loadAdditionalConfigs(getAdditionalConfigurationsFile());
}
@@ -217,7 +175,7 @@ public ImmutableList getTlrGoldStandard() {
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
- goldLinks.remove(0);
+ goldLinks.removeFirst();
goldLinks.removeIf(String::isBlank);
return Lists.immutable.ofAll(goldLinks);
}
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/ProjectHelper.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/ProjectHelper.java
index d2e23c5..45a5465 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/ProjectHelper.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/ProjectHelper.java
@@ -5,11 +5,9 @@
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.UncheckedIOException;
import java.util.concurrent.atomic.AtomicBoolean;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
/**
* Helper class for {@link GoldStandardProject} implementations.
*/
@@ -18,7 +16,6 @@ public class ProjectHelper {
* If set to false. The CodeProject will place the codeModel.acm file from the benchmark to the project directory.
*/
public static final AtomicBoolean ANALYZE_CODE_DIRECTLY = new AtomicBoolean(false);
- private static final Logger logger = LoggerFactory.getLogger(ProjectHelper.class);
private ProjectHelper() {
throw new IllegalAccessError();
@@ -33,7 +30,7 @@ private ProjectHelper() {
public static File loadFileFromResources(String resource) {
InputStream is = ProjectHelper.class.getResourceAsStream(resource);
if (is == null)
- return null;
+ throw new IllegalArgumentException("Resource not found: " + resource);
try {
File temporaryFile = File.createTempFile("ArDoCo", ".tmp");
temporaryFile.deleteOnExit();
@@ -44,8 +41,7 @@ public static File loadFileFromResources(String resource) {
}
return temporaryFile;
} catch (IOException e) {
- logger.error(e.getMessage(), e);
- return null;
+ throw new UncheckedIOException(e);
}
}
}
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResultVector.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResultVector.java
deleted file mode 100644
index 39a360b..0000000
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResultVector.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Licensed under MIT 2023-2024. */
-package edu.kit.kastel.mcse.ardoco.core.tests.eval.results;
-
-import org.eclipse.collections.api.factory.Lists;
-
-/**
- * used to form the average of several {@link EvaluationResults}
- *
- * @param type of the {@link EvaluationResults}
- */
-public class EvaluationResultVector {
- private double precision = 0.0;
- private double recall = 0.0;
- private double f1 = 0.0;
- private double accuracy = 0.0;
- private double phiCoefficient = 0.0;
- private double specificity = 0.0;
- private double phiCoefficientMax = 0.0;
- private double phiOverPhiMax = 0.0;
-
- public void add(EvaluationResults results) {
- precision += results.precision();
- recall += results.recall();
- f1 += results.f1();
- accuracy += results.accuracy();
- specificity += results.specificity();
- phiCoefficient += results.phiCoefficient();
- phiCoefficientMax += results.phiCoefficientMax();
- phiOverPhiMax += results.phiOverPhiMax();
- }
-
- public void scale(double scale) {
- precision /= scale;
- recall /= scale;
- f1 /= scale;
- accuracy /= scale;
- specificity /= scale;
- phiCoefficient /= scale;
- phiCoefficientMax /= scale;
- phiOverPhiMax /= scale;
- }
-
- public void addWeighted(EvaluationResults results, int weight) {
- precision += results.precision() * weight;
- recall += results.recall() * weight;
- f1 += results.f1() * weight;
- accuracy += results.accuracy() * weight;
- specificity += results.specificity() * weight;
- phiCoefficient += results.phiCoefficient() * weight;
- phiCoefficientMax += results.phiCoefficientMax() * weight;
- phiOverPhiMax += results.phiOverPhiMax() * weight;
- }
-
- public EvaluationResults toEvaluationResults() {
- return new EvaluationResults<>(precision, recall, f1, Lists.immutable.empty(), 0, Lists.immutable.empty(), Lists.immutable.empty(), accuracy,
- phiCoefficient, specificity, phiCoefficientMax, phiOverPhiMax);
- }
-}
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java
index 9332bb8..0db56b3 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/EvaluationResults.java
@@ -1,30 +1,29 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.tests.eval.results;
+import java.util.List;
import java.util.Locale;
import org.eclipse.collections.api.factory.Lists;
import org.eclipse.collections.api.list.ImmutableList;
import org.eclipse.collections.api.list.MutableList;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.EvaluationMetrics;
+import edu.kit.kastel.mcse.ardoco.metrics.result.SingleClassificationResult;
-public record EvaluationResults(double precision, double recall, double f1, ImmutableList truePositives, int trueNegatives,
- ImmutableList falseNegatives, ImmutableList falsePositives, double accuracy, double phiCoefficient, double specificity,
- double phiCoefficientMax, double phiOverPhiMax) {
+public record EvaluationResults(SingleClassificationResult classificationResult) {
public String toRow() {
return String.format(Locale.ENGLISH, """
%4s & %4s & %4s & %4s & %4s & %4s & %4s
- %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", precision, recall, f1, accuracy,
- specificity, phiCoefficient, phiOverPhiMax);
+ %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", precision(), recall(), f1(), accuracy(),
+ specificity(), phiCoefficient(), phiOverPhiMax());
}
public String toRow(String headerKey, String headerVal) {
return String.format(Locale.ENGLISH, """
%10s & %4s & %4s & %4s & %4s & %4s & %4s & %4s
- %10s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", headerKey, "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", headerVal, precision,
- recall, f1, accuracy, specificity, phiCoefficient, phiOverPhiMax);
+ %10s & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f & %4.2f""", headerKey, "P", "R", "F1", "Acc", "Spec", "Phi", "PhiN", headerVal,
+ precision(), recall(), f1(), accuracy(), specificity(), phiCoefficient(), phiOverPhiMax());
}
@Override
@@ -37,7 +36,7 @@ public String toString() {
\tSpecificity:%6.2f
\tPhi Coef.:%8.2f
\tPhi/PhiMax:%7.2f (Phi Max: %.2f)
- %s""", precision, recall, f1, accuracy, specificity, phiCoefficient, phiOverPhiMax, phiCoefficientMax, toRow());
+ %s""", precision(), recall(), f1(), accuracy(), specificity(), phiCoefficient(), phiOverPhiMax(), phiCoefficientMax(), toRow());
}
public String getResultStringWithExpected(ExpectedResults expectedResults) {
@@ -45,7 +44,7 @@ public String getResultStringWithExpected(ExpectedResults expectedResults) {
\tPrecision:%8.2f (min. expected: %.2f)
\tRecall:%11.2f (min. expected: %.2f)
\tF1:%15.2f (min. expected: %.2f)
- %s""", precision, expectedResults.precision(), recall, expectedResults.recall(), f1, expectedResults.f1(), toRow());
+ %s""", precision(), expectedResults.precision(), recall(), expectedResults.recall(), f1(), expectedResults.f1(), toRow());
}
public String getExtendedResultStringWithExpected(ExpectedResults expectedResults) {
@@ -53,16 +52,17 @@ public String getExtendedResultStringWithExpected(ExpectedResults expectedResult
outputBuilder.append(String.format(Locale.ENGLISH, """
\tPrecision:%8.2f (min. expected: %.2f)
\tRecall:%11.2f (min. expected: %.2f)
- \tF1:%15.2f (min. expected: %.2f)""", precision, expectedResults.precision(), recall, expectedResults.recall(), f1, expectedResults.f1()));
+ \tF1:%15.2f (min. expected: %.2f)""", precision(), expectedResults.precision(), recall(), expectedResults.recall(), f1(), expectedResults
+ .f1()));
outputBuilder.append(String.format(Locale.ENGLISH, """
\tAccuracy:%9.2f (min. expected: %.2f)
- \tSpecificity:%6.2f (min. expected: %.2f)""", accuracy, expectedResults.accuracy(), specificity, expectedResults.specificity()));
+ \tSpecificity:%6.2f (min. expected: %.2f)""", accuracy(), expectedResults.accuracy(), specificity(), expectedResults.specificity()));
outputBuilder.append(String.format(Locale.ENGLISH, """
\tPhi Coef.:%8.2f (min. expected: %.2f)
\tPhi/PhiMax:%7.2f (Phi Max: %.2f)
- %s""", phiCoefficient, expectedResults.phiCoefficient(), phiOverPhiMax, phiCoefficientMax, toRow()));
+ %s""", phiCoefficient(), expectedResults.phiCoefficient(), phiOverPhiMax(), phiCoefficientMax(), toRow()));
return outputBuilder.toString();
}
@@ -73,64 +73,62 @@ public String getExplicitResultString() {
\tTN:%15d
\tFN:%15d
\tP:%16d
- \tN:%16d""", truePositives.size(), falsePositives.size(), trueNegatives, falseNegatives.size(), truePositives.size() + falseNegatives.size(),
- trueNegatives + falsePositives.size());
- }
-
- /**
- * returns the weight (truePos + falseNeg)
- *
- * @return the weight
- */
- public int getWeight() {
- return this.truePositives().size() + this.falseNegatives().size();
+ \tN:%16d""", truePositives().size(), falsePositives().size(), trueNegatives(), falseNegatives().size(), truePositives()
+ .size() + falseNegatives().size(), trueNegatives() + falsePositives().size());
}
public ImmutableList getFound() {
MutableList found = Lists.mutable.empty();
- found.addAll(truePositives.castToCollection());
- found.addAll(falsePositives.castToCollection());
+ found.addAll(classificationResult.getTruePositives());
+ found.addAll(classificationResult.getFalsePositives());
return found.toImmutable();
}
- /**
- * creates new {@link EvaluationResults} from a {@link ResultMatrix}
- *
- * @param matrix the {@link ResultMatrix}
- * @return new {@link EvaluationResults}
- */
- public static EvaluationResults createEvaluationResults(ResultMatrix matrix) {
- int nrTruePos = matrix.truePositives().size();
- int nrTrueNeg = matrix.trueNegatives();
- int nrFalsePos = matrix.falsePositives().size();
- int nrFalseNeg = matrix.falseNegatives().size();
-
- double precision = EvaluationMetrics.calculatePrecision(nrTruePos, nrFalsePos);
- double recall = EvaluationMetrics.calculateRecall(nrTruePos, nrFalseNeg);
- double f1 = EvaluationMetrics.calculateF1(precision, recall);
-
- double accuracy = 0;
- double phiCoefficient = 0;
- double specificity = 0;
- double phiCoefficientMax = 0;
- double phiOverPhiMax = 0;
-
- if (nrTruePos + nrFalsePos + nrFalseNeg + nrTrueNeg != 0) {
- accuracy = EvaluationMetrics.calculateAccuracy(nrTruePos, nrFalsePos, nrFalseNeg, nrTrueNeg);
- }
- phiCoefficient = EvaluationMetrics.calculatePhiCoefficient(nrTruePos, nrFalsePos, nrFalseNeg, nrTrueNeg);
- if (nrTrueNeg + nrFalsePos != 0) {
- specificity = EvaluationMetrics.calculateSpecificity(nrTrueNeg, nrFalsePos);
- }
- if ((nrFalseNeg + nrTrueNeg) * (nrTruePos + nrFalseNeg) != 0) {
- phiCoefficientMax = EvaluationMetrics.calculatePhiCoefficientMax(nrTruePos, nrFalsePos, nrFalseNeg, nrTrueNeg);
- }
- if (phiCoefficientMax != 0) {
- phiOverPhiMax = EvaluationMetrics.calculatePhiOverPhiMax(nrTruePos, nrFalsePos, nrFalseNeg, nrTrueNeg);
- }
-
- return new EvaluationResults<>(precision, recall, f1, matrix.truePositives(), matrix.trueNegatives(), matrix.falseNegatives(), matrix.falsePositives(),
- accuracy, phiCoefficient, specificity, phiCoefficientMax, phiOverPhiMax);
+ public double precision() {
+ return classificationResult.getPrecision();
+ }
+
+ public double recall() {
+ return classificationResult.getRecall();
+ }
+
+ public double f1() {
+ return classificationResult.getF1();
+ }
+
+ public double accuracy() {
+ return classificationResult.getAccuracy();
+ }
+
+ public double specificity() {
+ return classificationResult.getSpecificity();
+ }
+
+ public double phiCoefficient() {
+ return classificationResult.getPhiCoefficient();
}
+ public double phiOverPhiMax() {
+ return classificationResult.getPhiOverPhiMax();
+ }
+
+ public double phiCoefficientMax() {
+ return classificationResult.getPhiCoefficientMax();
+ }
+
+ public List truePositives() {
+ return classificationResult.getTruePositives().stream().toList();
+ }
+
+ public List falsePositives() {
+ return classificationResult.getFalsePositives().stream().toList();
+ }
+
+ public List falseNegatives() {
+ return classificationResult.getFalseNegatives().stream().toList();
+ }
+
+ public int trueNegatives() {
+ return classificationResult.getTrueNegatives();
+ }
}
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/ResultMatrix.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/ResultMatrix.java
deleted file mode 100644
index 70dcab7..0000000
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/ResultMatrix.java
+++ /dev/null
@@ -1,15 +0,0 @@
-/* Licensed under MIT 2023-2024. */
-package edu.kit.kastel.mcse.ardoco.core.tests.eval.results;
-
-import org.eclipse.collections.api.list.ImmutableList;
-
-/**
- * reprensents the results in the form of a matrix
- *
- * @param truePositives the true positives
- * @param trueNegatives the true negatives
- * @param falsePositives the false positives
- * @param falseNegatives the false negatives
- */
-public record ResultMatrix(ImmutableList truePositives, int trueNegatives, ImmutableList falsePositives, ImmutableList falseNegatives) {
-}
diff --git a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java
index 6ba099e..91dc84d 100644
--- a/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java
+++ b/tests/integration-tests/tests-base/src/main/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/results/calculator/ResultCalculatorUtil.java
@@ -1,95 +1,73 @@
/* Licensed under MIT 2023-2024. */
package edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator;
-import org.eclipse.collections.api.factory.Lists;
+import java.util.List;
+
+import org.eclipse.collections.api.factory.Sets;
import org.eclipse.collections.api.list.ImmutableList;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.EvaluationMetrics;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResultVector;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
+import edu.kit.kastel.mcse.ardoco.metrics.ClassificationMetricsCalculator;
+import edu.kit.kastel.mcse.ardoco.metrics.result.AggregatedClassificationResult;
+import edu.kit.kastel.mcse.ardoco.metrics.result.AggregationType;
+import edu.kit.kastel.mcse.ardoco.metrics.result.SingleClassificationResult;
/**
* This utility class provides methods to form the average of several {@link EvaluationResults}
*/
public final class ResultCalculatorUtil {
+ private static final Logger logger = LoggerFactory.getLogger(ResultCalculatorUtil.class);
private ResultCalculatorUtil() {
throw new IllegalAccessError();
}
- public static EvaluationResults calculateAverageResults(ImmutableList> results) {
- int norm = results.size();
- EvaluationResultVector vector = new EvaluationResultVector<>();
-
- for (var result : results) {
- var weight = result.getWeight();
- if (weight <= 0) {
- norm--;
- continue;
- }
- vector.add(result);
- }
+ public static EvaluationResults calculateMacroAverageResults(ImmutableList> results) {
+ var averages = getAverages(results);
+ if (averages == null)
+ return null;
- vector.scale(norm);
- return vector.toEvaluationResults();
+ var macroAverage = averages.stream().filter(it -> it.getType() == AggregationType.MACRO_AVERAGE).findFirst().orElseThrow();
+ return evaluationResults(macroAverage);
}
public static EvaluationResults calculateWeightedAverageResults(ImmutableList> results) {
- double weight = 0.0;
- double precision = .0;
- double recall = 0.0;
- double f1 = 0.0;
- double accuracy = 0.0;
- double specificity = 0.0;
- double phi = 0.0;
- double phiMax = 0.0;
- double phiOverPhiMax = 0.0;
- int truePositives = 0;
- int trueNegatives = 0;
- int falsePositives = 0;
- int falseNegatives = 0;
-
- for (var result : results) {
- double localWeight = result.getWeight();
- weight += localWeight;
-
- precision += localWeight * result.precision();
- recall += localWeight * result.recall();
- f1 += localWeight * result.f1();
-
- accuracy += localWeight * result.accuracy();
- specificity += localWeight * result.specificity();
- phi += localWeight * result.phiCoefficient();
- phiMax += localWeight * result.phiCoefficientMax();
- phiOverPhiMax += localWeight * result.phiOverPhiMax();
-
- truePositives += result.truePositives().size();
- falseNegatives += result.falseNegatives().size();
- falsePositives += result.falsePositives().size();
- trueNegatives += result.trueNegatives();
+ var averages = getAverages(results);
+ if (averages == null)
+ return null;
- }
+ var macroAverage = averages.stream().filter(it -> it.getType() == AggregationType.WEIGHTED_AVERAGE).findFirst().orElseThrow();
+ return evaluationResults(macroAverage);
+ }
- precision = precision / weight;
- recall = recall / weight;
- f1 = f1 / weight;
- accuracy = accuracy / weight;
- specificity = specificity / weight;
+ public static EvaluationResults calculateMicroAverageResults(ImmutableList> results) {
+ var averages = getAverages(results);
+ if (averages == null)
+ return null;
- if (truePositives > 0) {
- phi = EvaluationMetrics.calculatePhiCoefficient(truePositives, falsePositives, falseNegatives, trueNegatives);
- phiMax = EvaluationMetrics.calculatePhiCoefficientMax(truePositives, falsePositives, falseNegatives, trueNegatives);
- phiOverPhiMax = EvaluationMetrics.calculatePhiOverPhiMax(truePositives, falsePositives, falseNegatives, trueNegatives);
+ var microAverage = averages.stream().filter(it -> it.getType() == AggregationType.MICRO_AVERAGE).findFirst().orElseThrow();
+ return evaluationResults(microAverage);
+ }
- return new EvaluationResults<>(precision, recall, f1, Lists.immutable.empty(), 0, Lists.immutable.empty(), Lists.immutable.empty(), accuracy, phi,
- specificity, phiMax, phiOverPhiMax);
+ private static EvaluationResults evaluationResults(AggregatedClassificationResult average) {
+ var weightedAverageAsSingle = new SingleClassificationResult(Sets.mutable.empty(), Sets.mutable.empty(), Sets.mutable.empty(), null, average
+ .getPrecision(), average.getRecall(), average.getF1(), average.getAccuracy(), average.getSpecificity(), average.getPhiCoefficient(), average
+ .getPhiCoefficientMax(), average.getPhiOverPhiMax());
+
+ return new EvaluationResults<>(weightedAverageAsSingle);
+ }
+
+ private static List getAverages(ImmutableList> results) {
+ if (results.isEmpty()) {
+ throw new IllegalArgumentException("No results to calculate average from");
}
- phi = phi / weight;
- phiMax /= weight;
- phiOverPhiMax /= weight;
- return new EvaluationResults<>(precision, recall, f1, Lists.immutable.empty(), 0, Lists.immutable.empty(), Lists.immutable.empty(), accuracy, phi,
- specificity, phiMax, phiOverPhiMax);
+ var calculator = ClassificationMetricsCalculator.getInstance();
+ var classifications = results.stream().map(EvaluationResults::classificationResult).toList();
+ return calculator.calculateAverages(classifications, null);
}
+
}
diff --git a/tests/integration-tests/tests-base/src/test/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetricsTest.java b/tests/integration-tests/tests-base/src/test/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetricsTest.java
deleted file mode 100644
index bd522c8..0000000
--- a/tests/integration-tests/tests-base/src/test/java/edu/kit/kastel/mcse/ardoco/core/tests/eval/EvaluationMetricsTest.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.core.tests.eval;
-
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-
-class EvaluationMetricsTest {
-
- @Test
- void calculatePrecisionTest() {
- Assertions.assertAll( //
- () -> Assertions.assertEquals(.5, EvaluationMetrics.calculatePrecision(10, 10), 1e-3), //
- () -> Assertions.assertEquals(.857, EvaluationMetrics.calculatePrecision(6, 1), 1e-3), //
- () -> Assertions.assertEquals(.154, EvaluationMetrics.calculatePrecision(10, 55), 1e-3), //
- () -> Assertions.assertEquals(.905, EvaluationMetrics.calculatePrecision(210, 22), 1e-3) //
- );
- }
-
- @Test
- void calculateRecallTest() {
- Assertions.assertAll( //
- () -> Assertions.assertEquals(.5, EvaluationMetrics.calculateRecall(10, 10), 1e-3), //
- () -> Assertions.assertEquals(.75, EvaluationMetrics.calculateRecall(6, 2), 1e-3), //
- () -> Assertions.assertEquals(.154, EvaluationMetrics.calculateRecall(10, 55), 1e-3), //
- () -> Assertions.assertEquals(.871, EvaluationMetrics.calculateRecall(210, 31), 1e-3) //
- );
- }
-
- @Test
- void calculateF1FromPrecisionRecallTest() {
- Assertions.assertAll( //
- () -> Assertions.assertEquals(1.0, EvaluationMetrics.calculateF1(1., 1.), 1e-2), //
- () -> Assertions.assertEquals(0.0, EvaluationMetrics.calculateF1(0., 1.), 1e-2), //
- () -> Assertions.assertEquals(0.0, EvaluationMetrics.calculateF1(1., 0.), 1e-2), //
- () -> Assertions.assertEquals(0.18, EvaluationMetrics.calculateF1(.9, .1), 1e-2), //
- () -> Assertions.assertEquals(0.48, EvaluationMetrics.calculateF1(.6, .4), 1e-2), //
- () -> Assertions.assertEquals(0.42, EvaluationMetrics.calculateF1(.3, .7), 1e-2), //
- () -> Assertions.assertEquals(0.9, EvaluationMetrics.calculateF1(.9, .9), 1e-2), //
- () -> Assertions.assertEquals(0.48, EvaluationMetrics.calculateF1(.4, .6), 1e-2) //
- );
- }
-
- @Test
- void calculateF1Test() {
- Assertions.assertAll( //
- () -> Assertions.assertEquals(.5, EvaluationMetrics.calculateF1(10, 10, 10), 1e-3), //
- () -> Assertions.assertEquals(.8, EvaluationMetrics.calculateF1(6, 1, 2), 1e-3), //
- () -> Assertions.assertEquals(.154, EvaluationMetrics.calculateF1(10, 55, 55), 1e-3), //
- () -> Assertions.assertEquals(.888, EvaluationMetrics.calculateF1(210, 22, 31), 1e-3) //
- );
- }
-
- @Test
- void calculateAccuracyTest() {
- Assertions.assertAll( //
- () -> Assertions.assertEquals(.5, EvaluationMetrics.calculateAccuracy(10, 10, 10, 10), 1e-3), //
- () -> Assertions.assertEquals(.75, EvaluationMetrics.calculateAccuracy(6, 1, 2, 3), 1e-3), //
- () -> Assertions.assertEquals(.214, EvaluationMetrics.calculateAccuracy(10, 55, 55, 20), 1e-3), //
- () -> Assertions.assertEquals(.967, EvaluationMetrics.calculateAccuracy(210, 22, 31, 1337), 1e-3) //
- );
- }
-
- @Test
- void calculatePhiCoefficientTest() {
- Assertions.assertAll( //
- () -> Assertions.assertEquals(.0, EvaluationMetrics.calculatePhiCoefficient(10, 10, 10, 10), 1e-3), //
- () -> Assertions.assertEquals(.478, EvaluationMetrics.calculatePhiCoefficient(6, 1, 2, 3), 1e-3), //
- () -> Assertions.assertEquals(-.579, EvaluationMetrics.calculatePhiCoefficient(10, 55, 55, 20), 1e-3), //
- () -> Assertions.assertEquals(.869, EvaluationMetrics.calculatePhiCoefficient(210, 22, 31, 1337), 1e-3), //
- () -> Assertions.assertEquals(.0, EvaluationMetrics.calculatePhiCoefficient(0, 0, 11, 11), 1e-3), //
- () -> Assertions.assertEquals(.0, EvaluationMetrics.calculatePhiCoefficient(11, 0, 11, 0), 1e-3) //
- );
- }
-
- @Test
- void calculateSpecificityTest() {
- Assertions.assertAll( //
- () -> Assertions.assertEquals(.5, EvaluationMetrics.calculateSpecificity(1, 1), 1e-3), //
- () -> Assertions.assertEquals(.76, EvaluationMetrics.calculateSpecificity(1337, 420), 1e-3), //
- () -> Assertions.assertEquals(.0, EvaluationMetrics.calculateSpecificity(0, 20), 1e-3), //
- () -> Assertions.assertEquals(1., EvaluationMetrics.calculateSpecificity(20, 0), 1e-3), //
- () -> Assertions.assertEquals(1., EvaluationMetrics.calculateSpecificity(0, 0), 1e-3), //
- () -> Assertions.assertEquals(.375, EvaluationMetrics.calculateSpecificity(3, 5), 1e-3) //
- );
- }
-
-}
diff --git a/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java b/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java
index 3295007..c3f57b8 100644
--- a/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java
+++ b/tests/integration-tests/tests-inconsistency/src/test/java/edu/kit/kastel/mcse/ardoco/id/tests/integration/InconsistencyDetectionEvaluationIT.java
@@ -2,10 +2,8 @@
package edu.kit.kastel.mcse.ardoco.id.tests.integration;
import java.io.IOException;
-import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
@@ -23,7 +21,6 @@
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.MethodOrderer;
import org.junit.jupiter.api.Order;
-import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestMethodOrder;
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
import org.junit.jupiter.params.ParameterizedTest;
@@ -31,7 +28,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import edu.kit.kastel.mcse.ardoco.core.api.inconsistency.InconsistentSentence;
import edu.kit.kastel.mcse.ardoco.core.api.inconsistency.ModelInconsistency;
import edu.kit.kastel.mcse.ardoco.core.api.models.ModelElement;
import edu.kit.kastel.mcse.ardoco.core.api.models.arcotl.ArchitectureModel;
@@ -43,7 +39,6 @@
import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ResultMatrix;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator.ResultCalculatorUtil;
import edu.kit.kastel.mcse.ardoco.id.tests.integration.inconsistencyhelper.HoldBackRunResultsProducer;
import edu.kit.kastel.mcse.ardoco.id.types.MissingModelInstanceInconsistency;
@@ -57,30 +52,13 @@
* are the spots of inconsistency then. We run this multiple times so each element was held back once.
*/
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
-public class InconsistencyDetectionEvaluationIT {
+class InconsistencyDetectionEvaluationIT {
private static final Logger logger = LoggerFactory.getLogger(InconsistencyDetectionEvaluationIT.class);
private static final String OUTPUT = "target/testout";
public static final String DIRECTORY_NAME = "ardoco_eval_id";
- /**
- * missing models in model
- */
- private static final MutableList> OVERALL_MME_RESULTS = Lists.mutable.empty(); //
- private static final MutableList> OVERALL_MME_RESULTS_BASELINE = Lists.mutable.empty();
-
- /**
- * undocumented models
- */
- private static final MutableList> OVERALL_UME_RESULTS = Lists.mutable.empty();
-
- private static final Map, ExpectedResults>> MME_RESULTS = new LinkedHashMap<>();
- private static final Map> MME_RESULTS_BASELINE = new LinkedHashMap<>();
- private static final Map> UME_RESULTS = new LinkedHashMap<>();
-
private static final String LINE_SEPARATOR = System.lineSeparator();
- private static boolean ranBaseline = false;
- private static final Map> inconsistentSentencesPerProject = new LinkedHashMap<>();
private static final Map arDoCoResults = new LinkedHashMap<>();
/**
@@ -94,18 +72,9 @@ public class InconsistencyDetectionEvaluationIT {
*/
@DisplayName("Evaluating MME-Inconsistency Detection")
@ParameterizedTest(name = "Evaluating MME-Inconsistency for {0}")
- @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_NONE, names = "^.*HISTORICAL$")
+ @EnumSource(Project.class)
@Order(1)
- protected void missingModelElementInconsistencyIT(GoldStandardProject goldStandardProject) {
- runMissingModelElementInconsistencyEval(goldStandardProject, goldStandardProject.getExpectedInconsistencyResults());
- }
-
- @EnabledIfEnvironmentVariable(named = "testHistoric", matches = ".*")
- @DisplayName("Evaluating MME-Inconsistency Detection (Historic)")
- @ParameterizedTest(name = "Evaluating MME-Inconsistency for {0}")
- @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_ALL, names = "^.*HISTORICAL$")
- @Order(2)
- protected void missingModelElementInconsistencyHistoricIT(GoldStandardProject goldStandardProject) {
+ void missingModelElementInconsistencyIT(GoldStandardProject goldStandardProject) {
runMissingModelElementInconsistencyEval(goldStandardProject, goldStandardProject.getExpectedInconsistencyResults());
}
@@ -115,11 +84,8 @@ protected void runMissingModelElementInconsistencyEval(GoldStandardProject goldS
var results = calculateEvaluationResults(goldStandardProject, runs);
- OVERALL_MME_RESULTS.addAll(results);
-
- EvaluationResults weightedResults = ResultCalculatorUtil.calculateWeightedAverageResults(results.toImmutable());
+ EvaluationResults weightedResults = ResultCalculatorUtil.calculateMicroAverageResults(results.toImmutable());
- MME_RESULTS.put(goldStandardProject, Tuples.pair(weightedResults, expectedInconsistencyResults));
logResultsMissingModelInconsistency(goldStandardProject, weightedResults, expectedInconsistencyResults);
checkResults(weightedResults, expectedInconsistencyResults);
@@ -135,24 +101,14 @@ protected void runMissingModelElementInconsistencyEval(GoldStandardProject goldS
@EnabledIfEnvironmentVariable(named = "testBaseline", matches = ".*")
@DisplayName("Evaluating MME-Inconsistency Detection Baseline")
@ParameterizedTest(name = "Evaluating Baseline for {0}")
- @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_NONE, names = "^.*HISTORICAL$")
+ @EnumSource(Project.class)
@Order(5)
- protected void missingModelElementInconsistencyBaselineIT(GoldStandardProject goldStandardProject) {
- runMissingModelElementInconsistencyBaselineEval(goldStandardProject);
- }
-
- @EnabledIfEnvironmentVariable(named = "testBaseline", matches = ".*")
- @DisplayName("Evaluating MME-Inconsistency Detection Baseline (Historical)")
- @ParameterizedTest(name = "Evaluating Baseline for {0}")
- @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_ALL, names = "^.*HISTORICAL$")
- @Order(6)
- protected void missingModelElementInconsistencyBaselineHistoricIT(GoldStandardProject goldStandardProject) {
+ void missingModelElementInconsistencyBaselineIT(GoldStandardProject goldStandardProject) {
runMissingModelElementInconsistencyBaselineEval(goldStandardProject);
}
protected void runMissingModelElementInconsistencyBaselineEval(GoldStandardProject goldStandardProject) {
logger.info("Start evaluation of MME-inconsistency baseline for {}", goldStandardProject.getProjectName());
- ranBaseline = true;
HoldBackRunResultsProducer holdBackRunResultsProducer = new HoldBackRunResultsProducer();
Map runs = holdBackRunResultsProducer.produceHoldBackRunResults(goldStandardProject, true);
@@ -160,10 +116,8 @@ protected void runMissingModelElementInconsistencyBaselineEval(GoldStandardProje
Assertions.assertTrue(runs != null && !runs.isEmpty());
var results = calculateEvaluationResults(goldStandardProject, runs);
- OVERALL_MME_RESULTS_BASELINE.addAll(results);
var weightedResults = ResultCalculatorUtil.calculateWeightedAverageResults(results.toImmutable());
- MME_RESULTS_BASELINE.put(goldStandardProject, weightedResults);
if (logger.isInfoEnabled()) {
String name = goldStandardProject.getProjectName() + " missing model inconsistency";
@@ -178,18 +132,9 @@ protected void runMissingModelElementInconsistencyBaselineEval(GoldStandardProje
*/
@DisplayName("Evaluate Inconsistency Analyses For MissingTextForModelElementInconsistencies")
@ParameterizedTest(name = "Evaluating UME-inconsistency for {0}")
- @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_NONE, names = "^.*HISTORICAL$")
+ @EnumSource(Project.class)
@Order(10)
- protected void missingTextInconsistencyIT(GoldStandardProject goldStandardProject) {
- runMissingTextInconsistencyEval(goldStandardProject);
- }
-
- @EnabledIfEnvironmentVariable(named = "testHistoric", matches = ".*")
- @DisplayName("Evaluate Inconsistency Analyses For MissingTextForModelElementInconsistencies " + "(Historical)")
- @ParameterizedTest(name = "Evaluating UME-inconsistency for {0}")
- @EnumSource(value = Project.class, mode = EnumSource.Mode.MATCH_ALL, names = "^.*HISTORICAL$")
- @Order(11)
- protected void missingTextInconsistencyHistoricIT(GoldStandardProject goldStandardProject) {
+ void missingTextInconsistencyIT(GoldStandardProject goldStandardProject) {
runMissingTextInconsistencyEval(goldStandardProject);
}
@@ -205,9 +150,6 @@ private void runMissingTextInconsistencyEval(GoldStandardProject goldStandardPro
var inconsistentModelElements = projectResults.getAllModelInconsistencies().collect(ModelInconsistency::getModelInstanceUid).toList();
var results = TestUtil.compareInconsistencies(projectResults, inconsistentModelElements.toImmutable(), expectedInconsistentModelElements.toImmutable());
- OVERALL_UME_RESULTS.add(results);
- UME_RESULTS.put(goldStandardProject, results);
-
String name = goldStandardProject.getProjectName() + " missing text inconsistency";
TestUtil.logExplicitResults(logger, name, results);
writeOutResults(goldStandardProject, results);
@@ -228,77 +170,6 @@ protected HoldBackRunResultsProducer getHoldBackRunResultsProducer() {
return new HoldBackRunResultsProducer();
}
- @EnabledIfEnvironmentVariable(named = "overallResults", matches = ".*")
- @Test
- @Order(999)
- void overAllResultsIT() {
- var weightedResults = ResultCalculatorUtil.calculateWeightedAverageResults(OVERALL_MME_RESULTS.toImmutable());
- var macroResults = ResultCalculatorUtil.calculateAverageResults(OVERALL_MME_RESULTS.toImmutable());
-
- Assertions.assertNotNull(weightedResults);
- Assertions.assertNotNull(macroResults);
-
- var weightedUMEResults = ResultCalculatorUtil.calculateWeightedAverageResults(OVERALL_UME_RESULTS.toImmutable());
- var macroUMEResults = ResultCalculatorUtil.calculateAverageResults(OVERALL_UME_RESULTS.toImmutable());
-
- Assertions.assertNotNull(weightedUMEResults);
- Assertions.assertNotNull(macroUMEResults);
-
- if (logger.isInfoEnabled()) {
- var mmeBaselineMacro = ResultCalculatorUtil.calculateAverageResults(OVERALL_MME_RESULTS_BASELINE.toImmutable());
- var mmeBaselineWeighted = ResultCalculatorUtil.calculateWeightedAverageResults(OVERALL_MME_RESULTS_BASELINE.toImmutable());
- ;
-
- var mmeOverallWeightedName = "MME Overall Weighted";
- TestUtil.logResults(logger, mmeOverallWeightedName, weightedResults);
-
- var mmeOverallMacroName = "MME Overall Macro";
- TestUtil.logResults(logger, mmeOverallMacroName, macroResults);
-
- var mmeBaselineOverallWeightedName = "MME BASELINE Overall Weighted";
- var mmeBaselineOverallMacroName = "MME BASELINE Overall Macro";
- if (ranBaseline) {
- TestUtil.logResults(logger, mmeBaselineOverallWeightedName, mmeBaselineWeighted);
- TestUtil.logResults(logger, mmeBaselineOverallMacroName, mmeBaselineMacro);
- }
-
- var umeOverallWeightedName = "Undoc. Model Element Overall Weighted";
- TestUtil.logResults(logger, umeOverallWeightedName, weightedUMEResults);
- var umeOverallMacroName = "Undoc. Model Element Overall Macro";
- TestUtil.logResults(logger, umeOverallMacroName, macroUMEResults);
-
- logger.info("MME");
- for (var entry : MME_RESULTS.entrySet()) {
- TestUtil.logExtendedResultsAsRow(logger, "Proj", entry.getKey().getAlias(), entry.getValue().getOne());
- }
- TestUtil.logExtendedResultsAsRow(logger, "-", "Macro", macroResults);
- TestUtil.logExtendedResultsAsRow(logger, "-", "Weighted", weightedResults);
-
- if (ranBaseline) {
- logger.info("MME Baseline");
- for (var entry : MME_RESULTS_BASELINE.entrySet()) {
- TestUtil.logExtendedResultsAsRow(logger, "Proj", entry.getKey().getAlias(), entry.getValue());
- }
- TestUtil.logExtendedResultsAsRow(logger, "-", "Macro", mmeBaselineMacro);
- TestUtil.logExtendedResultsAsRow(logger, "-", "Weighted", mmeBaselineWeighted);
- }
-
- logger.info("UME");
- for (var entry : UME_RESULTS.entrySet()) {
- TestUtil.logExtendedResultsAsRow(logger, "Proj", entry.getKey().getAlias(), entry.getValue());
- }
- TestUtil.logExtendedResultsAsRow(logger, "-", "Macro", macroUMEResults);
- TestUtil.logExtendedResultsAsRow(logger, "-", "Weighted", weightedUMEResults);
- }
-
- try {
- writeOutput(weightedResults, macroResults);
- writeOverallOutputMissingTextInconsistency(weightedUMEResults, macroUMEResults);
- } catch (IOException e) {
- logger.error(e.getMessage(), e.getCause());
- }
- }
-
private MutableList> calculateEvaluationResults(GoldStandardProject goldStandardProject, Map runs) {
Map> results = Maps.mutable.empty();
@@ -310,15 +181,14 @@ private MutableList> calculateEvaluationResults(GoldSt
if (runEvalResults != null) {
results.put(modelInstance, runEvalResults);
} else {
- // for the base case, instead of calculating results, save the found inconsistencies.
- inconsistentSentencesPerProject.put(goldStandardProject, arDoCoResult.getInconsistentSentences());
+ logger.error("Evaluation results for {} are null.", modelInstance);
}
}
return Lists.mutable.ofAll(results.values());
}
private EvaluationResults evaluateRun(GoldStandardProject goldStandardProject, ModelElement removedElement, ArDoCoResult arDoCoResult) {
- var modelId = arDoCoResult.getModelIds().get(0);
+ var modelId = arDoCoResult.getModelIds().getFirst();
ImmutableList inconsistencies = arDoCoResult.getInconsistenciesOfTypeForModel(modelId,
MissingModelInstanceInconsistency.class);
@@ -436,49 +306,6 @@ private static Pair createOutput(GoldStandardProje
return Tuples.pair(outputBuilder, detailedOutputBuilder);
}
- private static void writeOutput(EvaluationResults weightedResults, EvaluationResults macroResults) throws IOException {
- var evalDir = Path.of(OUTPUT).resolve(DIRECTORY_NAME);
- Files.createDirectories(evalDir);
- var outputFile = evalDir.resolve("base_results.md");
-
- var outputBuilder = new StringBuilder("# Inconsistency Detection").append(LINE_SEPARATOR);
-
- var resultString = TestUtil.createResultLogString("Overall Weighted", weightedResults);
- outputBuilder.append(resultString).append(LINE_SEPARATOR);
- resultString = TestUtil.createResultLogString("Overall Macro", macroResults);
- outputBuilder.append(resultString).append(LINE_SEPARATOR);
- outputBuilder.append(LINE_SEPARATOR);
-
- for (var entry : inconsistentSentencesPerProject.entrySet()) {
- var goldStandardProject = entry.getKey();
- outputBuilder.append("## ").append(goldStandardProject.getProjectName());
- outputBuilder.append(LINE_SEPARATOR);
- var inconsistentSentences = entry.getValue();
- for (var inconsistentSentence : inconsistentSentences) {
- outputBuilder.append(inconsistentSentence.getInfoString());
- outputBuilder.append(LINE_SEPARATOR);
- }
- }
-
- Files.writeString(outputFile, outputBuilder.toString(), StandardCharsets.UTF_8, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
- }
-
- //FIXME Something is wrong with this.
- private static void writeOverallOutputMissingTextInconsistency(EvaluationResults weightedResults, EvaluationResults macroResults)
- throws IOException {
- var evalDir = Path.of(OUTPUT).resolve(DIRECTORY_NAME);
- Files.createDirectories(evalDir);
- var outputFile = evalDir.resolve("_MissingTextInconsistency_Overall_Results.md");
-
- var outputBuilder = new StringBuilder("# Inconsistency Detection - Missing Text For Model " + "Element").append(LINE_SEPARATOR);
-
- var resultString = TestUtil.createResultLogString("Overall Weighted", weightedResults);
- outputBuilder.append(resultString).append(LINE_SEPARATOR);
- resultString = TestUtil.createResultLogString("Overall Macro", macroResults);
- outputBuilder.append(resultString).append(LINE_SEPARATOR);
- outputBuilder.append(LINE_SEPARATOR);
- }
-
private static String getOverallResultsString(MutableList> results) {
StringBuilder outputBuilder = new StringBuilder();
outputBuilder.append("###").append(LINE_SEPARATOR);
@@ -529,18 +356,15 @@ private static Pair>, StringBuilder> inspe
private static void inspectRun(StringBuilder outputBuilder, StringBuilder detailedOutputBuilder, MutableList> allResults,
ArDoCoResult arDoCoResult, EvaluationResults result) {
- var truePositives = result.truePositives().toList();
+ var truePositives = result.truePositives();
appendResults(truePositives, detailedOutputBuilder, "True Positives", arDoCoResult, outputBuilder);
- var falsePositives = result.falsePositives().toList();
+ var falsePositives = result.falsePositives();
appendResults(falsePositives, detailedOutputBuilder, "False Positives", arDoCoResult, outputBuilder);
- var falseNegatives = result.falseNegatives().toList();
+ var falseNegatives = result.falseNegatives();
appendResults(falseNegatives, detailedOutputBuilder, "False Negatives", arDoCoResult, outputBuilder);
-
- var results = EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositives.toImmutable(), 0, falsePositives.toImmutable(), falseNegatives
- .toImmutable()));
- allResults.add(results);
+ allResults.add(result);
}
private static void appendResults(List resultList, StringBuilder detailedOutputBuilder, String type, ArDoCoResult arDoCoResult,
@@ -588,7 +412,7 @@ private static String listToString(List> truePositives) {
}
private static ImmutableList getInitialInconsistencies(ArDoCoResult arDoCoResult) {
- var id = arDoCoResult.getModelIds().get(0);
+ var id = arDoCoResult.getModelIds().getFirst();
return arDoCoResult.getInconsistenciesOfTypeForModel(id, MissingModelInstanceInconsistency.class);
}
}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SadSamTraceabilityLinkRecoveryEvaluation.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SadSamTraceabilityLinkRecoveryEvaluation.java
index a18a52b..6af59c3 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SadSamTraceabilityLinkRecoveryEvaluation.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SadSamTraceabilityLinkRecoveryEvaluation.java
@@ -1,63 +1,36 @@
/* Licensed under MIT 2021-2024. */
package edu.kit.kastel.mcse.ardoco.tlr.tests.integration;
-import static edu.kit.kastel.mcse.ardoco.tlr.tests.integration.TraceLinkEvaluationIT.DATA_MAP;
import static edu.kit.kastel.mcse.ardoco.tlr.tests.integration.TraceLinkEvaluationIT.OUTPUT;
-import static edu.kit.kastel.mcse.ardoco.tlr.tests.integration.TraceLinkEvaluationIT.PROJECT_RESULTS;
-import static edu.kit.kastel.mcse.ardoco.tlr.tests.integration.TraceLinkEvaluationIT.RESULTS;
import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.stream.Stream;
-import org.eclipse.collections.api.collection.ImmutableCollection;
import org.eclipse.collections.api.factory.Lists;
import org.eclipse.collections.api.list.ImmutableList;
-import org.eclipse.collections.api.list.MutableList;
-import org.eclipse.collections.impl.tuple.Tuples;
-import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.MethodOrderer;
import org.junit.jupiter.api.TestMethodOrder;
-import edu.kit.kastel.mcse.ardoco.core.api.PreprocessingData;
import edu.kit.kastel.mcse.ardoco.core.api.models.ArchitectureModelType;
-import edu.kit.kastel.mcse.ardoco.core.api.models.ModelInstance;
-import edu.kit.kastel.mcse.ardoco.core.api.models.ModelStates;
import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.api.text.Sentence;
-import edu.kit.kastel.mcse.ardoco.core.common.util.FilePrinter;
import edu.kit.kastel.mcse.ardoco.core.common.util.TraceLinkUtilities;
-import edu.kit.kastel.mcse.ardoco.core.data.DataRepository;
import edu.kit.kastel.mcse.ardoco.core.execution.ConfigurationHelper;
import edu.kit.kastel.mcse.ardoco.core.execution.runner.ArDoCoRunner;
-import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults;
import edu.kit.kastel.mcse.ardoco.tlr.execution.ArDoCoForSadSamTraceabilityLinkRecovery;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TLRUtil;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLGoldStandardFile;
/**
* Integration test that evaluates the traceability link recovery capabilities of ArDoCo.
*/
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class SadSamTraceabilityLinkRecoveryEvaluation extends TraceabilityLinkRecoveryEvaluation {
+
@Override
protected boolean resultHasRequiredData(ArDoCoResult arDoCoResult) {
var traceLinks = arDoCoResult.getAllTraceLinks();
return !traceLinks.isEmpty();
}
- @Override
- protected ArDoCoResult runTraceLinkEvaluation(T project) {
- var result = super.runTraceLinkEvaluation(project);
- DATA_MAP.put(project, result);
- return result;
- }
-
@Override
protected ArDoCoRunner getAndSetupRunner(T project) {
var additionalConfigsMap = ConfigurationHelper.loadAdditionalConfigs(project.getAdditionalConfigurationsFile());
@@ -104,27 +77,6 @@ protected int getConfusionMatrixSum(ArDoCoResult arDoCoResult) {
return sentences * modelElements;
}
- @Override
- protected EvaluationResults calculateEvaluationResults(ArDoCoResult arDoCoResult, ImmutableCollection goldStandard) {
- var results = super.calculateEvaluationResults(arDoCoResult, goldStandard);
- PROJECT_RESULTS.add(results);
- return results;
- }
-
- public ArDoCoResult getArDoCoResult(T project) {
- String name = project.getProjectName();
- var inputModel = project.getModelFile();
- var inputText = project.getTextFile();
-
- var arDoCoResult = DATA_MAP.get(project);
- if (arDoCoResult == null) {
- File additionalConfigurations = project.getAdditionalConfigurationsFile();
- arDoCoResult = getArDoCoResult(name, inputText, inputModel, ArchitectureModelType.PCM, additionalConfigurations);
- DATA_MAP.put(project, arDoCoResult);
- }
- return arDoCoResult;
- }
-
protected ArDoCoResult getArDoCoResult(String name, File inputText, File inputModel, ArchitectureModelType architectureModelType,
File additionalConfigurations) {
var additionalConfigsMap = ConfigurationHelper.loadAdditionalConfigs(additionalConfigurations);
@@ -134,132 +86,4 @@ protected ArDoCoResult getArDoCoResult(String name, File inputText, File inputMo
runner.setUp(inputText, inputModel, architectureModelType, additionalConfigsMap, outputDir);
return runner.run();
}
-
- /**
- * calculate {@link EvaluationResults} and compare to {@link ExpectedResults}
- *
- * @param project the result's project
- * @param arDoCoResult the result
- */
- public static void checkResults(GoldStandardProject project, ArDoCoResult arDoCoResult) {
-
- var modelIds = arDoCoResult.getModelIds();
- var modelId = modelIds.stream().findFirst().orElseThrow();
-
- var goldStandard = project.getTlrGoldStandard();
- EvaluationResults results = calculateResults(goldStandard, arDoCoResult, modelId);
-
- ExpectedResults expectedResults = project.getExpectedTraceLinkResults();
-
- logAndSaveProjectResult(project, arDoCoResult, results, expectedResults);
-
- compareResultWithExpected(results, expectedResults);
-
- }
-
- private static void logAndSaveProjectResult(GoldStandardProject project, ArDoCoResult arDoCoResult, EvaluationResults results,
- ExpectedResults expectedResults) {
- if (logger.isInfoEnabled()) {
- String projectName = project.getProjectName();
- TestUtil.logExtendedResultsWithExpected(logger, SadSamTraceabilityLinkRecoveryEvaluation.class, projectName, results, expectedResults);
-
- var data = arDoCoResult.dataRepository();
- printDetailedDebug(results, data);
- try {
- RESULTS.add(Tuples.pair(project, TestUtil.compareTLR(DATA_MAP.get(project), TLRUtil.getTraceLinks(data), TLGoldStandardFile.loadLinks(project)
- .toImmutable())));
- DATA_MAP.put(project, arDoCoResult);
- PROJECT_RESULTS.add(results);
- } catch (IOException e) {
- // failing to save project results is irrelevant for test success
- logger.warn("Failed to load file for gold standard", e);
- }
- }
- }
-
- private static void compareResultWithExpected(EvaluationResults results, ExpectedResults expectedResults) {
- Assertions.assertAll(//
- () -> Assertions.assertTrue(results.precision() >= expectedResults.precision(), "Precision " + results
- .precision() + " is below the expected minimum value " + expectedResults.precision()), //
- () -> Assertions.assertTrue(results.recall() >= expectedResults.recall(), "Recall " + results
- .recall() + " is below the expected minimum value " + expectedResults.recall()), //
- () -> Assertions.assertTrue(results.f1() >= expectedResults.f1(), "F1 " + results
- .f1() + " is below the expected minimum value " + expectedResults.f1()));
- Assertions.assertAll(//
- () -> Assertions.assertTrue(results.accuracy() >= expectedResults.accuracy(), "Accuracy " + results
- .accuracy() + " is below the expected minimum value " + expectedResults.accuracy()), //
- () -> Assertions.assertTrue(results.phiCoefficient() >= expectedResults.phiCoefficient(), "Phi coefficient " + results
- .phiCoefficient() + " is below the expected minimum value " + expectedResults.phiCoefficient()));
- }
-
- public static void writeDetailedOutput(GoldStandardProject project, ArDoCoResult arDoCoResult) {
- String name = project.getProjectName();
- var path = Path.of(OUTPUT).resolve(name);
- try {
- Files.createDirectories(path);
- } catch (IOException e) {
- logger.warn("Could not create directories.", e);
- }
- FilePrinter.printResultsInFiles(path, name, arDoCoResult);
- }
-
- private static EvaluationResults calculateResults(ImmutableList goldStandard, ArDoCoResult arDoCoResult, String modelId) {
- var traceLinks = arDoCoResult.getTraceLinksForModelAsStrings(modelId);
- logger.info("Found {} trace links", traceLinks.size());
-
- return TestUtil.compareTLR(arDoCoResult, traceLinks, goldStandard);
- }
-
- private static void printDetailedDebug(EvaluationResults results, DataRepository data) {
- var falseNegatives = results.falseNegatives().stream().map(Object::toString);
- var falsePositives = results.falsePositives().stream().map(Object::toString);
-
- var sentences = data.getData(PreprocessingData.ID, PreprocessingData.class).orElseThrow().getText().getSentences();
- var modelStates = data.getData(ModelStates.ID, ModelStates.class).orElseThrow();
-
- for (String modelId : modelStates.modelIds()) {
- var instances = modelStates.getModelExtractionState(modelId).getInstances();
-
- var falseNegativeOutput = createOutputStrings(falseNegatives, sentences, instances);
- var falsePositivesOutput = createOutputStrings(falsePositives, sentences, instances);
-
- logger.debug("Model: \n{}", modelId);
- if (!falseNegativeOutput.isEmpty()) {
- logger.debug("False negatives:\n{}", String.join("\n", falseNegativeOutput));
- }
- if (!falsePositivesOutput.isEmpty()) {
- logger.debug("False positives:\n{}", String.join("\n", falsePositivesOutput));
- }
- }
-
- }
-
- private static MutableList createOutputStrings(Stream traceLinkStrings, ImmutableList sentences,
- ImmutableList instances) {
- var outputList = Lists.mutable.empty();
- for (var traceLinkString : traceLinkStrings.toList()) {
- var parts = traceLinkString.split(",", -1);
- if (parts.length < 2) {
- continue;
- }
- var id = parts[0];
-
- var modelElement = instances.detect(instance -> instance.getUid().equals(id));
-
- var sentence = parts[1];
-
- var sentenceNo = -1;
- try {
- sentenceNo = Integer.parseInt(sentence);
- } catch (NumberFormatException e) {
- logger.debug("Having problems retrieving sentence, so skipping line: {}", traceLinkString);
- continue;
- }
- var sentenceText = sentences.get(sentenceNo - 1);
-
- outputList.add(String.format("%-20s - %s (%s)", modelElement.getFullName(), sentenceText.getText(), traceLinkString));
- }
- return outputList;
- }
-
}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SamCodeTraceabilityLinkRecoveryEvaluation.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SamCodeTraceabilityLinkRecoveryEvaluation.java
index 07093be..ae136f1 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SamCodeTraceabilityLinkRecoveryEvaluation.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/SamCodeTraceabilityLinkRecoveryEvaluation.java
@@ -51,8 +51,7 @@ protected ImmutableList createTraceLinkStringList(ArDoCoResult arDoCoRes
@Override
protected ImmutableList getGoldStandard(CodeProject codeProject) {
- ImmutableList samCodeGoldStandard = codeProject.getSamCodeGoldStandard();
- return samCodeGoldStandard;
+ return codeProject.getSamCodeGoldStandard();
}
@Override
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationIT.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationIT.java
index 244e246..25c165b 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationIT.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationIT.java
@@ -3,63 +3,30 @@
import static edu.kit.kastel.mcse.ardoco.core.tests.eval.ProjectHelper.ANALYZE_CODE_DIRECTLY;
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.LinkedHashMap;
+import java.util.Arrays;
import java.util.List;
-import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.function.Predicate;
-import org.eclipse.collections.api.factory.Lists;
-import org.eclipse.collections.api.list.MutableList;
-import org.eclipse.collections.api.tuple.Pair;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
import org.junit.jupiter.params.provider.MethodSource;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import edu.kit.kastel.mcse.ardoco.core.api.models.ArchitectureModelType;
-import edu.kit.kastel.mcse.ardoco.core.api.models.tracelinks.SadSamTraceLink;
import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
import edu.kit.kastel.mcse.ardoco.core.common.RepositoryHandler;
-import edu.kit.kastel.mcse.ardoco.core.common.util.DataRepositoryHelper;
-import edu.kit.kastel.mcse.ardoco.core.execution.ArDoCo;
-import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.CodeProject;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator.ResultCalculatorUtil;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLDiffFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLLogFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLModelFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLPreviousFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLSentenceFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLSummaryFile;
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
-public class TraceLinkEvaluationIT {
-
- protected static final Logger logger = LoggerFactory.getLogger(TraceLinkEvaluationIT.class);
+class TraceLinkEvaluationIT {
protected static final String OUTPUT = "target/testout-tlr-it";
protected static final String LOGGING_ARDOCO_CORE = "org.slf4j.simpleLogger.log.edu.kit.kastel.mcse.ardoco.core";
protected static AtomicBoolean analyzeCodeDirectly = ANALYZE_CODE_DIRECTLY;
- protected static final List>> RESULTS = new ArrayList<>();
- protected static final MutableList> PROJECT_RESULTS = Lists.mutable.empty();
- protected static final Map DATA_MAP = new LinkedHashMap<>();
-
@BeforeAll
static void beforeAll() {
System.setProperty(LOGGING_ARDOCO_CORE, "info");
@@ -67,9 +34,6 @@ static void beforeAll() {
@AfterAll
static void afterAll() {
- logOverallResultsForSadSamTlr();
- writeOutputForSadSamTlr();
-
System.setProperty(LOGGING_ARDOCO_CORE, "error");
}
@@ -77,66 +41,12 @@ private static void cleanUpCodeRepository(CodeProject codeProject) {
RepositoryHandler.removeRepository(codeProject.getCodeLocation());
}
- private static void logOverallResultsForSadSamTlr() {
- if (logger.isInfoEnabled()) {
- var name = "Overall Weighted";
- var results = ResultCalculatorUtil.calculateWeightedAverageResults(PROJECT_RESULTS.toImmutable());
- TestUtil.logResults(logger, name, results);
-
- name = "Overall Macro";
- results = ResultCalculatorUtil.calculateAverageResults(PROJECT_RESULTS.toImmutable());
- TestUtil.logResults(logger, name, results);
- }
- }
-
- private static void writeOutputForSadSamTlr() {
- var evalDir = Path.of(OUTPUT).resolve("ardoco_eval_tl");
- try {
- Files.createDirectories(evalDir);
-
- TLSummaryFile.save(evalDir.resolve("summary.txt"), RESULTS, DATA_MAP);
- TLModelFile.save(evalDir.resolve("models.txt"), DATA_MAP);
- TLSentenceFile.save(evalDir.resolve("sentences.txt"), DATA_MAP);
- TLLogFile.append(evalDir.resolve("log.txt"), RESULTS);
- TLPreviousFile.save(evalDir.resolve("previous.csv"), RESULTS, logger); // save before loading
- TLDiffFile.save(evalDir.resolve("diff.txt"), RESULTS, TLPreviousFile.load(evalDir.resolve("previous.csv"), DATA_MAP), DATA_MAP);
- } catch (IOException e) {
- logger.error("Failed to write output.", e);
- }
- }
-
- private static List getHistoricalProjects() {
- return filterForHistoricalProjects(List.of(Project.values()));
- }
-
- private static List getNonHistoricalCodeProjects() {
- return filterForNonHistoricalProjects(List.of(CodeProject.values()));
- }
-
- private static > List filterForHistoricalProjects(Collection unfilteredProjects) {
- return filterForProjects(unfilteredProjects, p -> p.name().endsWith("HISTORICAL"));
- }
-
- private static > List filterForNonHistoricalProjects(Collection unfilteredProjects) {
- return filterForProjects(unfilteredProjects, p -> !p.name().endsWith("HISTORICAL"));
- }
-
- private static > List filterForProjects(Collection unfilteredProjects, Predicate filter) {
- List projects = new ArrayList<>();
- for (var project : unfilteredProjects) {
- if (filter.test(project)) {
- projects.add(project);
- }
- }
- return projects;
- }
-
@EnabledIfEnvironmentVariable(named = "testCodeFull", matches = ".*")
@DisplayName("Evaluate SAD-SAM-Code TLR (Full)")
@ParameterizedTest(name = "{0}")
- @MethodSource("getNonHistoricalCodeProjects")
+ @EnumSource(CodeProject.class)
@Order(1)
- protected void evaluateSadSamCodeTlrFullIT(CodeProject project) {
+ void evaluateSadSamCodeTlrFullIT(CodeProject project) {
analyzeCodeDirectly.set(true);
if (analyzeCodeDirectly.get())
cleanUpCodeRepository(project);
@@ -149,9 +59,9 @@ protected void evaluateSadSamCodeTlrFullIT(CodeProject project) {
@EnabledIfEnvironmentVariable(named = "testCodeFull", matches = ".*")
@DisplayName("Evaluate SAM-Code TLR (Full)")
@ParameterizedTest(name = "{0}")
- @EnumSource(value = CodeProject.class, mode = EnumSource.Mode.MATCH_NONE, names = "^.*HISTORICAL$")
+ @EnumSource(value = CodeProject.class)
@Order(2)
- protected void evaluateSamCodeTlrFullIT(CodeProject project) {
+ void evaluateSamCodeTlrFullIT(CodeProject project) {
analyzeCodeDirectly.set(true);
if (analyzeCodeDirectly.get())
cleanUpCodeRepository(project);
@@ -163,9 +73,9 @@ protected void evaluateSamCodeTlrFullIT(CodeProject project) {
@DisplayName("Evaluate SAD-SAM-Code TLR")
@ParameterizedTest(name = "{0}")
- @MethodSource("getNonHistoricalCodeProjects")
+ @EnumSource(CodeProject.class)
@Order(9)
- protected void evaluateSadSamCodeTlrIT(CodeProject codeProject) {
+ void evaluateSadSamCodeTlrIT(CodeProject codeProject) {
analyzeCodeDirectly.set(false);
if (analyzeCodeDirectly.get())
cleanUpCodeRepository(codeProject);
@@ -179,9 +89,9 @@ protected void evaluateSadSamCodeTlrIT(CodeProject codeProject) {
@DisplayName("Evaluate SAM-Code TLR")
@ParameterizedTest(name = "{0}")
- @MethodSource("getNonHistoricalCodeProjects")
+ @EnumSource(CodeProject.class)
@Order(10)
- protected void evaluateSamCodeTlrIT(CodeProject project) {
+ void evaluateSamCodeTlrIT(CodeProject project) {
analyzeCodeDirectly.set(false);
if (analyzeCodeDirectly.get())
cleanUpCodeRepository(project);
@@ -193,68 +103,15 @@ protected void evaluateSamCodeTlrIT(CodeProject project) {
@DisplayName("Evaluate SAD-SAM TLR")
@ParameterizedTest(name = "{0}")
- @MethodSource("getNonHistoricalCodeProjects")
+ @MethodSource("getProjects")
@Order(20)
- protected void evaluateSadSamTlrIT(T project) {
+ void evaluateSadSamTlrIT(T project) {
var evaluation = new SadSamTraceabilityLinkRecoveryEvaluation<>();
- var results = evaluation.runTraceLinkEvaluation(project);
- Assertions.assertNotNull(results);
- }
-
- @EnabledIfEnvironmentVariable(named = "testHistoric", matches = ".*")
- @DisplayName("Evaluate TLR (Historical)")
- @ParameterizedTest(name = "{0}")
- @MethodSource("getHistoricalProjects")
- @Order(21)
- protected void evaluateSadSamTlrHistoricalIT(T project) {
- var evaluation = new SadSamTraceabilityLinkRecoveryEvaluation<>();
- ArDoCoResult arDoCoResult = evaluation.getArDoCoResult(project);
+ var arDoCoResult = evaluation.runTraceLinkEvaluation(project);
Assertions.assertNotNull(arDoCoResult);
-
- SadSamTraceabilityLinkRecoveryEvaluation.checkResults(project, arDoCoResult);
- SadSamTraceabilityLinkRecoveryEvaluation.writeDetailedOutput(project, arDoCoResult);
}
- /**
- * Test if the results from executing ArDoCo with UML are the same as with PCM
- *
- * @param project the project, provided by the EnumSource
- */
- @Disabled("Only enable this for local tests.")
- @DisplayName("Compare TLR for UML/PCM")
- @ParameterizedTest(name = "{0}")
- @EnumSource(value = Project.class)
- @Order(29)
- protected void compareSadSamTlRForPcmAndUmlIT(Project project) {
- String name = project.name();
- var inputText = project.getTextFile();
-
- var evaluation = new SadSamTraceabilityLinkRecoveryEvaluation<>();
-
- var ardocoRunForPCM = evaluation.getArDoCoResult(project);
- Assertions.assertNotNull(ardocoRunForPCM);
-
- var arDoCo = ArDoCo.getInstance(name);
- var preprocessingData = ardocoRunForPCM.getPreprocessingData();
- DataRepositoryHelper.putPreprocessingData(arDoCo.getDataRepository(), preprocessingData);
-
- File umlModelFile = project.getModelFile(ArchitectureModelType.UML);
- File additionalConfigurations = project.getAdditionalConfigurationsFile();
- var ardocoRunForUML = evaluation.getArDoCoResult(name, inputText, umlModelFile, ArchitectureModelType.UML, additionalConfigurations);
- Assertions.assertNotNull(ardocoRunForUML);
-
- var pcmTLs = ardocoRunForPCM.getAllTraceLinks()
- .toList()
- .sortThisBy(SadSamTraceLink::getModelElementUid)
- .sortThisByInt(SadSamTraceLink::getSentenceNumber);
- var umlTLs = ardocoRunForUML.getAllTraceLinks()
- .toList()
- .sortThisBy(SadSamTraceLink::getModelElementUid)
- .sortThisByInt(SadSamTraceLink::getSentenceNumber);
-
- Assertions.assertAll( //
- () -> Assertions.assertEquals(pcmTLs.size(), umlTLs.size()), //
- () -> Assertions.assertIterableEquals(pcmTLs, umlTLs) //
- );
+ private static List extends GoldStandardProject> getProjects() {
+ return Arrays.asList(Project.values());
}
}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationSadCodeDirectIT.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationSadCodeDirectIT.java
index 6944b61..522c963 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationSadCodeDirectIT.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceLinkEvaluationSadCodeDirectIT.java
@@ -3,51 +3,29 @@
import static edu.kit.kastel.mcse.ardoco.core.tests.eval.ProjectHelper.ANALYZE_CODE_DIRECTLY;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import java.util.function.Predicate;
-import org.eclipse.collections.api.factory.Lists;
-import org.eclipse.collections.api.list.MutableList;
import org.eclipse.collections.api.tuple.Pair;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.MethodSource;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.junit.jupiter.params.provider.EnumSource;
import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.CodeProject;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator.ResultCalculatorUtil;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLDiffFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLLogFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLModelFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLPreviousFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLSentenceFile;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files.TLSummaryFile;
+import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.ModelElementSentenceLink;
class TraceLinkEvaluationSadCodeDirectIT {
- protected static final Logger logger = LoggerFactory.getLogger(TraceLinkEvaluationIT.class);
-
- protected static final String OUTPUT = "target/testout";
-
protected static final String LOGGING_ARDOCO_CORE = "org.slf4j.simpleLogger.log.edu.kit.kastel.mcse.ardoco.core";
- protected static final List>> RESULTS = new ArrayList<>();
- protected static final MutableList> PROJECT_RESULTS = Lists.mutable.empty();
+ protected static final List>> RESULTS = new ArrayList<>();
protected static final Map DATA_MAP = new LinkedHashMap<>();
@BeforeAll
@@ -57,64 +35,16 @@ static void beforeAll() {
@AfterAll
static void afterAll() {
- logOverallResultsForSadSamTlr();
- writeOutputForSadSamTlr();
System.setProperty(LOGGING_ARDOCO_CORE, "error");
}
@DisplayName("Evaluate SAD-Code TLR")
@ParameterizedTest(name = "{0}")
- @MethodSource("getNonHistoricalCodeProjects")
+ @EnumSource(CodeProject.class)
void evaluateSadCodeTlrIT(CodeProject project) {
ANALYZE_CODE_DIRECTLY.set(false);
var evaluation = new SadCodeTraceabilityLinkRecoveryEvaluation();
ArDoCoResult results = evaluation.runTraceLinkEvaluation(project);
Assertions.assertNotNull(results);
}
-
- private static List getNonHistoricalCodeProjects() {
- return filterForNonHistoricalProjects(List.of(CodeProject.values()));
- }
-
- private static > List filterForNonHistoricalProjects(Collection unfilteredProjects) {
- return filterForProjects(unfilteredProjects, p -> !p.name().endsWith("HISTORICAL"));
- }
-
- private static > List filterForProjects(Collection unfilteredProjects, Predicate filter) {
- List projects = new ArrayList<>();
- for (var project : unfilteredProjects) {
- if (filter.test(project)) {
- projects.add(project);
- }
- }
- return projects;
- }
-
- private static void logOverallResultsForSadSamTlr() {
- if (logger.isInfoEnabled()) {
- var name = "Overall Weighted";
- var results = ResultCalculatorUtil.calculateWeightedAverageResults(PROJECT_RESULTS.toImmutable());
- TestUtil.logResults(logger, name, results);
-
- name = "Overall Macro";
- results = ResultCalculatorUtil.calculateAverageResults(PROJECT_RESULTS.toImmutable());
- TestUtil.logResults(logger, name, results);
- }
- }
-
- private static void writeOutputForSadSamTlr() {
- var evalDir = Path.of(OUTPUT).resolve("ardoco_eval_tl");
- try {
- Files.createDirectories(evalDir);
-
- TLSummaryFile.save(evalDir.resolve("summary.txt"), RESULTS, DATA_MAP);
- TLModelFile.save(evalDir.resolve("models.txt"), DATA_MAP);
- TLSentenceFile.save(evalDir.resolve("sentences.txt"), DATA_MAP);
- TLLogFile.append(evalDir.resolve("log.txt"), RESULTS);
- TLPreviousFile.save(evalDir.resolve("previous.csv"), RESULTS, logger); // save before loading
- TLDiffFile.save(evalDir.resolve("diff.txt"), RESULTS, TLPreviousFile.load(evalDir.resolve("previous.csv"), DATA_MAP), DATA_MAP);
- } catch (IOException e) {
- logger.error("Failed to write output.", e);
- }
- }
}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceabilityLinkRecoveryEvaluation.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceabilityLinkRecoveryEvaluation.java
index b9e36f7..81658a0 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceabilityLinkRecoveryEvaluation.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/TraceabilityLinkRecoveryEvaluation.java
@@ -9,7 +9,6 @@
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Set;
-import java.util.stream.Collectors;
import org.eclipse.collections.api.collection.ImmutableCollection;
import org.eclipse.collections.api.factory.Lists;
@@ -31,7 +30,7 @@
import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ExpectedResults;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.ResultMatrix;
+import edu.kit.kastel.mcse.ardoco.metrics.ClassificationMetricsCalculator;
public abstract class TraceabilityLinkRecoveryEvaluation {
protected static final Logger logger = LoggerFactory.getLogger(TraceabilityLinkRecoveryEvaluation.class);
@@ -40,9 +39,11 @@ public abstract class TraceabilityLinkRecoveryEvaluation resultMap = new LinkedHashMap<>();
- protected ArDoCoResult runTraceLinkEvaluation(T project) {
+ protected final ArDoCoResult runTraceLinkEvaluation(T project) {
ArDoCoResult result = resultMap.get(project);
if (result == null || !resultHasRequiredData(result)) {
ArDoCoRunner runner = getAndSetupRunner(project);
@@ -160,44 +161,16 @@ protected EvaluationResults calculateEvaluationResults(ArDoCoResult arDo
Set distinctTraceLinks = new LinkedHashSet<>(results.castToCollection());
Set distinctGoldStandard = new LinkedHashSet<>(goldStandard.castToCollection());
+ int confusionMatrixSum = getConfusionMatrixSum(arDoCoResult);
+
+ var calculator = ClassificationMetricsCalculator.getInstance();
+ var classification = calculator.calculateMetrics(distinctTraceLinks, distinctGoldStandard, confusionMatrixSum);
+ return new EvaluationResults<>(classification);
- // True Positives are the trace links that are contained on both lists
- Set truePositives = distinctTraceLinks.stream()
- .filter(tl -> isTraceLinkContainedInGoldStandard(tl, distinctGoldStandard))
- .collect(Collectors.toSet());
- ImmutableList truePositivesList = Lists.immutable.ofAll(truePositives);
-
- // False Positives are the trace links that are only contained in the result set
- Set falsePositives = distinctTraceLinks.stream()
- .filter(tl -> !isTraceLinkContainedInGoldStandard(tl, distinctGoldStandard))
- .collect(Collectors.toSet());
- ImmutableList falsePositivesList = Lists.immutable.ofAll(falsePositives);
-
- // False Negatives are the trace links that are only contained in the gold standard
- Set falseNegatives = distinctGoldStandard.stream()
- .filter(gstl -> !isGoldStandardTraceLinkContainedInTraceLinks(gstl, distinctTraceLinks))
- .collect(Collectors.toSet());
- ImmutableList falseNegativesList = Lists.immutable.ofAll(falseNegatives);
-
- int trueNegatives = getConfusionMatrixSum(arDoCoResult) - truePositives.size() - falsePositives.size() - falseNegatives.size();
- return EvaluationResults.createEvaluationResults(new ResultMatrix<>(truePositivesList, trueNegatives, falsePositivesList, falseNegativesList));
}
protected abstract ImmutableList createTraceLinkStringList(ArDoCoResult arDoCoResult);
protected abstract int getConfusionMatrixSum(ArDoCoResult arDoCoResult);
- private static boolean areTraceLinksMatching(String goldStandardTraceLink, String traceLink) {
- traceLink = traceLink.strip();
- goldStandardTraceLink = goldStandardTraceLink.strip();
- return (goldStandardTraceLink.equals(traceLink));
- }
-
- private static boolean isTraceLinkContainedInGoldStandard(String traceLink, Set goldStandard) {
- return goldStandard.stream().anyMatch(goldStandardTraceLink -> areTraceLinksMatching(goldStandardTraceLink, traceLink));
- }
-
- private static boolean isGoldStandardTraceLinkContainedInTraceLinks(String goldStandardTraceLink, Set traceLinks) {
- return traceLinks.stream().anyMatch(traceLink -> areTraceLinksMatching(goldStandardTraceLink, traceLink));
- }
}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TestLink.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/ModelElementSentenceLink.java
similarity index 51%
rename from tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TestLink.java
rename to tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/ModelElementSentenceLink.java
index 42bb6c3..04ed6d4 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TestLink.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/ModelElementSentenceLink.java
@@ -8,15 +8,15 @@
/**
* Represents a simple trace link by the id of the model and number of the sentence involved.
*/
-public record TestLink(String modelId, int sentenceNr) implements Comparable {
+public record ModelElementSentenceLink(String modelElementId, int sentenceNumber) implements Comparable {
- public TestLink(SadSamTraceLink traceLink) {
+ public ModelElementSentenceLink(SadSamTraceLink traceLink) {
this(traceLink.getModelElementUid(), traceLink.getSentenceNumber());
}
@Override
- public int compareTo(TestLink o) {
- return Comparator.comparing(TestLink::modelId).thenComparing(TestLink::sentenceNr).compare(this, o);
+ public int compareTo(ModelElementSentenceLink o) {
+ return Comparator.comparing(ModelElementSentenceLink::modelElementId).thenComparing(ModelElementSentenceLink::sentenceNumber).compare(this, o);
}
}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TLRUtil.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TLRUtil.java
index cf5ef6b..e21bf0d 100644
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TLRUtil.java
+++ b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/TLRUtil.java
@@ -28,8 +28,8 @@ private TLRUtil() {
* @param data the {@link EvaluationResults}
* @return the trace links
*/
- public static ImmutableList getTraceLinks(DataRepository data) {
- var traceLinks = Lists.mutable.empty();
+ public static ImmutableList getTraceLinks(DataRepository data) {
+ var traceLinks = Lists.mutable.empty();
var connectionStates = data.getData(ConnectionStates.ID, ConnectionStates.class).orElseThrow();
var modelStates = data.getData(ModelStates.ID, ModelStates.class).orElseThrow();
@@ -40,7 +40,7 @@ public static ImmutableList getTraceLinks(DataRepository data) {
.map(connectionStates::getConnectionState)
.toList();
for (var connectionState : connectionStatesList) {
- traceLinks.addAll(connectionState.getTraceLinks().stream().map(TestLink::new).toList());
+ traceLinks.addAll(connectionState.getTraceLinks().stream().map(ModelElementSentenceLink::new).toList());
}
return traceLinks.toImmutable();
}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLDiffFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLDiffFile.java
deleted file mode 100644
index 5489385..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLDiffFile.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.text.DecimalFormat;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-
-import org.eclipse.collections.api.tuple.Pair;
-
-import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.common.util.CommonUtilities;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-
-/**
- * This is a helper class to write out a diff-file for the evaluation results of TLR.
- */
-public class TLDiffFile {
-
- private static final DecimalFormat NUMBER_FORMAT = new DecimalFormat("+##0.00%;-##0.00%");
- private static final String LINE_SEPARATOR = System.lineSeparator();
-
- private TLDiffFile() {
- throw new IllegalAccessError("This constructor should not be called!");
- }
-
- /**
- * Writes out the differences of new and old results.
- *
- * @param targetFile file to write into
- * @param newProjectResults new results
- * @param oldProjectResults old results
- * @param dataMap the mapping of Project to ArDoCoResult of the new run
- * @throws IOException if writing fails
- */
- public static void save(Path targetFile, Collection>> newProjectResults,
- Collection>> oldProjectResults, Map dataMap)
- throws IOException {
- // Assumption: Both collections contain the same projects
-
- newProjectResults = newProjectResults.stream().sorted(Comparator.comparing(x -> x.getOne().getProjectName())).toList();
- oldProjectResults = oldProjectResults.stream().sorted(Comparator.comparing(x -> x.getOne().getProjectName())).toList();
-
- var builder = new StringBuilder();
-
- builder.append("Time of evaluation: `").append(CommonUtilities.getCurrentTimeAsString()).append("`");
- builder.append(LINE_SEPARATOR);
-
- var newResults = newProjectResults.stream().map(Pair::getTwo).toList();
- var oldResults = newProjectResults.stream().map(Pair::getTwo).toList();
-
- // Append average differences in precision, recall, f1
- var oldAvgPrecision = oldResults.stream().mapToDouble(EvaluationResults::precision).average().orElse(Double.NaN);
- var oldAvgRecall = oldResults.stream().mapToDouble(EvaluationResults::recall).average().orElse(Double.NaN);
- var oldAvgF1 = oldResults.stream().mapToDouble(EvaluationResults::f1).average().orElse(Double.NaN);
- var newAvgPrecision = newResults.stream().mapToDouble(EvaluationResults::precision).average().orElse(Double.NaN);
- var newAvgRecall = newResults.stream().mapToDouble(EvaluationResults::recall).average().orElse(Double.NaN);
- var newAvgF1 = newResults.stream().mapToDouble(EvaluationResults::f1).average().orElse(Double.NaN);
-
- builder.append("Ø ");
- builder.append(NUMBER_FORMAT.format(newAvgPrecision - oldAvgPrecision)).append(" Precision, ");
- builder.append(NUMBER_FORMAT.format(newAvgRecall - oldAvgRecall)).append(" Recall, ");
- builder.append(NUMBER_FORMAT.format(newAvgF1 - oldAvgF1)).append(" F1");
- builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
- // Append project specific details
- for (Pair> oldProjectResult : oldProjectResults) {
- var project = oldProjectResult.getOne();
- var newResultOptional = newProjectResults.stream().filter(r -> r.getOne().equals(project)).findAny();
- if (newResultOptional.isEmpty()) {
- continue;
- }
- var newResult = newResultOptional.get().getTwo();
- var data = dataMap.get(project);
-
- builder.append("# ").append(project.getProjectName());
- builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
- var oldResult = oldProjectResult.getTwo();
- builder.append(NUMBER_FORMAT.format(newResult.precision() - oldResult.precision())).append(" Precision, ");
- builder.append(NUMBER_FORMAT.format(newResult.recall() - oldResult.recall())).append(" Recall, ");
- builder.append(NUMBER_FORMAT.format(newResult.f1() - oldResult.f1())).append(" F1");
- builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
- var newTruePositives = findNewLinks(oldResult.truePositives().castToList(), newResult.truePositives().castToList());
- appendList(builder, "New true positives", newTruePositives, data);
-
- var newFalsePositives = findNewLinks(oldResult.falsePositives().castToList(), newResult.falsePositives().castToList());
- appendList(builder, "New false positives", newFalsePositives, data);
-
- var newFalseNegatives = findNewLinks(oldResult.falseNegatives().castToList(), newResult.falseNegatives().castToList());
- appendList(builder, "New false negatives", newFalseNegatives, data);
-
- var lostFalsePositives = findMissingLinks(oldResult.falsePositives().castToList(), newResult.falsePositives().castToList());
- appendList(builder, "False positives that are now true negatives", lostFalsePositives, data);
-
- builder.append(LINE_SEPARATOR);
- }
-
- Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
- }
-
- private static List findNewLinks(List oldLinks, List newLinks) {
- return newLinks.stream().filter(link -> !oldLinks.contains(link)).toList();
- }
-
- private static List findMissingLinks(List oldLinks, List newLinks) {
- return oldLinks.stream().filter(link -> !newLinks.contains(link)).toList();
- }
-
- private static void appendList(StringBuilder builder, String description, List links, ArDoCoResult arDoCoResult) {
- var text = arDoCoResult.getText();
- if (links.isEmpty()) {
- return;
- }
-
- builder.append(description).append(":");
- builder.append(LINE_SEPARATOR);
-
- for (TestLink link : links) {
- for (var modelId : arDoCoResult.getModelIds()) {
- var dataModel = arDoCoResult.getModelState(modelId);
- var line = TLSummaryFile.format(link, text, dataModel);
- if (line != null && !line.isBlank()) {
- builder.append("- ").append(line).append(LINE_SEPARATOR);
- }
- }
- }
-
- builder.append(LINE_SEPARATOR);
- }
-
-}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLGoldStandardFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLGoldStandardFile.java
deleted file mode 100644
index 626fd7b..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLGoldStandardFile.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.List;
-
-import org.eclipse.collections.api.factory.Lists;
-import org.eclipse.collections.api.list.MutableList;
-
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-
-public class TLGoldStandardFile {
-
- private TLGoldStandardFile() {
- // no instantiation
- throw new IllegalAccessError("No instantiation allowed");
- }
-
- public static MutableList loadLinks(GoldStandardProject goldStandardProject) throws IOException {
- Path path = goldStandardProject.getTlrGoldStandardFile().toPath();
- List lines = Files.readAllLines(path);
-
- return Lists.mutable.ofAll(lines.stream()
- .skip(1) // skip csv header
- .map(line -> line.split(",")) // modelElementId,sentenceNr
- .map(array -> new TestLink(array[0], Integer.parseInt(array[1])))
- .map(link -> new TestLink(link.modelId(), link.sentenceNr() - 1))
- // ^ goldstandard sentences start with 1 while ISentences are zero indexed
- .toList());
- }
-
-}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLLogFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLLogFile.java
deleted file mode 100644
index 7ef9dd0..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLLogFile.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.text.DecimalFormat;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.List;
-
-import org.eclipse.collections.api.tuple.Pair;
-
-import edu.kit.kastel.mcse.ardoco.core.common.util.CommonUtilities;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-
-/**
- * This helper-class offer functionality to write out a log of the results for TLR.
- */
-public class TLLogFile {
- private static final String LINE_SEPARATOR = System.lineSeparator();
- private static final DecimalFormat NUMBER_FORMAT = new DecimalFormat("##0.00%");
-
- private TLLogFile() {
- throw new IllegalAccessError("This constructor should not be called!");
- }
-
- /**
- * Appends the given results to the given file.
- *
- * @param targetFile file to append to
- * @param projectResults the results to write out
- * @throws IOException if writing to file system fails
- */
- public static void append(Path targetFile, List>> projectResults) throws IOException {
- List> results = projectResults.stream().map(Pair::getTwo).toList();
- var builder = new StringBuilder();
-
- builder.append("- `").append(CommonUtilities.getCurrentTimeAsString()).append("` ");
-
- // calc average
- double avgPrecision = results.stream().mapToDouble(EvaluationResults::precision).average().orElse(Double.NaN);
- double avgRecall = results.stream().mapToDouble(EvaluationResults::recall).average().orElse(Double.NaN);
- double avgF1 = results.stream().mapToDouble(EvaluationResults::f1).average().orElse(Double.NaN);
-
- builder.append(String.format("[`Ø` %s %s %s]", NUMBER_FORMAT.format(avgPrecision), NUMBER_FORMAT.format(avgRecall), NUMBER_FORMAT.format(avgF1)));
-
- var sortedResults = new ArrayList<>(projectResults);
- sortedResults.sort(Comparator.comparing(x -> x.getOne().getProjectName()));
- for (Pair> projectResult : sortedResults) {
- String alias = projectResult.getOne().getAlias();
- EvaluationResults result = projectResult.getTwo();
- String precision = NUMBER_FORMAT.format(result.precision());
- String recall = NUMBER_FORMAT.format(result.recall());
- String F1 = NUMBER_FORMAT.format(result.f1());
-
- builder.append(String.format(" [`%s` %s %s %s]", alias, precision, recall, F1));
- }
-
- builder.append(LINE_SEPARATOR);
-
- Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.APPEND);
- }
-
-}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLModelFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLModelFile.java
deleted file mode 100644
index 0afdc95..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLModelFile.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.util.Map;
-
-import edu.kit.kastel.mcse.ardoco.core.api.models.ModelInstance;
-import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-
-/**
- * This helper-class offers functionality to write out information about the models as seen by ArDoCo after evaluation of TLR.
- */
-public class TLModelFile {
-
- private static final String LINE_SEPARATOR = System.lineSeparator();
-
- private TLModelFile() {
- throw new IllegalAccessError("This constructor should not be called!");
- }
-
- /**
- * Writes out information about models to the target file.
- *
- * @param targetFile the file to write to
- * @param dataMap the data map to extract model information for each project
- * @throws IOException if writing to file system fails
- */
- public static void save(Path targetFile, Map dataMap) throws IOException {
- var projects = dataMap.keySet().stream().sorted().toList();
- var builder = new StringBuilder();
-
- for (GoldStandardProject project : projects) {
- var projectData = dataMap.get(project);
-
- builder.append("# ").append(project.getProjectName());
- builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
- for (var modelId : projectData.getModelIds()) {
- var models = projectData.getModelState(modelId).getInstances();
- builder.append("## ModelId: ").append(modelId);
- builder.append(LINE_SEPARATOR);
- for (ModelInstance model : models) {
- builder.append("- [")
- .append(model.getUid())
- .append("]: \"")
- .append(model.getFullName())
- .append("\" (")
- .append(model.getFullType())
- .append(") (")
- .append(String.join(", ", model.getNameParts()))
- .append(") (")
- .append(String.join(", ", model.getTypeParts()))
- .append(")")
- .append(LINE_SEPARATOR);
- }
- }
-
- builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
- }
-
- Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
- }
-
-}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLPreviousFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLPreviousFile.java
deleted file mode 100644
index 52c2802..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLPreviousFile.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.eclipse.collections.api.factory.Lists;
-import org.eclipse.collections.api.tuple.Pair;
-import org.eclipse.collections.impl.tuple.Tuples;
-import org.slf4j.Logger;
-
-import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.Project;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-
-/**
- * This is a helper class to load and write out the results of the previous evaluation run for TLR results.
- */
-public class TLPreviousFile {
- private static final String LINE_SEPARATOR = System.lineSeparator();
-
- private TLPreviousFile() {
- throw new IllegalAccessError("This constructor should not be called!");
- }
-
- /**
- * Loads the previous results
- *
- * @param sourceFile file to load from
- * @return the previous results
- * @throws IOException if file access fails
- */
- public static Collection>> load(Path sourceFile,
- final Map DATA_MAP) throws IOException {
- List lines = Files.readAllLines(sourceFile);
- Map> foundLinkMap = new LinkedHashMap<>();
- List>> results = new ArrayList<>();
-
- for (String line : lines) {
- var parts = line.split(",", -1);
- Project project = Project.valueOf(parts[0]);
- String modelId = parts[1];
- int sentenceNr = Integer.parseInt(parts[2]);
-
- var testLink = new TestLink(modelId, sentenceNr);
-
- if (!foundLinkMap.containsKey(project)) {
- foundLinkMap.put(project, new ArrayList<>());
- }
-
- foundLinkMap.get(project).add(testLink);
- }
-
- for (Project project : foundLinkMap.keySet()) {
- var correctLinks = TLGoldStandardFile.loadLinks(project);
- var foundLinks = foundLinkMap.get(project);
-
- ArDoCoResult arDoCoResult = DATA_MAP.get(project);
- if (arDoCoResult != null) {
- results.add(Tuples.pair(project, TestUtil.compareTLR(arDoCoResult, Lists.immutable.ofAll(foundLinks), correctLinks.toImmutable())));
- }
- }
-
- return results;
- }
-
- /**
- * Saves the given results to the given file.
- *
- * @param targetFile file to save to
- * @param projectResults results to save
- * @throws IOException if writing to file system fails
- */
- public static void save(Path targetFile, Collection>> projectResults, Logger logger)
- throws IOException {
- if (Files.exists(targetFile)) {
- logger.warn("File with the results of the previous evaluation run already exists.");
- return; // do not overwrite
- }
-
- var sortedResults = new ArrayList<>(projectResults);
- sortedResults.sort(Comparator.comparing(x -> x.getOne().getProjectName()));
-
- var builder = new StringBuilder();
-
- for (Pair> projectResult : sortedResults) {
- EvaluationResults result = projectResult.getTwo();
- for (TestLink foundLink : result.getFound()) {
- builder.append(projectResult.getOne().getProjectName());
- builder.append(',');
- builder.append(foundLink.modelId());
- builder.append(',');
- builder.append(foundLink.sentenceNr());
- builder.append(LINE_SEPARATOR);
- }
- }
-
- Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE);
- }
-
-}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSentenceFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSentenceFile.java
deleted file mode 100644
index 0e24ada..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSentenceFile.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.util.Map;
-
-import org.eclipse.collections.api.list.ImmutableList;
-
-import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.api.text.Sentence;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-
-/**
- * This helper-class offers functionality to write out the sentences as seen by ArDoCo after the evaluation runs for TLR are done.
- */
-public class TLSentenceFile {
- private static final String LINE_SEPARATOR = System.lineSeparator();
-
- private TLSentenceFile() {
- throw new IllegalAccessError("This constructor should not be called!");
- }
-
- /**
- * Write out the sentences from the given data map to the target file
- *
- * @param targetFile file to write to
- * @param dataMap data to extract the sentences from
- * @throws IOException if writing to file system fails
- */
- public static void save(Path targetFile, Map dataMap) throws IOException {
- var projects = dataMap.keySet().stream().sorted().toList();
- var builder = new StringBuilder();
-
- for (GoldStandardProject project : projects) {
- ImmutableList sentences = dataMap.get(project).getText().getSentences();
-
- builder.append("# ").append(project.getProjectName());
- builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
- for (Sentence sentence : sentences) {
- builder.append("- [").append(sentence.getSentenceNumber()).append("]: ").append(sentence.getText()).append(LINE_SEPARATOR);
- }
-
- builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
- }
-
- Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
- }
-
-}
diff --git a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSummaryFile.java b/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSummaryFile.java
deleted file mode 100644
index 7d9e650..0000000
--- a/tests/integration-tests/tests-tlr/src/test/java/edu/kit/kastel/mcse/ardoco/tlr/tests/integration/tlrhelper/files/TLSummaryFile.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/* Licensed under MIT 2022-2024. */
-package edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.files;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.StandardOpenOption;
-import java.text.DecimalFormat;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-
-import org.eclipse.collections.api.factory.Lists;
-import org.eclipse.collections.api.tuple.Pair;
-
-import edu.kit.kastel.mcse.ardoco.core.api.models.LegacyModelExtractionState;
-import edu.kit.kastel.mcse.ardoco.core.api.output.ArDoCoResult;
-import edu.kit.kastel.mcse.ardoco.core.api.text.Text;
-import edu.kit.kastel.mcse.ardoco.core.common.util.CommonUtilities;
-import edu.kit.kastel.mcse.ardoco.core.tests.TestUtil;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.GoldStandardProject;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.EvaluationResults;
-import edu.kit.kastel.mcse.ardoco.core.tests.eval.results.calculator.ResultCalculatorUtil;
-import edu.kit.kastel.mcse.ardoco.tlr.tests.integration.tlrhelper.TestLink;
-
-/**
- * This helper class offers functionality to write out a summary of the TLR evaluation runs for all projects.
- */
-public class TLSummaryFile {
- private static final DecimalFormat NUMBER_FORMAT = new DecimalFormat("##0.00%");
- private static final String LINE_SEPARATOR = System.lineSeparator();
-
- private TLSummaryFile() {
- throw new IllegalAccessError("This constructor should not be called!");
- }
-
- /**
- * Writes a summary of the given results, data etc. to the given file.
- *
- * @param targetFile file to write to
- * @param results results of the runs
- * @param dataMap the outcomes (data) of the runs
- * @throws IOException if writing to file system fails
- */
- public static void save(Path targetFile, Collection>> results,
- Map dataMap) throws IOException {
- var sortedResults = results.stream().sorted().toList();
- var builder = new StringBuilder();
-
- builder.append("Time of evaluation: `").append(CommonUtilities.getCurrentTimeAsString()).append("`");
- builder.append(LINE_SEPARATOR);
-
- appendOverallResults(sortedResults, builder);
-
- for (var result : sortedResults) {
- appendProjectResultSummary(dataMap, builder, result);
- builder.append(LINE_SEPARATOR);
- }
-
- Files.writeString(targetFile, builder.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
- }
-
- private static void appendProjectResultSummary(Map dataMap, StringBuilder builder,
- Pair> projectResult) {
- var data = dataMap.get(projectResult.getOne());
- var text = data.getText();
-
- var result = projectResult.getTwo();
-
- var precision = NUMBER_FORMAT.format(result.precision());
- var recall = NUMBER_FORMAT.format(result.recall());
- var f1Score = NUMBER_FORMAT.format(result.f1());
- var truePosCount = result.truePositives().size();
- var falsePositives = result.falsePositives();
- var falsePosCount = falsePositives.size();
- var falseNegatives = result.falseNegatives();
- var falseNegCount = falseNegatives.size();
-
- builder.append("# ").append(projectResult.getOne().getProjectName());
- builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
- builder.append("Summary:").append(LINE_SEPARATOR);
- builder.append(String.format("- %s Precision / %s Recall / %s F1", precision, recall, f1Score));
- builder.append(LINE_SEPARATOR);
- builder.append(String.format("- %s True Positives / %s False Positives / %s False Negatives", truePosCount, falsePosCount, falseNegCount));
- builder.append(LINE_SEPARATOR).append(LINE_SEPARATOR);
-
- if (!falsePositives.isEmpty()) {
- var falsePositivesOutput = createFalseLinksOutput("False Positives", falsePositives.castToList(), data, text);
- builder.append(falsePositivesOutput);
- }
-
- if (!falseNegatives.isEmpty()) {
- var falseNegativesOutput = createFalseLinksOutput("False Negatives", falseNegatives.castToList(), data, text);
- builder.append(falseNegativesOutput);
- }
- }
-
- private static void appendOverallResults(List>> projectResults, StringBuilder builder) {
- var results = Lists.mutable.ofAll(projectResults.stream().map(Pair::getTwo).toList());
- var weightedResults = ResultCalculatorUtil.calculateWeightedAverageResults(results.toImmutable());
- var macroResults = ResultCalculatorUtil.calculateAverageResults(results.toImmutable());
- var resultString = TestUtil.createResultLogString("Overall Weighted", weightedResults);
- builder.append(resultString).append(LINE_SEPARATOR);
- resultString = TestUtil.createResultLogString("Overall Macro", macroResults);
- builder.append(resultString).append(LINE_SEPARATOR).append(LINE_SEPARATOR);
- }
-
- private static String createFalseLinksOutput(String type, List falseLinks, ArDoCoResult data, Text text) {
- var builder = new StringBuilder();
- builder.append(type).append(":").append(LINE_SEPARATOR);
-
- for (TestLink falseLink : falseLinks) {
- builder.append(createFalseLinkOutput(data, text, falseLink));
- }
-
- builder.append(LINE_SEPARATOR);
- return builder.toString();
- }
-
- private static String createFalseLinkOutput(ArDoCoResult data, Text text, TestLink falseLink) {
- var builder = new StringBuilder();
- for (var modelId : data.getModelIds()) {
- var dataModel = data.getModelState(modelId);
- var line = format(falseLink, text, dataModel);
- if (line != null && !line.isBlank()) {
- builder.append("- ").append(line).append(LINE_SEPARATOR);
- }
- }
- return builder.toString();
- }
-
- static String format(TestLink link, Text text, LegacyModelExtractionState modelState) {
- var model = modelState.getInstances().stream().filter(m -> m.getUid().equals(link.modelId())).findAny().orElse(null);
- var sentence = text.getSentences().stream().filter(s -> s.getSentenceNumber() == link.sentenceNr()).findAny().orElse(null);
-
- if (model == null && sentence == null) {
- return null;
- }
-
- var modelStr = model == null ? link.modelId() : "\"" + model.getFullName() + "\"";
- var sentenceStr = sentence == null ? String.valueOf(link.sentenceNr()) : "\"" + sentence.getText() + "\"";
-
- return String.format("%s ⇔ %s [%s,%s]", modelStr, sentenceStr, link.modelId(), link.sentenceNr());
- }
-
-}