From 9ef0d5d0f7f484530fcffc11762bcf84622f048c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Kir=C3=A1ly?= Date: Sat, 12 Oct 2024 22:21:24 +0200 Subject: [PATCH 1/3] Some catalogue specific elements do not work properly #14 --- .../marc/cli/parameters/CommonParameters.java | 4 +- .../marc/cli/ClassificationAnalysisTest.java | 3 +- .../metadataqa/marc/cli/ValidatorCliTest.java | 41 +++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/main/java/de/gwdg/metadataqa/marc/cli/parameters/CommonParameters.java b/src/main/java/de/gwdg/metadataqa/marc/cli/parameters/CommonParameters.java index 532930faf..b38c9a7bc 100644 --- a/src/main/java/de/gwdg/metadataqa/marc/cli/parameters/CommonParameters.java +++ b/src/main/java/de/gwdg/metadataqa/marc/cli/parameters/CommonParameters.java @@ -47,7 +47,9 @@ public class CommonParameters implements Serializable { protected boolean lineSeparated = false; protected boolean trimId = false; private String outputDir = DEFAULT_OUTPUT_DIR; + @JsonIgnore protected RecordIgnorator recordIgnorator; + protected String ignorableRecords; protected RecordFilter recordFilter; protected IgnorableFields ignorableFields = new IgnorableFields(); protected InputStream stream = null; @@ -194,7 +196,7 @@ private void readDefaultEncoding() { } private void readIgnorableRecords() { - String ignorableRecords = cmd.hasOption("ignorableRecords") ? cmd.getOptionValue("ignorableRecords") : ""; + ignorableRecords = cmd.hasOption("ignorableRecords") ? cmd.getOptionValue("ignorableRecords") : ""; setRecordIgnorator(ignorableRecords); } diff --git a/src/test/java/de/gwdg/metadataqa/marc/cli/ClassificationAnalysisTest.java b/src/test/java/de/gwdg/metadataqa/marc/cli/ClassificationAnalysisTest.java index d08e17521..4ca05d00e 100644 --- a/src/test/java/de/gwdg/metadataqa/marc/cli/ClassificationAnalysisTest.java +++ b/src/test/java/de/gwdg/metadataqa/marc/cli/ClassificationAnalysisTest.java @@ -7,6 +7,7 @@ import de.gwdg.metadataqa.marc.cli.utils.RecordIterator; import de.gwdg.metadataqa.marc.utils.ReadMarc; import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; import org.junit.Before; import org.junit.Test; import org.marc4j.marc.Record; @@ -158,7 +159,7 @@ public void marcxml() throws IOException { "--collectCollocations", "--marcxml", "--outputDir", outputDir, - inputFile = TestUtils.getPath("marcxml/marcxml.xml") + TestUtils.getPath("marcxml/marcxml.xml") }; ClassificationAnalysis.main(args); diff --git a/src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java b/src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java index 61ac0617b..c25827da5 100644 --- a/src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java +++ b/src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java @@ -12,6 +12,8 @@ import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static junit.framework.TestCase.assertTrue; import static org.junit.Assert.assertEquals; @@ -419,6 +421,45 @@ public void validate_whenUnimarc() throws Exception { assertEquals("1", lines.get(1).trim()); } + @Test + public void validate_whenHbz() throws Exception { + clearOutput(outputDir, outputFiles); + + ValidatorCli processor = new ValidatorCli(new String[]{ + "--schemaType", "MARC21", + "--marcVersion", "HBZ", + "--marcxml", + "--outputDir", outputDir, + "--fixAlma", + "--ignorableRecords", "DEL$a=Y", + "--ignorableFields", "964,940,941,942,944,945,946,947,948,949,950,951,952,955,956,957,958,959,966,967,970,971,972,973,974,975,976,977,978,978,979", + "--details", + "--trimId", + "--summary", + // "--format", "csv", + // "--defaultRecordType", "BOOKS", + // "--detailsFileName", "issue-details.csv", + // "--summaryFileName", "issue-summary.csv", + TestUtils.getPath("marcxml/990082522550206441_missing_validation_custom_subfield_9_core_710.xml"), + TestUtils.getPath("marcxml/990171082050206441_missing_validation_custom_ind2_9_core_246.xml"), + TestUtils.getPath("marcxml/991000922029706482_missing_subfield_validation_t_in_customfield_GKT.xml"), + }); + + RecordIterator iterator = new RecordIterator(processor); + iterator.setProcessWithErrors(true); + iterator.start(); + + List lines = getFileLines("issue-summary.csv"); + System.err.println(StringUtils.join(lines, "\n")); + assertEquals(3, lines.size()); + List undefinedFields = lines.stream() + .filter(line -> line.contains("undefined field")) + .collect(Collectors.toList()); + assertEquals(0, undefinedFields.size()); + // Pattern pattern = Pattern.compile("^\\d+,952,\\d+,\\d+,undefined field"); + // assertTrue(pattern.matcher(undefinedFields.get(0)).find()); + } + private List getFileLines(String outputFile) throws IOException { File output = new File(outputDir, outputFile); assertTrue(outputFile + " should exist", output.exists()); From 672a0c680447bf91d24cbec07211eadeca632445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Kir=C3=A1ly?= Date: Fri, 18 Oct 2024 16:59:44 +0200 Subject: [PATCH 2/3] Some catalogue specific elements do not work properly #14 --- .../java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java b/src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java index c25827da5..7d8c410b6 100644 --- a/src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java +++ b/src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java @@ -13,7 +13,6 @@ import java.util.List; import java.util.regex.Pattern; import java.util.stream.Collectors; -import java.util.stream.Stream; import static junit.framework.TestCase.assertTrue; import static org.junit.Assert.assertEquals; @@ -436,10 +435,6 @@ public void validate_whenHbz() throws Exception { "--details", "--trimId", "--summary", - // "--format", "csv", - // "--defaultRecordType", "BOOKS", - // "--detailsFileName", "issue-details.csv", - // "--summaryFileName", "issue-summary.csv", TestUtils.getPath("marcxml/990082522550206441_missing_validation_custom_subfield_9_core_710.xml"), TestUtils.getPath("marcxml/990171082050206441_missing_validation_custom_ind2_9_core_246.xml"), TestUtils.getPath("marcxml/991000922029706482_missing_subfield_validation_t_in_customfield_GKT.xml"), From 4571253cfc6beeff247e8d4153725db1c9cb2b05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Kir=C3=A1ly?= Date: Fri, 18 Oct 2024 17:04:29 +0200 Subject: [PATCH 3/3] Some catalogue specific elements do not work properly #14 --- src/test/java/de/gwdg/metadataqa/marc/cli/CompletenessTest.java | 1 - src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java | 1 - 2 files changed, 2 deletions(-) diff --git a/src/test/java/de/gwdg/metadataqa/marc/cli/CompletenessTest.java b/src/test/java/de/gwdg/metadataqa/marc/cli/CompletenessTest.java index c5eb858a1..4d2fb8189 100644 --- a/src/test/java/de/gwdg/metadataqa/marc/cli/CompletenessTest.java +++ b/src/test/java/de/gwdg/metadataqa/marc/cli/CompletenessTest.java @@ -321,7 +321,6 @@ public void completeness_pica_groupBy_file() throws Exception { assertTrue(line.contains("\"trimId\":false,")); assertTrue(line.contains("\"outputDir\":\"/")); assertTrue(line.contains("/qa-catalogue/src/test/resources/output\",")); - assertTrue(line.contains("\"recordIgnorator\":{\"criteria\":[],\"booleanCriteria\":null,\"empty\":true},")); assertTrue(line.contains("\"recordFilter\":{\"criteria\":[],\"booleanCriteria\":null,\"empty\":true},")); assertTrue(line.contains("\"ignorableFields\":{\"fields\":null,\"empty\":true},")); assertTrue(line.contains("\"stream\":null,")); diff --git a/src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java b/src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java index 7d8c410b6..5fe5eeb46 100644 --- a/src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java +++ b/src/test/java/de/gwdg/metadataqa/marc/cli/ValidatorCliTest.java @@ -228,7 +228,6 @@ public void validate_pica_groupBy() throws Exception { assertTrue(line.contains("\"trimId\":true,")); assertTrue(line.contains("\"outputDir\":\"")); assertTrue(line.contains("qa-catalogue/src/test/resources/output\",")); - assertTrue(line.contains("\"recordIgnorator\":{\"criteria\":[],\"booleanCriteria\":null,\"empty\":true},")); assertTrue(line.contains("\"recordFilter\":{\"criteria\":[],\"booleanCriteria\":null,\"empty\":true},")); assertTrue(line.contains("\"ignorableFields\":{\"fields\":null,\"empty\":true},")); assertTrue(line.contains("\"stream\":null,"));