From 67252f9ade50acbb1f38edcd1487dc4d97592ce5 Mon Sep 17 00:00:00 2001 From: Bart Hanssens Date: Tue, 9 Jul 2024 23:28:44 +0100 Subject: [PATCH] GH-5058: additional parser code (WIP) --- .../rio/csvw/parsers/CellParserBoolean.java | 44 +++++++++++++++++++ .../rio/csvw/parsers/CellParserDate.java | 7 +-- .../rio/csvw/parsers/CellParserFactory.java | 2 + .../rio/csvw/CSVWMetadataFinderTest.java | 2 - .../src/test/resources/painters-metadata.json | 5 ++- core/rio/csvw/src/test/resources/painters.csv | 6 +-- 6 files changed, 54 insertions(+), 12 deletions(-) create mode 100644 core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserBoolean.java diff --git a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserBoolean.java b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserBoolean.java new file mode 100644 index 0000000000..41a4094a90 --- /dev/null +++ b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserBoolean.java @@ -0,0 +1,44 @@ +/******************************************************************************* + * Copyright (c) 2024 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.rio.csvw.parsers; + +import java.util.Set; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Namespace; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.util.Values; + +/** + * + * @author Bart Hanssens + */ +public class CellParserBoolean extends CellParser { + private String valueTrue; + private String valueFalse; + + @Override + public void setFormat(String format) { + String[] values = format.split("\\|"); + valueTrue = values[0]; + valueFalse = values[1]; + } + + @Override + public Value parse(String cell) { + String s = cell; + if ((s == null || s.isEmpty()) && (defaultValue != null)) { + s = defaultValue; + } + return Values.literal(valueTrue.equals(s) ? "true" : "false", dataType); + } + +} diff --git a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserDate.java b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserDate.java index 92d760c444..ca17db5c3e 100644 --- a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserDate.java +++ b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserDate.java @@ -37,12 +37,7 @@ public void setFormat(String format) { formatter = DateTimeFormatter.ofPattern(format); } - /** - * Get the value from a cell - * - * @param cell - * @return - */ + @Override public Value parse(String cell) { String s = cell; if ((s == null || s.isEmpty()) && (defaultValue != null)) { diff --git a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserFactory.java b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserFactory.java index a49429db6e..4f4dc6bed7 100644 --- a/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserFactory.java +++ b/core/rio/csvw/src/main/java/org/eclipse/rdf4j/rio/csvw/parsers/CellParserFactory.java @@ -29,6 +29,8 @@ public static CellParser create(IRI datatype) { if (datatype.equals(XSD.DATE.getIri())) { p = new CellParserDate(); + } else if (datatype.equals(XSD.BOOLEAN.getIri())) { + p = new CellParserBoolean(); } else { p = new CellParser(); } diff --git a/core/rio/csvw/src/test/java/org/eclipse/rdf4j/rio/csvw/CSVWMetadataFinderTest.java b/core/rio/csvw/src/test/java/org/eclipse/rdf4j/rio/csvw/CSVWMetadataFinderTest.java index 800373b56d..b487f5d771 100644 --- a/core/rio/csvw/src/test/java/org/eclipse/rdf4j/rio/csvw/CSVWMetadataFinderTest.java +++ b/core/rio/csvw/src/test/java/org/eclipse/rdf4j/rio/csvw/CSVWMetadataFinderTest.java @@ -30,8 +30,6 @@ */ @ExtendWith(MockServerExtension.class) public class CSVWMetadataFinderTest extends AbstractTest { - private MockServerClient client; - @BeforeEach public void init(MockServerClient client) throws IOException { this.client = client; diff --git a/core/rio/csvw/src/test/resources/painters-metadata.json b/core/rio/csvw/src/test/resources/painters-metadata.json index 6be69bb45e..8df163f8d5 100644 --- a/core/rio/csvw/src/test/resources/painters-metadata.json +++ b/core/rio/csvw/src/test/resources/painters-metadata.json @@ -22,7 +22,10 @@ "format": "d/M/yyyy" } }, { "name": "maried", - "datatype": "boolean" }, + "datatype": { + "base": "boolean", + "format": "Yes|No" + } }, { "name": "languages", "separator": " " } ], diff --git a/core/rio/csvw/src/test/resources/painters.csv b/core/rio/csvw/src/test/resources/painters.csv index 270053434b..4c3a87048c 100644 --- a/core/rio/csvw/src/test/resources/painters.csv +++ b/core/rio/csvw/src/test/resources/painters.csv @@ -1,5 +1,5 @@ "wikidata_id","first_name","last_name,country_id","country_name_nl","country_name_en","date_of_birth","married","languages" -"Q5582","Vincent","van Gogh","Q29999","Nederland","The Netherlands","30/3/1853","false","dutch french" -"Q164712","Paul,Delvaux","Q31","Belgiƫ","Belgium","23/9/1897","true",french" -"Q46408","Georgia","O'Keeffe","Q30","Verenigde Staten","United States","15/11/1887","true","english" +"Q5582","Vincent","van Gogh","Q29999","Nederland","The Netherlands","30/3/1853","No","dutch french" +"Q164712","Paul","Delvaux","Q31","Belgiƫ","Belgium","23/9/1897","Yes","french" +"Q46408","Georgia","O'Keeffe","Q30","Verenigde Staten","United States","15/11/1887","Yes","english"