Skip to content

Commit

Permalink
eclipse-rdf4jGH-5058: added metadatafinder code (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
barthanssens committed Jul 8, 2024
1 parent 8c3942d commit 67e1965
Show file tree
Hide file tree
Showing 12 changed files with 241 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,24 @@ public class CSVW {
// Classes

// Properties
/** csvw:basee */
/** csvw:base */
public static final IRI BASE;

/** csvw:columns */
public static final IRI COLUMNS;

/** csvw:datatype */
public static final IRI DATATYPE;

/** csvw:default */
public static final IRI DEFAULT;

/** csvw:dialect */
public static final IRI DIALECT;

/** csvw:header */
public static final IRI HEADER;

/** csvw:lang */
public static final IRI LANG;

Expand All @@ -57,6 +66,12 @@ public class CSVW {
/** csvw:tableSchema */
public static final IRI TABLE_SCHEMA;

/** csvw:tables */
public static final IRI TABLES;

/** csvw:titles */
public static final IRI TITLES;

/** csvw:url */
public static final IRI URL;

Expand All @@ -65,11 +80,16 @@ public class CSVW {

static {
BASE = Vocabularies.createIRI(NAMESPACE, "base");
COLUMNS = Vocabularies.createIRI(NAMESPACE, "columns");
DATATYPE = Vocabularies.createIRI(NAMESPACE, "datatype");
DEFAULT = Vocabularies.createIRI(NAMESPACE, "default");
DIALECT = Vocabularies.createIRI(NAMESPACE, "dialect");
HEADER = Vocabularies.createIRI(NAMESPACE, "header");
LANG = Vocabularies.createIRI(NAMESPACE, "lang");
PROPERTY_URL = Vocabularies.createIRI(NAMESPACE, "propertyUrl");
TABLE_SCHEMA = Vocabularies.createIRI(NAMESPACE, "tableSchema");
TABLES = Vocabularies.createIRI(NAMESPACE, "tables");
TITLES = Vocabularies.createIRI(NAMESPACE, "titles");
URL = Vocabularies.createIRI(NAMESPACE, "url");
VALUE_URL = Vocabularies.createIRI(NAMESPACE, "valueUrl");
}
Expand Down
6 changes: 6 additions & 0 deletions core/rio/csvw/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,11 @@
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
</dependency>
<dependency>
<groupId>org.mock-server</groupId>
<artifactId>mockserver-junit-jupiter-no-dependencies</artifactId>
<version>5.14.0</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.rio.csvw;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Find metadata info for a given CSV file, using various methods
*
* @author Bart Hanssens
*/
public class CSVWMetadataFinder {
private static final Logger LOGGER = LoggerFactory.getLogger(CSVWMetadataFinder.class);
private static final String WELL_KNOWN = "/.well-known/csvm";
private static final String METADATA_JSON = "-metadata.json";
private static final String CSV = ".csv";

/**
* Find by adding metadata.json as file extension
*
* @param csvFile
* @return inputstream or null
*/
public static InputStream findByExtension(URI csvFile) {
String s = csvFile.toString();
if (s.endsWith(CSV)) {
s = s.substring(0, s.length() - CSV.length());
}
URI metaURI = URI.create(s + METADATA_JSON);
try (InputStream meta = metaURI.toURL().openStream()) {
return new ByteArrayInputStream(meta.readAllBytes());
} catch (IOException ioe) {
LOGGER.debug("Could not open {}", metaURI);
return null;
}
}

/**
* Try reading the well-known location
*
* @param csvFile
* @return URI or null
*/
public static InputStream findByWellKnown(URI csvFile) {
URI wellKnown = csvFile.resolve(WELL_KNOWN);

try (InputStream is = wellKnown.toURL().openStream();
BufferedReader r = new BufferedReader(new InputStreamReader(is))) {
URI metaURI;
String line = r.readLine();

while (line != null) {
String s = line.replaceFirst("\\{\\+?url\\}", csvFile.toString());
if (s.isBlank()) {
continue;
}
switch (line.charAt(0)) {
case '?':
metaURI = URI.create(line + s);
break;
case '/':
metaURI = csvFile.resolve(s);
break;
default:
metaURI = URI.create(s);
}
try (InputStream meta = metaURI.toURL().openStream()) {
return new ByteArrayInputStream(meta.readAllBytes());
} catch (IOException ioe) {
LOGGER.debug("Could not open {}", metaURI);
}
line = r.readLine();
}
} catch (IOException ioe) {
LOGGER.info("Could not open {}", wellKnown);
}
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,31 +13,30 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.ArrayList;
import java.util.List;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.impl.LinkedHashModel;
import org.eclipse.rdf4j.model.util.Models;
import org.eclipse.rdf4j.model.base.CoreDatatype.XSD;
import org.eclipse.rdf4j.model.util.RDFCollections;
import org.eclipse.rdf4j.model.vocabulary.CSVW;
import org.eclipse.rdf4j.rio.ParserConfig;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.Rio;
import org.eclipse.rdf4j.rio.csvw.parsers.CellParserFactory;
import org.eclipse.rdf4j.rio.csvw.parsers.Parser;
import org.eclipse.rdf4j.rio.helpers.AbstractRDFParser;
import org.eclipse.rdf4j.rio.helpers.JSONLDSettings;

/**
* Basic (experimental) CSV on the Web Parser
*
* @author Bart Hanssens
* @see <a href="https://w3c.github.io/csvw/primer/">CSV on the Web Primer</a>
*
* @since 5.1.0
*/
Expand All @@ -51,20 +50,16 @@ public RDFFormat getRDFFormat() {
@Override
public synchronized void parse(InputStream in, String baseURI)
throws IOException, RDFParseException, RDFHandlerException {

clear();

Model metadata = parseMetadata(in, null, baseURI);
System.err.println(metadata);

Iterable<Statement> statements = metadata.getStatements(null, CSVW.TABLE_SCHEMA, null);
for (Statement s : statements) {
Value obj = s.getObject();
Model cols = RDFCollections.getCollection(metadata, (Resource) obj, new LinkedHashModel());
metadata.getStatements((Resource) obj, null, null).forEach(a -> {
System.err.println(a);
}
);

Parser p = new Parser();
Iterable<Statement> tables = metadata.getStatements(null, CSVW.TABLE_SCHEMA, null);
for (Statement table : tables) {
getCellParsers(metadata, table.getObject());
}

clear();
}

Expand All @@ -91,12 +86,40 @@ private Model parseMetadata(InputStream in, Reader reader, String baseURI) throw

if (in != null) {
metadata = Rio.parse(in, null, RDFFormat.JSONLD, cfg);
System.err.println(metadata);
}

// if (reader != null) {
// return Rio.parse(reader, baseURI, RDFFormat.JSONLD, cfg);
// }
return metadata;
}

/**
*
* @param metadata
* @param table
* @return
*/
private List<Parser> getCellParsers(Model metadata, Value table) {
List<Parser> parsers = new ArrayList<>();

Iterable<Statement> columns = metadata.getStatements((Resource) table, CSVW.COLUMNS, null);
Statement s = columns.iterator().next();

// the columns must be retrieved in the exact same order as they appear in the JSON metadata file,
// especially when the CSV does not have a header row
if (s != null) {
List<Value> cols = RDFCollections.asValues(metadata, (Resource) s.getObject(), new ArrayList());
for (Value col : cols) {
Parser p = new Parser();
p.setDataType(getDataType(metadata, col));

}
}
return parsers;
}

private IRI getDataType(Model metadata, Value col) {
return XSD.STRING.getIri();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
<head></head>
<body>
Parser for CSV on the Web. The parser adheres to the
<a href="https://w3c.github.io/csvw/syntax/">editor's draft of 02 November 2022</a>.
<a href="https://w3c.github.io/csvw/syntax/">editor's draft of 02 November 2022</a>.

See also the CSWV on the Web <a href="https://csvw.org/">starting page </a>
and <a href="https://w3c.github.io/csvw/primer/">Primer</a>.
</body>
</html>
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
*******************************************************************************/
package org.eclipse.rdf4j.rio.csvw.parsers;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.util.Literals;
import org.eclipse.rdf4j.model.util.Values;
Expand All @@ -20,13 +21,21 @@
* @author Bart.Hanssens
*/
public class Parser {
private IRI dataType;
private String defaultValue;
private boolean isRequired;
private String format;
private String propertyUrl;
private String valueUrl;
private String separator;

/**
* @param dataType
*/
public void setDataType(IRI dataType) {
this.dataType = dataType;
}

/**
* @param defaultValue the defaultValue to set
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*******************************************************************************
* Copyright (c) 2024 Eclipse RDF4J contributors.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/

package org.eclipse.rdf4j.rio.csvw;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockserver.model.HttpRequest.request;
import static org.mockserver.model.HttpResponse.response;

import java.io.IOException;
import java.net.URI;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockserver.client.MockServerClient;
import org.mockserver.junit.jupiter.MockServerExtension;

/**
*
* @author Bart.Hanssens
*/
@ExtendWith(MockServerExtension.class)
public class CSVWMetadataFinderTest {
private MockServerClient client;

private String getFile(String file) throws IOException {
return new String(CSVWMetadataFinderTest.class.getResourceAsStream("/" + file).readAllBytes());
}

@BeforeEach
public void init(MockServerClient client) throws IOException {
this.client = client;
client.when(
request().withMethod("GET").withPath("/downloads/painters.csv"))
.respond(response().withBody(getFile("painters.csv")));
client.when(
request().withMethod("GET").withPath("/.well-known/csvm"))
.respond(response().withBody(getFile("well-known-csvm")));
client.when(
request().withMethod("GET").withPath("/downloads/painters.csvm"))
.respond(response().withBody(getFile("painters-metadata.json")));
}

@Test
public void testWellKnownLocation() throws IOException {
String base = "http://localhost:" + client.getPort() + "/";
URI uri = URI.create(base + "downloads/painters.csv");

String expected = getFile("painters-metadata.json");
String got = new String(CSVWMetadataFinder.findByWellKnown(uri).readAllBytes());
assertEquals(expected, got);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
*******************************************************************************/
package org.eclipse.rdf4j.rio.csvw;

import static org.junit.jupiter.api.Assertions.assertTrue;

import java.io.FileInputStream;
import java.io.IOException;

Expand All @@ -27,6 +25,6 @@ public class CSVWParserTest {
public void testCSVWParser() throws IOException {
CSVWParser parser = new CSVWParser();
parser.getParserConfig().set(BasicWriterSettings.BASE_DIRECTIVE, true);
parser.parse(new FileInputStream("src/test/resources/org/eclipse/rdf4j/rio/csvw/painters-metadata.json"));
parser.parse(new FileInputStream("src/test/resources/painters-metadata.json"));
}
}
1 change: 1 addition & 0 deletions core/rio/csvw/src/test/resources/well-known-csvm
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{+url}m

0 comments on commit 67e1965

Please sign in to comment.