diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 000000000..5d637a7fb --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,18 @@ +name: Publish package to GitHub Packages +on: + push: + branches: + - '*-rc*' +jobs: + publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-java@v1 + with: + java-version: 1.8 + - name: Publish package + run: ./gradlew publish + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + diff --git a/build.gradle b/build.gradle index d0b730a63..f6191a68e 100644 --- a/build.gradle +++ b/build.gradle @@ -47,7 +47,8 @@ editorconfig { '**/*.bzip2', '**/*.gzip', '**/*.xz', - 'gradlew*' + 'gradlew*', + '**/bin' ] } @@ -56,6 +57,7 @@ subprojects { apply plugin: 'maven' apply plugin: 'jacoco' apply plugin: 'checkstyle' + apply plugin: 'maven-publish' check.dependsOn(editorconfigCheck) check.dependsOn(javadoc) @@ -94,6 +96,7 @@ subprojects { options { addBooleanOption 'Xwerror', true } + exclude '**/harvester2/**' } artifacts { @@ -241,9 +244,43 @@ gradle.projectsEvaluated { } } + //'upload' vs. 'publish, see + //https://medium.com/dot-debug/deploying-artifacts-to-maven-using-gradle-b669acc1b6f8' + /* task publish { dependsOn tasks.uploadArchives } + */ + + publishing { + publications { + mavenArtifacts(MavenPublication) { + from components.java + afterEvaluate { + groupId = project.group + artifactId = project.name + } + } + } + + repositories { + maven { + name = 'localEmbedded' + // run generated gradle task `./gradlew + // publishMavenArtifactsPublicationToLocalEmbeddedRepository` + // to publish all subprojects into the same local embedded repo: + url = "file://${rootDir}/build/repo" + } + maven { + name = "GitHubPackages" + url = "https://maven.pkg.github.com/metafacture/metafacture-core" + credentials { + username = System.getenv("GITHUB_ACTOR") + password = System.getenv("GITHUB_TOKEN") + } + } + } + } } } @@ -315,6 +352,10 @@ def getSnapshotVersion() { logger.lifecycle('Release branch found') return "${extractVersionFromBranch(grgit.branch.current().name)}-SNAPSHOT" } + if (grgit.branch.current().name.contains('-rc')) { + logger.lifecycle('Release candidate branch found') + return "${grgit.branch.current().name}" + } logger.lifecycle('Feature branch found') return "feature-${grgit.branch.current().name}-SNAPSHOT" } diff --git a/config/checkstyle/checkstyle.xml b/config/checkstyle/checkstyle.xml index b31045320..06f410f10 100644 --- a/config/checkstyle/checkstyle.xml +++ b/config/checkstyle/checkstyle.xml @@ -6,7 +6,7 @@ - + diff --git a/metafacture-biblio/build.gradle b/metafacture-biblio/build.gradle index dabffc8e5..3f27c9d99 100644 --- a/metafacture-biblio/build.gradle +++ b/metafacture-biblio/build.gradle @@ -21,13 +21,13 @@ dependencies { api project(':metafacture-framework') implementation project(':metafacture-commons') implementation project(':metafacture-flowcontrol') - implementation 'org.dspace:oclc-harvester2:0.1.12' implementation ('xalan:xalan:2.7.0') { exclude group: 'xalan', module: 'serializer' exclude group: 'xercesImpl', module: 'xercesImpl' exclude group: 'xml-apis', module: 'xml-apis' } implementation 'log4j:log4j:1.2.17' + implementation 'org.slf4j:slf4j-api:1.7.7' testImplementation 'junit:junit:4.12' testImplementation 'org.mockito:mockito-core:2.5.5' } diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java index fd98f3470..d9fa8c424 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java @@ -1,4 +1,4 @@ -/* Copyright 2013 Pascal Christoph. +/* Copyright 2013, 2022 Pascal Christoph and others. * Licensed under the Eclipse Public License 1.0 */ package org.metafacture.biblio; @@ -10,7 +10,7 @@ import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.DefaultObjectPipe; -import ORG.oclc.oai.harvester2.app.RawWrite; +import org.oclc.oai.harvester2.app.RawWrite; import org.xml.sax.SAXException; import java.io.ByteArrayInputStream; @@ -19,7 +19,7 @@ import java.io.InputStreamReader; import java.io.Reader; import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.TransformerException; +import javax.xml.xpath.XPathException; /** * Opens an OAI-PMH stream and passes a reader to the receiver. @@ -111,10 +111,10 @@ public void process(final String baseUrl) { catch (final SAXException e) { e.printStackTrace(); } - catch (final TransformerException e) { + catch (final NoSuchFieldException e) { e.printStackTrace(); } - catch (final NoSuchFieldException e) { + catch (final XPathException e) { e.printStackTrace(); } try { diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/app/RawWrite.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/app/RawWrite.java new file mode 100644 index 000000000..6cbce310d --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/app/RawWrite.java @@ -0,0 +1,170 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.app; + +import java.io.*; +import java.lang.NoSuchFieldException; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.HashMap; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathException; +import javax.xml.xpath.XPathExpressionException; +import org.oclc.oai.harvester2.verb.*; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +public class RawWrite { + + public static void main(String[] args) { + try { + System.out.println(new Date()); + + HashMap options = getOptions(args); + List rootArgs = (List) options.get("rootArgs"); + String baseURL = null; + if (rootArgs.size() > 0) { + baseURL = (String) rootArgs.get(0); + } else { + throw new IllegalArgumentException(); + } + + OutputStream out = System.out; + String outFileName = (String) options.get("-out"); + String from = (String) options.get("-from"); + String until = (String) options.get("-until"); + String metadataPrefix = (String) options.get("-metadataPrefix"); + if (metadataPrefix == null) metadataPrefix = "oai_dc"; + String resumptionToken = (String) options.get("-resumptionToken"); + String setSpec = (String) options.get("-setSpec"); + + if (resumptionToken != null) { + if (outFileName != null) + out = new FileOutputStream(outFileName, true); + run(baseURL, resumptionToken, out); + } else { + if (outFileName != null) + out = new FileOutputStream(outFileName); + run(baseURL, from, until, metadataPrefix, setSpec, out); + } + + if (out != System.out) out.close(); + System.out.println(new Date()); + } catch (IllegalArgumentException e) { + System.err.println("RawWrite <-from date> <-until date> <-metadataPrefix prefix> <-setSpec setName> <-resumptionToken token> <-out fileName> baseURL"); + } catch (Exception e) { + e.printStackTrace(); + System.exit(-1); + } + } + + public static void run(String baseURL, String resumptionToken, + OutputStream out) + throws IOException, ParserConfigurationException, SAXException, XPathExpressionException, + NoSuchFieldException { + ListRecords listRecords = new ListRecords(baseURL, resumptionToken); + while (listRecords != null) { + NodeList errors = listRecords.getErrors(); + if (errors != null && errors.getLength() > 0) { + System.out.println("Found errors"); + int length = errors.getLength(); + for (int i = 0; i < length; ++i) { + Node item = errors.item(i); + System.out.println(item); + } + System.out.println("Error record: " + listRecords.toString()); + break; + } +// System.out.println(listRecords); + out.write(listRecords.toString().getBytes("UTF-8")); + out.write("\n".getBytes("UTF-8")); + resumptionToken = listRecords.getResumptionToken(); + System.out.println("resumptionToken: " + resumptionToken); + if (resumptionToken == null || resumptionToken.length() == 0) { + listRecords = null; + } else { + listRecords = new ListRecords(baseURL, resumptionToken); + } + } + out.write("\n".getBytes("UTF-8")); + } + + public static void run(String baseURL, String from, String until, + String metadataPrefix, String setSpec, + OutputStream out) + throws IOException, ParserConfigurationException, SAXException, XPathException, + NoSuchFieldException { + out.write("\n".getBytes("UTF-8")); + out.write("\n".getBytes("UTF-8")); + out.write(new Identify(baseURL).toString().getBytes("UTF-8")); + out.write("\n".getBytes("UTF-8")); + out.write(new ListMetadataFormats(baseURL).toString().getBytes("UTF-8")); + out.write("\n".getBytes("UTF-8")); + ListSets listSets = new ListSets(baseURL); + while (listSets != null) { + out.write(listSets.toString().getBytes("UTF-8")); + out.write("\n".getBytes("UTF-8")); + String resumptionToken = listSets.getResumptionToken(); + System.out.println("resumptionToken: " + resumptionToken); + if (resumptionToken == null || resumptionToken.length() == 0) { + listSets = null; + } else { + listSets = new ListSets(baseURL, resumptionToken); + } + } + ListRecords listRecords = new ListRecords(baseURL, from, until, setSpec, + metadataPrefix); + while (listRecords != null) { + NodeList errors = listRecords.getErrors(); + if (errors != null && errors.getLength() > 0) { + System.out.println("Found errors"); + int length = errors.getLength(); + for (int i = 0; i < length; ++i) { + Node item = errors.item(i); + System.out.println(item); + } + System.out.println("Error record: " + listRecords.toString()); + break; + } +// System.out.println(listRecords); + out.write(listRecords.toString().getBytes("UTF-8")); + out.write("\n".getBytes("UTF-8")); + String resumptionToken = listRecords.getResumptionToken(); + System.out.println("resumptionToken: " + resumptionToken); + if (resumptionToken == null || resumptionToken.length() == 0) { + listRecords = null; + } else { + listRecords = new ListRecords(baseURL, resumptionToken); + } + } + out.write("\n".getBytes("UTF-8")); + } + + private static HashMap getOptions(String[] args) { + HashMap options = new HashMap<>(); + ArrayList rootArgs = new ArrayList<>(); + options.put("rootArgs", rootArgs); + + for (int i = 0; i < args.length; ++i) { + if (args[i].charAt(0) != '-') { + rootArgs.add(args[i]); + } else if (i + 1 < args.length) { + options.put(args[i], args[++i]); + } else { + throw new IllegalArgumentException(); + } + } + return options; + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/GetRecord.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/GetRecord.java new file mode 100644 index 000000000..5055acee6 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/GetRecord.java @@ -0,0 +1,80 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.IOException; +import java.net.MalformedURLException; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import org.xml.sax.SAXException; + +/** + * This class represents an GetRecord response on either the server or on the client + * + * @author Jeffrey A. Young, OCLC Online Computer Library Center + */ +public class GetRecord extends HarvesterVerb { + + /** + * Mock object constructor (for unit testing purposes) + */ + public GetRecord() { + super(); + } + + /** + * Client-side GetRecord verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @param identifier + * @param metadataPrefix + * @exception MalformedURLException the baseURL is bad + * @exception SAXException the xml response is bad + * @exception IOException an I/O error occurred + * @throws ParserConfigurationException + * @throws XPathExpressionException + */ + public GetRecord(String baseURL, String identifier, String metadataPrefix) + throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { + super(getRequestURL(baseURL, identifier, metadataPrefix)); + } + + /** + * Get the oai:identifier from the oai:header + * + * @return the oai:identifier as a String + * @throws XPathExpressionException + * @throws NoSuchFieldException + */ + public String getIdentifier() throws XPathExpressionException, NoSuchFieldException { + if (SCHEMA_LOCATION_V2_0.equals(getSchemaLocation())) { + return getSingleString("/oai20:OAI-PMH/oai20:GetRecord/oai20:record/oai20:header/oai20:identifier"); + } else if (SCHEMA_LOCATION_V1_1_GET_RECORD.equals(getSchemaLocation())) { + return getSingleString("/oai11_GetRecord:GetRecord/oai11_GetRecord:record/oai11_GetRecord:header/oai11_GetRecord:identifier"); + } else { + throw new NoSuchFieldException(getSchemaLocation()); + } + } + + /** + * Construct the query portion of the http request + * + * @return a String containing the query portion of the http request + */ + private static String getRequestURL(String baseURL, String identifier, String metadataPrefix) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=GetRecord"); + requestURL.append("&identifier=").append(identifier); + requestURL.append("&metadataPrefix=").append(metadataPrefix); + return requestURL.toString(); + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/HarvesterVerb.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/HarvesterVerb.java new file mode 100644 index 000000000..c99c4ab4d --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/HarvesterVerb.java @@ -0,0 +1,287 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.Date; +import java.util.zip.GZIPInputStream; +import java.util.zip.InflaterInputStream; +import java.util.zip.ZipInputStream; + +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Result; +import javax.xml.transform.Source; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +//import org.apache.xpath.XPathAPI; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +/** + * HarvesterVerb is the parent class for each of the OAI verbs. + * + * @author Jefffrey A. Young, OCLC Online Computer Library Center + */ +public abstract class HarvesterVerb { + + private static final Logger log = LoggerFactory.getLogger(HarvesterVerb.class); + + /* Primary OAI namespaces */ + public static final String SCHEMA_LOCATION_V2_0 = "http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"; + public static final String SCHEMA_LOCATION_V1_1_GET_RECORD = "http://www.openarchives.org/OAI/1.1/OAI_GetRecord http://www.openarchives.org/OAI/1.1/OAI_GetRecord.xsd"; + public static final String SCHEMA_LOCATION_V1_1_IDENTIFY = "http://www.openarchives.org/OAI/1.1/OAI_Identify http://www.openarchives.org/OAI/1.1/OAI_Identify.xsd"; + public static final String SCHEMA_LOCATION_V1_1_LIST_IDENTIFIERS = "http://www.openarchives.org/OAI/1.1/OAI_ListIdentifiers http://www.openarchives.org/OAI/1.1/OAI_ListIdentifiers.xsd"; + public static final String SCHEMA_LOCATION_V1_1_LIST_METADATA_FORMATS = "http://www.openarchives.org/OAI/1.1/OAI_ListMetadataFormats http://www.openarchives.org/OAI/1.1/OAI_ListMetadataFormats.xsd"; + public static final String SCHEMA_LOCATION_V1_1_LIST_RECORDS = "http://www.openarchives.org/OAI/1.1/OAI_ListRecords http://www.openarchives.org/OAI/1.1/OAI_ListRecords.xsd"; + public static final String SCHEMA_LOCATION_V1_1_LIST_SETS = "http://www.openarchives.org/OAI/1.1/OAI_ListSets http://www.openarchives.org/OAI/1.1/OAI_ListSets.xsd"; + private Document doc = null; + private String schemaLocation = null; + private String requestURL = null; + + private static final ThreadLocal builderFactory = new ThreadLocal() { + @Override + public DocumentBuilderFactory initialValue() { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setNamespaceAware(true); + return factory; + } + }; + private static final ThreadLocal transformerFactory = new ThreadLocal() { + @Override + public TransformerFactory initialValue() { + return TransformerFactory.newInstance(); + } + }; + + private static final ThreadLocal xpath = new ThreadLocal() { + @Override + public XPath initialValue() { + XPathFactory fucktory = XPathFactory.newInstance(); + XPath xpath = fucktory.newXPath(); + try { + xpath.setNamespaceContext(new OAINamespaceContext(builderFactory.get())); + } catch (ParserConfigurationException e) { + log.error("Cannot set namespace context", e); + } + return xpath; + } + }; + + /** + * Get the OAI response as a DOM object + * + * @return the DOM for the OAI response + */ + public Document getDocument() { + return doc; + } + + /** + * Get the xsi:schemaLocation for the OAI response + * + * @return the xsi:schemaLocation value + */ + public String getSchemaLocation() { + return schemaLocation; + } + + /** + * Get the OAI errors + * + * @return a NodeList of /oai:OAI-PMH/oai:error elements + * @throws XPathExpressionException + */ + public NodeList getErrors() throws XPathExpressionException { + if (SCHEMA_LOCATION_V2_0.equals(getSchemaLocation())) { + return getNodeList("/oai20:OAI-PMH/oai20:error"); + } else { + return null; + } + } + + /** + * Get the OAI request URL for this response + * + * @return the OAI request URL as a String + */ + public String getRequestURL() { + return requestURL; + } + + /** + * Mock object creator (for unit testing purposes) + */ + public HarvesterVerb() { + } + + /** + * Performs the OAI request + * + * @param requestURL + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + public HarvesterVerb(String requestURL) throws IOException, + ParserConfigurationException, SAXException, XPathExpressionException { + harvest(requestURL); + } + + /** + * Preforms the OAI request + * + * @param requestURL + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + private void harvest(String requestURL) throws IOException, + ParserConfigurationException, SAXException, XPathExpressionException { + this.requestURL = requestURL; + log.debug("requestURL=" + requestURL); + InputStream in; + URL url = new URL(requestURL); + HttpURLConnection con = null; + int responseCode; + do { + con = (HttpURLConnection) url.openConnection(); + con.setRequestProperty("User-Agent", "OAIHarvester/2.0"); + con.setRequestProperty("Accept-Encoding", + "compress, gzip, identify"); + try { + responseCode = con.getResponseCode(); + log.debug("responseCode=" + responseCode); + } catch (FileNotFoundException e) { + // assume it's a 503 response + log.info(requestURL, e); + responseCode = HttpURLConnection.HTTP_UNAVAILABLE; + } + + if (responseCode == HttpURLConnection.HTTP_UNAVAILABLE) { + long retrySeconds = con.getHeaderFieldInt("Retry-After", -1); + if (retrySeconds == -1) { + long now = (new Date()).getTime(); + long retryDate = con.getHeaderFieldDate("Retry-After", now); + retrySeconds = retryDate - now; + } + if (retrySeconds == 0) { // Apparently, it's a bad URL + throw new FileNotFoundException("Bad URL?"); + } + System.err.println("Server response: Retry-After=" + retrySeconds); + if (retrySeconds > 0) { + try { + Thread.sleep(retrySeconds * 1000); + } catch (InterruptedException ex) { + log.error("Error while sleeping", ex); + } + } + } + } while (responseCode == HttpURLConnection.HTTP_UNAVAILABLE); + String contentEncoding = con.getHeaderField("Content-Encoding"); + log.debug("contentEncoding=" + contentEncoding); + if ("compress".equals(contentEncoding)) { + ZipInputStream zis = new ZipInputStream(con.getInputStream()); + zis.getNextEntry(); + in = zis; + } else if ("gzip".equals(contentEncoding)) { + in = new GZIPInputStream(con.getInputStream()); + } else if ("deflate".equals(contentEncoding)) { + in = new InflaterInputStream(con.getInputStream()); + } else { + in = con.getInputStream(); + } + + InputSource data = new InputSource(in); + + doc = builderFactory.get().newDocumentBuilder().parse(data); + + // The URIs in xsi:schemaLocation are separated by (any kind + // of) white space. Normalize it to a single space. + String schemaLoc = getSingleString("/*/@xsi:schemaLocation"); + this.schemaLocation = schemaLoc.trim().replaceAll("\\s+", " "); + } + + /** + * Get the String value for the given XPath location in the response DOM + * + * @param xpath + * @return a String containing the value of the XPath location. + * @throws XPathExpressionException + */ + public String getSingleString(String xpath) throws XPathExpressionException { + return getSingleString(getDocument(), xpath); +// return XPathAPI.eval(getDocument(), xpath, namespaceElement).str(); +// String str = null; +// Node node = XPathAPI.selectSingleNode(getDocument(), xpath, +// namespaceElement); +// if (node != null) { +// XObject xObject = XPathAPI.eval(node, "string()"); +// str = xObject.str(); +// } +// return str; + } + + public String getSingleString(Node node, String expression) throws XPathExpressionException { + return xpath.get().evaluate(expression, node); + } + + /** + * Get a NodeList containing the nodes in the response DOM for the specified xpath + * + * @param expression + * @return the NodeList for the xpath into the response DOM + * @throws XPathExpressionException + */ + public NodeList getNodeList(String expression) throws XPathExpressionException { + return (NodeList) xpath.get().evaluate(expression, getDocument(), XPathConstants.NODESET); + } + + @Override + public String toString() { + // Element docEl = getDocument().getDocumentElement(); + // return docEl.toString(); + Source input = new DOMSource(getDocument()); + StringWriter sw = new StringWriter(); + Result output = new StreamResult(sw); + try { + Transformer idTransformer = transformerFactory.get().newTransformer(); + idTransformer.setOutputProperty( + OutputKeys.OMIT_XML_DECLARATION, "yes"); + idTransformer.transform(input, output); + return sw.toString(); + } catch (TransformerException e) { + return e.getMessage(); + } + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/Identify.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/Identify.java new file mode 100644 index 000000000..83071738f --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/Identify.java @@ -0,0 +1,77 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.IOException; +import java.net.MalformedURLException; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import org.xml.sax.SAXException; + +/** + * This class represents an Identify response on either the server or on the client + * + * @author Jeffrey A. Young, OCLC Online Computer Library Center + */ +public class Identify extends HarvesterVerb { + + /** + * Mock object constructor (for unit testing purposes) + */ + public Identify() { + super(); + } + + /** + * Client-side Identify verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @exception MalformedURLException the baseURL is bad + * @exception IOException an I/O error occurred + * @throws ParserConfigurationException + * @throws XPathExpressionException + * @throws SAXException + */ + public Identify(String baseURL) throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { + super(getRequestURL(baseURL)); + } + + /** + * Get the oai:protocolVersion value from the Identify response + * + * @return the oai:protocolVersion value + * @throws XPathExpressionException + * @throws NoSuchFieldException + */ + public String getProtocolVersion() + throws XPathExpressionException, NoSuchFieldException { + if (SCHEMA_LOCATION_V2_0.equals(getSchemaLocation())) { + return getSingleString("/oai20:OAI-PMH/oai20:Identify/oai20:protocolVersion"); + } else if (SCHEMA_LOCATION_V1_1_IDENTIFY.equals(getSchemaLocation())) { + return getSingleString("/oai11_Identify:Identify/oai11_Identify:protocolVersion"); + } else { + throw new NoSuchFieldException(getSchemaLocation()); + } + } + + /** + * generate the Identify request URL for the specified baseURL + * + * @param baseURL + * @return the requestURL + */ + private static String getRequestURL(String baseURL) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=Identify"); + return requestURL.toString(); + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListIdentifiers.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListIdentifiers.java new file mode 100644 index 000000000..6a0c340c5 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListIdentifiers.java @@ -0,0 +1,121 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import org.xml.sax.SAXException; + +/** + * This class represents an ListIdentifiers response on either the server or on the client + * + * @author Jeffrey A. Young, OCLC Online Computer Library Center + */ +public class ListIdentifiers extends HarvesterVerb { + + /** + * Mock object constructor (for unit testing purposes) + */ + public ListIdentifiers() { + super(); + } + + /** + * Client-side ListIdentifiers verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @param from + * @param until + * @param set + * @param metadataPrefix + * @throws MalformedURLException the baseURL is bad + * @throws SAXException the xml response is bad + * @throws IOException an I/O error occurred + * @throws XPathExpressionException + * @throws ParserConfigurationException + */ + public ListIdentifiers(String baseURL, String from, String until, String set, String metadataPrefix) + throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { + super(getRequestURL(baseURL, from, until, set, metadataPrefix)); + } + + /** + * Client-side ListIdentifiers verb constructor (resumptionToken version) + * + * @param baseURL + * @param resumptionToken + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + public ListIdentifiers(String baseURL, String resumptionToken) + throws IOException, ParserConfigurationException, SAXException, + XPathExpressionException { + super(getRequestURL(baseURL, resumptionToken)); + } + + /** + * Get the oai:resumptionToken from the response + * + * @return the oai:resumptionToken value + * @throws XPathExpressionException + * @throws NoSuchFieldException + */ + public String getResumptionToken() + throws XPathExpressionException, NoSuchFieldException { + if (SCHEMA_LOCATION_V2_0.equals(getSchemaLocation())) { + return getSingleString("/oai20:OAI-PMH/oai20:ListIdentifiers/oai20:resumptionToken"); + } else if (SCHEMA_LOCATION_V1_1_LIST_IDENTIFIERS.equals(getSchemaLocation())) { + return getSingleString("/oai11_ListIdentifiers:ListIdentifiers/oai11_ListIdentifiers:resumptionToken"); + } else { + throw new NoSuchFieldException(getSchemaLocation()); + } + } + + /** + * Construct the query portion of the http request + * + * @return a String containing the query portion of the http request + */ + private static String getRequestURL(String baseURL, String from, String until, String set, String metadataPrefix) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListIdentifiers"); + if (from != null) requestURL.append("&from=").append(from); + if (until != null) requestURL.append("&until=").append(until); + if (set != null) requestURL.append("&set=").append(set); + requestURL.append("&metadataPrefix=").append(metadataPrefix); + return requestURL.toString(); + } + + /** + * Construct the query portion of the http request (resumptionToken version) + * + * @param baseURL + * @param resumptionToken + * @return + * @throws UnsupportedEncodingException + */ + private static String getRequestURL(String baseURL, + String resumptionToken) throws UnsupportedEncodingException { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListIdentifiers"); + requestURL.append("&resumptionToken=").append(URLEncoder.encode(resumptionToken, StandardCharsets.UTF_8.name())); + return requestURL.toString(); + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListMetadataFormats.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListMetadataFormats.java new file mode 100644 index 000000000..0facd07e9 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListMetadataFormats.java @@ -0,0 +1,77 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.IOException; +import java.net.MalformedURLException; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import org.xml.sax.SAXException; + +/** + * This class represents an ListMetadataFormats response on either the server or on the client + * + * @author Jeffrey A. Young, OCLC Online Computer Library Center + */ +public class ListMetadataFormats extends HarvesterVerb { + + /** + * Mock object constructor (for unit testing purposes) + */ + public ListMetadataFormats() { + super(); + } + + /** + * Client-side ListMetadataFormats verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @throws MalformedURLException the baseURL is bad + * @throws javax.xml.parsers.ParserConfigurationException + * @throws SAXException the xml response is bad + * @throws javax.xml.xpath.XPathExpressionException + * @throws IOException an I/O error occurred + */ + public ListMetadataFormats(String baseURL) throws IOException, ParserConfigurationException, + SAXException, XPathExpressionException { + this(baseURL, null); + } + + /** + * Client-side ListMetadataFormats verb constructor (identifier version) + * + * @param baseURL + * @param identifier + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + public ListMetadataFormats(String baseURL, String identifier) + throws IOException, ParserConfigurationException, SAXException, + XPathExpressionException { + super(getRequestURL(baseURL, identifier)); + } + + /** + * Construct the query portion of the http request + * + * @return a String containing the query portion of the http request + */ + private static String getRequestURL(String baseURL, String identifier) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListMetadataFormats"); + if (identifier != null) + requestURL.append("&identifier=").append(identifier); + return requestURL.toString(); + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListRecords.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListRecords.java new file mode 100644 index 000000000..6be0bcfb3 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListRecords.java @@ -0,0 +1,120 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URLEncoder; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import org.xml.sax.SAXException; + +/** + * This class represents an ListRecords response on either the server or on the client + * + * @author Jeffrey A. Young, OCLC Online Computer Library Center + */ +public class ListRecords extends HarvesterVerb { + + /** + * Mock object constructor (for unit testing purposes) + */ + public ListRecords() { + super(); + } + + /** + * Client-side ListRecords verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @param from + * @param until + * @param set + * @param metadataPrefix + * @throws MalformedURLException the baseURL is bad + * @throws SAXException the xml response is bad + * @throws IOException an I/O error occurred + * @throws javax.xml.parsers.ParserConfigurationException + * @throws javax.xml.xpath.XPathExpressionException + */ + public ListRecords(String baseURL, String from, String until, String set, String metadataPrefix) + throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { + super(getRequestURL(baseURL, from, until, set, metadataPrefix)); + } + + /** + * Client-side ListRecords verb constructor (resumptionToken version) + * + * @param baseURL + * @param resumptionToken + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + public ListRecords(String baseURL, String resumptionToken) + throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { + super(getRequestURL(baseURL, resumptionToken)); + } + + /** + * Get the oai:resumptionToken from the response + * + * @return the oai:resumptionToken value + * @throws XPathExpressionException + * @throws NoSuchFieldException + */ + public String getResumptionToken() + throws XPathExpressionException, NoSuchFieldException { + String schemaLocation = getSchemaLocation(); + if (schemaLocation.contains(SCHEMA_LOCATION_V2_0)) { + return getSingleString("/oai20:OAI-PMH/oai20:ListRecords/oai20:resumptionToken"); + } else if (schemaLocation.contains(SCHEMA_LOCATION_V1_1_LIST_RECORDS)) { + return getSingleString("/oai11_ListRecords:ListRecords/oai11_ListRecords:resumptionToken"); + } else { + throw new NoSuchFieldException(schemaLocation); + } + } + + /** + * Construct the query portion of the http request + * + * @return a String containing the query portion of the http request + */ + private static String getRequestURL(String baseURL, String from, + String until, String set, + String metadataPrefix) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListRecords"); + if (from != null) requestURL.append("&from=").append(from); + if (until != null) requestURL.append("&until=").append(until); + if (set != null) requestURL.append("&set=").append(set); + requestURL.append("&metadataPrefix=").append(metadataPrefix); + return requestURL.toString(); + } + + /** + * Construct the query portion of the http request (resumptionToken version) + * + * @param baseURL + * @param resumptionToken + * @return + */ + private static String getRequestURL(String baseURL, String resumptionToken) throws UnsupportedEncodingException { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListRecords"); + requestURL.append("&resumptionToken=").append(URLEncoder.encode(resumptionToken, "UTF-8")); + return requestURL.toString(); + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListSets.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListSets.java new file mode 100644 index 000000000..f4bbff4f4 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListSets.java @@ -0,0 +1,111 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URLEncoder; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import org.xml.sax.SAXException; + +/** + * This class represents an ListSets response on either the server or on the client + * + * @author Jeffrey A. Young, OCLC Online Computer Library Center + */ +public class ListSets extends HarvesterVerb { + + /** + * Mock object constructor (for unit testing purposes) + */ + public ListSets() { + super(); + } + + /** + * Client-side ListSets verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @exception MalformedURLException the baseURL is bad + * @exception IOException an I/O error occurred + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + public ListSets(String baseURL) throws IOException, ParserConfigurationException, + SAXException, XPathExpressionException { + super(getRequestURL(baseURL)); + } + + /** + * @param baseURL + * @param resumptionToken + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + public ListSets(String baseURL, String resumptionToken) + throws IOException, ParserConfigurationException, SAXException, + XPathExpressionException { + super(getRequestURL(baseURL, resumptionToken)); + } + + /** + * Get the oai:resumptionToken from the response + * + * @return the oai:resumptionToken as a String + * @throws XPathExpressionException + * @throws NoSuchFieldException + */ + public String getResumptionToken() + throws XPathExpressionException, NoSuchFieldException { + if (SCHEMA_LOCATION_V2_0.equals(getSchemaLocation())) { + return getSingleString("/oai20:OAI-PMH/oai20:ListSets/oai20:resumptionToken"); + } else if (SCHEMA_LOCATION_V1_1_LIST_SETS.equals(getSchemaLocation())) { + return getSingleString("/oai11_ListSets:ListSets/oai11_ListSets:resumptionToken"); + } else { + throw new NoSuchFieldException(getSchemaLocation()); + } + } + + /** + * Generate a ListSets request for the given baseURL and resumptionToken + * + * @param baseURL + * @param resumptionToken + * @return + * @throws UnsupportedEncodingException + */ + private static String getRequestURL(String baseURL, + String resumptionToken) throws UnsupportedEncodingException { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListSets"); + requestURL.append("&resumptionToken=").append(URLEncoder.encode(resumptionToken, "UTF-8")); + return requestURL.toString(); + } + + /** + * Generate a ListSets request for the given baseURL + * + * @param baseURL + * @return + */ + private static String getRequestURL(String baseURL) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListSets"); + return requestURL.toString(); + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java new file mode 100644 index 000000000..ec4033f91 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java @@ -0,0 +1,61 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package org.oclc.oai.harvester2.verb; + +import java.util.Iterator; +import javax.xml.namespace.NamespaceContext; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import org.w3c.dom.DOMImplementation; +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +/** + * + * @author Michal Hlavac (michal_hlavac@datalan.sk) + */ +public class OAINamespaceContext implements NamespaceContext { + + private final Element namespaceElement; + + public OAINamespaceContext(DocumentBuilderFactory factory) throws ParserConfigurationException { + this.namespaceElement = buildNamespaceElement(factory); + } + + @Override + public String getNamespaceURI(String prefix) { + return namespaceElement.lookupNamespaceURI(prefix); + } + + @Override + public String getPrefix(String namespaceURI) { + return null; + } + + @Override + public Iterator getPrefixes(String namespaceURI) { + return null; + } + + private Element buildNamespaceElement(DocumentBuilderFactory factory) throws ParserConfigurationException { + DOMImplementation impl = factory.newDocumentBuilder().getDOMImplementation(); + Document namespaceHolder = impl.createDocument( + "http://www.oclc.org/research/software/oai/harvester", + "harvester:namespaceHolder", null); + + Element el = namespaceHolder.getDocumentElement(); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:harvester", "http://www.oclc.org/research/software/oai/harvester"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai20", "http://www.openarchives.org/OAI/2.0/"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai11_GetRecord", "http://www.openarchives.org/OAI/1.1/OAI_GetRecord"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai11_Identify", "http://www.openarchives.org/OAI/1.1/OAI_Identify"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai11_ListIdentifiers", "http://www.openarchives.org/OAI/1.1/OAI_ListIdentifiers"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai11_ListMetadataFormats", "http://www.openarchives.org/OAI/1.1/OAI_ListMetadataFormats"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai11_ListRecords", "http://www.openarchives.org/OAI/1.1/OAI_ListRecords"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai11_ListSets", "http://www.openarchives.org/OAI/1.1/OAI_ListSets"); + return el; + } +} diff --git a/metafacture-runner/build.gradle b/metafacture-runner/build.gradle index 55eac9bad..53cbbe0b3 100644 --- a/metafacture-runner/build.gradle +++ b/metafacture-runner/build.gradle @@ -67,7 +67,7 @@ dependencies { // class loader which was used to load the classes of the slf4j-api. Until // a solution is found for this problem, the binding need to be placed on the // class path: - runtimeOnly 'org.slf4j:slf4j-log4j12:1.7.21' + runtime 'org.slf4j:slf4j-log4j12:1.7.21' // The following dependencies are placed in the "provided" scope to prevent // them from being included in the class path but still have them available