From d8f3afdf99ddabb10ea4fec5998ce8b6b3f790f7 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Wed, 10 Mar 2021 13:39:23 +0100 Subject: [PATCH 1/9] Switch OAI-PMH harvester library Using https://github.com/hbz/oai-harvester2 via jitpack.io See https://github.com/metafacture/metafacture-core/issues/360 --- build.gradle | 1 + metafacture-biblio/build.gradle | 2 +- .../main/java/org/metafacture/biblio/OaiPmhOpener.java | 9 ++++----- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/build.gradle b/build.gradle index 3a2e4d071..6647ecb75 100644 --- a/build.gradle +++ b/build.gradle @@ -81,6 +81,7 @@ subprojects { repositories { mavenLocal() mavenCentral() + maven { url 'https://jitpack.io' } } } diff --git a/metafacture-biblio/build.gradle b/metafacture-biblio/build.gradle index 7faf64733..1236c9b2a 100644 --- a/metafacture-biblio/build.gradle +++ b/metafacture-biblio/build.gradle @@ -21,7 +21,7 @@ dependencies { api project(':metafacture-framework') implementation project(':metafacture-commons') implementation project(':metafacture-flowcontrol') - implementation 'org.dspace:oclc-harvester2:0.1.12' + implementation 'com.github.hbz:oai-harvester2:master-SNAPSHOT' implementation ('xalan:xalan:2.7.0') { exclude group: 'xalan', module: 'serializer' exclude group: 'xercesImpl', module: 'xercesImpl' diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java index cfec369c2..664b3de6e 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java @@ -10,7 +10,7 @@ import java.io.Reader; import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.TransformerException; +import javax.xml.xpath.XPathException; import org.metafacture.framework.MetafactureException; import org.metafacture.framework.ObjectReceiver; @@ -18,10 +18,9 @@ import org.metafacture.framework.annotations.In; import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.DefaultObjectPipe; +import org.oclc.oai.harvester2.app.RawWrite; import org.xml.sax.SAXException; -import ORG.oclc.oai.harvester2.app.RawWrite; - /** * Opens an OAI-PMH stream and passes a reader to the receiver. * @@ -111,10 +110,10 @@ public void process(final String baseUrl) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); - } catch (TransformerException e) { - e.printStackTrace(); } catch (NoSuchFieldException e) { e.printStackTrace(); + } catch (XPathException e) { + e.printStackTrace(); } try { getReceiver().process( From e2c76436eab2efc906e7a1f8b05bce7a68ddaff2 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Thu, 11 Mar 2021 13:50:50 +0100 Subject: [PATCH 2/9] Revert switch of OAI-PMH harvester library Causes downstream problems due to jitpack.io requirement See https://github.com/metafacture/metafacture-core/issues/360 --- build.gradle | 1 - metafacture-biblio/build.gradle | 2 +- .../main/java/org/metafacture/biblio/OaiPmhOpener.java | 9 +++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/build.gradle b/build.gradle index 6647ecb75..3a2e4d071 100644 --- a/build.gradle +++ b/build.gradle @@ -81,7 +81,6 @@ subprojects { repositories { mavenLocal() mavenCentral() - maven { url 'https://jitpack.io' } } } diff --git a/metafacture-biblio/build.gradle b/metafacture-biblio/build.gradle index 1236c9b2a..7faf64733 100644 --- a/metafacture-biblio/build.gradle +++ b/metafacture-biblio/build.gradle @@ -21,7 +21,7 @@ dependencies { api project(':metafacture-framework') implementation project(':metafacture-commons') implementation project(':metafacture-flowcontrol') - implementation 'com.github.hbz:oai-harvester2:master-SNAPSHOT' + implementation 'org.dspace:oclc-harvester2:0.1.12' implementation ('xalan:xalan:2.7.0') { exclude group: 'xalan', module: 'serializer' exclude group: 'xercesImpl', module: 'xercesImpl' diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java index 664b3de6e..cfec369c2 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java @@ -10,7 +10,7 @@ import java.io.Reader; import javax.xml.parsers.ParserConfigurationException; -import javax.xml.xpath.XPathException; +import javax.xml.transform.TransformerException; import org.metafacture.framework.MetafactureException; import org.metafacture.framework.ObjectReceiver; @@ -18,9 +18,10 @@ import org.metafacture.framework.annotations.In; import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.DefaultObjectPipe; -import org.oclc.oai.harvester2.app.RawWrite; import org.xml.sax.SAXException; +import ORG.oclc.oai.harvester2.app.RawWrite; + /** * Opens an OAI-PMH stream and passes a reader to the receiver. * @@ -110,9 +111,9 @@ public void process(final String baseUrl) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); - } catch (NoSuchFieldException e) { + } catch (TransformerException e) { e.printStackTrace(); - } catch (XPathException e) { + } catch (NoSuchFieldException e) { e.printStackTrace(); } try { From 96f36a53743a2c9236c85e8c2470881ab87bf25b Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Thu, 11 Mar 2021 16:59:57 +0100 Subject: [PATCH 3/9] Add harvester2 package from https://github.com/hbz/oai-harvester2 See https://github.com/metafacture/metafacture-core/issues/360 --- metafacture-biblio/build.gradle | 2 +- .../org/metafacture/biblio/OaiPmhOpener.java | 8 +- .../org/oclc/oai/harvester2/app/RawWrite.java | 170 +++++++++++ .../oclc/oai/harvester2/verb/GetRecord.java | 80 +++++ .../oai/harvester2/verb/HarvesterVerb.java | 287 ++++++++++++++++++ .../oclc/oai/harvester2/verb/Identify.java | 77 +++++ .../oai/harvester2/verb/ListIdentifiers.java | 117 +++++++ .../harvester2/verb/ListMetadataFormats.java | 77 +++++ .../oclc/oai/harvester2/verb/ListRecords.java | 120 ++++++++ .../oclc/oai/harvester2/verb/ListSets.java | 111 +++++++ .../harvester2/verb/OAINamespaceContext.java | 61 ++++ 11 files changed, 1105 insertions(+), 5 deletions(-) create mode 100644 metafacture-biblio/src/main/java/org/oclc/oai/harvester2/app/RawWrite.java create mode 100644 metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/GetRecord.java create mode 100644 metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/HarvesterVerb.java create mode 100644 metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/Identify.java create mode 100644 metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListIdentifiers.java create mode 100644 metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListMetadataFormats.java create mode 100644 metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListRecords.java create mode 100644 metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListSets.java create mode 100644 metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java diff --git a/metafacture-biblio/build.gradle b/metafacture-biblio/build.gradle index 7faf64733..c34a362fe 100644 --- a/metafacture-biblio/build.gradle +++ b/metafacture-biblio/build.gradle @@ -21,13 +21,13 @@ dependencies { api project(':metafacture-framework') implementation project(':metafacture-commons') implementation project(':metafacture-flowcontrol') - implementation 'org.dspace:oclc-harvester2:0.1.12' implementation ('xalan:xalan:2.7.0') { exclude group: 'xalan', module: 'serializer' exclude group: 'xercesImpl', module: 'xercesImpl' exclude group: 'xml-apis', module: 'xml-apis' } implementation 'log4j:log4j:1.2.12' + implementation 'org.slf4j:slf4j-api:1.7.7' testImplementation 'junit:junit:4.12' testImplementation 'org.mockito:mockito-core:2.5.5' } diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java index cfec369c2..2ce807079 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java @@ -11,6 +11,7 @@ import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; +import javax.xml.xpath.XPathException; import org.metafacture.framework.MetafactureException; import org.metafacture.framework.ObjectReceiver; @@ -18,10 +19,9 @@ import org.metafacture.framework.annotations.In; import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.DefaultObjectPipe; +import org.oclc.oai.harvester2.app.RawWrite; import org.xml.sax.SAXException; -import ORG.oclc.oai.harvester2.app.RawWrite; - /** * Opens an OAI-PMH stream and passes a reader to the receiver. * @@ -111,10 +111,10 @@ public void process(final String baseUrl) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); - } catch (TransformerException e) { - e.printStackTrace(); } catch (NoSuchFieldException e) { e.printStackTrace(); + } catch (XPathException e) { + e.printStackTrace(); } try { getReceiver().process( diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/app/RawWrite.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/app/RawWrite.java new file mode 100644 index 000000000..8e4dbe825 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/app/RawWrite.java @@ -0,0 +1,170 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.app; + +import java.io.*; +import java.lang.NoSuchFieldException; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.HashMap; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathException; +import javax.xml.xpath.XPathExpressionException; +import org.oclc.oai.harvester2.verb.*; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +public class RawWrite { + + public static void main(String[] args) { + try { + System.out.println(new Date()); + + HashMap options = getOptions(args); + List rootArgs = (List) options.get("rootArgs"); + String baseURL = null; + if (rootArgs.size() > 0) { + baseURL = (String) rootArgs.get(0); + } else { + throw new IllegalArgumentException(); + } + + OutputStream out = System.out; + String outFileName = (String) options.get("-out"); + String from = (String) options.get("-from"); + String until = (String) options.get("-until"); + String metadataPrefix = (String) options.get("-metadataPrefix"); + if (metadataPrefix == null) metadataPrefix = "oai_dc"; + String resumptionToken = (String) options.get("-resumptionToken"); + String setSpec = (String) options.get("-setSpec"); + + if (resumptionToken != null) { + if (outFileName != null) + out = new FileOutputStream(outFileName, true); + run(baseURL, resumptionToken, out); + } else { + if (outFileName != null) + out = new FileOutputStream(outFileName); + run(baseURL, from, until, metadataPrefix, setSpec, out); + } + + if (out != System.out) out.close(); + System.out.println(new Date()); + } catch (IllegalArgumentException e) { + System.err.println("RawWrite <-from date> <-until date> <-metadataPrefix prefix> <-setSpec setName> <-resumptionToken token> <-out fileName> baseURL"); + } catch (Exception e) { + e.printStackTrace(); + System.exit(-1); + } + } + + public static void run(String baseURL, String resumptionToken, + OutputStream out) + throws IOException, ParserConfigurationException, SAXException, XPathExpressionException, + NoSuchFieldException { + ListRecords listRecords = new ListRecords(baseURL, resumptionToken); + while (listRecords != null) { + NodeList errors = listRecords.getErrors(); + if (errors != null && errors.getLength() > 0) { + System.out.println("Found errors"); + int length = errors.getLength(); + for (int i = 0; i < length; ++i) { + Node item = errors.item(i); + System.out.println(item); + } + System.out.println("Error record: " + listRecords.toString()); + break; + } +// System.out.println(listRecords); + out.write(listRecords.toString().getBytes("UTF-8")); + out.write("\n".getBytes("UTF-8")); + resumptionToken = listRecords.getResumptionToken(); + System.out.println("resumptionToken: " + resumptionToken); + if (resumptionToken == null || resumptionToken.length() == 0) { + listRecords = null; + } else { + listRecords = new ListRecords(baseURL, resumptionToken); + } + } + out.write("\n".getBytes("UTF-8")); + } + + public static void run(String baseURL, String from, String until, + String metadataPrefix, String setSpec, + OutputStream out) + throws IOException, ParserConfigurationException, SAXException, XPathException, + NoSuchFieldException { + out.write("\n".getBytes("UTF-8")); + out.write("\n".getBytes("UTF-8")); + out.write(new Identify(baseURL).toString().getBytes("UTF-8")); + out.write("\n".getBytes("UTF-8")); + out.write(new ListMetadataFormats(baseURL).toString().getBytes("UTF-8")); + out.write("\n".getBytes("UTF-8")); + ListSets listSets = new ListSets(baseURL); + while (listSets != null) { + out.write(listSets.toString().getBytes("UTF-8")); + out.write("\n".getBytes("UTF-8")); + String resumptionToken = listSets.getResumptionToken(); + System.out.println("resumptionToken: " + resumptionToken); + if (resumptionToken == null || resumptionToken.length() == 0) { + listSets = null; + } else { + listSets = new ListSets(baseURL, resumptionToken); + } + } + ListRecords listRecords = new ListRecords(baseURL, from, until, setSpec, + metadataPrefix); + while (listRecords != null) { + NodeList errors = listRecords.getErrors(); + if (errors != null && errors.getLength() > 0) { + System.out.println("Found errors"); + int length = errors.getLength(); + for (int i = 0; i < length; ++i) { + Node item = errors.item(i); + System.out.println(item); + } + System.out.println("Error record: " + listRecords.toString()); + break; + } +// System.out.println(listRecords); + out.write(listRecords.toString().getBytes("UTF-8")); + out.write("\n".getBytes("UTF-8")); + String resumptionToken = listRecords.getResumptionToken(); + System.out.println("resumptionToken: " + resumptionToken); + if (resumptionToken == null || resumptionToken.length() == 0) { + listRecords = null; + } else { + listRecords = new ListRecords(baseURL, resumptionToken); + } + } + out.write("\n".getBytes("UTF-8")); + } + + private static HashMap getOptions(String[] args) { + HashMap options = new HashMap(); + ArrayList rootArgs = new ArrayList(); + options.put("rootArgs", rootArgs); + + for (int i = 0; i < args.length; ++i) { + if (args[i].charAt(0) != '-') { + rootArgs.add(args[i]); + } else if (i + 1 < args.length) { + options.put(args[i], args[++i]); + } else { + throw new IllegalArgumentException(); + } + } + return options; + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/GetRecord.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/GetRecord.java new file mode 100644 index 000000000..5055acee6 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/GetRecord.java @@ -0,0 +1,80 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.IOException; +import java.net.MalformedURLException; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import org.xml.sax.SAXException; + +/** + * This class represents an GetRecord response on either the server or on the client + * + * @author Jeffrey A. Young, OCLC Online Computer Library Center + */ +public class GetRecord extends HarvesterVerb { + + /** + * Mock object constructor (for unit testing purposes) + */ + public GetRecord() { + super(); + } + + /** + * Client-side GetRecord verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @param identifier + * @param metadataPrefix + * @exception MalformedURLException the baseURL is bad + * @exception SAXException the xml response is bad + * @exception IOException an I/O error occurred + * @throws ParserConfigurationException + * @throws XPathExpressionException + */ + public GetRecord(String baseURL, String identifier, String metadataPrefix) + throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { + super(getRequestURL(baseURL, identifier, metadataPrefix)); + } + + /** + * Get the oai:identifier from the oai:header + * + * @return the oai:identifier as a String + * @throws XPathExpressionException + * @throws NoSuchFieldException + */ + public String getIdentifier() throws XPathExpressionException, NoSuchFieldException { + if (SCHEMA_LOCATION_V2_0.equals(getSchemaLocation())) { + return getSingleString("/oai20:OAI-PMH/oai20:GetRecord/oai20:record/oai20:header/oai20:identifier"); + } else if (SCHEMA_LOCATION_V1_1_GET_RECORD.equals(getSchemaLocation())) { + return getSingleString("/oai11_GetRecord:GetRecord/oai11_GetRecord:record/oai11_GetRecord:header/oai11_GetRecord:identifier"); + } else { + throw new NoSuchFieldException(getSchemaLocation()); + } + } + + /** + * Construct the query portion of the http request + * + * @return a String containing the query portion of the http request + */ + private static String getRequestURL(String baseURL, String identifier, String metadataPrefix) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=GetRecord"); + requestURL.append("&identifier=").append(identifier); + requestURL.append("&metadataPrefix=").append(metadataPrefix); + return requestURL.toString(); + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/HarvesterVerb.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/HarvesterVerb.java new file mode 100644 index 000000000..c99c4ab4d --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/HarvesterVerb.java @@ -0,0 +1,287 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.Date; +import java.util.zip.GZIPInputStream; +import java.util.zip.InflaterInputStream; +import java.util.zip.ZipInputStream; + +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Result; +import javax.xml.transform.Source; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +//import org.apache.xpath.XPathAPI; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +/** + * HarvesterVerb is the parent class for each of the OAI verbs. + * + * @author Jefffrey A. Young, OCLC Online Computer Library Center + */ +public abstract class HarvesterVerb { + + private static final Logger log = LoggerFactory.getLogger(HarvesterVerb.class); + + /* Primary OAI namespaces */ + public static final String SCHEMA_LOCATION_V2_0 = "http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"; + public static final String SCHEMA_LOCATION_V1_1_GET_RECORD = "http://www.openarchives.org/OAI/1.1/OAI_GetRecord http://www.openarchives.org/OAI/1.1/OAI_GetRecord.xsd"; + public static final String SCHEMA_LOCATION_V1_1_IDENTIFY = "http://www.openarchives.org/OAI/1.1/OAI_Identify http://www.openarchives.org/OAI/1.1/OAI_Identify.xsd"; + public static final String SCHEMA_LOCATION_V1_1_LIST_IDENTIFIERS = "http://www.openarchives.org/OAI/1.1/OAI_ListIdentifiers http://www.openarchives.org/OAI/1.1/OAI_ListIdentifiers.xsd"; + public static final String SCHEMA_LOCATION_V1_1_LIST_METADATA_FORMATS = "http://www.openarchives.org/OAI/1.1/OAI_ListMetadataFormats http://www.openarchives.org/OAI/1.1/OAI_ListMetadataFormats.xsd"; + public static final String SCHEMA_LOCATION_V1_1_LIST_RECORDS = "http://www.openarchives.org/OAI/1.1/OAI_ListRecords http://www.openarchives.org/OAI/1.1/OAI_ListRecords.xsd"; + public static final String SCHEMA_LOCATION_V1_1_LIST_SETS = "http://www.openarchives.org/OAI/1.1/OAI_ListSets http://www.openarchives.org/OAI/1.1/OAI_ListSets.xsd"; + private Document doc = null; + private String schemaLocation = null; + private String requestURL = null; + + private static final ThreadLocal builderFactory = new ThreadLocal() { + @Override + public DocumentBuilderFactory initialValue() { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setNamespaceAware(true); + return factory; + } + }; + private static final ThreadLocal transformerFactory = new ThreadLocal() { + @Override + public TransformerFactory initialValue() { + return TransformerFactory.newInstance(); + } + }; + + private static final ThreadLocal xpath = new ThreadLocal() { + @Override + public XPath initialValue() { + XPathFactory fucktory = XPathFactory.newInstance(); + XPath xpath = fucktory.newXPath(); + try { + xpath.setNamespaceContext(new OAINamespaceContext(builderFactory.get())); + } catch (ParserConfigurationException e) { + log.error("Cannot set namespace context", e); + } + return xpath; + } + }; + + /** + * Get the OAI response as a DOM object + * + * @return the DOM for the OAI response + */ + public Document getDocument() { + return doc; + } + + /** + * Get the xsi:schemaLocation for the OAI response + * + * @return the xsi:schemaLocation value + */ + public String getSchemaLocation() { + return schemaLocation; + } + + /** + * Get the OAI errors + * + * @return a NodeList of /oai:OAI-PMH/oai:error elements + * @throws XPathExpressionException + */ + public NodeList getErrors() throws XPathExpressionException { + if (SCHEMA_LOCATION_V2_0.equals(getSchemaLocation())) { + return getNodeList("/oai20:OAI-PMH/oai20:error"); + } else { + return null; + } + } + + /** + * Get the OAI request URL for this response + * + * @return the OAI request URL as a String + */ + public String getRequestURL() { + return requestURL; + } + + /** + * Mock object creator (for unit testing purposes) + */ + public HarvesterVerb() { + } + + /** + * Performs the OAI request + * + * @param requestURL + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + public HarvesterVerb(String requestURL) throws IOException, + ParserConfigurationException, SAXException, XPathExpressionException { + harvest(requestURL); + } + + /** + * Preforms the OAI request + * + * @param requestURL + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + private void harvest(String requestURL) throws IOException, + ParserConfigurationException, SAXException, XPathExpressionException { + this.requestURL = requestURL; + log.debug("requestURL=" + requestURL); + InputStream in; + URL url = new URL(requestURL); + HttpURLConnection con = null; + int responseCode; + do { + con = (HttpURLConnection) url.openConnection(); + con.setRequestProperty("User-Agent", "OAIHarvester/2.0"); + con.setRequestProperty("Accept-Encoding", + "compress, gzip, identify"); + try { + responseCode = con.getResponseCode(); + log.debug("responseCode=" + responseCode); + } catch (FileNotFoundException e) { + // assume it's a 503 response + log.info(requestURL, e); + responseCode = HttpURLConnection.HTTP_UNAVAILABLE; + } + + if (responseCode == HttpURLConnection.HTTP_UNAVAILABLE) { + long retrySeconds = con.getHeaderFieldInt("Retry-After", -1); + if (retrySeconds == -1) { + long now = (new Date()).getTime(); + long retryDate = con.getHeaderFieldDate("Retry-After", now); + retrySeconds = retryDate - now; + } + if (retrySeconds == 0) { // Apparently, it's a bad URL + throw new FileNotFoundException("Bad URL?"); + } + System.err.println("Server response: Retry-After=" + retrySeconds); + if (retrySeconds > 0) { + try { + Thread.sleep(retrySeconds * 1000); + } catch (InterruptedException ex) { + log.error("Error while sleeping", ex); + } + } + } + } while (responseCode == HttpURLConnection.HTTP_UNAVAILABLE); + String contentEncoding = con.getHeaderField("Content-Encoding"); + log.debug("contentEncoding=" + contentEncoding); + if ("compress".equals(contentEncoding)) { + ZipInputStream zis = new ZipInputStream(con.getInputStream()); + zis.getNextEntry(); + in = zis; + } else if ("gzip".equals(contentEncoding)) { + in = new GZIPInputStream(con.getInputStream()); + } else if ("deflate".equals(contentEncoding)) { + in = new InflaterInputStream(con.getInputStream()); + } else { + in = con.getInputStream(); + } + + InputSource data = new InputSource(in); + + doc = builderFactory.get().newDocumentBuilder().parse(data); + + // The URIs in xsi:schemaLocation are separated by (any kind + // of) white space. Normalize it to a single space. + String schemaLoc = getSingleString("/*/@xsi:schemaLocation"); + this.schemaLocation = schemaLoc.trim().replaceAll("\\s+", " "); + } + + /** + * Get the String value for the given XPath location in the response DOM + * + * @param xpath + * @return a String containing the value of the XPath location. + * @throws XPathExpressionException + */ + public String getSingleString(String xpath) throws XPathExpressionException { + return getSingleString(getDocument(), xpath); +// return XPathAPI.eval(getDocument(), xpath, namespaceElement).str(); +// String str = null; +// Node node = XPathAPI.selectSingleNode(getDocument(), xpath, +// namespaceElement); +// if (node != null) { +// XObject xObject = XPathAPI.eval(node, "string()"); +// str = xObject.str(); +// } +// return str; + } + + public String getSingleString(Node node, String expression) throws XPathExpressionException { + return xpath.get().evaluate(expression, node); + } + + /** + * Get a NodeList containing the nodes in the response DOM for the specified xpath + * + * @param expression + * @return the NodeList for the xpath into the response DOM + * @throws XPathExpressionException + */ + public NodeList getNodeList(String expression) throws XPathExpressionException { + return (NodeList) xpath.get().evaluate(expression, getDocument(), XPathConstants.NODESET); + } + + @Override + public String toString() { + // Element docEl = getDocument().getDocumentElement(); + // return docEl.toString(); + Source input = new DOMSource(getDocument()); + StringWriter sw = new StringWriter(); + Result output = new StreamResult(sw); + try { + Transformer idTransformer = transformerFactory.get().newTransformer(); + idTransformer.setOutputProperty( + OutputKeys.OMIT_XML_DECLARATION, "yes"); + idTransformer.transform(input, output); + return sw.toString(); + } catch (TransformerException e) { + return e.getMessage(); + } + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/Identify.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/Identify.java new file mode 100644 index 000000000..83071738f --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/Identify.java @@ -0,0 +1,77 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.IOException; +import java.net.MalformedURLException; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import org.xml.sax.SAXException; + +/** + * This class represents an Identify response on either the server or on the client + * + * @author Jeffrey A. Young, OCLC Online Computer Library Center + */ +public class Identify extends HarvesterVerb { + + /** + * Mock object constructor (for unit testing purposes) + */ + public Identify() { + super(); + } + + /** + * Client-side Identify verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @exception MalformedURLException the baseURL is bad + * @exception IOException an I/O error occurred + * @throws ParserConfigurationException + * @throws XPathExpressionException + * @throws SAXException + */ + public Identify(String baseURL) throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { + super(getRequestURL(baseURL)); + } + + /** + * Get the oai:protocolVersion value from the Identify response + * + * @return the oai:protocolVersion value + * @throws XPathExpressionException + * @throws NoSuchFieldException + */ + public String getProtocolVersion() + throws XPathExpressionException, NoSuchFieldException { + if (SCHEMA_LOCATION_V2_0.equals(getSchemaLocation())) { + return getSingleString("/oai20:OAI-PMH/oai20:Identify/oai20:protocolVersion"); + } else if (SCHEMA_LOCATION_V1_1_IDENTIFY.equals(getSchemaLocation())) { + return getSingleString("/oai11_Identify:Identify/oai11_Identify:protocolVersion"); + } else { + throw new NoSuchFieldException(getSchemaLocation()); + } + } + + /** + * generate the Identify request URL for the specified baseURL + * + * @param baseURL + * @return the requestURL + */ + private static String getRequestURL(String baseURL) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=Identify"); + return requestURL.toString(); + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListIdentifiers.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListIdentifiers.java new file mode 100644 index 000000000..bfeed92a2 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListIdentifiers.java @@ -0,0 +1,117 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URLEncoder; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import org.xml.sax.SAXException; + +/** + * This class represents an ListIdentifiers response on either the server or on the client + * + * @author Jeffrey A. Young, OCLC Online Computer Library Center + */ +public class ListIdentifiers extends HarvesterVerb { + + /** + * Mock object constructor (for unit testing purposes) + */ + public ListIdentifiers() { + super(); + } + + /** + * Client-side ListIdentifiers verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @param from + * @param until + * @param set + * @param metadataPrefix + * @throws MalformedURLException the baseURL is bad + * @throws SAXException the xml response is bad + * @throws IOException an I/O error occurred + * @throws XPathExpressionException + * @throws ParserConfigurationException + */ + public ListIdentifiers(String baseURL, String from, String until, String set, String metadataPrefix) + throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { + super(getRequestURL(baseURL, from, until, set, metadataPrefix)); + } + + /** + * Client-side ListIdentifiers verb constructor (resumptionToken version) + * + * @param baseURL + * @param resumptionToken + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + public ListIdentifiers(String baseURL, String resumptionToken) + throws IOException, ParserConfigurationException, SAXException, + XPathExpressionException { + super(getRequestURL(baseURL, resumptionToken)); + } + + /** + * Get the oai:resumptionToken from the response + * + * @return the oai:resumptionToken value + * @throws XPathExpressionException + * @throws NoSuchFieldException + */ + public String getResumptionToken() + throws XPathExpressionException, NoSuchFieldException { + if (SCHEMA_LOCATION_V2_0.equals(getSchemaLocation())) { + return getSingleString("/oai20:OAI-PMH/oai20:ListIdentifiers/oai20:resumptionToken"); + } else if (SCHEMA_LOCATION_V1_1_LIST_IDENTIFIERS.equals(getSchemaLocation())) { + return getSingleString("/oai11_ListIdentifiers:ListIdentifiers/oai11_ListIdentifiers:resumptionToken"); + } else { + throw new NoSuchFieldException(getSchemaLocation()); + } + } + + /** + * Construct the query portion of the http request + * + * @return a String containing the query portion of the http request + */ + private static String getRequestURL(String baseURL, String from, String until, String set, String metadataPrefix) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListIdentifiers"); + if (from != null) requestURL.append("&from=").append(from); + if (until != null) requestURL.append("&until=").append(until); + if (set != null) requestURL.append("&set=").append(set); + requestURL.append("&metadataPrefix=").append(metadataPrefix); + return requestURL.toString(); + } + + /** + * Construct the query portion of the http request (resumptionToken version) + * + * @param baseURL + * @param resumptionToken + * @return + */ + private static String getRequestURL(String baseURL, + String resumptionToken) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListIdentifiers"); + requestURL.append("&resumptionToken=").append(URLEncoder.encode(resumptionToken)); + return requestURL.toString(); + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListMetadataFormats.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListMetadataFormats.java new file mode 100644 index 000000000..dc50ddeda --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListMetadataFormats.java @@ -0,0 +1,77 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.IOException; +import java.net.MalformedURLException; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import org.xml.sax.SAXException; + +/** + * This class represents an ListMetadataFormats response on either the server or on the client + * + * @author Jeffrey A. Young, OCLC Online Computer Library Center + */ +public class ListMetadataFormats extends HarvesterVerb { + + /** + * Mock object constructor (for unit testing purposes) + */ + public ListMetadataFormats() { + super(); + } + + /** + * Client-side ListMetadataFormats verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @throws MalformedURLException the baseURL is bad + * @throws javax.xml.parsers.ParserConfigurationException + * @throws SAXException the xml response is bad + * @throws javax.xml.xpath.XPathExpressionException + * @throws IOException an I/O error occurred + */ + public ListMetadataFormats(String baseURL) throws IOException, ParserConfigurationException, + SAXException, XPathExpressionException { + this(baseURL, null); + } + + /** + * Client-side ListMetadataFormats verb constructor (identifier version) + * + * @param baseURL + * @param identifier + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + public ListMetadataFormats(String baseURL, String identifier) + throws IOException, ParserConfigurationException, SAXException, + XPathExpressionException { + super(getRequestURL(baseURL, identifier)); + } + + /** + * Construct the query portion of the http request + * + * @return a String containing the query portion of the http request + */ + private static String getRequestURL(String baseURL, String identifier) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListMetadataFormats"); + if (identifier != null) + requestURL.append("&identifier=").append(identifier); + return requestURL.toString(); + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListRecords.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListRecords.java new file mode 100644 index 000000000..6be0bcfb3 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListRecords.java @@ -0,0 +1,120 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URLEncoder; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import org.xml.sax.SAXException; + +/** + * This class represents an ListRecords response on either the server or on the client + * + * @author Jeffrey A. Young, OCLC Online Computer Library Center + */ +public class ListRecords extends HarvesterVerb { + + /** + * Mock object constructor (for unit testing purposes) + */ + public ListRecords() { + super(); + } + + /** + * Client-side ListRecords verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @param from + * @param until + * @param set + * @param metadataPrefix + * @throws MalformedURLException the baseURL is bad + * @throws SAXException the xml response is bad + * @throws IOException an I/O error occurred + * @throws javax.xml.parsers.ParserConfigurationException + * @throws javax.xml.xpath.XPathExpressionException + */ + public ListRecords(String baseURL, String from, String until, String set, String metadataPrefix) + throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { + super(getRequestURL(baseURL, from, until, set, metadataPrefix)); + } + + /** + * Client-side ListRecords verb constructor (resumptionToken version) + * + * @param baseURL + * @param resumptionToken + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + public ListRecords(String baseURL, String resumptionToken) + throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { + super(getRequestURL(baseURL, resumptionToken)); + } + + /** + * Get the oai:resumptionToken from the response + * + * @return the oai:resumptionToken value + * @throws XPathExpressionException + * @throws NoSuchFieldException + */ + public String getResumptionToken() + throws XPathExpressionException, NoSuchFieldException { + String schemaLocation = getSchemaLocation(); + if (schemaLocation.contains(SCHEMA_LOCATION_V2_0)) { + return getSingleString("/oai20:OAI-PMH/oai20:ListRecords/oai20:resumptionToken"); + } else if (schemaLocation.contains(SCHEMA_LOCATION_V1_1_LIST_RECORDS)) { + return getSingleString("/oai11_ListRecords:ListRecords/oai11_ListRecords:resumptionToken"); + } else { + throw new NoSuchFieldException(schemaLocation); + } + } + + /** + * Construct the query portion of the http request + * + * @return a String containing the query portion of the http request + */ + private static String getRequestURL(String baseURL, String from, + String until, String set, + String metadataPrefix) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListRecords"); + if (from != null) requestURL.append("&from=").append(from); + if (until != null) requestURL.append("&until=").append(until); + if (set != null) requestURL.append("&set=").append(set); + requestURL.append("&metadataPrefix=").append(metadataPrefix); + return requestURL.toString(); + } + + /** + * Construct the query portion of the http request (resumptionToken version) + * + * @param baseURL + * @param resumptionToken + * @return + */ + private static String getRequestURL(String baseURL, String resumptionToken) throws UnsupportedEncodingException { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListRecords"); + requestURL.append("&resumptionToken=").append(URLEncoder.encode(resumptionToken, "UTF-8")); + return requestURL.toString(); + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListSets.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListSets.java new file mode 100644 index 000000000..f4bbff4f4 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListSets.java @@ -0,0 +1,111 @@ +/** + * Copyright 2006 OCLC, Online Computer Library Center Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a copy of the + * License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is + * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and limitations under the License. + */ +package org.oclc.oai.harvester2.verb; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URLEncoder; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathExpressionException; +import org.xml.sax.SAXException; + +/** + * This class represents an ListSets response on either the server or on the client + * + * @author Jeffrey A. Young, OCLC Online Computer Library Center + */ +public class ListSets extends HarvesterVerb { + + /** + * Mock object constructor (for unit testing purposes) + */ + public ListSets() { + super(); + } + + /** + * Client-side ListSets verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @exception MalformedURLException the baseURL is bad + * @exception IOException an I/O error occurred + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + public ListSets(String baseURL) throws IOException, ParserConfigurationException, + SAXException, XPathExpressionException { + super(getRequestURL(baseURL)); + } + + /** + * @param baseURL + * @param resumptionToken + * @throws IOException + * @throws ParserConfigurationException + * @throws SAXException + * @throws XPathExpressionException + */ + public ListSets(String baseURL, String resumptionToken) + throws IOException, ParserConfigurationException, SAXException, + XPathExpressionException { + super(getRequestURL(baseURL, resumptionToken)); + } + + /** + * Get the oai:resumptionToken from the response + * + * @return the oai:resumptionToken as a String + * @throws XPathExpressionException + * @throws NoSuchFieldException + */ + public String getResumptionToken() + throws XPathExpressionException, NoSuchFieldException { + if (SCHEMA_LOCATION_V2_0.equals(getSchemaLocation())) { + return getSingleString("/oai20:OAI-PMH/oai20:ListSets/oai20:resumptionToken"); + } else if (SCHEMA_LOCATION_V1_1_LIST_SETS.equals(getSchemaLocation())) { + return getSingleString("/oai11_ListSets:ListSets/oai11_ListSets:resumptionToken"); + } else { + throw new NoSuchFieldException(getSchemaLocation()); + } + } + + /** + * Generate a ListSets request for the given baseURL and resumptionToken + * + * @param baseURL + * @param resumptionToken + * @return + * @throws UnsupportedEncodingException + */ + private static String getRequestURL(String baseURL, + String resumptionToken) throws UnsupportedEncodingException { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListSets"); + requestURL.append("&resumptionToken=").append(URLEncoder.encode(resumptionToken, "UTF-8")); + return requestURL.toString(); + } + + /** + * Generate a ListSets request for the given baseURL + * + * @param baseURL + * @return + */ + private static String getRequestURL(String baseURL) { + StringBuilder requestURL = new StringBuilder(baseURL); + requestURL.append("?verb=ListSets"); + return requestURL.toString(); + } +} diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java new file mode 100644 index 000000000..7ff952c23 --- /dev/null +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java @@ -0,0 +1,61 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package org.oclc.oai.harvester2.verb; + +import java.util.Iterator; +import javax.xml.namespace.NamespaceContext; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import org.w3c.dom.DOMImplementation; +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +/** + * + * @author Michal Hlavac + */ +public class OAINamespaceContext implements NamespaceContext { + + private final Element namespaceElement; + + public OAINamespaceContext(DocumentBuilderFactory factory) throws ParserConfigurationException { + this.namespaceElement = buildNamespaceElement(factory); + } + + @Override + public String getNamespaceURI(String prefix) { + return namespaceElement.lookupNamespaceURI(prefix); + } + + @Override + public String getPrefix(String namespaceURI) { + return null; + } + + @Override + public Iterator getPrefixes(String namespaceURI) { + return null; + } + + private Element buildNamespaceElement(DocumentBuilderFactory factory) throws ParserConfigurationException { + DOMImplementation impl = factory.newDocumentBuilder().getDOMImplementation(); + Document namespaceHolder = impl.createDocument( + "http://www.oclc.org/research/software/oai/harvester", + "harvester:namespaceHolder", null); + + Element el = namespaceHolder.getDocumentElement(); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:harvester", "http://www.oclc.org/research/software/oai/harvester"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai20", "http://www.openarchives.org/OAI/2.0/"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai11_GetRecord", "http://www.openarchives.org/OAI/1.1/OAI_GetRecord"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai11_Identify", "http://www.openarchives.org/OAI/1.1/OAI_Identify"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai11_ListIdentifiers", "http://www.openarchives.org/OAI/1.1/OAI_ListIdentifiers"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai11_ListMetadataFormats", "http://www.openarchives.org/OAI/1.1/OAI_ListMetadataFormats"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai11_ListRecords", "http://www.openarchives.org/OAI/1.1/OAI_ListRecords"); + el.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:oai11_ListSets", "http://www.openarchives.org/OAI/1.1/OAI_ListSets"); + return el; + } +} From b09b4fbba6d330038a5da87870f96038a1eca521 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Thu, 11 Mar 2021 17:20:16 +0100 Subject: [PATCH 4/9] Fix javadoc CI error in harvester2 package See https://github.com/metafacture/metafacture-core/issues/360 --- .../java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java index 7ff952c23..8eb8a7437 100644 --- a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java @@ -15,7 +15,7 @@ /** * - * @author Michal Hlavac + * @author Michal Hlavac (michal_hlavac@datalan.sk) */ public class OAINamespaceContext implements NamespaceContext { From 8d762ea428d41b26bf4dc64b7cc229d91793b19a Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Wed, 17 Mar 2021 10:24:16 +0100 Subject: [PATCH 5/9] Set up publishing to GitHub Packages See: https://github.com/metafacture/metafacture-core/issues/356 https://gitlab.com/oersi/oersi-etl/-/issues/59 --- .github/workflows/publish.yml | 16 ++++++++++++++++ build.gradle | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 .github/workflows/publish.yml diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 000000000..18ec57f82 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,16 @@ +name: Publish package to GitHub Packages +on: + push +jobs: + publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-java@v1 + with: + java-version: 1.8 + - name: Publish package + run: gradle publish + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + diff --git a/build.gradle b/build.gradle index 3a2e4d071..7dbf6feb8 100644 --- a/build.gradle +++ b/build.gradle @@ -42,6 +42,7 @@ subprojects { apply plugin: 'signing' apply plugin: 'maven' apply plugin: 'jacoco' + apply plugin: 'maven-publish' sourceCompatibility = 1.8 targetCompatibility = 1.8 @@ -197,9 +198,43 @@ gradle.projectsEvaluated { } } + //'upload' vs. 'publish, see + //https://medium.com/dot-debug/deploying-artifacts-to-maven-using-gradle-b669acc1b6f8' + /* task publish { dependsOn tasks.uploadArchives } + */ + + publishing { + publications { + mavenArtifacts(MavenPublication) { + from components.java + afterEvaluate { + groupId = project.group + artifactId = project.name + } + } + } + + repositories { + maven { + name = 'localEmbedded' + // run generated gradle task `./gradlew + // publishMavenArtifactsPublicationToLocalEmbeddedRepository` + // to publish all subprojects into the same local embedded repo: + url = "file://${rootDir}/build/repo" + } + maven { + name = "GitHubPackages" + url = "https://maven.pkg.github.com/metafacture/metafacture-core" + credentials { + username = System.getenv("GITHUB_ACTOR") + password = System.getenv("GITHUB_TOKEN") + } + } + } + } } } From 08685dc82528d9d3179e594aae0f68c7a3aafa87 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Thu, 15 Apr 2021 15:45:23 +0200 Subject: [PATCH 6/9] Use Gradle Wrapper in publishing workflow See https://github.com/metafacture/metafacture-core/issues/356 --- .github/workflows/publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 18ec57f82..6fcbe5740 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -10,7 +10,7 @@ jobs: with: java-version: 1.8 - name: Publish package - run: gradle publish + run: ./gradlew publish env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 6613de93e1ee1a49198ad5f837c68bc992bf0a30 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Thu, 15 Apr 2021 15:22:16 +0200 Subject: [PATCH 7/9] Publish to GitHub packages only when RC branch is pushed Use plain RC branch name as version during build See https://github.com/metafacture/metafacture-core/issues/356 --- .github/workflows/publish.yml | 4 +++- build.gradle | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 6fcbe5740..5d637a7fb 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,6 +1,8 @@ name: Publish package to GitHub Packages on: - push + push: + branches: + - '*-rc*' jobs: publish: runs-on: ubuntu-latest diff --git a/build.gradle b/build.gradle index 7dbf6feb8..1b5fb1942 100644 --- a/build.gradle +++ b/build.gradle @@ -307,6 +307,10 @@ def getSnapshotVersion() { logger.lifecycle('Release branch found') return "${extractVersionFromBranch(grgit.branch.current().name)}-SNAPSHOT" } + if (grgit.branch.current().name.contains('-rc')) { + logger.lifecycle('Release candidate branch found') + return "${grgit.branch.current().name}" + } logger.lifecycle('Feature branch found') return "feature-${grgit.branch.current().name}-SNAPSHOT" } From 5054b1bf8c01246831339d22d98dfbdc87a6fe96 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Mon, 29 Aug 2022 14:51:59 +0200 Subject: [PATCH 8/9] Fix compiler warnings, editorconfig and checkstyle violations Set up ignores for bin folders and harvester2 package --- build.gradle | 8 +++++--- config/checkstyle/checkstyle.xml | 2 +- .../org/metafacture/biblio/OaiPmhOpener.java | 20 +++++++++---------- .../org/oclc/oai/harvester2/app/RawWrite.java | 10 +++++----- .../oai/harvester2/verb/ListIdentifiers.java | 8 ++++++-- .../harvester2/verb/ListMetadataFormats.java | 2 +- .../harvester2/verb/OAINamespaceContext.java | 2 +- 7 files changed, 29 insertions(+), 23 deletions(-) diff --git a/build.gradle b/build.gradle index 0e7164613..f6191a68e 100644 --- a/build.gradle +++ b/build.gradle @@ -47,7 +47,8 @@ editorconfig { '**/*.bzip2', '**/*.gzip', '**/*.xz', - 'gradlew*' + 'gradlew*', + '**/bin' ] } @@ -95,6 +96,7 @@ subprojects { options { addBooleanOption 'Xwerror', true } + exclude '**/harvester2/**' } artifacts { @@ -242,7 +244,7 @@ gradle.projectsEvaluated { } } - //'upload' vs. 'publish, see + //'upload' vs. 'publish, see //https://medium.com/dot-debug/deploying-artifacts-to-maven-using-gradle-b669acc1b6f8' /* task publish { @@ -264,7 +266,7 @@ gradle.projectsEvaluated { repositories { maven { name = 'localEmbedded' - // run generated gradle task `./gradlew + // run generated gradle task `./gradlew // publishMavenArtifactsPublicationToLocalEmbeddedRepository` // to publish all subprojects into the same local embedded repo: url = "file://${rootDir}/build/repo" diff --git a/config/checkstyle/checkstyle.xml b/config/checkstyle/checkstyle.xml index b31045320..06f410f10 100644 --- a/config/checkstyle/checkstyle.xml +++ b/config/checkstyle/checkstyle.xml @@ -6,7 +6,7 @@ - + diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java index 908e6d052..d9fa8c424 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/OaiPmhOpener.java @@ -3,24 +3,24 @@ package org.metafacture.biblio; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.Reader; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.xpath.XPathException; - import org.metafacture.framework.MetafactureException; import org.metafacture.framework.ObjectReceiver; import org.metafacture.framework.annotations.Description; import org.metafacture.framework.annotations.In; import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.DefaultObjectPipe; + import org.oclc.oai.harvester2.app.RawWrite; import org.xml.sax.SAXException; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPathException; + /** * Opens an OAI-PMH stream and passes a reader to the receiver. * @@ -114,7 +114,7 @@ public void process(final String baseUrl) { catch (final NoSuchFieldException e) { e.printStackTrace(); } - catch (XPathException e) { + catch (final XPathException e) { e.printStackTrace(); } try { diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/app/RawWrite.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/app/RawWrite.java index 8e4dbe825..6cbce310d 100644 --- a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/app/RawWrite.java +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/app/RawWrite.java @@ -31,8 +31,8 @@ public static void main(String[] args) { try { System.out.println(new Date()); - HashMap options = getOptions(args); - List rootArgs = (List) options.get("rootArgs"); + HashMap options = getOptions(args); + List rootArgs = (List) options.get("rootArgs"); String baseURL = null; if (rootArgs.size() > 0) { baseURL = (String) rootArgs.get(0); @@ -151,9 +151,9 @@ public static void run(String baseURL, String from, String until, out.write("\n".getBytes("UTF-8")); } - private static HashMap getOptions(String[] args) { - HashMap options = new HashMap(); - ArrayList rootArgs = new ArrayList(); + private static HashMap getOptions(String[] args) { + HashMap options = new HashMap<>(); + ArrayList rootArgs = new ArrayList<>(); options.put("rootArgs", rootArgs); for (int i = 0; i < args.length; ++i) { diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListIdentifiers.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListIdentifiers.java index bfeed92a2..6a0c340c5 100644 --- a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListIdentifiers.java +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListIdentifiers.java @@ -12,8 +12,11 @@ package org.oclc.oai.harvester2.verb; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; + import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPathExpressionException; import org.xml.sax.SAXException; @@ -106,12 +109,13 @@ private static String getRequestURL(String baseURL, String from, String until, S * @param baseURL * @param resumptionToken * @return + * @throws UnsupportedEncodingException */ private static String getRequestURL(String baseURL, - String resumptionToken) { + String resumptionToken) throws UnsupportedEncodingException { StringBuilder requestURL = new StringBuilder(baseURL); requestURL.append("?verb=ListIdentifiers"); - requestURL.append("&resumptionToken=").append(URLEncoder.encode(resumptionToken)); + requestURL.append("&resumptionToken=").append(URLEncoder.encode(resumptionToken, StandardCharsets.UTF_8.name())); return requestURL.toString(); } } diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListMetadataFormats.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListMetadataFormats.java index dc50ddeda..0facd07e9 100644 --- a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListMetadataFormats.java +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/ListMetadataFormats.java @@ -41,7 +41,7 @@ public ListMetadataFormats() { * @throws javax.xml.xpath.XPathExpressionException * @throws IOException an I/O error occurred */ - public ListMetadataFormats(String baseURL) throws IOException, ParserConfigurationException, + public ListMetadataFormats(String baseURL) throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { this(baseURL, null); } diff --git a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java index 8eb8a7437..ec4033f91 100644 --- a/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java +++ b/metafacture-biblio/src/main/java/org/oclc/oai/harvester2/verb/OAINamespaceContext.java @@ -36,7 +36,7 @@ public String getPrefix(String namespaceURI) { } @Override - public Iterator getPrefixes(String namespaceURI) { + public Iterator getPrefixes(String namespaceURI) { return null; } From 6394cf6d2cd06a76806eb9e01ea8c3d99500434b Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Fri, 16 Sep 2022 13:28:42 +0200 Subject: [PATCH 9/9] Revert to deprecated `runtime` for slf4j to fix logging issue See https://gitlab.com/oersi/oersi-etl/-/issues/129 --- metafacture-runner/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metafacture-runner/build.gradle b/metafacture-runner/build.gradle index 55eac9bad..53cbbe0b3 100644 --- a/metafacture-runner/build.gradle +++ b/metafacture-runner/build.gradle @@ -67,7 +67,7 @@ dependencies { // class loader which was used to load the classes of the slf4j-api. Until // a solution is found for this problem, the binding need to be placed on the // class path: - runtimeOnly 'org.slf4j:slf4j-log4j12:1.7.21' + runtime 'org.slf4j:slf4j-log4j12:1.7.21' // The following dependencies are placed in the "provided" scope to prevent // them from being included in the class path but still have them available