Skip to content

Commit 1896c17

Browse files
committed
1 parent 33aeead commit 1896c17

File tree

2 files changed

+81
-58
lines changed

2 files changed

+81
-58
lines changed

metafacture-io/src/main/java/org/metafacture/io/SruOpener.java

Lines changed: 78 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,13 @@
1414
import org.w3c.dom.Element;
1515
import org.xml.sax.SAXException;
1616

17-
import java.io.ByteArrayInputStream;
18-
import java.io.ByteArrayOutputStream;
19-
import java.io.IOException;
20-
import java.io.InputStream;
21-
import java.io.InputStreamReader;
22-
import java.io.Reader;
17+
import java.io.*;
2318
import java.net.HttpURLConnection;
2419
import java.net.URL;
2520
import javax.xml.parsers.DocumentBuilder;
2621
import javax.xml.parsers.DocumentBuilderFactory;
2722
import javax.xml.parsers.ParserConfigurationException;
28-
import javax.xml.transform.Result;
29-
import javax.xml.transform.Transformer;
30-
import javax.xml.transform.TransformerException;
31-
import javax.xml.transform.TransformerFactory;
23+
import javax.xml.transform.*;
3224
import javax.xml.transform.dom.DOMSource;
3325
import javax.xml.transform.stream.StreamResult;
3426

@@ -61,9 +53,13 @@ public final class SruOpener extends DefaultObjectPipe<String, ObjectReceiver<Re
6153
private int maximumRecords = MAXIMUM_RECORDS;
6254
private int startRecord = START_RECORD;
6355
private int totalRecords = Integer.MAX_VALUE;
56+
int numberOfRecords = Integer.MAX_VALUE;
6457

6558
private boolean stopRetrieving;
59+
private int recordsRetrieved;
6660

61+
private String xmlDeclarationTemplate ="<?xml version=\"%s\" encoding=\"%s\"?>";
62+
private String xmlDeclaration;
6763

6864
/**
6965
* Default constructor
@@ -149,51 +145,83 @@ public void setVersion(final String version) {
149145
@Override
150146
public void process(final String baseUrl) {
151147

152-
try {
148+
StringBuilder srUrl = new StringBuilder(baseUrl);
149+
if (query != null) {
150+
srUrl.append("?query=").append(query).append("&operation=").append(operation).append("&recordSchema=")
151+
.append(recordSchema).append("&version=").append(version);
152+
} else {
153+
throw new IllegalArgumentException("Missing mandatory parameter 'query'");
154+
}
153155

154-
StringBuilder srUrl = new StringBuilder(baseUrl);
155-
if (query != null) {
156-
srUrl.append("?query=").append(query).append("&operation=").append(operation).append("&recordSchema=").append(recordSchema).append("&version=").append(version);
157-
}
158-
else {
159-
throw new IllegalArgumentException("Missing mandatory parameter 'query'");
160-
}
161-
int numberOfRecords = Integer.MAX_VALUE;
162-
TransformerFactory tf = TransformerFactory.newInstance();
163-
Transformer t = tf.newTransformer();
164-
while (!stopRetrieving && (startRecord < numberOfRecords)) {
165-
/* if (totalRecords >0) {
166-
yetToRetrieveRecords = totalRecords - retrievedRecords;
167-
if (yetToRetrieveRecords < maximumRecords) {
168-
maximumRecords = yetToRetrieveRecords;
156+
try {
157+
//get first document and add a starting root tag
158+
Transformer t = TransformerFactory.newInstance().newTransformer();
159+
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(getXmlDocsViaSru(srUrl)));
160+
String line;
161+
StringBuilder stringBuilder = new StringBuilder(1024 * 1024);
162+
boolean rootTagAdded = false;
163+
while ((line = bufferedReader.readLine()) != null) {
164+
if(!rootTagAdded) {
165+
if (line.matches(".*searchRetrieveResponse.*")) {
166+
stringBuilder.append(xmlDeclaration+"\n");
167+
stringBuilder.append("<harvest>\n");
168+
rootTagAdded = true;
169169
}
170-
}*/
171-
ByteArrayInputStream byteArrayInputStream = retrieve(srUrl, startRecord, maximumRecords);
172-
173-
174-
DocumentBuilderFactory factory =DocumentBuilderFactory.newInstance();
175-
DocumentBuilder docBuilder = factory.newDocumentBuilder();
176-
Document xmldoc = docBuilder.parse(byteArrayInputStream);
177-
178-
Element element = (Element)xmldoc.getElementsByTagName("numberOfRecords").item(0);
179-
numberOfRecords=Integer.parseInt(element.getTextContent());
180-
181-
ByteArrayOutputStream os = new ByteArrayOutputStream();
182-
Result result = new StreamResult(os);
183-
t.transform(new DOMSource(xmldoc), result);
184-
ByteArrayInputStream inputStream = new ByteArrayInputStream(os.toByteArray());
185-
186-
getReceiver().process(
187-
new InputStreamReader(inputStream));
188-
tf = TransformerFactory.newInstance();
189-
t = tf.newTransformer();
190-
t.setOutputProperty("omit-xml-declaration", "yes");
191-
startRecord = startRecord + maximumRecords;
170+
}
171+
stringBuilder.append(line+"\n");
172+
}
173+
getReceiver().process(new InputStreamReader(new ByteArrayInputStream(stringBuilder.toString().getBytes())));
174+
while (!stopRetrieving && recordsRetrieved < totalRecords && (startRecord < numberOfRecords)) {
175+
InputStream inputStream = getXmlDocsViaSru(srUrl);
176+
getReceiver().process(new InputStreamReader(inputStream));
192177
}
178+
//close root tag
179+
getReceiver().process(new InputStreamReader(new ByteArrayInputStream("</harvest>\n\n".getBytes())));
193180
}
194-
catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) {
181+
catch (TransformerConfigurationException | IOException e) {
195182
throw new MetafactureException(e);
196183
}
184+
}
185+
186+
private InputStream getXmlDocsViaSru(final StringBuilder srUrl ){
187+
try {
188+
ByteArrayInputStream byteArrayInputStream = retrieve(srUrl, startRecord, maximumRecords);
189+
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
190+
DocumentBuilder docBuilder = factory.newDocumentBuilder();
191+
Document xmldoc = docBuilder.parse(byteArrayInputStream);
192+
193+
/* Element newRoot = xmldoc.createElement("harvest");
194+
newRoot.appendChild(xmldoc.getFirstChild());
195+
xmldoc.appendChild(newRoot);*/
196+
197+
numberOfRecords =
198+
Integer.parseInt(((Element) xmldoc.getElementsByTagName("numberOfRecords").item(0)).getTextContent());
199+
int recordPosition =
200+
Integer.parseInt(((Element) xmldoc.getElementsByTagName("recordPosition").item(0)).getTextContent());
201+
int nextRecordPosition =
202+
Integer.parseInt(((Element) xmldoc.getElementsByTagName("nextRecordPosition").item(0)).getTextContent());
203+
204+
String xmlEncoding = xmldoc.getXmlEncoding();
205+
String xmlVersion = xmldoc.getXmlVersion();
206+
//<?xml version="1.0" encoding="UTF-8"?>
207+
xmlDeclaration=String.format(xmlDeclarationTemplate,xmldoc.getXmlVersion(),xmldoc.getXmlEncoding());
208+
recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition;
209+
210+
ByteArrayOutputStream os = new ByteArrayOutputStream();
211+
212+
Result result = new StreamResult(os);
213+
Transformer t = TransformerFactory.newInstance().newTransformer();
214+
t.setOutputProperty("omit-xml-declaration", "yes");
215+
t.transform(new DOMSource(xmldoc), result);
216+
217+
ByteArrayInputStream inputStream = new ByteArrayInputStream(os.toByteArray());
218+
startRecord = startRecord + maximumRecords;
219+
return inputStream;
220+
221+
} catch (final IOException | TransformerException | SAXException | ParserConfigurationException e) {
222+
throw new MetafactureException(e);
223+
}
224+
197225

198226
}
199227

@@ -206,14 +234,9 @@ private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int
206234
connection.setRequestProperty("User-Agent", userAgent);
207235
}
208236
InputStream inputStream = getInputStream(connection);
237+
209238
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
210239

211-
System.out.println("srUrl="+srUrl);
212-
System.out.println("startRecord="+startRecord);
213-
System.out.println("istream.length="+inputStream.available());
214-
if (inputStream.available() < 768){ // we take it that this is a result without a record
215-
stopRetrieving = true;
216-
}
217240
inputStream.transferTo(outputStream);
218241
return new ByteArrayInputStream(outputStream.toByteArray());
219242
}

metafacture-io/src/test/java/org/metafacture/io/SruOpenerTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ public void process(final XmlReceiver obj) {
4444
// sruOpener.setQuery("WVN%3D24A05");
4545
sruOpener.setRecordSchema("MARC21plus-xml");
4646
sruOpener.setVersion("1.1");
47-
sruOpener.setStartRecord("3029");
48-
sruOpener.setMaximumRecords("5");
49-
sruOpener.setTotal("6");
47+
sruOpener.setStartRecord("4");
48+
sruOpener.setMaximumRecords("1");
49+
sruOpener.setTotal("2");
5050
// sruOpener.process("https://services.dnb.de/sru/dnb");
5151
sruOpener.process("https://services.dnb.de/sru/zdb");
5252
// sruOpener.process("https://amsquery.stadt-zuerich.ch/sru/");

0 commit comments

Comments
 (0)