14
14
import org .w3c .dom .Element ;
15
15
import org .xml .sax .SAXException ;
16
16
17
- import java .io .ByteArrayInputStream ;
18
- import java .io .ByteArrayOutputStream ;
19
- import java .io .IOException ;
20
- import java .io .InputStream ;
21
- import java .io .InputStreamReader ;
22
- import java .io .Reader ;
17
+ import java .io .*;
23
18
import java .net .HttpURLConnection ;
24
19
import java .net .URL ;
25
20
import javax .xml .parsers .DocumentBuilder ;
26
21
import javax .xml .parsers .DocumentBuilderFactory ;
27
22
import javax .xml .parsers .ParserConfigurationException ;
28
- import javax .xml .transform .Result ;
29
- import javax .xml .transform .Transformer ;
30
- import javax .xml .transform .TransformerException ;
31
- import javax .xml .transform .TransformerFactory ;
23
+ import javax .xml .transform .*;
32
24
import javax .xml .transform .dom .DOMSource ;
33
25
import javax .xml .transform .stream .StreamResult ;
34
26
@@ -61,9 +53,13 @@ public final class SruOpener extends DefaultObjectPipe<String, ObjectReceiver<Re
61
53
private int maximumRecords = MAXIMUM_RECORDS ;
62
54
private int startRecord = START_RECORD ;
63
55
private int totalRecords = Integer .MAX_VALUE ;
56
+ int numberOfRecords = Integer .MAX_VALUE ;
64
57
65
58
private boolean stopRetrieving ;
59
+ private int recordsRetrieved ;
66
60
61
+ private String xmlDeclarationTemplate ="<?xml version=\" %s\" encoding=\" %s\" ?>" ;
62
+ private String xmlDeclaration ;
67
63
68
64
/**
69
65
* Default constructor
@@ -149,51 +145,83 @@ public void setVersion(final String version) {
149
145
@ Override
150
146
public void process (final String baseUrl ) {
151
147
152
- try {
148
+ StringBuilder srUrl = new StringBuilder (baseUrl );
149
+ if (query != null ) {
150
+ srUrl .append ("?query=" ).append (query ).append ("&operation=" ).append (operation ).append ("&recordSchema=" )
151
+ .append (recordSchema ).append ("&version=" ).append (version );
152
+ } else {
153
+ throw new IllegalArgumentException ("Missing mandatory parameter 'query'" );
154
+ }
153
155
154
- StringBuilder srUrl = new StringBuilder (baseUrl );
155
- if (query != null ) {
156
- srUrl .append ("?query=" ).append (query ).append ("&operation=" ).append (operation ).append ("&recordSchema=" ).append (recordSchema ).append ("&version=" ).append (version );
157
- }
158
- else {
159
- throw new IllegalArgumentException ("Missing mandatory parameter 'query'" );
160
- }
161
- int numberOfRecords = Integer .MAX_VALUE ;
162
- TransformerFactory tf = TransformerFactory .newInstance ();
163
- Transformer t = tf .newTransformer ();
164
- while (!stopRetrieving && (startRecord < numberOfRecords )) {
165
- /* if (totalRecords >0) {
166
- yetToRetrieveRecords = totalRecords - retrievedRecords;
167
- if (yetToRetrieveRecords < maximumRecords) {
168
- maximumRecords = yetToRetrieveRecords;
156
+ try {
157
+ //get first document and add a starting root tag
158
+ Transformer t = TransformerFactory .newInstance ().newTransformer ();
159
+ BufferedReader bufferedReader = new BufferedReader (new InputStreamReader (getXmlDocsViaSru (srUrl )));
160
+ String line ;
161
+ StringBuilder stringBuilder = new StringBuilder (1024 * 1024 );
162
+ boolean rootTagAdded = false ;
163
+ while ((line = bufferedReader .readLine ()) != null ) {
164
+ if (!rootTagAdded ) {
165
+ if (line .matches (".*searchRetrieveResponse.*" )) {
166
+ stringBuilder .append (xmlDeclaration +"\n " );
167
+ stringBuilder .append ("<harvest>\n " );
168
+ rootTagAdded = true ;
169
169
}
170
- }*/
171
- ByteArrayInputStream byteArrayInputStream = retrieve (srUrl , startRecord , maximumRecords );
172
-
173
-
174
- DocumentBuilderFactory factory =DocumentBuilderFactory .newInstance ();
175
- DocumentBuilder docBuilder = factory .newDocumentBuilder ();
176
- Document xmldoc = docBuilder .parse (byteArrayInputStream );
177
-
178
- Element element = (Element )xmldoc .getElementsByTagName ("numberOfRecords" ).item (0 );
179
- numberOfRecords =Integer .parseInt (element .getTextContent ());
180
-
181
- ByteArrayOutputStream os = new ByteArrayOutputStream ();
182
- Result result = new StreamResult (os );
183
- t .transform (new DOMSource (xmldoc ), result );
184
- ByteArrayInputStream inputStream = new ByteArrayInputStream (os .toByteArray ());
185
-
186
- getReceiver ().process (
187
- new InputStreamReader (inputStream ));
188
- tf = TransformerFactory .newInstance ();
189
- t = tf .newTransformer ();
190
- t .setOutputProperty ("omit-xml-declaration" , "yes" );
191
- startRecord = startRecord + maximumRecords ;
170
+ }
171
+ stringBuilder .append (line +"\n " );
172
+ }
173
+ getReceiver ().process (new InputStreamReader (new ByteArrayInputStream (stringBuilder .toString ().getBytes ())));
174
+ while (!stopRetrieving && recordsRetrieved < totalRecords && (startRecord < numberOfRecords )) {
175
+ InputStream inputStream = getXmlDocsViaSru (srUrl );
176
+ getReceiver ().process (new InputStreamReader (inputStream ));
192
177
}
178
+ //close root tag
179
+ getReceiver ().process (new InputStreamReader (new ByteArrayInputStream ("</harvest>\n \n " .getBytes ())));
193
180
}
194
- catch (final IOException | TransformerException | SAXException | ParserConfigurationException e ) {
181
+ catch (TransformerConfigurationException | IOException e ) {
195
182
throw new MetafactureException (e );
196
183
}
184
+ }
185
+
186
+ private InputStream getXmlDocsViaSru (final StringBuilder srUrl ){
187
+ try {
188
+ ByteArrayInputStream byteArrayInputStream = retrieve (srUrl , startRecord , maximumRecords );
189
+ DocumentBuilderFactory factory = DocumentBuilderFactory .newInstance ();
190
+ DocumentBuilder docBuilder = factory .newDocumentBuilder ();
191
+ Document xmldoc = docBuilder .parse (byteArrayInputStream );
192
+
193
+ /* Element newRoot = xmldoc.createElement("harvest");
194
+ newRoot.appendChild(xmldoc.getFirstChild());
195
+ xmldoc.appendChild(newRoot);*/
196
+
197
+ numberOfRecords =
198
+ Integer .parseInt (((Element ) xmldoc .getElementsByTagName ("numberOfRecords" ).item (0 )).getTextContent ());
199
+ int recordPosition =
200
+ Integer .parseInt (((Element ) xmldoc .getElementsByTagName ("recordPosition" ).item (0 )).getTextContent ());
201
+ int nextRecordPosition =
202
+ Integer .parseInt (((Element ) xmldoc .getElementsByTagName ("nextRecordPosition" ).item (0 )).getTextContent ());
203
+
204
+ String xmlEncoding = xmldoc .getXmlEncoding ();
205
+ String xmlVersion = xmldoc .getXmlVersion ();
206
+ //<?xml version="1.0" encoding="UTF-8"?>
207
+ xmlDeclaration =String .format (xmlDeclarationTemplate ,xmldoc .getXmlVersion (),xmldoc .getXmlEncoding ());
208
+ recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition ;
209
+
210
+ ByteArrayOutputStream os = new ByteArrayOutputStream ();
211
+
212
+ Result result = new StreamResult (os );
213
+ Transformer t = TransformerFactory .newInstance ().newTransformer ();
214
+ t .setOutputProperty ("omit-xml-declaration" , "yes" );
215
+ t .transform (new DOMSource (xmldoc ), result );
216
+
217
+ ByteArrayInputStream inputStream = new ByteArrayInputStream (os .toByteArray ());
218
+ startRecord = startRecord + maximumRecords ;
219
+ return inputStream ;
220
+
221
+ } catch (final IOException | TransformerException | SAXException | ParserConfigurationException e ) {
222
+ throw new MetafactureException (e );
223
+ }
224
+
197
225
198
226
}
199
227
@@ -206,14 +234,9 @@ private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int
206
234
connection .setRequestProperty ("User-Agent" , userAgent );
207
235
}
208
236
InputStream inputStream = getInputStream (connection );
237
+
209
238
ByteArrayOutputStream outputStream = new ByteArrayOutputStream ();
210
239
211
- System .out .println ("srUrl=" +srUrl );
212
- System .out .println ("startRecord=" +startRecord );
213
- System .out .println ("istream.length=" +inputStream .available ());
214
- if (inputStream .available () < 768 ){ // we take it that this is a result without a record
215
- stopRetrieving = true ;
216
- }
217
240
inputStream .transferTo (outputStream );
218
241
return new ByteArrayInputStream (outputStream .toByteArray ());
219
242
}
0 commit comments