diff --git a/src/main/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMap.java b/src/main/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMap.java index 27e192eb..eaa13f3a 100644 --- a/src/main/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMap.java +++ b/src/main/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMap.java @@ -21,13 +21,15 @@ public class TimeMap { * Defaults to application/link-type. * @return The timemap in the specified format. */ - public static StreamingOutput getTimeMap(URI originalResource, String type, Integer pageNumber) { + public StreamingOutput getTimeMap(URI originalResource, String type, Integer pageNumber) { switch (type){ case "json": return TimeMapAsCdxJSON.getTimeMapAsCdxJson(originalResource); case "spec": - return TimeMapAsJSON.getTimeMapAsSpecJson(originalResource, pageNumber); + TimeMapAsJSON timeMapAsJSON = new TimeMapAsJSON(); + + return timeMapAsJSON.getTimeMapAsSpecJson(originalResource, pageNumber); default: return output -> TimeMapAsLink.getTimeMapAsLinkFormat(originalResource, output, pageNumber); } @@ -120,6 +122,7 @@ static Page getPage(Stream streamOfDocs, int pageNum .limit(PropertiesLoader.MEMENTO_TIMEMAP_PAGESIZE); Page page = new Page<>(pageNumber, numberOfDocsInStream, solrDocs); + log.info("Returning a paged result. This query returns pageNumber: '{}'", pageNumber); return page; } diff --git a/src/main/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMapAsCdxJSON.java b/src/main/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMapAsCdxJSON.java index 6687eb9a..ba4608cd 100644 --- a/src/main/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMapAsCdxJSON.java +++ b/src/main/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMapAsCdxJSON.java @@ -76,16 +76,24 @@ private static StreamingOutput getJSONStreamingOutputCdxFields(Stream hostSurtList = (List) solrDoc.get("host_surt"); + + String hostsurt = hostSurtList.get(hostSurtList.size()-1); + String waybackdate = extractNonNullStringFromSolr(solrDoc, "wayback_date"); + String url = extractNonNullStringFromSolr(solrDoc, "url"); + String contentType = extractNonNullStringFromSolr(solrDoc, "content_type"); + String statusCode = extractNonNullStringFromSolr(solrDoc, "status_code"); + String hash = extractNonNullStringFromSolr(solrDoc, "hash"); + String contentLength = extractNonNullStringFromSolr(solrDoc, "content_length"); try { jsonGenerator.writeStartArray(); // Start entry - jsonGenerator.writeString(hostSurtList.get(hostSurtList.size()-1)); - jsonGenerator.writeString(solrDoc.get("wayback_date").toString()); - jsonGenerator.writeString((String) solrDoc.get("url")); - jsonGenerator.writeString(solrDoc.get("content_type").toString()); - jsonGenerator.writeString(solrDoc.get("status_code").toString()); - jsonGenerator.writeString((String) solrDoc.get("hash")); - jsonGenerator.writeString(solrDoc.get("content_length").toString()); + jsonGenerator.writeString(hostsurt); + jsonGenerator.writeString(waybackdate); + jsonGenerator.writeString(url); + jsonGenerator.writeString(contentType); + jsonGenerator.writeString(statusCode); + jsonGenerator.writeString(hash); + jsonGenerator.writeString(contentLength); jsonGenerator.writeEndArray(); // End entry jsonGenerator.writeRaw("\n"); } catch (IOException e) { @@ -95,6 +103,21 @@ private static SolrDocument addMementoToTimeMapObject(SolrDocument solrDoc, Json return solrDoc; } + /** + * Return a value from the input SolrDocument if it is present otherwise return an empty string. + * @param solrDoc to retrieve value from. + * @param value to retrieve in doc. + * @return the value if present. + */ + private static String extractNonNullStringFromSolr(SolrDocument solrDoc, String value) { + try { + return solrDoc.get(value).toString(); + } catch (NullPointerException e){ + log.debug("A NullPointerException happened when extracting values from SolrDocument. The specific value will be empty in the timemap"); + return ""; + } + } + /** * Write a simple JSON array containing the following values: {@code ["urlkey","timestamp","original","mimetype","statuscode","digest","length"]} diff --git a/src/main/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMapAsJSON.java b/src/main/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMapAsJSON.java index 55cd07f0..05a8c26e 100644 --- a/src/main/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMapAsJSON.java +++ b/src/main/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMapAsJSON.java @@ -29,25 +29,23 @@ public class TimeMapAsJSON { * @param originalResource URI-R to create URI-T from. * @return A json representation of the timemap ready for streaming. */ - static StreamingOutput getTimeMapAsSpecJson(URI originalResource, Integer pageNumber) { + StreamingOutput getTimeMapAsSpecJson(URI originalResource, Integer pageNumber) { MementoMetadata metadata = new MementoMetadata(); long count = getDocStreamAndUpdateDatesForFirstAndLastMemento(originalResource, metadata) .count(); log.info("Original resource has been harvested '{}' times.",count); + Stream mementoStream = getMementoStream(originalResource); + if (count < PropertiesLoader.MEMENTO_TIMEMAP_PAGINGLIMIT){ log.info("Creating timemap of '{}' entries, with dates in range from '{}' to '{}'.", count, metadata.getFirstMemento(), metadata.getLastMemento()); - Stream mementoStream = getMementoStream(originalResource); - return getJSONStreamingOutput(originalResource, metadata, mementoStream); } else { log.info("Creating paged timemaps of '{}' entries, with dates in range from '{}' to '{}'.", count, metadata.getFirstMemento(), metadata.getLastMemento()); - Stream mementoStream = getMementoStream(originalResource); - return getJSONPagedStreamingOutput(originalResource, metadata, mementoStream, count, pageNumber); } } @@ -85,13 +83,13 @@ private static StreamingOutput getJSONStreamingOutput(URI originalResource, Meme * @param count total amount of mementos. * @return A paged JSON timemap ready for streaming. */ - private static StreamingOutput getJSONPagedStreamingOutput(URI originalResource, MementoMetadata metadata, + private StreamingOutput getJSONPagedStreamingOutput(URI originalResource, MementoMetadata metadata, Stream mementoStream, long count, Integer pageNumber) { if (pageNumber == null){ pageNumber = 1; - log.info("Set page number to: " + pageNumber); + log.info("Set page number to: '{}'", pageNumber); } int finalPageNumber = pageNumber; @@ -164,7 +162,7 @@ private static JsonGenerator getStartOfJsonTimeMap(URI originalResource, Memento * @param pageNumber of the page to retrieve. * @param pageOfResults a page containing SolrDocuments that are to be written as a paged timemap. */ - private static void createPagedJsonTimemap(URI originalResource, MementoMetadata metadata, long totalMementosForResource, + private void createPagedJsonTimemap(URI originalResource, MementoMetadata metadata, long totalMementosForResource, int pageNumber, TimeMap.Page pageOfResults, OutputStream os) throws IOException { diff --git a/src/main/java/dk/kb/netarchivesuite/solrwayback/service/SolrWaybackMementoAPI.java b/src/main/java/dk/kb/netarchivesuite/solrwayback/service/SolrWaybackMementoAPI.java index 40c4e552..3c884454 100644 --- a/src/main/java/dk/kb/netarchivesuite/solrwayback/service/SolrWaybackMementoAPI.java +++ b/src/main/java/dk/kb/netarchivesuite/solrwayback/service/SolrWaybackMementoAPI.java @@ -1,6 +1,9 @@ package dk.kb.netarchivesuite.solrwayback.service; import dk.kb.netarchivesuite.solrwayback.memento.DatetimeNegotiation; +import dk.kb.netarchivesuite.solrwayback.memento.TimeMap; +import dk.kb.netarchivesuite.solrwayback.service.exception.NotFoundServiceException; +import dk.kb.netarchivesuite.solrwayback.service.exception.SolrWaybackServiceException; import dk.kb.netarchivesuite.solrwayback.util.PathResolver; import org.apache.http.client.utils.DateUtils; import org.slf4j.Logger; @@ -20,7 +23,6 @@ import java.net.URISyntaxException; import java.util.Date; -import static dk.kb.netarchivesuite.solrwayback.memento.TimeMap.getTimeMap; import static dk.kb.netarchivesuite.solrwayback.util.DateUtils.validateTimestamp; /** @@ -81,7 +83,8 @@ public Response timeMap(@Context UriInfo uriInfo, @Context HttpServletRequest ht mimeTypeForResponse = getMimeTypeForResponse(type, mimeTypeForResponse); URI uri = PathResolver.mementoAPIResolver("/timemap/" + type + "/", uriInfo, url); - StreamingOutput timemap = getTimeMap(uri, type, null); + TimeMap timeMap = new TimeMap(); + StreamingOutput timemap = timeMap.getTimeMap(uri, type, null); String fileType = fileEndingFromAcceptHeader(mimeTypeForResponse); return Response.ok().type(mimeTypeForResponse) @@ -90,6 +93,19 @@ public Response timeMap(@Context UriInfo uriInfo, @Context HttpServletRequest ht .build(); } + /** + * The paged timemap is not implemented for the json timemap, therefore trying to access a paged version throws an exception. + */ + @GET + @Path("timemap/{page:\\d+}/json/{url:.+}") + public Response timeMapPagedJson(@Context UriInfo uriInfo, @Context HttpServletRequest httpRequest, @PathParam("url") String url, @PathParam("page") String page) + throws NotFoundServiceException { + + String message = "Endpoint timemap/json does not support pagination. Either access the full timemap at /services/memento/timemap/json/" + url + " or request a paged " + + "timemap in the link or spec-json format at /services/memento/timemap/" + page + "/spec/" + url; + throw new NotFoundServiceException(message); + } + @GET @Path("timemap/{page:\\d+}/{type}/{url:.+}") public Response timeMapPaged(@Context UriInfo uriInfo, @Context HttpServletRequest httpRequest, @@ -102,12 +118,13 @@ public Response timeMapPaged(@Context UriInfo uriInfo, @Context HttpServletReque mimeTypeForResponse = getMimeTypeForResponse(type, mimeTypeForResponse); URI uri = PathResolver.mementoAPIResolver("/timemap/" + page + "/" + type + "/", uriInfo, url); - StreamingOutput timemap = getTimeMap(uri, type, Integer.valueOf(page)); + TimeMap timeMap = new TimeMap(); + StreamingOutput output = timeMap.getTimeMap(uri, type, Integer.valueOf(page)); String fileType = fileEndingFromAcceptHeader(mimeTypeForResponse); // TODO: Fresh eyes on http headers for timemap return Response.ok().type(mimeTypeForResponse) - .entity(timemap) + .entity(output) .header(HttpHeaders.CONTENT_DISPOSITION, "inline") // Ensure inline displa //.header(HttpHeaders.CONTENT_DISPOSITION, "attachment ; filename = \"timemap"+ fileType + "\"") .build(); diff --git a/src/test/java/dk/kb/netarchivesuite/solrwayback/memento/PagedTimeMapTest.java b/src/test/java/dk/kb/netarchivesuite/solrwayback/memento/PagedTimeMapTest.java index 96c0f6f9..e80cf3b8 100644 --- a/src/test/java/dk/kb/netarchivesuite/solrwayback/memento/PagedTimeMapTest.java +++ b/src/test/java/dk/kb/netarchivesuite/solrwayback/memento/PagedTimeMapTest.java @@ -78,9 +78,10 @@ public void serverAvailable() throws SolrServerException, IOException { @Test public void testTimeMapLinkLastConstruction() throws IOException, URISyntaxException { - StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "link", 20); + TimeMap timeMap = new TimeMap(); + StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "link", 20); - String timeMapString = convertStreamingTimeMapToString(timeMap); + String timeMapString = convertStreamingTimeMapToString(output); String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString); System.out.println(timeMapStringLocalhost); @@ -89,18 +90,20 @@ public void testTimeMapLinkLastConstruction() throws IOException, URISyntaxExcep @Test public void testTimeMapLinkNoPageConstruction() throws IOException, URISyntaxException { - StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "link", null); + TimeMap timeMap = new TimeMap(); + StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "link", null); - String timeMapString = convertStreamingTimeMapToString(timeMap); + String timeMapString = convertStreamingTimeMapToString(output); String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString); assertEquals(testPagedTimeMapLinkFirstPage, timeMapStringLocalhost); } @Test public void testTimeMapLinkFirstConstruction() throws IOException, URISyntaxException { - StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "link", 1); + TimeMap timeMap = new TimeMap(); + StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "link", 1); - String timeMapString = convertStreamingTimeMapToString(timeMap); + String timeMapString = convertStreamingTimeMapToString(output); String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString); assertEquals(testPagedTimeMapLinkFirstPage, timeMapStringLocalhost); } @@ -108,45 +111,50 @@ public void testTimeMapLinkFirstConstruction() throws IOException, URISyntaxExce @Test public void testTimeMapLinkRandomConstruction() throws IOException, URISyntaxException { - StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "link", 5); + TimeMap timeMap = new TimeMap(); + StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "link", 5); - String timeMapString = convertStreamingTimeMapToString(timeMap); + String timeMapString = convertStreamingTimeMapToString(output); String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString); assertEquals(testPagedTimeMapLinkFifthPage, timeMapStringLocalhost); } @Test public void testPagedTimeMapFirstJSONConstruction() throws IOException, URISyntaxException { - StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 1); + TimeMap timeMap = new TimeMap(); + StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 1); - String timeMapString = convertStreamingTimeMapToString(timeMap); + String timeMapString = convertStreamingTimeMapToString(output); String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString); assertEquals(testPagedTimeMapJSONFirstPage, timeMapStringLocalhost); } @Test public void testPagedTimeMapRandomJSONConstruction() throws IOException, URISyntaxException { - StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 12); + TimeMap timeMap = new TimeMap(); + StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 12); - String timeMapString = convertStreamingTimeMapToString(timeMap); + String timeMapString = convertStreamingTimeMapToString(output); String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString); assertEquals(testPagedTimeMapJSONTwelthPage, timeMapStringLocalhost); } @Test public void testPagedTimeMapLastJSONConstruction() throws IOException, URISyntaxException { - StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 20); + TimeMap timeMap = new TimeMap(); + StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 20); - String timeMapString = convertStreamingTimeMapToString(timeMap); + String timeMapString = convertStreamingTimeMapToString(output); String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString); assertEquals(testPagedTimeMapJSONLastPage, timeMapStringLocalhost); } @Test public void testPagedTimeMapNoPageJSONConstruction() throws IOException, URISyntaxException { - StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "spec", null); + TimeMap timeMap = new TimeMap(); + StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "spec", null); - String timeMapString = convertStreamingTimeMapToString(timeMap); + String timeMapString = convertStreamingTimeMapToString(output); String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString); assertEquals(testPagedTimeMapJSONFirstPage, timeMapStringLocalhost); } diff --git a/src/test/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMapTest.java b/src/test/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMapTest.java index eccbe94f..d04a1060 100644 --- a/src/test/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMapTest.java +++ b/src/test/java/dk/kb/netarchivesuite/solrwayback/memento/TimeMapTest.java @@ -91,10 +91,11 @@ public void serverAvailable() throws SolrServerException, IOException { public void timeMapLinkConstruction() throws IOException, URISyntaxException { assertEquals(10000, PropertiesLoader.MEMENTO_TIMEMAP_PAGINGLIMIT); // Set very high to disable paging - StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "link", 0); + TimeMap timeMap = new TimeMap(); + StreamingOutput streamingOutput = timeMap.getTimeMap(new URI("http://kb.dk/"), "link", 0); ByteArrayOutputStream output = new ByteArrayOutputStream(); - timeMap.write(output); + streamingOutput.write(output); String timeMapString = new String(output.toByteArray(), StandardCharsets.UTF_8); String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString); @@ -105,10 +106,11 @@ public void timeMapLinkConstruction() throws IOException, URISyntaxException { public void timeMapJsonConstruction() throws IOException, URISyntaxException { assertEquals(10000, PropertiesLoader.MEMENTO_TIMEMAP_PAGINGLIMIT); // Set very high to disable paging - StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 0); + TimeMap timeMap = new TimeMap(); + StreamingOutput streamingOutput = timeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 0); ByteArrayOutputStream output = new ByteArrayOutputStream(); - timeMap.write(output); + streamingOutput.write(output); String timeMapString = new String(output.toByteArray(), StandardCharsets.UTF_8); String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString); assertEquals(testTimeMapJSON, timeMapStringLocalhost);