Skip to content

Commit

Permalink
Merge pull request #468 from netarchivesuite/fix_memento_nullpointer
Browse files Browse the repository at this point in the history
Fix memento nullpointer
  • Loading branch information
thomasegense authored Jan 13, 2025
2 parents ab091c4 + 80458ca commit 9c876f0
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@ public class TimeMap {
* Defaults to application/link-type.
* @return The timemap in the specified format.
*/
public static StreamingOutput getTimeMap(URI originalResource, String type, Integer pageNumber) {
public StreamingOutput getTimeMap(URI originalResource, String type, Integer pageNumber) {

switch (type){
case "json":
return TimeMapAsCdxJSON.getTimeMapAsCdxJson(originalResource);
case "spec":
return TimeMapAsJSON.getTimeMapAsSpecJson(originalResource, pageNumber);
TimeMapAsJSON timeMapAsJSON = new TimeMapAsJSON();

return timeMapAsJSON.getTimeMapAsSpecJson(originalResource, pageNumber);
default:
return output -> TimeMapAsLink.getTimeMapAsLinkFormat(originalResource, output, pageNumber);
}
Expand Down Expand Up @@ -120,6 +122,7 @@ static Page<SolrDocument> getPage(Stream<SolrDocument> streamOfDocs, int pageNum
.limit(PropertiesLoader.MEMENTO_TIMEMAP_PAGESIZE);

Page<SolrDocument> page = new Page<>(pageNumber, numberOfDocsInStream, solrDocs);
log.info("Returning a paged result. This query returns pageNumber: '{}'", pageNumber);

return page;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,24 @@ private static StreamingOutput getJSONStreamingOutputCdxFields(Stream<SolrDocume
*/
private static SolrDocument addMementoToTimeMapObject(SolrDocument solrDoc, JsonGenerator jsonGenerator) {
List<String> hostSurtList = (List<String>) solrDoc.get("host_surt");

String hostsurt = hostSurtList.get(hostSurtList.size()-1);
String waybackdate = extractNonNullStringFromSolr(solrDoc, "wayback_date");
String url = extractNonNullStringFromSolr(solrDoc, "url");
String contentType = extractNonNullStringFromSolr(solrDoc, "content_type");
String statusCode = extractNonNullStringFromSolr(solrDoc, "status_code");
String hash = extractNonNullStringFromSolr(solrDoc, "hash");
String contentLength = extractNonNullStringFromSolr(solrDoc, "content_length");

try {
jsonGenerator.writeStartArray(); // Start entry
jsonGenerator.writeString(hostSurtList.get(hostSurtList.size()-1));
jsonGenerator.writeString(solrDoc.get("wayback_date").toString());
jsonGenerator.writeString((String) solrDoc.get("url"));
jsonGenerator.writeString(solrDoc.get("content_type").toString());
jsonGenerator.writeString(solrDoc.get("status_code").toString());
jsonGenerator.writeString((String) solrDoc.get("hash"));
jsonGenerator.writeString(solrDoc.get("content_length").toString());
jsonGenerator.writeString(hostsurt);
jsonGenerator.writeString(waybackdate);
jsonGenerator.writeString(url);
jsonGenerator.writeString(contentType);
jsonGenerator.writeString(statusCode);
jsonGenerator.writeString(hash);
jsonGenerator.writeString(contentLength);
jsonGenerator.writeEndArray(); // End entry
jsonGenerator.writeRaw("\n");
} catch (IOException e) {
Expand All @@ -95,6 +103,21 @@ private static SolrDocument addMementoToTimeMapObject(SolrDocument solrDoc, Json
return solrDoc;
}

/**
* Return a value from the input SolrDocument if it is present otherwise return an empty string.
* @param solrDoc to retrieve value from.
* @param value to retrieve in doc.
* @return the value if present.
*/
private static String extractNonNullStringFromSolr(SolrDocument solrDoc, String value) {
try {
return solrDoc.get(value).toString();
} catch (NullPointerException e){
log.debug("A NullPointerException happened when extracting values from SolrDocument. The specific value will be empty in the timemap");
return "";
}
}


/**
* Write a simple JSON array containing the following values: {@code ["urlkey","timestamp","original","mimetype","statuscode","digest","length"]}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,23 @@ public class TimeMapAsJSON {
* @param originalResource URI-R to create URI-T from.
* @return A json representation of the timemap ready for streaming.
*/
static StreamingOutput getTimeMapAsSpecJson(URI originalResource, Integer pageNumber) {
StreamingOutput getTimeMapAsSpecJson(URI originalResource, Integer pageNumber) {
MementoMetadata metadata = new MementoMetadata();

long count = getDocStreamAndUpdateDatesForFirstAndLastMemento(originalResource, metadata)
.count();
log.info("Original resource has been harvested '{}' times.",count);
Stream<SolrDocument> mementoStream = getMementoStream(originalResource);

if (count < PropertiesLoader.MEMENTO_TIMEMAP_PAGINGLIMIT){
log.info("Creating timemap of '{}' entries, with dates in range from '{}' to '{}'.",
count, metadata.getFirstMemento(), metadata.getLastMemento());

Stream<SolrDocument> mementoStream = getMementoStream(originalResource);

return getJSONStreamingOutput(originalResource, metadata, mementoStream);
} else {
log.info("Creating paged timemaps of '{}' entries, with dates in range from '{}' to '{}'.",
count, metadata.getFirstMemento(), metadata.getLastMemento());

Stream<SolrDocument> mementoStream = getMementoStream(originalResource);

return getJSONPagedStreamingOutput(originalResource, metadata, mementoStream, count, pageNumber);
}
}
Expand Down Expand Up @@ -85,13 +83,13 @@ private static StreamingOutput getJSONStreamingOutput(URI originalResource, Meme
* @param count total amount of mementos.
* @return A paged JSON timemap ready for streaming.
*/
private static StreamingOutput getJSONPagedStreamingOutput(URI originalResource, MementoMetadata metadata,
private StreamingOutput getJSONPagedStreamingOutput(URI originalResource, MementoMetadata metadata,
Stream<SolrDocument> mementoStream,
long count, Integer pageNumber) {

if (pageNumber == null){
pageNumber = 1;
log.info("Set page number to: " + pageNumber);
log.info("Set page number to: '{}'", pageNumber);
}

int finalPageNumber = pageNumber;
Expand Down Expand Up @@ -164,7 +162,7 @@ private static JsonGenerator getStartOfJsonTimeMap(URI originalResource, Memento
* @param pageNumber of the page to retrieve.
* @param pageOfResults a page containing SolrDocuments that are to be written as a paged timemap.
*/
private static void createPagedJsonTimemap(URI originalResource, MementoMetadata metadata, long totalMementosForResource,
private void createPagedJsonTimemap(URI originalResource, MementoMetadata metadata, long totalMementosForResource,
int pageNumber, TimeMap.Page<SolrDocument> pageOfResults, OutputStream os)
throws IOException {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package dk.kb.netarchivesuite.solrwayback.service;

import dk.kb.netarchivesuite.solrwayback.memento.DatetimeNegotiation;
import dk.kb.netarchivesuite.solrwayback.memento.TimeMap;
import dk.kb.netarchivesuite.solrwayback.service.exception.NotFoundServiceException;
import dk.kb.netarchivesuite.solrwayback.service.exception.SolrWaybackServiceException;
import dk.kb.netarchivesuite.solrwayback.util.PathResolver;
import org.apache.http.client.utils.DateUtils;
import org.slf4j.Logger;
Expand All @@ -20,7 +23,6 @@
import java.net.URISyntaxException;
import java.util.Date;

import static dk.kb.netarchivesuite.solrwayback.memento.TimeMap.getTimeMap;
import static dk.kb.netarchivesuite.solrwayback.util.DateUtils.validateTimestamp;

/**
Expand Down Expand Up @@ -81,7 +83,8 @@ public Response timeMap(@Context UriInfo uriInfo, @Context HttpServletRequest ht
mimeTypeForResponse = getMimeTypeForResponse(type, mimeTypeForResponse);

URI uri = PathResolver.mementoAPIResolver("/timemap/" + type + "/", uriInfo, url);
StreamingOutput timemap = getTimeMap(uri, type, null);
TimeMap timeMap = new TimeMap();
StreamingOutput timemap = timeMap.getTimeMap(uri, type, null);
String fileType = fileEndingFromAcceptHeader(mimeTypeForResponse);

return Response.ok().type(mimeTypeForResponse)
Expand All @@ -90,6 +93,19 @@ public Response timeMap(@Context UriInfo uriInfo, @Context HttpServletRequest ht
.build();
}

/**
* The paged timemap is not implemented for the json timemap, therefore trying to access a paged version throws an exception.
*/
@GET
@Path("timemap/{page:\\d+}/json/{url:.+}")
public Response timeMapPagedJson(@Context UriInfo uriInfo, @Context HttpServletRequest httpRequest, @PathParam("url") String url, @PathParam("page") String page)
throws NotFoundServiceException {

String message = "Endpoint timemap/json does not support pagination. Either access the full timemap at /services/memento/timemap/json/" + url + " or request a paged " +
"timemap in the link or spec-json format at /services/memento/timemap/" + page + "/spec/" + url;
throw new NotFoundServiceException(message);
}

@GET
@Path("timemap/{page:\\d+}/{type}/{url:.+}")
public Response timeMapPaged(@Context UriInfo uriInfo, @Context HttpServletRequest httpRequest,
Expand All @@ -102,12 +118,13 @@ public Response timeMapPaged(@Context UriInfo uriInfo, @Context HttpServletReque
mimeTypeForResponse = getMimeTypeForResponse(type, mimeTypeForResponse);

URI uri = PathResolver.mementoAPIResolver("/timemap/" + page + "/" + type + "/", uriInfo, url);
StreamingOutput timemap = getTimeMap(uri, type, Integer.valueOf(page));
TimeMap timeMap = new TimeMap();
StreamingOutput output = timeMap.getTimeMap(uri, type, Integer.valueOf(page));
String fileType = fileEndingFromAcceptHeader(mimeTypeForResponse);

// TODO: Fresh eyes on http headers for timemap
return Response.ok().type(mimeTypeForResponse)
.entity(timemap)
.entity(output)
.header(HttpHeaders.CONTENT_DISPOSITION, "inline") // Ensure inline displa
//.header(HttpHeaders.CONTENT_DISPOSITION, "attachment ; filename = \"timemap"+ fileType + "\"")
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ public void serverAvailable() throws SolrServerException, IOException {

@Test
public void testTimeMapLinkLastConstruction() throws IOException, URISyntaxException {
StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "link", 20);
TimeMap timeMap = new TimeMap();
StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "link", 20);

String timeMapString = convertStreamingTimeMapToString(timeMap);
String timeMapString = convertStreamingTimeMapToString(output);

String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString);
System.out.println(timeMapStringLocalhost);
Expand All @@ -89,64 +90,71 @@ public void testTimeMapLinkLastConstruction() throws IOException, URISyntaxExcep

@Test
public void testTimeMapLinkNoPageConstruction() throws IOException, URISyntaxException {
StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "link", null);
TimeMap timeMap = new TimeMap();
StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "link", null);

String timeMapString = convertStreamingTimeMapToString(timeMap);
String timeMapString = convertStreamingTimeMapToString(output);
String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString);

assertEquals(testPagedTimeMapLinkFirstPage, timeMapStringLocalhost);
}
@Test
public void testTimeMapLinkFirstConstruction() throws IOException, URISyntaxException {
StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "link", 1);
TimeMap timeMap = new TimeMap();
StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "link", 1);

String timeMapString = convertStreamingTimeMapToString(timeMap);
String timeMapString = convertStreamingTimeMapToString(output);
String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString);
assertEquals(testPagedTimeMapLinkFirstPage, timeMapStringLocalhost);
}


@Test
public void testTimeMapLinkRandomConstruction() throws IOException, URISyntaxException {
StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "link", 5);
TimeMap timeMap = new TimeMap();
StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "link", 5);

String timeMapString = convertStreamingTimeMapToString(timeMap);
String timeMapString = convertStreamingTimeMapToString(output);
String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString);
assertEquals(testPagedTimeMapLinkFifthPage, timeMapStringLocalhost);
}

@Test
public void testPagedTimeMapFirstJSONConstruction() throws IOException, URISyntaxException {
StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 1);
TimeMap timeMap = new TimeMap();
StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 1);

String timeMapString = convertStreamingTimeMapToString(timeMap);
String timeMapString = convertStreamingTimeMapToString(output);
String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString);
assertEquals(testPagedTimeMapJSONFirstPage, timeMapStringLocalhost);
}

@Test
public void testPagedTimeMapRandomJSONConstruction() throws IOException, URISyntaxException {
StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 12);
TimeMap timeMap = new TimeMap();
StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 12);

String timeMapString = convertStreamingTimeMapToString(timeMap);
String timeMapString = convertStreamingTimeMapToString(output);
String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString);
assertEquals(testPagedTimeMapJSONTwelthPage, timeMapStringLocalhost);
}

@Test
public void testPagedTimeMapLastJSONConstruction() throws IOException, URISyntaxException {
StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 20);
TimeMap timeMap = new TimeMap();
StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 20);

String timeMapString = convertStreamingTimeMapToString(timeMap);
String timeMapString = convertStreamingTimeMapToString(output);
String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString);
assertEquals(testPagedTimeMapJSONLastPage, timeMapStringLocalhost);
}

@Test
public void testPagedTimeMapNoPageJSONConstruction() throws IOException, URISyntaxException {
StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "spec", null);
TimeMap timeMap = new TimeMap();
StreamingOutput output = timeMap.getTimeMap(new URI("http://kb.dk/"), "spec", null);

String timeMapString = convertStreamingTimeMapToString(timeMap);
String timeMapString = convertStreamingTimeMapToString(output);
String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString);
assertEquals(testPagedTimeMapJSONFirstPage, timeMapStringLocalhost);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,11 @@ public void serverAvailable() throws SolrServerException, IOException {
public void timeMapLinkConstruction() throws IOException, URISyntaxException {
assertEquals(10000, PropertiesLoader.MEMENTO_TIMEMAP_PAGINGLIMIT);
// Set very high to disable paging
StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "link", 0);
TimeMap timeMap = new TimeMap();
StreamingOutput streamingOutput = timeMap.getTimeMap(new URI("http://kb.dk/"), "link", 0);

ByteArrayOutputStream output = new ByteArrayOutputStream();
timeMap.write(output);
streamingOutput.write(output);
String timeMapString = new String(output.toByteArray(), StandardCharsets.UTF_8);

String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString);
Expand All @@ -105,10 +106,11 @@ public void timeMapLinkConstruction() throws IOException, URISyntaxException {
public void timeMapJsonConstruction() throws IOException, URISyntaxException {
assertEquals(10000, PropertiesLoader.MEMENTO_TIMEMAP_PAGINGLIMIT);
// Set very high to disable paging
StreamingOutput timeMap = TimeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 0);
TimeMap timeMap = new TimeMap();
StreamingOutput streamingOutput = timeMap.getTimeMap(new URI("http://kb.dk/"), "spec", 0);

ByteArrayOutputStream output = new ByteArrayOutputStream();
timeMap.write(output);
streamingOutput.write(output);
String timeMapString = new String(output.toByteArray(), StandardCharsets.UTF_8);
String timeMapStringLocalhost=UnitTestUtils.replaceHostNameWithLocalHost(timeMapString);
assertEquals(testTimeMapJSON, timeMapStringLocalhost);
Expand Down

0 comments on commit 9c876f0

Please sign in to comment.