From 9948aa8a1361057aeff6941f82e6d5937725c404 Mon Sep 17 00:00:00 2001 From: Emanuel Dima Date: Thu, 24 Jun 2021 14:10:19 +0200 Subject: [PATCH] set content-encoding for text utf8 files; add more tests --- backend/pom.xml | 2 +- .../switchboard/resources/DataResource.java | 139 +++++++++++------- .../switchboard/core/DataStoreTest.java | 1 - .../switchboard/core/MediaLibraryTest.java | 4 +- .../resources/DataResourceTest.java | 102 +++++++++++++ profiler | 2 +- 6 files changed, 192 insertions(+), 58 deletions(-) create mode 100644 backend/src/test/java/eu/clarin/switchboard/resources/DataResourceTest.java diff --git a/backend/pom.xml b/backend/pom.xml index fd05f87d..9e18efc2 100644 --- a/backend/pom.xml +++ b/backend/pom.xml @@ -74,7 +74,7 @@ eu.clarin.switchboard profiler - 1.0.7 + 1.0.8 diff --git a/backend/src/main/java/eu/clarin/switchboard/resources/DataResource.java b/backend/src/main/java/eu/clarin/switchboard/resources/DataResource.java index b8d4991d..e16a9be0 100644 --- a/backend/src/main/java/eu/clarin/switchboard/resources/DataResource.java +++ b/backend/src/main/java/eu/clarin/switchboard/resources/DataResource.java @@ -4,12 +4,13 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.io.ByteStreams; +import com.google.common.io.CharSource; +import com.google.common.io.CharStreams; import eu.clarin.switchboard.core.ArchiveOps; import eu.clarin.switchboard.core.Constants; import eu.clarin.switchboard.core.FileInfo; import eu.clarin.switchboard.core.MediaLibrary; import eu.clarin.switchboard.profiler.api.Profile; -import org.apache.commons.io.IOUtils; import org.glassfish.jersey.media.multipart.FormDataContentDisposition; import org.glassfish.jersey.media.multipart.FormDataParam; import org.slf4j.Logger; @@ -40,7 +41,54 @@ public DataResource(MediaLibrary mediaLibrary) { @GET @Path("/{id}") - public Response getFile(@PathParam("id") String idString, @QueryParam("mediatype") String mediatype) throws Throwable { + public Response httpGetFile(@PathParam("id") String idString, @QueryParam("mediatype") String mediatype) throws Throwable { + return getFile(idString, mediatype); + } + + @PUT + @Path("/{id}") + @Consumes(MediaType.TEXT_PLAIN) + @Produces(MediaType.TEXT_PLAIN + ";charset=utf-8") + public Response httpPutContent(@PathParam("id") String idString, String content) throws Throwable { + return putContent(idString, content); + } + + @GET + @Path("/{id}/info") + @Produces(MediaType.APPLICATION_JSON + ";charset=utf-8") + public Response httpGetFileInfo(@Context HttpServletRequest request, @PathParam("id") String idString) throws Throwable { + return getFileInfo(request.getRequestURI(), idString); + } + + @POST + @Consumes(MediaType.MULTIPART_FORM_DATA) + @Produces(MediaType.APPLICATION_JSON + ";charset=utf-8") + public Response httpPostFile(@Context HttpServletRequest request, + @FormDataParam("file") InputStream inputStream, + @FormDataParam("file") final FormDataContentDisposition contentDispositionHeader, + @FormDataParam("url") String url, + @FormDataParam("mimetype") String mimetype, + @FormDataParam("archiveID") String archiveID, + @FormDataParam("archiveEntryName") String archiveEntryName, + @FormDataParam("profile") String profileString + ) throws Throwable { + if (mimetype != null) { + throw new Exception("mimetype is deprecated, use `profile` form instead of mimetype: " + mimetype); + } + String filename = contentDispositionHeader == null ? null : contentDispositionHeader.getFileName(); + return postFile(request.getRequestURI(), + inputStream, filename, url, archiveID, archiveEntryName, profileString); + } + + @GET + @Path("/{id}/outline") + @Produces(MediaType.APPLICATION_JSON + ";charset=utf-8") + public Response httpGetOutline(@PathParam("id") String idString) + throws Throwable { + return getOutline(idString); + } + + public Response getFile(String idString, String mediatype) throws Throwable { FileInfo fi = getFileInfo(idString); if (fi == null) { return Response.status(Response.Status.NOT_FOUND).build(); @@ -52,21 +100,22 @@ public Response getFile(@PathParam("id") String idString, @QueryParam("mediatype output.flush(); }; - Response.ResponseBuilder builder = Response.ok(fileStream); - if (mediatype != null && !mediatype.isEmpty()) { - builder.type(mediatype); - } else { - builder.type(fi.getProfile().toProfile().getMediaType()); + if (mediatype == null || mediatype.isEmpty()) { + mediatype = fi.getProfile().toProfile().getMediaType(); } - builder.header("content-disposition", "attachment; filename=" + fi.getFilename()); - return builder.build(); + if (MediaType.TEXT_PLAIN.equalsIgnoreCase(mediatype)) { + String isUTF8Feature = fi.getProfile().toProfile().getFeature(Profile.FEATURE_IS_UTF8); + if (Boolean.parseBoolean(isUTF8Feature)) { + mediatype = mediatype + ";charset=utf-8"; + } + } + return Response.ok(fileStream) + .type(mediatype) + .header("content-disposition", "attachment; filename=" + fi.getFilename()) + .build(); } - @PUT - @Path("/{id}") - @Consumes(MediaType.TEXT_PLAIN) - @Produces(MediaType.TEXT_PLAIN + ";charset=utf-8") - public Response putContent(@PathParam("id") String idString, String content) throws Throwable { + public Response putContent(String idString, String content) throws Throwable { FileInfo fi = getFileInfo(idString); if (fi == null) { return Response.status(Response.Status.NOT_FOUND).build(); @@ -81,45 +130,33 @@ public Response putContent(@PathParam("id") String idString, String content) thr return Response.ok(content).type(MediaType.TEXT_PLAIN).build(); } - @GET - @Path("/{id}/info") - @Produces(MediaType.APPLICATION_JSON + ";charset=utf-8") - public Response getFileInfo(@Context HttpServletRequest request, @PathParam("id") String idString) - throws Throwable { + public Response getFileInfo(String requestURI, String idString) throws Throwable { FileInfo fi = getFileInfo(idString); if (fi == null) { return Response.status(Response.Status.NOT_FOUND).build(); } final String trimEnd = "/info"; - String localLink = request.getRequestURI(); + String localLink = requestURI; assert (localLink.endsWith(trimEnd)); localLink = localLink.substring(0, localLink.length() - trimEnd.length()); return fileInfoToResponse(URI.create(localLink), fi); } - @POST - @Consumes(MediaType.MULTIPART_FORM_DATA) - @Produces(MediaType.APPLICATION_JSON + ";charset=utf-8") - public Response postFile(@Context HttpServletRequest request, - @FormDataParam("file") InputStream inputStream, - @FormDataParam("file") final FormDataContentDisposition contentDispositionHeader, - @FormDataParam("url") String url, - @FormDataParam("mimetype") String mimetype, - @FormDataParam("archiveID") String archiveID, - @FormDataParam("archiveEntryName") String archiveEntryName, - @FormDataParam("profile") String profileString + public Response postFile(String requestURI, + InputStream inputStream, + String filename, + String url, + String archiveID, + String archiveEntryName, + String profileString ) throws Throwable { FileInfo fileInfo; - if (contentDispositionHeader != null) { - String filename = contentDispositionHeader.getFileName(); + if (inputStream != null && filename != null) { fileInfo = mediaLibrary.addFile(filename, inputStream, null); } else if (url != null) { - Profile profile = null; - if (mimetype != null && !mimetype.isEmpty()) { - profile = Profile.builder().mediaType(mimetype).build(); - } + Profile profile = readProfile(profileString); fileInfo = mediaLibrary.addByUrl(url, profile); } else if (archiveID != null && !archiveID.isEmpty()) { FileInfo fi = getFileInfo(archiveID); @@ -135,21 +172,20 @@ public Response postFile(@Context HttpServletRequest request, return Response.status(400).entity("Please provide either a file or a url to download in the form").build(); } - URI localLink = UriBuilder.fromPath(request.getRequestURI()) + URI localLink = UriBuilder.fromPath(requestURI) .path(fileInfo.getId().toString()) .build(); return fileInfoToResponse(localLink, fileInfo); } private Profile readProfile(String profileString) { - if (profileString != null && !profileString.isEmpty()) { - Profile.Flat flat; - try { - flat = mapper.readValue(profileString, Profile.Flat.class); - return flat.toProfile(); - } catch (JsonProcessingException xc) { - LOGGER.error("json conversion exception ", xc); - } + if (profileString == null || profileString.isEmpty()) { + return null; + } + try { + return mapper.readValue(profileString, Profile.Flat.class).toProfile(); + } catch (JsonProcessingException xc) { + LOGGER.error("json conversion exception ", xc); } return null; } @@ -169,9 +205,10 @@ static Response fileInfoToResponse(URI localLink, FileInfo fileInfo) { // add the file content File file = fileInfo.getPath().toFile(); try (InputStream fin = new BufferedInputStream(new FileInputStream(file)); - InputStream in = ByteStreams.limit(fin, MAX_INLINE_CONTENT) + InputStream in = ByteStreams.limit(fin, MAX_INLINE_CONTENT); + Reader reader = new InputStreamReader(in, StandardCharsets.UTF_8) ) { - String preview = IOUtils.toString(in, StandardCharsets.UTF_8); + String preview = CharStreams.toString(reader); if (preview != null && !preview.isEmpty()) { ret.put("content", preview); if (file.length() > MAX_INLINE_CONTENT) { @@ -186,11 +223,7 @@ static Response fileInfoToResponse(URI localLink, FileInfo fileInfo) { return Response.ok(ret).build(); } - @GET - @Path("/{id}/outline") - @Produces(MediaType.APPLICATION_JSON + ";charset=utf-8") - public Response getOutline(@Context HttpServletRequest request, @PathParam("id") String idString) - throws Throwable { + public Response getOutline(String idString) throws Throwable { FileInfo fi = getFileInfo(idString); if (fi == null) { return Response.status(Response.Status.NOT_FOUND).build(); diff --git a/backend/src/test/java/eu/clarin/switchboard/core/DataStoreTest.java b/backend/src/test/java/eu/clarin/switchboard/core/DataStoreTest.java index a6a6f40c..653d3750 100644 --- a/backend/src/test/java/eu/clarin/switchboard/core/DataStoreTest.java +++ b/backend/src/test/java/eu/clarin/switchboard/core/DataStoreTest.java @@ -1,7 +1,6 @@ package eu.clarin.switchboard.core; import eu.clarin.switchboard.app.config.DataStoreConfig; -import eu.clarin.switchboard.core.xc.LinkException; import eu.clarin.switchboard.core.xc.StoragePolicyException; import org.junit.Before; import org.junit.Test; diff --git a/backend/src/test/java/eu/clarin/switchboard/core/MediaLibraryTest.java b/backend/src/test/java/eu/clarin/switchboard/core/MediaLibraryTest.java index 9f28a75e..fdfedc04 100644 --- a/backend/src/test/java/eu/clarin/switchboard/core/MediaLibraryTest.java +++ b/backend/src/test/java/eu/clarin/switchboard/core/MediaLibraryTest.java @@ -6,6 +6,7 @@ import eu.clarin.switchboard.core.xc.LinkException; import eu.clarin.switchboard.core.xc.StorageException; import eu.clarin.switchboard.core.xc.StoragePolicyException; +import eu.clarin.switchboard.profiler.DefaultProfiler; import eu.clarin.switchboard.profiler.api.Profile; import eu.clarin.switchboard.profiler.api.Profiler; import eu.clarin.switchboard.profiler.api.ProfilingException; @@ -47,8 +48,7 @@ public void setUp() throws Exception { dataStore = new DataStore(dataStoreRoot, storagePolicy); - TikaConfig tikaConfig = new TikaConfig(this.getClass().getResourceAsStream("/tikaConfig.xml")); - profiler = new TikaProfiler(tikaConfig); + profiler = new DefaultProfiler(); urlResolver = new UrlResolverConfig(3, 3, "seconds", 10); } diff --git a/backend/src/test/java/eu/clarin/switchboard/resources/DataResourceTest.java b/backend/src/test/java/eu/clarin/switchboard/resources/DataResourceTest.java new file mode 100644 index 00000000..6c3942e2 --- /dev/null +++ b/backend/src/test/java/eu/clarin/switchboard/resources/DataResourceTest.java @@ -0,0 +1,102 @@ +package eu.clarin.switchboard.resources; + +import com.google.common.io.ByteStreams; +import eu.clarin.switchboard.app.config.DataStoreConfig; +import eu.clarin.switchboard.app.config.UrlResolverConfig; +import eu.clarin.switchboard.core.DataStore; +import eu.clarin.switchboard.core.DefaultStoragePolicy; +import eu.clarin.switchboard.core.MediaLibrary; +import eu.clarin.switchboard.profiler.DefaultProfiler; +import eu.clarin.switchboard.profiler.api.Profile; +import eu.clarin.switchboard.profiler.api.Profiler; +import org.junit.Before; +import org.junit.Test; + +import javax.ws.rs.core.Response; +import javax.ws.rs.core.StreamingOutput; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import static org.junit.Assert.*; + +public class DataResourceTest { + DataResource dataResource; + + @Before + public void setUp() throws Exception { + Path dataStoreRoot = Files.createTempDirectory("switchboard-test-"); + String maxSize = "1M"; + String maxFiles = "2"; + String maxLifetime = "4"; + String maxLifetimeUnit = "seconds"; + String cleanupPeriod = "1"; + String cleanupPeriodUnit = "seconds"; + + DataStoreConfig dataStoreConfig = new DataStoreConfig( + dataStoreRoot.toString(), false, maxSize, maxFiles, maxLifetime, maxLifetimeUnit, cleanupPeriod, cleanupPeriodUnit); + + DefaultStoragePolicy storagePolicy = new DefaultStoragePolicy(dataStoreConfig); + storagePolicy.setAllowedMediaTypes(Collections.singleton("text/plain")); + + DataStore dataStore = new DataStore(dataStoreRoot, storagePolicy); + Profiler profiler = new DefaultProfiler(); + UrlResolverConfig urlResolver = new UrlResolverConfig(3, 3, "seconds", 10); + MediaLibrary mediaLibrary = new MediaLibrary(dataStore, profiler, storagePolicy, urlResolver, dataStoreConfig); + dataResource = new DataResource(mediaLibrary); + } + + + @Test + public void getFile() throws Throwable { + InputStream is = new ByteArrayInputStream("first content".getBytes(StandardCharsets.UTF_8)); + + Response postResponse = dataResource.postFile("", is, "filename", null, null, null, null); + String id = ((Map) postResponse.getEntity()).get("id").toString(); + + Response r = dataResource.getFile(id, null); + assertEquals("text/plain;charset=utf-8", r.getHeaderString("content-type")); + } + + @Test + public void getFileInfo() throws Throwable { + String filename = "myfilename"; + InputStream is = new ByteArrayInputStream("first content".getBytes(StandardCharsets.UTF_8)); + + Response postResponse = dataResource.postFile("", is, filename, null, null, null, null); + String id = ((Map) postResponse.getEntity()).get("id").toString(); + + Response r = dataResource.getFileInfo("/info", id); + Map fileinfo = ((Map) r.getEntity()); + + assertEquals(id, fileinfo.get("id").toString()); + assertEquals(filename, fileinfo.get("filename").toString()); + assertTrue((int)fileinfo.get("fileLength") > 0); + assertFalse((boolean)fileinfo.get("selection")); + assertEquals("text/plain", ((Map)fileinfo.get("profile")).get("mediaType")); + } + + @Test + public void putContent() throws Throwable { + String newContent = "new content"; + InputStream is = new ByteArrayInputStream("first content".getBytes(StandardCharsets.UTF_8)); + + Response postResponse = dataResource.postFile("", is, "filename", null, null, null, null); + String id = ((Map) postResponse.getEntity()).get("id").toString(); + + dataResource.putContent(id, newContent); + + Response r = dataResource.getFile(id, null); + StreamingOutput output = (StreamingOutput) r.getEntity(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + output.write(baos); + assertEquals(newContent, baos.toString()); + } +} \ No newline at end of file diff --git a/profiler b/profiler index 3d506c19..8ada4908 160000 --- a/profiler +++ b/profiler @@ -1 +1 @@ -Subproject commit 3d506c1926dd73efb4559a7a0c920f50009ab8fc +Subproject commit 8ada4908381804e517128ebe2464d7559437a408