Skip to content

Commit

Permalink
Merge pull request #209 from clarin-eric/utf8encoding
Browse files Browse the repository at this point in the history
Set content-encoding for text utf8 files
  • Loading branch information
andmor- authored Jun 24, 2021
2 parents 55b127b + 9948aa8 commit 8298830
Show file tree
Hide file tree
Showing 6 changed files with 192 additions and 58 deletions.
2 changes: 1 addition & 1 deletion backend/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
<dependency>
<groupId>eu.clarin.switchboard</groupId>
<artifactId>profiler</artifactId>
<version>1.0.7</version>
<version>1.0.8</version>
</dependency>

<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.io.ByteStreams;
import com.google.common.io.CharSource;
import com.google.common.io.CharStreams;
import eu.clarin.switchboard.core.ArchiveOps;
import eu.clarin.switchboard.core.Constants;
import eu.clarin.switchboard.core.FileInfo;
import eu.clarin.switchboard.core.MediaLibrary;
import eu.clarin.switchboard.profiler.api.Profile;
import org.apache.commons.io.IOUtils;
import org.glassfish.jersey.media.multipart.FormDataContentDisposition;
import org.glassfish.jersey.media.multipart.FormDataParam;
import org.slf4j.Logger;
Expand Down Expand Up @@ -40,7 +41,54 @@ public DataResource(MediaLibrary mediaLibrary) {

@GET
@Path("/{id}")
public Response getFile(@PathParam("id") String idString, @QueryParam("mediatype") String mediatype) throws Throwable {
public Response httpGetFile(@PathParam("id") String idString, @QueryParam("mediatype") String mediatype) throws Throwable {
return getFile(idString, mediatype);
}

@PUT
@Path("/{id}")
@Consumes(MediaType.TEXT_PLAIN)
@Produces(MediaType.TEXT_PLAIN + ";charset=utf-8")
public Response httpPutContent(@PathParam("id") String idString, String content) throws Throwable {
return putContent(idString, content);
}

@GET
@Path("/{id}/info")
@Produces(MediaType.APPLICATION_JSON + ";charset=utf-8")
public Response httpGetFileInfo(@Context HttpServletRequest request, @PathParam("id") String idString) throws Throwable {
return getFileInfo(request.getRequestURI(), idString);
}

@POST
@Consumes(MediaType.MULTIPART_FORM_DATA)
@Produces(MediaType.APPLICATION_JSON + ";charset=utf-8")
public Response httpPostFile(@Context HttpServletRequest request,
@FormDataParam("file") InputStream inputStream,
@FormDataParam("file") final FormDataContentDisposition contentDispositionHeader,
@FormDataParam("url") String url,
@FormDataParam("mimetype") String mimetype,
@FormDataParam("archiveID") String archiveID,
@FormDataParam("archiveEntryName") String archiveEntryName,
@FormDataParam("profile") String profileString
) throws Throwable {
if (mimetype != null) {
throw new Exception("mimetype is deprecated, use `profile` form instead of mimetype: " + mimetype);
}
String filename = contentDispositionHeader == null ? null : contentDispositionHeader.getFileName();
return postFile(request.getRequestURI(),
inputStream, filename, url, archiveID, archiveEntryName, profileString);
}

@GET
@Path("/{id}/outline")
@Produces(MediaType.APPLICATION_JSON + ";charset=utf-8")
public Response httpGetOutline(@PathParam("id") String idString)
throws Throwable {
return getOutline(idString);
}

public Response getFile(String idString, String mediatype) throws Throwable {
FileInfo fi = getFileInfo(idString);
if (fi == null) {
return Response.status(Response.Status.NOT_FOUND).build();
Expand All @@ -52,21 +100,22 @@ public Response getFile(@PathParam("id") String idString, @QueryParam("mediatype
output.flush();
};

Response.ResponseBuilder builder = Response.ok(fileStream);
if (mediatype != null && !mediatype.isEmpty()) {
builder.type(mediatype);
} else {
builder.type(fi.getProfile().toProfile().getMediaType());
if (mediatype == null || mediatype.isEmpty()) {
mediatype = fi.getProfile().toProfile().getMediaType();
}
builder.header("content-disposition", "attachment; filename=" + fi.getFilename());
return builder.build();
if (MediaType.TEXT_PLAIN.equalsIgnoreCase(mediatype)) {
String isUTF8Feature = fi.getProfile().toProfile().getFeature(Profile.FEATURE_IS_UTF8);
if (Boolean.parseBoolean(isUTF8Feature)) {
mediatype = mediatype + ";charset=utf-8";
}
}
return Response.ok(fileStream)
.type(mediatype)
.header("content-disposition", "attachment; filename=" + fi.getFilename())
.build();
}

@PUT
@Path("/{id}")
@Consumes(MediaType.TEXT_PLAIN)
@Produces(MediaType.TEXT_PLAIN + ";charset=utf-8")
public Response putContent(@PathParam("id") String idString, String content) throws Throwable {
public Response putContent(String idString, String content) throws Throwable {
FileInfo fi = getFileInfo(idString);
if (fi == null) {
return Response.status(Response.Status.NOT_FOUND).build();
Expand All @@ -81,45 +130,33 @@ public Response putContent(@PathParam("id") String idString, String content) thr
return Response.ok(content).type(MediaType.TEXT_PLAIN).build();
}

@GET
@Path("/{id}/info")
@Produces(MediaType.APPLICATION_JSON + ";charset=utf-8")
public Response getFileInfo(@Context HttpServletRequest request, @PathParam("id") String idString)
throws Throwable {
public Response getFileInfo(String requestURI, String idString) throws Throwable {
FileInfo fi = getFileInfo(idString);
if (fi == null) {
return Response.status(Response.Status.NOT_FOUND).build();
}

final String trimEnd = "/info";
String localLink = request.getRequestURI();
String localLink = requestURI;
assert (localLink.endsWith(trimEnd));
localLink = localLink.substring(0, localLink.length() - trimEnd.length());

return fileInfoToResponse(URI.create(localLink), fi);
}

@POST
@Consumes(MediaType.MULTIPART_FORM_DATA)
@Produces(MediaType.APPLICATION_JSON + ";charset=utf-8")
public Response postFile(@Context HttpServletRequest request,
@FormDataParam("file") InputStream inputStream,
@FormDataParam("file") final FormDataContentDisposition contentDispositionHeader,
@FormDataParam("url") String url,
@FormDataParam("mimetype") String mimetype,
@FormDataParam("archiveID") String archiveID,
@FormDataParam("archiveEntryName") String archiveEntryName,
@FormDataParam("profile") String profileString
public Response postFile(String requestURI,
InputStream inputStream,
String filename,
String url,
String archiveID,
String archiveEntryName,
String profileString
) throws Throwable {
FileInfo fileInfo;
if (contentDispositionHeader != null) {
String filename = contentDispositionHeader.getFileName();
if (inputStream != null && filename != null) {
fileInfo = mediaLibrary.addFile(filename, inputStream, null);
} else if (url != null) {
Profile profile = null;
if (mimetype != null && !mimetype.isEmpty()) {
profile = Profile.builder().mediaType(mimetype).build();
}
Profile profile = readProfile(profileString);
fileInfo = mediaLibrary.addByUrl(url, profile);
} else if (archiveID != null && !archiveID.isEmpty()) {
FileInfo fi = getFileInfo(archiveID);
Expand All @@ -135,21 +172,20 @@ public Response postFile(@Context HttpServletRequest request,
return Response.status(400).entity("Please provide either a file or a url to download in the form").build();
}

URI localLink = UriBuilder.fromPath(request.getRequestURI())
URI localLink = UriBuilder.fromPath(requestURI)
.path(fileInfo.getId().toString())
.build();
return fileInfoToResponse(localLink, fileInfo);
}

private Profile readProfile(String profileString) {
if (profileString != null && !profileString.isEmpty()) {
Profile.Flat flat;
try {
flat = mapper.readValue(profileString, Profile.Flat.class);
return flat.toProfile();
} catch (JsonProcessingException xc) {
LOGGER.error("json conversion exception ", xc);
}
if (profileString == null || profileString.isEmpty()) {
return null;
}
try {
return mapper.readValue(profileString, Profile.Flat.class).toProfile();
} catch (JsonProcessingException xc) {
LOGGER.error("json conversion exception ", xc);
}
return null;
}
Expand All @@ -169,9 +205,10 @@ static Response fileInfoToResponse(URI localLink, FileInfo fileInfo) {
// add the file content
File file = fileInfo.getPath().toFile();
try (InputStream fin = new BufferedInputStream(new FileInputStream(file));
InputStream in = ByteStreams.limit(fin, MAX_INLINE_CONTENT)
InputStream in = ByteStreams.limit(fin, MAX_INLINE_CONTENT);
Reader reader = new InputStreamReader(in, StandardCharsets.UTF_8)
) {
String preview = IOUtils.toString(in, StandardCharsets.UTF_8);
String preview = CharStreams.toString(reader);
if (preview != null && !preview.isEmpty()) {
ret.put("content", preview);
if (file.length() > MAX_INLINE_CONTENT) {
Expand All @@ -186,11 +223,7 @@ static Response fileInfoToResponse(URI localLink, FileInfo fileInfo) {
return Response.ok(ret).build();
}

@GET
@Path("/{id}/outline")
@Produces(MediaType.APPLICATION_JSON + ";charset=utf-8")
public Response getOutline(@Context HttpServletRequest request, @PathParam("id") String idString)
throws Throwable {
public Response getOutline(String idString) throws Throwable {
FileInfo fi = getFileInfo(idString);
if (fi == null) {
return Response.status(Response.Status.NOT_FOUND).build();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package eu.clarin.switchboard.core;

import eu.clarin.switchboard.app.config.DataStoreConfig;
import eu.clarin.switchboard.core.xc.LinkException;
import eu.clarin.switchboard.core.xc.StoragePolicyException;
import org.junit.Before;
import org.junit.Test;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import eu.clarin.switchboard.core.xc.LinkException;
import eu.clarin.switchboard.core.xc.StorageException;
import eu.clarin.switchboard.core.xc.StoragePolicyException;
import eu.clarin.switchboard.profiler.DefaultProfiler;
import eu.clarin.switchboard.profiler.api.Profile;
import eu.clarin.switchboard.profiler.api.Profiler;
import eu.clarin.switchboard.profiler.api.ProfilingException;
Expand Down Expand Up @@ -47,8 +48,7 @@ public void setUp() throws Exception {

dataStore = new DataStore(dataStoreRoot, storagePolicy);

TikaConfig tikaConfig = new TikaConfig(this.getClass().getResourceAsStream("/tikaConfig.xml"));
profiler = new TikaProfiler(tikaConfig);
profiler = new DefaultProfiler();

urlResolver = new UrlResolverConfig(3, 3, "seconds", 10);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package eu.clarin.switchboard.resources;

import com.google.common.io.ByteStreams;
import eu.clarin.switchboard.app.config.DataStoreConfig;
import eu.clarin.switchboard.app.config.UrlResolverConfig;
import eu.clarin.switchboard.core.DataStore;
import eu.clarin.switchboard.core.DefaultStoragePolicy;
import eu.clarin.switchboard.core.MediaLibrary;
import eu.clarin.switchboard.profiler.DefaultProfiler;
import eu.clarin.switchboard.profiler.api.Profile;
import eu.clarin.switchboard.profiler.api.Profiler;
import org.junit.Before;
import org.junit.Test;

import javax.ws.rs.core.Response;
import javax.ws.rs.core.StreamingOutput;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.UUID;

import static org.junit.Assert.*;

public class DataResourceTest {
DataResource dataResource;

@Before
public void setUp() throws Exception {
Path dataStoreRoot = Files.createTempDirectory("switchboard-test-");
String maxSize = "1M";
String maxFiles = "2";
String maxLifetime = "4";
String maxLifetimeUnit = "seconds";
String cleanupPeriod = "1";
String cleanupPeriodUnit = "seconds";

DataStoreConfig dataStoreConfig = new DataStoreConfig(
dataStoreRoot.toString(), false, maxSize, maxFiles, maxLifetime, maxLifetimeUnit, cleanupPeriod, cleanupPeriodUnit);

DefaultStoragePolicy storagePolicy = new DefaultStoragePolicy(dataStoreConfig);
storagePolicy.setAllowedMediaTypes(Collections.singleton("text/plain"));

DataStore dataStore = new DataStore(dataStoreRoot, storagePolicy);
Profiler profiler = new DefaultProfiler();
UrlResolverConfig urlResolver = new UrlResolverConfig(3, 3, "seconds", 10);
MediaLibrary mediaLibrary = new MediaLibrary(dataStore, profiler, storagePolicy, urlResolver, dataStoreConfig);
dataResource = new DataResource(mediaLibrary);
}


@Test
public void getFile() throws Throwable {
InputStream is = new ByteArrayInputStream("first content".getBytes(StandardCharsets.UTF_8));

Response postResponse = dataResource.postFile("", is, "filename", null, null, null, null);
String id = ((Map) postResponse.getEntity()).get("id").toString();

Response r = dataResource.getFile(id, null);
assertEquals("text/plain;charset=utf-8", r.getHeaderString("content-type"));
}

@Test
public void getFileInfo() throws Throwable {
String filename = "myfilename";
InputStream is = new ByteArrayInputStream("first content".getBytes(StandardCharsets.UTF_8));

Response postResponse = dataResource.postFile("", is, filename, null, null, null, null);
String id = ((Map) postResponse.getEntity()).get("id").toString();

Response r = dataResource.getFileInfo("/info", id);
Map fileinfo = ((Map) r.getEntity());

assertEquals(id, fileinfo.get("id").toString());
assertEquals(filename, fileinfo.get("filename").toString());
assertTrue((int)fileinfo.get("fileLength") > 0);
assertFalse((boolean)fileinfo.get("selection"));
assertEquals("text/plain", ((Map)fileinfo.get("profile")).get("mediaType"));
}

@Test
public void putContent() throws Throwable {
String newContent = "new content";
InputStream is = new ByteArrayInputStream("first content".getBytes(StandardCharsets.UTF_8));

Response postResponse = dataResource.postFile("", is, "filename", null, null, null, null);
String id = ((Map) postResponse.getEntity()).get("id").toString();

dataResource.putContent(id, newContent);

Response r = dataResource.getFile(id, null);
StreamingOutput output = (StreamingOutput) r.getEntity();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
output.write(baos);
assertEquals(newContent, baos.toString());
}
}

0 comments on commit 8298830

Please sign in to comment.