Skip to content

Commit

Permalink
OAK-10803 Compress in-memory property values
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Iancu committed May 15, 2024
1 parent 28f411a commit 63f0330
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 6 deletions.
6 changes: 6 additions & 0 deletions oak-store-spi/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.lz4</groupId>
<artifactId>lz4-java</artifactId>
<version>1.8.0</version>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,54 @@
package org.apache.jackrabbit.oak.plugins.memory;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.jackrabbit.oak.commons.Compression;
import org.jetbrains.annotations.NotNull;

/**
* This {@code Blob} implementations is based on an array of bytes.
*/
public class ArrayBasedBlob extends AbstractBlob {
private final byte[] value;
private final long valueLength;

private Compression compression = Compression.GZIP;

public ArrayBasedBlob(byte[] value) {
this.value = value;
System.out.println("value = " + value.length);
this.value = compress(value);
System.out.println("value = " + this.value.length);
this.valueLength = value.length;
}

private byte[] compress(byte[] value) {
try {
ByteArrayOutputStream out = new ByteArrayOutputStream();
OutputStream compressionOutputStream = compression.getOutputStream(out);
compressionOutputStream.write(value);
compressionOutputStream.close();
return out.toByteArray();
} catch (IOException e) {
throw new RuntimeException("Failed to compress data", e);
}
}

@NotNull
@Override
public InputStream getNewStream() {
return new ByteArrayInputStream(value);
try {
return compression.getInputStream(new ByteArrayInputStream(this.value));
} catch (IOException e) {
throw new RuntimeException("Failed to decompress data", e);
}
}

@Override
public long length() {
return value.length;
return this.valueLength;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,56 @@

import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.Compression;
import org.apache.jackrabbit.oak.plugins.value.Conversions;
import org.apache.jackrabbit.oak.plugins.value.Conversions.Converter;
import org.jetbrains.annotations.NotNull;

import static org.apache.jackrabbit.guava.common.base.Preconditions.checkNotNull;
import static org.apache.jackrabbit.oak.api.Type.STRING;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;

public class StringPropertyState extends SinglePropertyState<String> {
private final String value;
private byte[] compressedValue;
private Compression compression = Compression.GZIP;

public StringPropertyState(@NotNull String name, @NotNull String value) {
super(name);
this.value = checkNotNull(value);
checkNotNull(value);
int size = value.getBytes().length;
System.out.println("size = " + size);
if (size > 0) {//force compression
compressedValue = compress(value.getBytes());
System.out.println("compressedValue = " + compressedValue.length);
this.value = null;
} else {
this.value = value;
}
}

private byte[] compress(byte[] value) {
try {
ByteArrayOutputStream out = new ByteArrayOutputStream();
OutputStream compressionOutputStream = compression.getOutputStream(out);
compressionOutputStream.write(value);
compressionOutputStream.close();
return out.toByteArray();
} catch (IOException e) {
throw new RuntimeException("Failed to compress data", e);
}
}

private String decompress(byte[] value) {
try {
return new String(compression.getInputStream(new ByteArrayInputStream(value)).readAllBytes());
} catch (IOException e) {
throw new RuntimeException("Failed to decompress data", e);
}
}

/**
Expand All @@ -48,12 +85,12 @@ public static PropertyState stringProperty(

@Override
public String getValue() {
return value;
return value != null ? value : decompress(this.compressedValue);
}

@Override
public Converter getConverter() {
return Conversions.convert(value);
return Conversions.convert(getValue());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.apache.jackrabbit.oak.plugins.memory;

import junit.framework.TestCase;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;

public class ArrayBasedBlobTest extends TestCase {


public void testGetNewStream() throws IOException {
byte[] fileContent = Files.readAllBytes(Paths.get("src/test/resources/sample.txt"));
ArrayBasedBlob arrayBasedBlob = new ArrayBasedBlob(fileContent);
String expected = new String(fileContent);
assertEquals(expected, new String(arrayBasedBlob.getNewStream().readAllBytes()));
assertEquals(fileContent.length, arrayBasedBlob.length());
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package org.apache.jackrabbit.oak.plugins.memory;

import junit.framework.TestCase;
import org.junit.Test;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;

public class StringPropertyStateTest extends TestCase {


@Test
public void testGetValue() throws IOException {
String value = Arrays.toString(Files.readAllBytes(Paths.get("src/test/resources/sample.txt")));
StringPropertyState stringPropertyState = new StringPropertyState("name", value);
assertEquals(value, stringPropertyState.getValue());
}

}
10 changes: 10 additions & 0 deletions oak-store-spi/src/test/resources/sample.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
The Lorem ipsum text is derived from sections 1.10.32 and 1.10.33 of Cicero's De finibus bonorum et malorum.[7][8] The physical source may have been the 1914 Loeb Classical Library edition of De finibus, where the Latin text, presented on the left-hand (even) pages, breaks off on page 34 with "Neque porro quisquam est qui do-" and continues on page 36 with "lorem ipsum ...", suggesting that the galley type of that page was mixed up to make the dummy text seen today.[1]

The discovery of the text's origin is attributed to Richard McClintock, a Latin scholar at Hampden–Sydney College. McClintock connected Lorem ipsum to Cicero's writing sometime before 1982 while searching for instances of the Latin word consectetur, which was rarely used in classical literature.[2] McClintock first published his discovery in a 1994 letter to the editor of Before & After magazine,[9] contesting the editor's earlier claim that Lorem ipsum held no meaning.[2]

The relevant section of Cicero as printed in the source is reproduced below with fragments used in Lorem ipsum highlighted. Letters in brackets were added to Lorem ipsum and were not present in the source text:

[32] Sed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam eaque ipsa, quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt, explicabo. Nemo enim ipsam voluptatem, quia voluptas sit, aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos, qui ratione voluptatem sequi nesciunt, neque porro quisquam est, qui dolorem ipsum, quia dolor sit amet consectetur adipisci[ng] velit, sed quia non numquam [do] eius modi tempora inci[di]dunt, ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum[d] exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? [D]Quis autem vel eum i[r]ure reprehenderit, qui in ea voluptate velit esse, quam nihil molestiae consequatur, vel illum, qui dolorem eum fugiat, quo voluptas nulla pariatur?

[33] At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem reru[d]um facilis est e[r]t expedita distinctio. Nam libero tempore, cum soluta nobis est eligendi optio, cumque nihil impedit, quo minus id, quod maxime placeat facere possimus, omnis voluptas assumenda est, omnis dolor repellend[a]us. Temporibus autem quibusdam et aut officiis debitis aut rerum necessitatibus saepe eveniet, ut et voluptates repudiandae sint et molestiae non recusandae. Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus maiores alias consequatur aut perferendis doloribus asperiores repellat.

0 comments on commit 63f0330

Please sign in to comment.