From ec58f8cb69d182dc8512e493a0ca0234c747035b Mon Sep 17 00:00:00 2001 From: Daniel Iancu Date: Tue, 14 May 2024 16:50:13 +0300 Subject: [PATCH] OAK-10803 Compress in-memory property values --- oak-store-spi/pom.xml | 6 +++ .../oak/plugins/memory/ArrayBasedBlob.java | 32 ++++++++++++-- .../plugins/memory/StringPropertyState.java | 43 +++++++++++++++++-- .../plugins/memory/ArrayBasedBlobTest.java | 20 +++++++++ .../memory/StringPropertyStateTest.java | 21 +++++++++ oak-store-spi/src/test/resources/sample.txt | 10 +++++ 6 files changed, 126 insertions(+), 6 deletions(-) create mode 100644 oak-store-spi/src/test/java/org/apache/jackrabbit/oak/plugins/memory/ArrayBasedBlobTest.java create mode 100644 oak-store-spi/src/test/java/org/apache/jackrabbit/oak/plugins/memory/StringPropertyStateTest.java create mode 100644 oak-store-spi/src/test/resources/sample.txt diff --git a/oak-store-spi/pom.xml b/oak-store-spi/pom.xml index 026b16e6ec8..e64a7eaa9a6 100644 --- a/oak-store-spi/pom.xml +++ b/oak-store-spi/pom.xml @@ -131,6 +131,12 @@ mockito-core test + + + org.lz4 + lz4-java + 1.8.0 + diff --git a/oak-store-spi/src/main/java/org/apache/jackrabbit/oak/plugins/memory/ArrayBasedBlob.java b/oak-store-spi/src/main/java/org/apache/jackrabbit/oak/plugins/memory/ArrayBasedBlob.java index 57ed263b956..1ef9b726e3a 100644 --- a/oak-store-spi/src/main/java/org/apache/jackrabbit/oak/plugins/memory/ArrayBasedBlob.java +++ b/oak-store-spi/src/main/java/org/apache/jackrabbit/oak/plugins/memory/ArrayBasedBlob.java @@ -19,8 +19,12 @@ package org.apache.jackrabbit.oak.plugins.memory; import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; +import org.apache.jackrabbit.oak.commons.Compression; import org.jetbrains.annotations.NotNull; /** @@ -28,19 +32,41 @@ */ public class ArrayBasedBlob extends AbstractBlob { private final byte[] value; + private final long valueLength; + + private Compression compression = Compression.GZIP; public ArrayBasedBlob(byte[] value) { - this.value = value; + System.out.println("value = " + value.length); + this.value = compress(value); + System.out.println("value = " + this.value.length); + this.valueLength = value.length; + } + + private byte[] compress(byte[] value) { + try { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + OutputStream compressionOutputStream = compression.getOutputStream(out); + compressionOutputStream.write(value); + compressionOutputStream.close(); + return out.toByteArray(); + } catch (IOException e) { + throw new RuntimeException("Failed to compress data", e); + } } @NotNull @Override public InputStream getNewStream() { - return new ByteArrayInputStream(value); + try { + return compression.getInputStream(new ByteArrayInputStream(this.value)); + } catch (IOException e) { + throw new RuntimeException("Failed to decompress data", e); + } } @Override public long length() { - return value.length; + return this.valueLength; } } diff --git a/oak-store-spi/src/main/java/org/apache/jackrabbit/oak/plugins/memory/StringPropertyState.java b/oak-store-spi/src/main/java/org/apache/jackrabbit/oak/plugins/memory/StringPropertyState.java index 31251c42aa9..f7e0d06896c 100644 --- a/oak-store-spi/src/main/java/org/apache/jackrabbit/oak/plugins/memory/StringPropertyState.java +++ b/oak-store-spi/src/main/java/org/apache/jackrabbit/oak/plugins/memory/StringPropertyState.java @@ -20,6 +20,7 @@ import org.apache.jackrabbit.oak.api.PropertyState; import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.commons.Compression; import org.apache.jackrabbit.oak.plugins.value.Conversions; import org.apache.jackrabbit.oak.plugins.value.Conversions.Converter; import org.jetbrains.annotations.NotNull; @@ -27,12 +28,48 @@ import static org.apache.jackrabbit.guava.common.base.Preconditions.checkNotNull; import static org.apache.jackrabbit.oak.api.Type.STRING; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; + public class StringPropertyState extends SinglePropertyState { private final String value; + private byte[] compressedValue; + private Compression compression = Compression.GZIP; public StringPropertyState(@NotNull String name, @NotNull String value) { super(name); - this.value = checkNotNull(value); + checkNotNull(value); + int size = value.getBytes().length; + System.out.println("size = " + size); + if (size > 0) {//todo: introduce a threshold + compressedValue = compress(value.getBytes()); + System.out.println("compressedValue = " + compressedValue.length); + this.value = null; + } else { + this.value = value; + } + } + + private byte[] compress(byte[] value) { + try { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + OutputStream compressionOutputStream = compression.getOutputStream(out); + compressionOutputStream.write(value); + compressionOutputStream.close(); + return out.toByteArray(); + } catch (IOException e) { + throw new RuntimeException("Failed to compress data", e); + } + } + + private String decompress(byte[] value) { + try { + return new String(compression.getInputStream(new ByteArrayInputStream(value)).readAllBytes()); + } catch (IOException e) { + throw new RuntimeException("Failed to decompress data", e); + } } /** @@ -48,12 +85,12 @@ public static PropertyState stringProperty( @Override public String getValue() { - return value; + return value != null ? value : decompress(this.compressedValue); } @Override public Converter getConverter() { - return Conversions.convert(value); + return Conversions.convert(getValue()); } @Override diff --git a/oak-store-spi/src/test/java/org/apache/jackrabbit/oak/plugins/memory/ArrayBasedBlobTest.java b/oak-store-spi/src/test/java/org/apache/jackrabbit/oak/plugins/memory/ArrayBasedBlobTest.java new file mode 100644 index 00000000000..b4f6507e5c2 --- /dev/null +++ b/oak-store-spi/src/test/java/org/apache/jackrabbit/oak/plugins/memory/ArrayBasedBlobTest.java @@ -0,0 +1,20 @@ +package org.apache.jackrabbit.oak.plugins.memory; + +import junit.framework.TestCase; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; + +public class ArrayBasedBlobTest extends TestCase { + + + public void testGetNewStream() throws IOException { + byte[] fileContent = Files.readAllBytes(Paths.get("src/test/resources/sample.txt")); + ArrayBasedBlob arrayBasedBlob = new ArrayBasedBlob(fileContent); + String expected = new String(fileContent); + assertEquals(expected, new String(arrayBasedBlob.getNewStream().readAllBytes())); + assertEquals(fileContent.length, arrayBasedBlob.length()); + } + +} \ No newline at end of file diff --git a/oak-store-spi/src/test/java/org/apache/jackrabbit/oak/plugins/memory/StringPropertyStateTest.java b/oak-store-spi/src/test/java/org/apache/jackrabbit/oak/plugins/memory/StringPropertyStateTest.java new file mode 100644 index 00000000000..398545ed172 --- /dev/null +++ b/oak-store-spi/src/test/java/org/apache/jackrabbit/oak/plugins/memory/StringPropertyStateTest.java @@ -0,0 +1,21 @@ +package org.apache.jackrabbit.oak.plugins.memory; + +import junit.framework.TestCase; +import org.junit.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Arrays; + +public class StringPropertyStateTest extends TestCase { + + + @Test + public void testGetValue() throws IOException { + String value = Arrays.toString(Files.readAllBytes(Paths.get("src/test/resources/sample.txt"))); + StringPropertyState stringPropertyState = new StringPropertyState("name", value); + assertEquals(value, stringPropertyState.getValue()); + } + +} \ No newline at end of file diff --git a/oak-store-spi/src/test/resources/sample.txt b/oak-store-spi/src/test/resources/sample.txt new file mode 100644 index 00000000000..821a1b5cc82 --- /dev/null +++ b/oak-store-spi/src/test/resources/sample.txt @@ -0,0 +1,10 @@ +The Lorem ipsum text is derived from sections 1.10.32 and 1.10.33 of Cicero's De finibus bonorum et malorum.[7][8] The physical source may have been the 1914 Loeb Classical Library edition of De finibus, where the Latin text, presented on the left-hand (even) pages, breaks off on page 34 with "Neque porro quisquam est qui do-" and continues on page 36 with "lorem ipsum ...", suggesting that the galley type of that page was mixed up to make the dummy text seen today.[1] + +The discovery of the text's origin is attributed to Richard McClintock, a Latin scholar at Hampden–Sydney College. McClintock connected Lorem ipsum to Cicero's writing sometime before 1982 while searching for instances of the Latin word consectetur, which was rarely used in classical literature.[2] McClintock first published his discovery in a 1994 letter to the editor of Before & After magazine,[9] contesting the editor's earlier claim that Lorem ipsum held no meaning.[2] + +The relevant section of Cicero as printed in the source is reproduced below with fragments used in Lorem ipsum highlighted. Letters in brackets were added to Lorem ipsum and were not present in the source text: + +[32] Sed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam eaque ipsa, quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt, explicabo. Nemo enim ipsam voluptatem, quia voluptas sit, aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos, qui ratione voluptatem sequi nesciunt, neque porro quisquam est, qui dolorem ipsum, quia dolor sit amet consectetur adipisci[ng] velit, sed quia non numquam [do] eius modi tempora inci[di]dunt, ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum[d] exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? [D]Quis autem vel eum i[r]ure reprehenderit, qui in ea voluptate velit esse, quam nihil molestiae consequatur, vel illum, qui dolorem eum fugiat, quo voluptas nulla pariatur? + +[33] At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga. Et harum quidem reru[d]um facilis est e[r]t expedita distinctio. Nam libero tempore, cum soluta nobis est eligendi optio, cumque nihil impedit, quo minus id, quod maxime placeat facere possimus, omnis voluptas assumenda est, omnis dolor repellend[a]us. Temporibus autem quibusdam et aut officiis debitis aut rerum necessitatibus saepe eveniet, ut et voluptates repudiandae sint et molestiae non recusandae. Itaque earum rerum hic tenetur a sapiente delectus, ut aut reiciendis voluptatibus maiores alias consequatur aut perferendis doloribus asperiores repellat. +