diff --git a/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java b/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java index d9b5338f..dede892d 100644 --- a/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java +++ b/johnzon-core/src/main/java/org/apache/johnzon/core/JsonStreamParserImpl.java @@ -28,6 +28,8 @@ import java.io.Reader; import java.math.BigDecimal; import java.nio.charset.Charset; +import java.util.LinkedList; +import java.util.List; import java.util.NoSuchElementException; //This class represents either the Json tokenizer and the Json parser. @@ -74,6 +76,8 @@ public class JsonStreamParserImpl extends JohnzonJsonParserImpl implements JsonC private char[] fallBackCopyBuffer; private boolean releaseFallBackCopyBufferLength = true; private int fallBackCopyBufferLength; + // when boundaries of fallBackCopyBuffer have been reached + private List previousFallBackCopyBuffers; // location (line, column, offset) // We try to calculate this efficiently so we do not just increment the values per char read @@ -115,6 +119,16 @@ private static final class StructureElement { } } + private static final class Buffer { + private char[] buffer; + private int length; + + public Buffer(char[] buffer, int length) { + this.buffer = buffer; + this.length = length; + } + } + //detect charset according to RFC 4627 public JsonStreamParserImpl(final InputStream inputStream, final int maxStringLength, final BufferStrategy.BufferProvider bufferProvider, final BufferStrategy.BufferProvider valueBuffer, @@ -165,7 +179,7 @@ private JsonStreamParserImpl(final InputStream inputStream, final Reader reader, //append a single char to the value buffer private void appendToCopyBuffer(final char c) { if (fallBackCopyBufferLength >= fallBackCopyBuffer.length - 1) { - doAutoAdjust(1); + createNewFallBackCopyBuffer(); } fallBackCopyBuffer[fallBackCopyBufferLength++] = c; } @@ -180,40 +194,39 @@ private void copyCurrentValue() { } if (fallBackCopyBufferLength >= fallBackCopyBuffer.length - length) { // not good at runtime but handled - doAutoAdjust(length); - } else { - System.arraycopy(buffer, startOfValueInBuffer, fallBackCopyBuffer, fallBackCopyBufferLength, length); + createNewFallBackCopyBuffer(); } + + System.arraycopy(buffer, startOfValueInBuffer, fallBackCopyBuffer, fallBackCopyBufferLength, length); fallBackCopyBufferLength += length; } startOfValueInBuffer = endOfValueInBuffer = -1; } - private void doAutoAdjust(final int length) { + // Creates new fallBackCopyBuffer and stores the old instance in previousFallBackCopyBuffers, + // this is much faster than resizing (recreating + copying) fallBackCopyBuffer + private void createNewFallBackCopyBuffer() { if (!autoAdjust) { throw new ArrayIndexOutOfBoundsException("Buffer too small for such a long string"); } - final char[] newArray = new char[fallBackCopyBuffer.length + Math.max(getBufferExtends(fallBackCopyBuffer.length), length)]; - // TODO: log to adjust size once? - System.arraycopy(fallBackCopyBuffer, 0, newArray, 0, fallBackCopyBufferLength); - if (startOfValueInBuffer != -1) { - System.arraycopy(buffer, startOfValueInBuffer, newArray, fallBackCopyBufferLength, length); + if (previousFallBackCopyBuffers == null) { + previousFallBackCopyBuffers = new LinkedList<>(); } - if (releaseFallBackCopyBufferLength) { - bufferProvider.release(fallBackCopyBuffer); - releaseFallBackCopyBufferLength = false; - } - fallBackCopyBuffer = newArray; + + previousFallBackCopyBuffers.add(new Buffer(fallBackCopyBuffer, fallBackCopyBufferLength)); + fallBackCopyBuffer = valueProvider.newBuffer(); + fallBackCopyBufferLength = 0; } - /** - * @param currentLength length of the buffer - * @return the amount of bytes the current buffer should get extended with - */ - protected int getBufferExtends(int currentLength) { - return currentLength / 4; + private void releasePreviousFallBackCopyBuffers() { + if (previousFallBackCopyBuffers == null) { + return; + } + + previousFallBackCopyBuffers.forEach(it -> valueProvider.release(it.buffer)); + previousFallBackCopyBuffers = null; } @@ -443,6 +456,7 @@ protected final Event internalNext() { currentIntegralNumber = Integer.MIN_VALUE; } + releasePreviousFallBackCopyBuffers(); if (fallBackCopyBufferLength != 0) { fallBackCopyBufferLength = 0; } @@ -898,6 +912,7 @@ private Event handleLiteral() { @Override public String getString() { if (previousEvent == KEY_NAME || previousEvent == VALUE_STRING || previousEvent == VALUE_NUMBER) { + combinePreviousFallbackBuffersToCurrent(); //if there a content in the value buffer read from them, if not use main buffer return fallBackCopyBufferLength > 0 ? new String(fallBackCopyBuffer, 0, fallBackCopyBufferLength) : new String(buffer, @@ -907,6 +922,30 @@ public String getString() { } } + // Combines all old stored fallback buffers into the current fallback buffer again so we have a char[] to easily access + // Releases all previous fallback buffers while doing so + private void combinePreviousFallbackBuffersToCurrent() { + if (previousFallBackCopyBuffers == null) { + return; + } + + int newSize = previousFallBackCopyBuffers.stream().mapToInt(it -> it.length).sum() + fallBackCopyBufferLength; + char[] newBuffer = new char[newSize]; + + int index = 0; + for (Buffer buffer : previousFallBackCopyBuffers) { + System.arraycopy(buffer.buffer, 0, newBuffer, index, buffer.length); + index += buffer.length; + } + + System.arraycopy(fallBackCopyBuffer, 0, newBuffer, index, fallBackCopyBufferLength); + index += fallBackCopyBufferLength; + + releasePreviousFallBackCopyBuffers(); + fallBackCopyBuffer = newBuffer; + fallBackCopyBufferLength = index; + } + @Override public boolean isIntegralNumber() { @@ -929,6 +968,7 @@ public int getInt() { } else if (isCurrentNumberIntegral && currentIntegralNumber != Integer.MIN_VALUE) { return currentIntegralNumber; } else if (isCurrentNumberIntegral) { + combinePreviousFallbackBuffersToCurrent(); //if there a content in the value buffer read from them, if not use main buffer final Integer retVal = fallBackCopyBufferLength > 0 ? parseIntegerFromChars(fallBackCopyBuffer, 0, fallBackCopyBufferLength) : parseIntegerFromChars(buffer, startOfValueInBuffer, endOfValueInBuffer); @@ -949,6 +989,7 @@ public long getLong() { } else if (isCurrentNumberIntegral && currentIntegralNumber != Integer.MIN_VALUE) { return currentIntegralNumber; } else if (isCurrentNumberIntegral) { + combinePreviousFallbackBuffersToCurrent(); //if there a content in the value buffer read from them, if not use main buffer final Long retVal = fallBackCopyBufferLength > 0 ? parseLongFromChars(fallBackCopyBuffer, 0, fallBackCopyBufferLength) : parseLongFromChars(buffer, startOfValueInBuffer, endOfValueInBuffer); @@ -984,6 +1025,8 @@ public BigDecimal getBigDecimal() { } else if (isCurrentNumberIntegral && currentIntegralNumber != Integer.MIN_VALUE) { return new BigDecimal(currentIntegralNumber); } + + combinePreviousFallbackBuffersToCurrent(); //if there a content in the value buffer read from them, if not use main buffer return (/*currentBigDecimalNumber = */fallBackCopyBufferLength > 0 ? new BigDecimal(fallBackCopyBuffer, 0, fallBackCopyBufferLength) : new BigDecimal(buffer, startOfValueInBuffer, (endOfValueInBuffer - startOfValueInBuffer))); @@ -1004,6 +1047,7 @@ public void close() { if (releaseFallBackCopyBufferLength) { valueProvider.release(fallBackCopyBuffer); } + releasePreviousFallBackCopyBuffers(); try { in.close(); diff --git a/johnzon-core/src/test/java/org/apache/johnzon/core/HugeStringTest.java b/johnzon-core/src/test/java/org/apache/johnzon/core/HugeStringTest.java new file mode 100644 index 00000000..72ce072a --- /dev/null +++ b/johnzon-core/src/test/java/org/apache/johnzon/core/HugeStringTest.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.johnzon.core; + +import jakarta.json.Json; +import jakarta.json.JsonReader; +import org.junit.Ignore; +import org.junit.Test; + +import java.io.StringReader; + +@Ignore +public class HugeStringTest { + @Test + public void test() { + String json = "{\"data\":\"" + "a".repeat(50 * 1024 * 1024 + 1) + "\"}"; + + // Warmup + for (int i = 0; i < 10; i++) { + try (JsonReader reader = Json.createReader(new StringReader(json))) { + reader.readObject(); + } + } + + long start = System.currentTimeMillis(); + try (JsonReader reader = Json.createReader(new StringReader(json))) { + reader.readObject(); + } + System.err.println("Took " + (System.currentTimeMillis() - start) + "ms"); + } +}