diff --git a/src/main/java/io/usethesource/vallang/IString.java b/src/main/java/io/usethesource/vallang/IString.java index cee3ef89..10dc4744 100644 --- a/src/main/java/io/usethesource/vallang/IString.java +++ b/src/main/java/io/usethesource/vallang/IString.java @@ -13,6 +13,7 @@ package io.usethesource.vallang; import java.io.IOException; +import java.io.Reader; import java.io.Writer; import java.util.PrimitiveIterator.OfInt; @@ -99,6 +100,13 @@ default int getMatchFingerprint() { */ public void write(Writer w) throws IOException; + /** + * Generates a reader that can be used to stream the contents of the string + * Note, this will generate java characters, users are responsible for dealing with surrogate-pairs. + * See {@link #iterator()} for a more unicode compatible approach to iterate over the characters of an IString. + */ + public Reader asReader(); + /** * Build an iterator which generates the Unicode UTF-32 codepoints of the IString one-by-one. * @see Character for more information on Unicode UTF-32 codepoints. diff --git a/src/main/java/io/usethesource/vallang/impl/primitive/StringValue.java b/src/main/java/io/usethesource/vallang/impl/primitive/StringValue.java index 942cad30..89c11bc5 100644 --- a/src/main/java/io/usethesource/vallang/impl/primitive/StringValue.java +++ b/src/main/java/io/usethesource/vallang/impl/primitive/StringValue.java @@ -16,6 +16,8 @@ package io.usethesource.vallang.impl.primitive; import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; import java.io.StringWriter; import java.io.Writer; import java.lang.management.ManagementFactory; @@ -25,15 +27,15 @@ import java.time.Duration; import java.time.temporal.ChronoUnit; import java.util.ArrayDeque; +import java.util.Collections; import java.util.Deque; import java.util.Iterator; import java.util.NoSuchElementException; import java.util.PrimitiveIterator; import java.util.PrimitiveIterator.OfInt; - +import java.util.function.Function; import org.checkerframework.checker.nullness.qual.MonotonicNonNull; import org.checkerframework.checker.nullness.qual.Nullable; - import io.usethesource.vallang.IString; import io.usethesource.vallang.IValueFactory; import io.usethesource.vallang.impl.persistent.ValueFactory; @@ -279,6 +281,16 @@ public boolean isNewlineTerminated() { public IString concat(IString other) { return other; } + + @Override + public Reader asReader() { + return Reader.nullReader(); + } + + @Override + public Iterator iterateParts() { + return Collections.emptyIterator(); + } } private static class FullUnicodeString extends AbstractString { @@ -501,6 +513,11 @@ public void write(Writer w) throws IOException { w.write(value); } + @Override + public Reader asReader() { + return new StringReader(value); + } + @Override public void indentedWrite(Writer w, Deque whitespace, boolean indentFirstLine) throws IOException { if (value.isEmpty()) { @@ -579,6 +596,11 @@ public int nextInt() { } }; } + + @Override + public Iterator iterateParts() { + return Collections.singleton(CharBuffer.wrap(value)).iterator(); + } } /** @@ -633,6 +655,16 @@ public int nextInt() { } }; } + + @Override + public Reader asReader() { + return new StringReader(value); + } + + @Override + public Iterator iterateParts() { + return Collections.singleton(CharBuffer.wrap(value)).iterator(); + } } /** @@ -808,6 +840,8 @@ default AbstractString rotateRightLeft() { default AbstractString rotateLeftRight() { return (AbstractString) this; } + + Iterator iterateParts(); } private abstract static class AbstractString implements IString, IStringTreeNode, IIndentableString { @@ -963,6 +997,47 @@ protected final int hashCode(int prefixCode) { } abstract boolean hasNonBMPCodePoints(); + + public abstract Iterator iterateParts(); + + @Override + public Reader asReader() { + return new Reader() { + final Iterator parts = iterateParts(); + CharBuffer currentBuffer = CharBuffer.allocate(0); + + private CharBuffer getBuffer() { + var actualBuffer = currentBuffer; + while (!actualBuffer.hasRemaining()) { + if (!parts.hasNext()) { + return actualBuffer; + } + actualBuffer = currentBuffer = parts.next(); + } + return actualBuffer; + } + + @Override + public int read(char[] cbuf, int off, int len) throws IOException { + if (off < 0 || len < 0 || len > cbuf.length + off) { + throw new IndexOutOfBoundsException(); + } + var target = CharBuffer.wrap(cbuf, off, len); + while (target.hasRemaining()) { + var actualBuffer = getBuffer(); + if (!actualBuffer.hasRemaining()) { + break; + } + actualBuffer.read(target); + } + return target.position() == off ? -1 : (len - target.remaining()); + } + + @Override + public void close() throws IOException { + } + }; + } } private static class LazyConcatString extends AbstractString { @@ -1146,6 +1221,8 @@ public void write(Writer w) throws IOException { right.write(w); } + + @Override public void indentedWrite(Writer w, Deque whitespace, boolean indentFirstLine) throws IOException { left.indentedWrite(w, whitespace, indentFirstLine); @@ -1180,38 +1257,68 @@ public AbstractString rotateLeftRight() { @Override public OfInt iterator() { return new OfInt() { - final Deque todo = new ArrayDeque<>(depth); - OfInt currentLeaf = leftmostLeafIterator(todo, LazyConcatString.this); + final InOrderIterator it = new InOrderIterator<>(IStringTreeNode::iterator); @Override public boolean hasNext() { - return currentLeaf.hasNext(); /* || !todo.isEmpty() is unnecessary due to post-condition of nextInt() */ + return it.getActive().hasNext(); } @Override public int nextInt() { - int next = currentLeaf.nextInt(); + return it.getActive().nextInt(); + } + }; + } - if (!currentLeaf.hasNext() && !todo.isEmpty()) { - // now we back track to the previous node we went left from, - // take the right branch and continue with its first leaf: - currentLeaf = leftmostLeafIterator(todo, todo.pop()); - } + @Override + public Iterator iterateParts() { + return new Iterator<> () { + final InOrderIterator> it = new InOrderIterator<>(IStringTreeNode::iterateParts); - assert currentLeaf.hasNext() || todo.isEmpty(); - return next; + @Override + public boolean hasNext() { + return it.getActive().hasNext(); + } + + @Override + public CharBuffer next() { + return it.getActive().next(); } }; } /** - * Static helper function for the iterator() method. + * An in order traversel of the leafs of the concat tree. + * We then for every leaf call the desired iterator, and replace it when the next when it's consumed + */ + private class InOrderIterator> { + private final Deque todo; + private final Function getActualIterator; + private T activeIterator; + + InOrderIterator( Function getActualIterator) { + this.getActualIterator = getActualIterator; + todo = new ArrayDeque<>(depth); + activeIterator = getActualIterator.apply(leftmostLeaf(todo, LazyConcatString.this)); + } + + T getActive() { + while (!activeIterator.hasNext() && !todo.isEmpty()) { + activeIterator = getActualIterator.apply(leftmostLeaf(todo, todo.pop())); + } + return activeIterator; + } + + } + /** + * helper function for the iterator() method. * * It finds the left-most leaf of the tree, and collects * the path of nodes to this leaf as a side-effect in the todo * stack. */ - private static OfInt leftmostLeafIterator(Deque todo, IStringTreeNode start) { + private static IStringTreeNode leftmostLeaf(Deque todo, IStringTreeNode start) { IStringTreeNode cur = start; while (cur.depth() > 1) { @@ -1219,8 +1326,9 @@ private static OfInt leftmostLeafIterator(Deque todo, IStringTre cur = cur.left(); } - return cur.iterator(); + return cur; } + } private static class IndentedString extends AbstractString { @@ -1332,6 +1440,69 @@ public int nextInt() { }; } + @Override + public Iterator iterateParts() { + if (flattened != null) { + return flattened.iterateParts(); + } + var indentBuffer = CharBuffer.wrap(indent.getValue()); + return new Iterator<>() { + final Iterator content = wrapped.iterateParts(); + CharBuffer active = CharBuffer.allocate(0); + boolean indentNext = indentFirstLine; + + @Override + public boolean hasNext() { + return indentNext || content.hasNext() || active.hasRemaining(); + } + + private CharBuffer nextTillNewlineOrEndOfBuffer() { + int start = active.position(); + int end = start + active.remaining(); + int cur = start; + while (cur < end) { + if (active.get(cur) == NEWLINE) { + cur++; + indentNext = true; + break; + } + cur++; + } + if (cur != end) { + var result = active.duplicate(); + result.limit(cur); + active.position(cur); + return result; + } + else { + // end of the buffer + var result = active; + if (content.hasNext()) { + active = content.next(); + } + else { + // end of the stream + indentNext = false; + active = CharBuffer.allocate(0); + } + return result; + } + } + + @Override + public CharBuffer next() { + if (indentNext) { + indentNext = false; + return indentBuffer.asReadOnlyBuffer(); + } + // okay so no indent to send + // now we should give the next char-buffer till the next newline + return nextTillNewlineOrEndOfBuffer(); + } + + }; + } + @Override public IString reverse() { return applyIndentation().reverse(); diff --git a/src/test/java/io/usethesource/vallang/basic/BasicValueSmokeTest.java b/src/test/java/io/usethesource/vallang/basic/BasicValueSmokeTest.java index 572cea69..bf5ba410 100644 --- a/src/test/java/io/usethesource/vallang/basic/BasicValueSmokeTest.java +++ b/src/test/java/io/usethesource/vallang/basic/BasicValueSmokeTest.java @@ -174,6 +174,27 @@ public void testStringWrite(IValueFactory vf) { } } + @ParameterizedTest @ArgumentsSource(ValueProvider.class) + public void testStringRead(IValueFactory vf) { + Random rnd = new Random(); + + for (int i = 0; i < 1000; i++) { + IString testString = vf.string(RandomUtil.string(rnd, rnd.nextInt(200))); + var result = new StringBuilder(); + try (var r = testString.asReader()) { + char[] buffer = new char[1024]; + int read = 0; + while ((read = r.read(buffer)) > 0) { + result.append(buffer, 0, read); + } + } catch (IOException e) { + fail(e.getMessage()); + } + + assertEqual(testString, vf.string(result.toString())); + } + } + @ParameterizedTest @ArgumentsSource(ValueProvider.class) public void testStringEmptyWrite(IValueFactory vf) { IString testString = vf.string(""); @@ -284,6 +305,7 @@ private void checkIndent(IValueFactory vf, String indent, String newline, boolea assertSimilarIteration(indentedDirect, indentedConcatTree); assertEqualLength(indentedDirect, indentedConcatTree); assertEqual(indentedDirect, indentedConcatTree); + assertEqualWriteAndRead(indentedDirect, indentedConcatTree); assertEquals(indentedDirect.hashCode(), indentedConcatTree.hashCode()); // these modify internal structure as a side-effect, so after this we test the above again! @@ -298,6 +320,7 @@ private void checkIndent(IValueFactory vf, String indent, String newline, boolea assertSimilarIteration(vf.string(expected), indentedConcatTree); assertSimilarIteration(indentedDirect, indentedConcatTree); assertEqual(indentedDirect, indentedConcatTree); + assertEqualWriteAndRead(indentedDirect, indentedConcatTree); assertEquals(indentedDirect.hashCode(), indentedConcatTree.hashCode()); // basic tests showing lazy versus eager indentation should have the same semantics: @@ -306,6 +329,7 @@ private void checkIndent(IValueFactory vf, String indent, String newline, boolea assertSimilarIteration(vf.string(expectedTwice), indentedDirectTwice); assertSimilarIteration(vf.string(expectedTwice), indentedConcatTreeTwice); assertEqual(indentedDirectTwice, indentedConcatTreeTwice); + assertEqualWriteAndRead(indentedDirectTwice, indentedConcatTreeTwice); assertSimilarIteration(indentedDirectTwice, indentedConcatTreeTwice); assertEquals(indentedDirectTwice.hashCode(), indentedConcatTreeTwice.hashCode()); @@ -326,10 +350,68 @@ private void checkIndent(IValueFactory vf, String indent, String newline, boolea assertSimilarIteration(vf.string(expectedTwice), indentedDirectTwice); assertSimilarIteration(vf.string(expectedTwice), indentedConcatTreeTwice); assertEqual(indentedDirectTwice, indentedConcatTreeTwice); + assertEqualWriteAndRead(indentedDirectTwice, indentedConcatTreeTwice); assertSimilarIteration(indentedDirectTwice, indentedConcatTreeTwice); assertEquals(indentedDirectTwice.hashCode(), indentedConcatTreeTwice.hashCode()); } + + private String writerToString(IString a) { + try { + var result = new StringWriter(); + a.write(result); + return result.toString(); + } catch (IOException e) { + fail("IString::write failed", e); + return ""; + } + } + + private String readerToString(IString a) { + try (var r = a.asReader()) { + var result = new StringWriter(); + a.asReader().transferTo(result); + return result.toString(); + } catch (IOException e) { + fail("IString::asReader failed", e); + return ""; + } + } + + private String readerSlowly(IString a) { + try (var r = a.asReader()) { + var result = new StringBuilder(); + while (true) { + int oneChar = r.read(); + if (oneChar == -1) { + break; + } + result.appendCodePoint(oneChar); + var buf = new char[3]; + int read = r.read(buf); + if (read == -1) { + break; + } + result.append(buf, 0, read); + } + return result.toString(); + } catch (IOException e) { + fail("IString::asReader failed", e); + return ""; + } + + } + + + private void assertEqualWriteAndRead(IString one, IString two) { + assertEquals(one.getValue(), writerToString(one), "IString::write should be the same as getValue"); + assertEquals(one.getValue(), readerToString(one), "IString::asReader should be the same as getValue"); + assertEquals(writerToString(one), writerToString(two), "IString::write had different results"); + assertEquals(readerToString(one), readerToString(two), "IString::asReader had different results"); + assertEquals(one.getValue(), readerSlowly(one), "IString::asReader had different results depending on buffer size"); + assertEquals(two.getValue(), readerSlowly(two), "IString::asReader had different results depending on buffer size"); + } + private void assertEqualCharAt(IString one, IString two) { assertEquals(one, two); diff --git a/src/test/java/io/usethesource/vallang/basic/LazyStringOperationsTest.java b/src/test/java/io/usethesource/vallang/basic/LazyStringOperationsTest.java index 02fb70ee..2e269adf 100644 --- a/src/test/java/io/usethesource/vallang/basic/LazyStringOperationsTest.java +++ b/src/test/java/io/usethesource/vallang/basic/LazyStringOperationsTest.java @@ -1,9 +1,12 @@ package io.usethesource.vallang.basic; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import java.io.IOException; +import java.io.Reader; import java.io.StringWriter; import java.util.Random; @@ -91,11 +94,11 @@ public void testEquals(IValueFactory vf) { IString y = vf.string("abcdefgh"); IString z = vf.string("abcdefgi"); - assertTrue(x.hashCode() == y.hashCode()); - assertTrue(x.equals(y)); - assertTrue(y.equals(x)); - assertTrue(!z.equals(x)); - assertTrue(x.substring(0, 0).equals(vf.string(""))); + assertEquals(x.hashCode(), y.hashCode()); + assertEquals(x, y); + assertEquals(y, x); + assertNotEquals(z, x); + assertEquals(x.substring(0, 0), vf.string("")); } finally { StringValue.resetMaxFlatString(); StringValue.resetMaxUnbalance(); @@ -192,8 +195,22 @@ public void testStringReplace(IValueFactory vf) { vf.string("abcdefxygh").concat(str.substring(8))); } + private String fromReader(Reader r) throws IOException { + try { + var result = new StringBuilder(); + char[] buffer = new char[8 * 1024]; + int read = 0; + while ((read = r.read(buffer)) > 0) { + result.append(buffer, 0, read); + } + return result.toString(); + } finally { + r.close(); + } + } + @ParameterizedTest @ArgumentsSource(ValueProvider.class) - public void neverRunOutOfStack(IValueFactory vf) { + public void neverRunOutOfStack(IValueFactory vf) throws IOException { int outofStack = 100000; // first we have to know for sure that we would run out of stack with @see @@ -231,6 +248,8 @@ public void neverRunOutOfStack(IValueFactory vf) { try { new StringWriter().write(v.toString()); // do not remove this, this is the test assertTrue(true); + fromReader(v.asReader()); + assertTrue(true); } catch (StackOverflowError e) { fail("the tree balancer should have avoided a stack overflow"); }