-
Notifications
You must be signed in to change notification settings - Fork 13
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implemented first version of the asReader interface on IString #273
Changes from 1 commit
16073c5
21c2bf4
e289490
a7cf69a
206676f
589aa9f
ea2101a
1bc04ad
f107012
9909809
82c9844
b47627c
f70c200
f7d591e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,8 @@ | |
package io.usethesource.vallang.impl.primitive; | ||
|
||
import java.io.IOException; | ||
import java.io.Reader; | ||
import java.io.StringReader; | ||
import java.io.StringWriter; | ||
import java.io.Writer; | ||
import java.lang.management.ManagementFactory; | ||
|
@@ -279,6 +281,11 @@ public boolean isNewlineTerminated() { | |
public IString concat(IString other) { | ||
return other; | ||
} | ||
|
||
@Override | ||
public Reader asReader() { | ||
return Reader.nullReader(); | ||
} | ||
} | ||
|
||
private static class FullUnicodeString extends AbstractString { | ||
|
@@ -501,6 +508,11 @@ public void write(Writer w) throws IOException { | |
w.write(value); | ||
} | ||
|
||
@Override | ||
public Reader asReader() { | ||
return new StringReader(value); | ||
} | ||
|
||
@Override | ||
public void indentedWrite(Writer w, Deque<IString> whitespace, boolean indentFirstLine) throws IOException { | ||
if (value.isEmpty()) { | ||
|
@@ -633,6 +645,11 @@ public int nextInt() { | |
} | ||
}; | ||
} | ||
|
||
@Override | ||
public Reader asReader() { | ||
return new StringReader(value); | ||
} | ||
} | ||
|
||
/** | ||
|
@@ -1146,6 +1163,47 @@ public void write(Writer w) throws IOException { | |
right.write(w); | ||
} | ||
|
||
@Override | ||
public Reader asReader() { | ||
return new Reader() { | ||
private Reader currentReader = left.asReader(); | ||
private boolean readingRight = false; | ||
|
||
private void continueRight() throws IOException { | ||
assert !readingRight; | ||
currentReader.close(); | ||
currentReader = right.asReader(); | ||
readingRight = true; | ||
} | ||
|
||
|
||
@Override | ||
public int read(char[] cbuf, int off, int len) throws IOException { | ||
int result = currentReader.read(cbuf, off, len); | ||
if (result == -1 && !readingRight) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could use some comments to explain that read is allowed to return fewer chars than Len. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah okay, I didn't want to repeat the manual of |
||
continueRight(); | ||
return read(cbuf, off, len); | ||
} | ||
return result; | ||
} | ||
|
||
@Override | ||
public int read() throws IOException { | ||
int result = currentReader.read(); | ||
if (result == -1 && !readingRight) { | ||
continueRight(); | ||
return read(); | ||
} | ||
return result; | ||
} | ||
|
||
@Override | ||
public void close() throws IOException { | ||
currentReader.close(); | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
public void indentedWrite(Writer w, Deque<IString> whitespace, boolean indentFirstLine) throws IOException { | ||
left.indentedWrite(w, whitespace, indentFirstLine); | ||
|
@@ -1380,6 +1438,72 @@ public void write(Writer w) throws IOException { | |
assert indents.isEmpty(); | ||
} | ||
|
||
@Override | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is correct, although I see we need to work hard with the surrogate pairs. Maybe If we would always read until There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But before we merge we should find an implemention that can say least stream line by line. I suspect that the current experiment with the int iterator will slow down the stream to the point that it doesn't make sense to stream anymore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, we need to do work with surrogate pairs since every int the iterator returns can be either 1 or 2 chars. I initially had some code that would write 2 chars if there was room, but that just lead to duplication of logic with the |
||
public Reader asReader() { | ||
return new Reader() { | ||
private final OfInt chars = iterator(); | ||
private char endedWithHalfSurrogate = 0; | ||
|
||
@Override | ||
public int read(char[] cbuf, int off, int len) throws IOException { | ||
if (off < 0 || len < 0 || len > (cbuf.length - off)) { | ||
throw new IndexOutOfBoundsException(); | ||
} | ||
if (len == 0) { | ||
return 0; | ||
} | ||
|
||
int pos = off; | ||
if (endedWithHalfSurrogate != 0) { | ||
cbuf[pos++] = endedWithHalfSurrogate; | ||
endedWithHalfSurrogate = 0; | ||
} | ||
int endPos = off + len; | ||
while (pos <= endPos) { | ||
if (!chars.hasNext()) { | ||
break; | ||
} | ||
int nextChar = chars.nextInt(); | ||
if (Character.isBmpCodePoint(nextChar)) { | ||
cbuf[pos++] = (char)nextChar; | ||
} else { | ||
cbuf[pos++] = Character.highSurrogate(nextChar); | ||
char lowSide = Character.lowSurrogate(nextChar); | ||
if (pos <= endPos) { | ||
cbuf[pos++] = lowSide; | ||
} else { | ||
endedWithHalfSurrogate = lowSide; | ||
} | ||
} | ||
} | ||
int written = pos - off; | ||
return written == 0 ? /* EOF */ -1 : written; | ||
} | ||
|
||
@Override | ||
public int read() throws IOException { | ||
if (endedWithHalfSurrogate != 0) { | ||
int result = endedWithHalfSurrogate; | ||
endedWithHalfSurrogate = 0; | ||
return result; | ||
} | ||
if (chars.hasNext()) { | ||
int nextChar = chars.nextInt(); | ||
if (Character.isBmpCodePoint(nextChar)) { | ||
return nextChar; | ||
} else { | ||
endedWithHalfSurrogate = Character.lowSurrogate(nextChar); | ||
return Character.highSurrogate(nextChar); | ||
} | ||
} | ||
return -1; | ||
} | ||
@Override | ||
public void close() throws IOException { | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
public void indentedWrite(Writer w, Deque<IString> whitespace, boolean indentFirstLine) throws IOException { | ||
if (flattened != null) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here we allocate a reader at every level in the tree and every node in every level. For the writer this design didn't hold up because of the enormous amounts of concat nodes in some applications. Typically we have at least as many concat nodes as lines in the output.
So let's have a look at this and also use the benchmarks in the main method so we can fine-tune it. Typically a recursion is ok for the concat nodes and maximally log(n) allocations (a spine of reader objects that moves over the tree).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The problem is that we have to keep state, the writer was easier as it's push, while the reader is pull. I tried to do it in a similar way as the writer, but could not in a reasonable timeframe figure it out. We need co-routintes ;)