Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

XALANJ-2617 Fixed serializer for high-surrogate UTF-16 characters #4

Open
wants to merge 3 commits into
base: trunk
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion src/org/apache/xml/serializer/ToStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -1594,6 +1594,13 @@ else if (ch == CharInfo.S_LINE_SEPARATOR) {
writer.write("
");
lastDirtyCharProcessed = i;
}
else if (Encodings.isHighUTF16Surrogate(ch)) {
// As of Java 1.5, we could use Character.isHighSurrogate(ch),
// but this codebase needs to be Java 1.3 compliant (even though that is seriously outdated),
// which is why we settle for Encodings.isHighUTF16Surrogate(ch).
lastDirtyCharProcessed = processDirty(chars, end, i, ch, lastDirtyCharProcessed, true);
i = lastDirtyCharProcessed;
}
else if (m_encodingInfo.isInEncoding(ch)) {
// If the character is in the encoding, and
// not in the normal ASCII range, we also
Expand Down Expand Up @@ -2102,6 +2109,7 @@ public void writeAttrString(
}
string.getChars(0,len, m_attrBuff, 0);
final char[] stringChars = m_attrBuff;
int lastDirtyCharProcessed = -1;

for (int i = 0; i < len; i++)
{
Expand All @@ -2111,7 +2119,7 @@ public void writeAttrString(
// The character is supposed to be replaced by a String
// e.g. '&' --> "&amp;"
// e.g. '<' --> "&lt;"
accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
lastDirtyCharProcessed = accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
}
else {
if (0x0 <= ch && ch <= 0x1F) {
Expand All @@ -2133,17 +2141,21 @@ public void writeAttrString(

case CharInfo.S_HORIZONAL_TAB:
writer.write("&#9;");
lastDirtyCharProcessed = i;
break;
case CharInfo.S_LINEFEED:
writer.write("&#10;");
lastDirtyCharProcessed = i;
break;
case CharInfo.S_CARRIAGERETURN:
writer.write("&#13;");
lastDirtyCharProcessed = i;
break;
default:
writer.write("&#");
writer.write(Integer.toString(ch));
writer.write(';');
lastDirtyCharProcessed = i;
break;

}
Expand All @@ -2152,23 +2164,31 @@ else if (ch < 0x7F) {
// Range 0x20 through 0x7E inclusive
// Normal ASCII chars
writer.write(ch);
lastDirtyCharProcessed = i;
}
else if (ch <= 0x9F){
// Range 0x7F through 0x9F inclusive
// More control characters
writer.write("&#");
writer.write(Integer.toString(ch));
writer.write(';');
lastDirtyCharProcessed = i;
}
else if (ch == CharInfo.S_LINE_SEPARATOR) {
// LINE SEPARATOR
writer.write("&#8232;");
lastDirtyCharProcessed = i;
}
else if (Encodings.isHighUTF16Surrogate(ch)) {
lastDirtyCharProcessed = processDirty(stringChars, len, i, ch, lastDirtyCharProcessed, false);
i = lastDirtyCharProcessed;
}
else if (m_encodingInfo.isInEncoding(ch)) {
// If the character is in the encoding, and
// not in the normal ASCII range, we also
// just write it out
writer.write(ch);
lastDirtyCharProcessed = i;
}
else {
// This is a fallback plan, we should never get here
Expand All @@ -2178,6 +2198,7 @@ else if (m_encodingInfo.isInEncoding(ch)) {
writer.write("&#");
writer.write(Integer.toString(ch));
writer.write(';');
lastDirtyCharProcessed = i;
}

}
Expand Down