From 8a735e58e6804be1e6a125678d1a8d116ad54651 Mon Sep 17 00:00:00 2001
From: peterdm <peter.de.maeyer@gmail.com>
Date: Fri, 14 Sep 2018 21:15:32 +0200
Subject: [PATCH 1/2] XALANJ-2617 Fixed serializer such that it correctly deals
 with high-surrogate UTF-16 characters

---
 src/org/apache/xml/serializer/ToStream.java | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/org/apache/xml/serializer/ToStream.java b/src/org/apache/xml/serializer/ToStream.java
index 1134eb7ee..5c1f74172 100644
--- a/src/org/apache/xml/serializer/ToStream.java
+++ b/src/org/apache/xml/serializer/ToStream.java
@@ -1594,6 +1594,13 @@ else if (ch == CharInfo.S_LINE_SEPARATOR) {
                         writer.write("&#8232;");
                         lastDirtyCharProcessed = i;
                     }
+                    else if (Encodings.isHighUTF16Surrogate(ch)) {
+                        // As of Java 1.5, we could use Character.isHighSurrogate(ch),
+                        // but this codebase needs to be Java 1.3 compliant (even though that is seriously outdated),
+                        // which is why we settle for Encodings.isHighUTF16Surrogate(ch).
+                        lastDirtyCharProcessed = processDirty(chars, end, i, ch, lastDirtyCharProcessed, true);
+                        i = lastDirtyCharProcessed;
+                    }
                     else if (m_encodingInfo.isInEncoding(ch)) {
                         // If the character is in the encoding, and
                         // not in the normal ASCII range, we also

From 0edbce9f3f2708ecc697dab043ba8fd82076a7ad Mon Sep 17 00:00:00 2001
From: peterdm <peter.de.maeyer@gmail.com>
Date: Fri, 19 Oct 2018 21:50:01 +0200
Subject: [PATCH 2/2] XALANJ-2617 Fixed serializer for high-surrogate UTF-16
 characters also for attribute values

---
 src/org/apache/xml/serializer/ToStream.java | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/org/apache/xml/serializer/ToStream.java b/src/org/apache/xml/serializer/ToStream.java
index 5c1f74172..322fef2bb 100644
--- a/src/org/apache/xml/serializer/ToStream.java
+++ b/src/org/apache/xml/serializer/ToStream.java
@@ -2109,6 +2109,7 @@ public void writeAttrString(
         }
         string.getChars(0,len, m_attrBuff, 0);   
         final char[] stringChars = m_attrBuff;
+        int lastDirtyCharProcessed = -1;
 
         for (int i = 0; i < len; i++)
         {
@@ -2118,7 +2119,7 @@ public void writeAttrString(
                 // The character is supposed to be replaced by a String
                 // e.g.   '&'  -->  "&amp;"
                 // e.g.   '<'  -->  "&lt;"
-                accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
+                lastDirtyCharProcessed = accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
             }
             else {
                 if (0x0 <= ch && ch <= 0x1F) {
@@ -2140,17 +2141,21 @@ public void writeAttrString(
 
                     case CharInfo.S_HORIZONAL_TAB:
                         writer.write("&#9;");
+                        lastDirtyCharProcessed = i;
                         break;
                     case CharInfo.S_LINEFEED:
                         writer.write("&#10;");
+                        lastDirtyCharProcessed = i;
                         break;
                     case CharInfo.S_CARRIAGERETURN:
                         writer.write("&#13;");
+                        lastDirtyCharProcessed = i;
                         break;
                     default:
                         writer.write("&#");
                         writer.write(Integer.toString(ch));
                         writer.write(';');
+                        lastDirtyCharProcessed = i;
                         break;
 
                     }
@@ -2159,6 +2164,7 @@ else if (ch < 0x7F) {
                     // Range 0x20 through 0x7E inclusive
                     // Normal ASCII chars
                         writer.write(ch);
+                    lastDirtyCharProcessed = i;
                 }
                 else if (ch <= 0x9F){
                     // Range 0x7F through 0x9F inclusive
@@ -2166,16 +2172,23 @@ else if (ch <= 0x9F){
                     writer.write("&#");
                     writer.write(Integer.toString(ch));
                     writer.write(';');
+                    lastDirtyCharProcessed = i;
                 }
                 else if (ch == CharInfo.S_LINE_SEPARATOR) {
                     // LINE SEPARATOR
                     writer.write("&#8232;");
+                    lastDirtyCharProcessed = i;
+                }
+                else if (Encodings.isHighUTF16Surrogate(ch)) {
+                    lastDirtyCharProcessed = processDirty(stringChars, len, i, ch, lastDirtyCharProcessed, false);
+                    i = lastDirtyCharProcessed;
                 }
                 else if (m_encodingInfo.isInEncoding(ch)) {
                     // If the character is in the encoding, and
                     // not in the normal ASCII range, we also
                     // just write it out
                     writer.write(ch);
+                    lastDirtyCharProcessed = i;
                 }
                 else {
                     // This is a fallback plan, we should never get here
@@ -2185,6 +2198,7 @@ else if (m_encodingInfo.isInEncoding(ch)) {
                     writer.write("&#");
                     writer.write(Integer.toString(ch));
                     writer.write(';');
+                    lastDirtyCharProcessed = i;
                 }
                     
             }