Fix #652

FasterXML · Apr 4, 2021 · a6c2976 · a6c2976
1 parent 4e44508
commit a6c2976
Show file tree

Hide file tree

Showing 8 changed files with 112 additions and 22 deletions.
diff --git a/release-notes/CREDITS-2.x b/release-notes/CREDITS-2.x
@@ -258,11 +258,15 @@ Jonathan Haber (jhaber@github)
   * Contributed #573: More customizable TokenFilter inclusion (using `Tokenfilter.Inclusion`)
   (2.12.0)
 
+Greg Wittel (gwittel@github)
+  * Reported #652: Misleading exception for input source when processing byte buffer
+    with start offset
+  (2.13.0)
+
 Ferenc Csaky (ferenc-csaky@github)
   * Contributed #677: Introduce O(n^1.5) BigDecimal parser implementation
   (2.13.0)
 
 Fabian Meumertzheim (fmeum@github)
   * Reported #692: UTF32Reader ArrayIndexOutOfBoundsException
   (2.13.0)
-
diff --git a/release-notes/VERSION-2.x b/release-notes/VERSION-2.x
@@ -16,6 +16,9 @@ JSON library.
 
 2.13.0 (not yet released)
 
+#652: Misleading exception for input source when processing byte buffer
+  with start offset
+ (reported by Greg W)
 #664: Add `StreamWriteException` type to eventually replace `JsonGenerationException`
 #671: Replace `getCurrentLocation()`/`getTokenLocation()` with
   `currentLocation()`/`currentTokenLocation()` in `JsonParser`

diff --git a/src/main/java/com/fasterxml/jackson/core/JsonLocation.java b/src/main/java/com/fasterxml/jackson/core/JsonLocation.java
@@ -238,35 +238,67 @@ protected StringBuilder _appendSourceDesc(StringBuilder sb)
         // and then, include (part of) contents for selected types
         // (never for binary-format data)
         if (_contentReference.hasTextualContent()) {
-            int len;
-            String charStr = " chars";
-
+            // First, retrieve declared offset+length for content; handle
+            // negative markers (can't do more for general case)
+            int offset, length;
+            offset = _contentReference.contentOffset();
+            if (offset < 0) {
+                offset = 0;
+                length = 0;
+            } else {
+                length = Math.max(0, _contentReference.contentLength());
+            }
+
+            String unitStr = " chars";
+            String trimmed;
+
             if (srcRef instanceof CharSequence) {
-                CharSequence cs = (CharSequence) srcRef;
-                len = cs.length();
-                len -= _append(sb, cs.subSequence(0, Math.min(len, MAX_CONTENT_SNIPPET)).toString());
+                trimmed = _truncate((CharSequence) srcRef, offset, length);
             } else if (srcRef instanceof char[]) {
-                char[] ch = (char[]) srcRef;
-                len = ch.length;
-                len -= _append(sb, new String(ch, 0, Math.min(len, MAX_CONTENT_SNIPPET)));
+                trimmed = _truncate((char[]) srcRef, offset, length);
             } else if (srcRef instanceof byte[]) {
-                byte[] b = (byte[]) srcRef;
-                int maxLen = Math.min(b.length, MAX_CONTENT_SNIPPET);
-                _append(sb, new String(b, 0, maxLen, Charset.forName("UTF-8")));
-                len = b.length - maxLen;
-                charStr = " bytes";
+                trimmed = _truncate((byte[]) srcRef, offset, length);
+                unitStr = " bytes";
             } else {
-                len = 0;
+                trimmed = null;
             }
-            if (len > 0) {
-                sb.append("[truncated ").append(len).append(charStr).append(']');
+            if (trimmed != null) {
+                _append(sb, trimmed);
+                final int truncLen = length - trimmed.length();
+                if (truncLen > 0) {
+                    sb.append("[truncated ").append(truncLen).append(unitStr).append(']');
+                }
             }
         } else {
-
+            // What should we do with binary content?
         }
         return sb;
     }
 
+    private String _truncate(CharSequence cs, int start, int length) {
+        final int fullLength = cs.length();
+        start = Math.min(start, fullLength);
+        length = Math.min(Math.min(length, fullLength - start),
+                MAX_CONTENT_SNIPPET);
+        return cs.subSequence(start, start+length).toString();
+    }
+
+    private String _truncate(char[] cs, int start, int length) {
+        final int fullLength = cs.length;
+        start = Math.min(start, fullLength);
+        length = Math.min(Math.min(length, fullLength - start),
+                MAX_CONTENT_SNIPPET);
+        return new String(cs, start, length);
+    }
+
+    private String _truncate(byte[] b, int start, int length) {
+        final int fullLength = b.length;
+        start = Math.min(start, fullLength);
+        length = Math.min(Math.min(length, fullLength - start),
+                MAX_CONTENT_SNIPPET);
+        return new String(b, start, length, Charset.forName("UTF-8"));
+    }
+
     private int _append(StringBuilder sb, String content) {
         sb.append('"').append(content).append('"');
         return content.length();

diff --git a/src/main/java/com/fasterxml/jackson/core/json/JsonGeneratorImpl.java b/src/main/java/com/fasterxml/jackson/core/json/JsonGeneratorImpl.java
@@ -46,7 +46,7 @@ public abstract class JsonGeneratorImpl extends GeneratorBase
     /**********************************************************
      */
 
-    final protected IOContext _ioContext;
+    protected final IOContext _ioContext;
 
     /*
     /**********************************************************

diff --git a/src/main/java/com/fasterxml/jackson/core/json/ReaderBasedJsonParser.java b/src/main/java/com/fasterxml/jackson/core/json/ReaderBasedJsonParser.java
@@ -144,10 +144,13 @@ public ReaderBasedJsonParser(IOContext ctxt, int features, Reader r,
     {
         super(ctxt, features);
         _reader = r;
+        _objectCodec = codec;
         _inputBuffer = inputBuffer;
         _inputPtr = start;
         _inputEnd = end;
-        _objectCodec = codec;
+        _currInputRowStart = start;
+        // If we have offset, need to omit that from byte offset, so:
+        _currInputProcessed = -start;
         _symbols = st;
         _hashSeed = st.hashSeed();
         _bufferRecyclable = bufferRecyclable;

diff --git a/src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java b/src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java
@@ -3619,7 +3619,10 @@ protected void _reportInvalidToken(String matchedPart, String msg) throws IOExce
             char c = (char) _decodeCharForError(i);
             if (!Character.isJavaIdentifierPart(c)) {
                 // 11-Jan-2016, tatu: note: we will fully consume the character,
-                // included or not, so if recovery was possible, it'd be off-by-one...
+                //   included or not, so if recovery was possible, it'd be off-by-one...
+                // 04-Apr-2021, tatu: ... and the reason we can't do much about it is
+                //   because it may be multi-byte UTF-8 character (and even if saved
+                //   offset, on buffer boundary it would not work, still)
                 break;
             }
             sb.append(c);

diff --git a/src/test/java/com/fasterxml/jackson/core/BaseTest.java b/src/test/java/com/fasterxml/jackson/core/BaseTest.java
@@ -536,6 +536,10 @@ protected static String quote(String str) {
     }
 
     protected static String aposToQuotes(String json) {
+        return a2q(json);
+    }
+
+    protected static String a2q(String json) {
         return json.replace("'", "\"");
     }
 

diff --git a/src/test/java/com/fasterxml/jackson/core/TestExceptions.java b/src/test/java/com/fasterxml/jackson/core/TestExceptions.java
@@ -2,6 +2,7 @@
 
 import java.io.StringWriter;
 
+import com.fasterxml.jackson.core.exc.StreamReadException;
 import com.fasterxml.jackson.core.io.JsonEOFException;
 
 public class TestExceptions extends BaseTest
@@ -119,4 +120,44 @@ private void _testEofExceptions(int mode) throws Exception
 
         // any other cases we'd like to test?
     }
+
+    public void testContentSnippetWithOffset() throws Exception
+    {
+        JsonParser p;
+        final String json = a2q("{'k1':'v1'}\n[broken]\n");
+        final byte[] jsonB = utf8Bytes(json);
+        final int lfIndex = json.indexOf("\n");
+        final int start = lfIndex+1;
+        final int len = json.length() - start;
+
+        p = JSON_F.createParser(jsonB, start, len);
+        // for byte-based, will be after character that follows token:
+        // (and alas cannot be easily fixed)
+        _testContentSnippetWithOffset(p, 9, "(byte[])\"[broken]\n\"");
+        p.close();
+
+        final char[] jsonC = json.toCharArray();
+        p = JSON_F.createParser(jsonC, start, len);
+        // for char-based we get true offset at end of token
+        _testContentSnippetWithOffset(p, 8, "(char[])\"[broken]\n\"");
+        p.close();
+    }
+
+    private void _testContentSnippetWithOffset(final JsonParser p,
+            int expColumn, String expContent) throws Exception
+    {
+        assertToken(JsonToken.START_ARRAY, p.nextToken());
+        try {
+            p.nextToken();
+            fail("Should not pass");
+        } catch (StreamReadException e) {
+            verifyException(e, "Unrecognized token 'broken'");
+            JsonLocation loc = e.getLocation();
+            assertEquals(1, loc.getLineNr());
+            assertEquals(expColumn, loc.getColumnNr());
+            final String srcDesc = loc.sourceDescription();
+
+            assertEquals(expContent, srcDesc);
+        }
+    }
 }