From 915078c1c79a3c728a78a6c547c68087e31ada25 Mon Sep 17 00:00:00 2001 From: Jake Luciani Date: Mon, 4 Nov 2024 23:13:55 -0500 Subject: [PATCH] Better json detection and extraction from strings --- .../github/tjake/jlama/util/JsonSupport.java | 52 ++++++++++++++----- pom.xml | 2 +- 2 files changed, 41 insertions(+), 13 deletions(-) diff --git a/jlama-core/src/main/java/com/github/tjake/jlama/util/JsonSupport.java b/jlama-core/src/main/java/com/github/tjake/jlama/util/JsonSupport.java index e682883..5995f50 100644 --- a/jlama-core/src/main/java/com/github/tjake/jlama/util/JsonSupport.java +++ b/jlama-core/src/main/java/com/github/tjake/jlama/util/JsonSupport.java @@ -15,25 +15,24 @@ */ package com.github.tjake.jlama.util; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.core.util.DefaultPrettyPrinter; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException; +import java.io.StringReader; import java.util.ArrayList; import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * Helper class for Jackson JSON support */ public class JsonSupport { - private static final String JSON_REGEX = - "(\\{\\s*(\"[^\"]+\"\\s*:\\s*(\"[^\"]*\"|\\d+|true|false|null|\\{[^{}]*\\}|\\[[^\\[\\]]*\\])\\s*,?\\s*)+\\})|(\\[\\s*(\\{\\s*(\"[^\"]+\"\\s*:\\s*(\"[^\"]*\"|\\d+|true|false|null|\\{[^{}]*\\}|\\[[^\\[\\]]*\\])\\s*,?\\s*)+\\}\\s*,?\\s*)+\\])"; - private static final Pattern JSON_PATTERN = Pattern.compile(JSON_REGEX); - public static final ObjectMapper om = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false) + public static final ObjectMapper om = new ObjectMapper().configure(DeserializationFeature.FAIL_ON_IGNORED_PROPERTIES, false) .configure(DeserializationFeature.FAIL_ON_TRAILING_TOKENS, false) .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) .configure(DeserializationFeature.FAIL_ON_MISSING_CREATOR_PROPERTIES, false) @@ -48,14 +47,43 @@ public static String toJson(Object o) { } public static List extractJsonFromString(String s) { - Matcher matcher = JSON_PATTERN.matcher(s); - List extractedJson = new ArrayList<>(); - while (matcher.find()) { - String json = matcher.group(); - extractedJson.add(json); + JsonFactory factory = new JsonFactory(); + String text = s; + List jsons = new ArrayList<>(); + + while (text != null && !text.isEmpty()) { + boolean found = false; + for (int i = 0; i < text.length(); i++) { + if (text.charAt(i) == '{' || text.charAt(i) == '[') { + String possibleJson = text.substring(i); + try (JsonParser parser = factory.createParser(new StringReader(possibleJson))) { + JsonToken token = parser.nextToken(); + if (token != null) { + parser.skipChildren(); + int endIndex = i + (int) parser.getTokenLocation().getCharOffset(); + int extra = 0; + while (endIndex + extra < text.length() && (text.charAt(endIndex + extra) == '}' || text.charAt(endIndex + extra) == ']')) { + extra++; + } + String jsonString = s.substring(i, endIndex + extra); + jsons.add(jsonString); + found = true; + text = text.substring(endIndex + extra); + // Parse the valid JSON string as needed + break; + } + } catch (Exception e) { + // Not a valid JSON at this position + } + } + } + + if (!found) { + break; + } } - return extractedJson; + return jsons; } public static class JlamaPrettyPrinter extends DefaultPrettyPrinter { diff --git a/pom.xml b/pom.xml index 6fe1f7b..51732e7 100644 --- a/pom.xml +++ b/pom.xml @@ -42,7 +42,7 @@ UTF-8 - 0.8.1 + 0.8.2 2.0.7 1.5.6