diff --git a/pom.xml b/pom.xml
index 75561b6e0..66ef78098 100644
--- a/pom.xml
+++ b/pom.xml
@@ -83,7 +83,7 @@
1.11.133
0.10.2-hadoop2
1.56
- 6.10.0-SNAPSHOT
+ 6.10.0
1.1.5
1.6
2.5
diff --git a/wrangler-core/src/main/java/io/cdap/directives/xml/XmlToJson.java b/wrangler-core/src/main/java/io/cdap/directives/xml/XmlToJson.java
index 8a3ed89ff..513a54d17 100644
--- a/wrangler-core/src/main/java/io/cdap/directives/xml/XmlToJson.java
+++ b/wrangler-core/src/main/java/io/cdap/directives/xml/XmlToJson.java
@@ -38,6 +38,7 @@
import io.cdap.wrangler.api.parser.Numeric;
import io.cdap.wrangler.api.parser.TokenType;
import io.cdap.wrangler.api.parser.UsageDefinition;
+import org.apache.commons.lang.StringUtils;
import org.json.JSONException;
import org.json.XML;
@@ -52,9 +53,11 @@
@Description("Parses a XML document to JSON representation.")
public class XmlToJson implements Directive, Lineage {
public static final String NAME = "parse-xml-to-json";
+ public static final String ARG_KEEP_STRING = "keep-string";
// Column within the input row that needs to be parsed as Json
private String col;
private int depth;
+ private boolean keepString;
private final Gson gson = new Gson();
@Override
@@ -62,6 +65,7 @@ public UsageDefinition define() {
UsageDefinition.Builder builder = UsageDefinition.builder(NAME);
builder.define("column", TokenType.COLUMN_NAME);
builder.define("depth", TokenType.NUMERIC, Optional.TRUE);
+ builder.define(ARG_KEEP_STRING, TokenType.BOOLEAN, Optional.TRUE);
return builder.build();
}
@@ -73,6 +77,12 @@ public void initialize(Arguments args) throws DirectiveParseException {
} else {
this.depth = Integer.MAX_VALUE;
}
+
+ if (args.contains(ARG_KEEP_STRING) &&
+ StringUtils.isNotEmpty(args.value(ARG_KEEP_STRING).value().toString())) {
+ this.keepString = Boolean.parseBoolean(args.value(ARG_KEEP_STRING).value().toString());
+ }
+
}
@Override
@@ -93,7 +103,7 @@ public List execute(List rows, ExecutorContext context) throws Directi
try {
if (object instanceof String) {
- JsonObject element = gson.fromJson(XML.toJSONObject((String) object).toString(),
+ JsonObject element = gson.fromJson(XML.toJSONObject((String) object, this.keepString).toString(),
JsonElement.class).getAsJsonObject();
JsParser.jsonFlatten(element, col, 1, depth, row);
row.remove(idx);
diff --git a/wrangler-core/src/test/java/io/cdap/directives/parser/XmlToJsonTest.java b/wrangler-core/src/test/java/io/cdap/directives/parser/XmlToJsonTest.java
new file mode 100644
index 000000000..2d08228a8
--- /dev/null
+++ b/wrangler-core/src/test/java/io/cdap/directives/parser/XmlToJsonTest.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2024 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package io.cdap.directives.parser;
+
+import io.cdap.directives.xml.XmlToJson;
+import io.cdap.wrangler.TestingRig;
+import io.cdap.wrangler.api.Row;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Tests {@link XmlToJson}
+ */
+public class XmlToJsonTest {
+ @Test
+ public void testAutoConversionOfStringField() throws Exception {
+ String[] directives = new String[] {
+ "copy body body_1 true",
+ "copy body body_2 true",
+ "copy body body_3 true",
+ "parse-xml-to-json body_1 1",
+ "parse-xml-to-json body_2 1 false",
+ "parse-xml-to-json body_3 1 true"
+ };
+
+ List rows = Arrays.asList(
+ new Row("body",
+ "303246306303E8")
+ );
+
+ rows = TestingRig.execute(directives, rows);
+ Assert.assertEquals(1, rows.size());
+ Assert.assertEquals("{\"tagid\":3.03246306303E19}", rows.get(0).getValue("body_1_Data").toString());
+ Assert.assertEquals("{\"tagid\":3.03246306303E19}", rows.get(0).getValue("body_2_Data").toString());
+ Assert.assertEquals("{\"tagid\":\"303246306303E8\"}", rows.get(0).getValue("body_3_Data").toString());
+ }
+}
diff --git a/wrangler-docs/directives/parse-xml-to-json.md b/wrangler-docs/directives/parse-xml-to-json.md
index 031633786..beb136b0c 100644
--- a/wrangler-docs/directives/parse-xml-to-json.md
+++ b/wrangler-docs/directives/parse-xml-to-json.md
@@ -8,11 +8,13 @@ transforms the XML into a JSON document, simplifying further parsing using the
## Syntax
```
-parse-xml-to-json []
+parse-xml-to-json [] []
```
* `` is the name of the column in the record that is an XML document.
* `` indicates the depth at which the XML document parsing should terminate processing.
+* `` An OPTIONAL boolean value that if true, then values will not be coerced into boolean or numeric values and will instead be left as strings. (as per `org.json.XML` rules)
+ The default value is `false`
## Usage Notes