diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 332203c3fa..55ec183772 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -95,6 +95,7 @@ jobs: github.repository == 'apache/daffodil' && github.ref == 'refs/heads/main' }} + DAFFODIL_TDML_API_INFOSETS: all steps: diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala index 6c5a40f85a..4ae2a2e845 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala @@ -176,6 +176,8 @@ class DaffodilTDMLDFDLProcessor private (private var dp: DataProcessor) private def blobPrefix = "" private def blobSuffix = ".bin" + private lazy val tdmlApiInfosetsEnv = sys.env.getOrElse("DAFFODIL_TDML_API_INFOSETS", "scala") + override def withDebugging(b: Boolean): DaffodilTDMLDFDLProcessor = copy(dp = dp.withDebugging(b)) @@ -206,22 +208,16 @@ class DaffodilTDMLDFDLProcessor private (private var dp: DataProcessor) ): DaffodilTDMLDFDLProcessor = copy(dp = dp.withExternalVariables(externalVarBindings)) - def parse(uri: java.net.URI, lengthLimitInBits: Long): TDMLParseResult = { - val url = uri.toURL - val dpInputStream = url.openStream() - val saxInputStream = url.openStream() - doParseWithBothApis(dpInputStream, saxInputStream, lengthLimitInBits) - } - - def parse(arr: Array[Byte], lengthLimitInBits: Long): TDMLParseResult = { - val dpInputStream = new ByteArrayInputStream(arr) - val saxInputStream = new ByteArrayInputStream(arr) - doParseWithBothApis(dpInputStream, saxInputStream, lengthLimitInBits) - } - override def parse(is: java.io.InputStream, lengthLimitInBits: Long): TDMLParseResult = { - val arr = IOUtils.toByteArray(is) - parse(arr, lengthLimitInBits) + val (dpInputStream, optSaxInputStream) = if (tdmlApiInfosetsEnv == "all") { + val arr = IOUtils.toByteArray(is) + val saxInputStream = new ByteArrayInputStream(arr) + val dpInputStream = new ByteArrayInputStream(arr) + (dpInputStream, Some(saxInputStream)) + } else { + (is, None) + } + doParse(dpInputStream, optSaxInputStream, lengthLimitInBits) } override def unparse( @@ -252,104 +248,126 @@ class DaffodilTDMLDFDLProcessor private (private var dp: DataProcessor) infosetXML: scala.xml.Node, outStream: java.io.OutputStream ): TDMLUnparseResult = { - val bos = new ByteArrayOutputStream() - val osw = new OutputStreamWriter(bos, StandardCharsets.UTF_8) - scala.xml.XML.write(osw, infosetXML, "UTF-8", xmlDecl = true, null) - osw.flush() - osw.close() - val saxInstream = new ByteArrayInputStream(bos.toByteArray) - doUnparseWithBothApis(inputter, saxInstream, outStream) + val optSaxInstream = if (tdmlApiInfosetsEnv == "all") { + val bos = new ByteArrayOutputStream() + val osw = new OutputStreamWriter(bos, StandardCharsets.UTF_8) + scala.xml.XML.write(osw, infosetXML, "UTF-8", xmlDecl = true, null) + osw.flush() + osw.close() + val sis = new ByteArrayInputStream(bos.toByteArray) + Some(sis) + } else { + None + } + doUnparse(inputter, optSaxInstream, outStream) } - def doParseWithBothApis( + def doParse( dpInputStream: java.io.InputStream, - saxInputStream: java.io.InputStream, + optSaxInputStream: Option[java.io.InputStream] = None, lengthLimitInBits: Long ): TDMLParseResult = { - val outputter = new TDMLInfosetOutputter() + val outputter = if (tdmlApiInfosetsEnv == "all") { + new TDMLInfosetOutputterAll + } else { + new TDMLInfosetOutputterScala + } outputter.setBlobAttributes(blobDir, blobPrefix, blobSuffix) - val xri = dp.newXMLReaderInstance - val errorHandler = new DaffodilTDMLSAXErrorHandler() - val saxOutputStream = new ByteArrayOutputStream() - val saxHandler = - new DaffodilParseOutputStreamContentHandler(saxOutputStream, pretty = false) - xri.setContentHandler(saxHandler) - xri.setErrorHandler(errorHandler) - xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBDIRECTORY, blobDir) - xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBPREFIX, blobPrefix) - xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBSUFFIX, blobSuffix) - using(InputSourceDataInputStream(dpInputStream)) { dis => - using(InputSourceDataInputStream(saxInputStream)) { sis => - // The length limit here should be the length of the document - // under test. Only set a limit when the end of the document - // do not match a byte boundary. - if (lengthLimitInBits % 8 != 0) { - Assert.usage(lengthLimitInBits >= 0) - dis.setBitLimit0b(MaybeULong(lengthLimitInBits)) - sis.setBitLimit0b(MaybeULong(lengthLimitInBits)) - } - - val actual = dp.parse(dis, outputter) - xri.parse(sis) + // The length limit here should be the length of the document + // under test. Only set a limit when the end of the document + // do not match a byte boundary. + if (lengthLimitInBits % 8 != 0) { + Assert.usage(lengthLimitInBits >= 0) + dis.setBitLimit0b(MaybeULong(lengthLimitInBits)) + } - if (!actual.isError && !errorHandler.isError) { - verifySameParseOutput(outputter.xmlStream, saxOutputStream) + val actual = dp.parse(dis, outputter) + if (tdmlApiInfosetsEnv == "all") { + val saxInputStream = optSaxInputStream.get + using(InputSourceDataInputStream(saxInputStream)) { sis => + // The length limit here should be the length of the document + // under test. Only set a limit when the end of the document + // do not match a byte boundary. + if (lengthLimitInBits % 8 != 0) { + Assert.usage(lengthLimitInBits >= 0) + sis.setBitLimit0b(MaybeULong(lengthLimitInBits)) + } + + val xri = dp.newXMLReaderInstance + val errorHandler = new DaffodilTDMLSAXErrorHandler() + val saxOutputStream = new ByteArrayOutputStream() + val saxHandler = + new DaffodilParseOutputStreamContentHandler(saxOutputStream, pretty = false) + xri.setContentHandler(saxHandler) + xri.setErrorHandler(errorHandler) + xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBDIRECTORY, blobDir) + xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBPREFIX, blobPrefix) + xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBSUFFIX, blobSuffix) + + xri.parse(sis) + + if (!actual.isError && !errorHandler.isError) { + verifySameParseOutput(outputter.xmlStream, saxOutputStream) + } + val dpParseDiag = actual.getDiagnostics.map(_.getMessage()) + val saxParseDiag = errorHandler.getDiagnostics.map(_.getMessage()) + verifySameDiagnostics(dpParseDiag, saxParseDiag) } - val dpParseDiag = actual.getDiagnostics.map(_.getMessage()) - val saxParseDiag = errorHandler.getDiagnostics.map(_.getMessage()) - verifySameDiagnostics(dpParseDiag, saxParseDiag) - - new DaffodilTDMLParseResult(actual, outputter) } + new DaffodilTDMLParseResult(actual, outputter) } } - def doUnparseWithBothApis( + def doUnparse( dpInputter: TDMLInfosetInputter, - saxInputStream: java.io.InputStream, + optSaxInputStream: Option[java.io.InputStream] = None, dpOutputStream: java.io.OutputStream ): DaffodilTDMLUnparseResult = { val dpOutputChannel = java.nio.channels.Channels.newChannel(dpOutputStream) - val saxOutputStream = new ByteArrayOutputStream - val saxOutputChannel = java.nio.channels.Channels.newChannel(saxOutputStream) - val unparseContentHandler = dp.newContentHandlerInstance(saxOutputChannel) - unparseContentHandler.enableResolutionOfRelativeInfosetBlobURIs() - val xmlReader = DaffodilSAXParserFactory().newSAXParser.getXMLReader - xmlReader.setContentHandler(unparseContentHandler) - xmlReader.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true) - xmlReader.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true) - val actualDP = dp.unparse(dpInputter, dpOutputChannel).asInstanceOf[UnparseResult] dpOutputChannel.close() - // kick off SAX Unparsing - try { - xmlReader.parse(new InputSource(saxInputStream)) - } catch { - case e: DaffodilUnhandledSAXException => - // In the case of an unexpected errors, catch and throw as TDMLException - throw TDMLException("Unexpected error during SAX Unparse:" + e, None) - case _: DaffodilUnparseErrorSAXException => - // do nothing as unparseResult and its diagnostics will be handled below - } - val actualSAX = unparseContentHandler.getUnparseResult - saxOutputChannel.close() - if (!actualDP.isError && !actualSAX.isError) { - val dpis = new ByteArrayInputStream( - dpOutputStream.asInstanceOf[ByteArrayOutputStream].toByteArray - ) - if (actualDP.isScannable && actualSAX.isScannable) { - VerifyTestCase.verifyTextData(dpis, saxOutputStream, actualSAX.encodingName, None) - } else { - VerifyTestCase.verifyBinaryOrMixedData(dpis, saxOutputStream, None) + if (tdmlApiInfosetsEnv == "all") { + val saxInputStream = optSaxInputStream.get + val saxOutputStream = new ByteArrayOutputStream + val saxOutputChannel = java.nio.channels.Channels.newChannel(saxOutputStream) + val unparseContentHandler = dp.newContentHandlerInstance(saxOutputChannel) + unparseContentHandler.enableResolutionOfRelativeInfosetBlobURIs() + val xmlReader = DaffodilSAXParserFactory().newSAXParser.getXMLReader + xmlReader.setContentHandler(unparseContentHandler) + xmlReader.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true) + xmlReader.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true) + + // kick off SAX Unparsing + try { + xmlReader.parse(new InputSource(saxInputStream)) + } catch { + case e: DaffodilUnhandledSAXException => + // In the case of an unexpected errors, catch and throw as TDMLException + throw TDMLException("Unexpected error during SAX Unparse:" + e, None) + case _: DaffodilUnparseErrorSAXException => + // do nothing as unparseResult and its diagnostics will be handled below + } + + val actualSAX = unparseContentHandler.getUnparseResult + saxOutputChannel.close() + if (!actualDP.isError && !actualSAX.isError) { + val dpis = new ByteArrayInputStream( + dpOutputStream.asInstanceOf[ByteArrayOutputStream].toByteArray + ) + if (actualDP.isScannable && actualSAX.isScannable) { + VerifyTestCase.verifyTextData(dpis, saxOutputStream, actualSAX.encodingName, None) + } else { + VerifyTestCase.verifyBinaryOrMixedData(dpis, saxOutputStream, None) + } } + val dpUnparseDiag = actualDP.getDiagnostics.map(_.getMessage()) + val saxUnparseDiag = actualSAX.getDiagnostics.map(_.getMessage()) + verifySameDiagnostics(dpUnparseDiag, saxUnparseDiag) } - val dpUnparseDiag = actualDP.getDiagnostics.map(_.getMessage()) - val saxUnparseDiag = actualSAX.getDiagnostics.map(_.getMessage()) - verifySameDiagnostics(dpUnparseDiag, saxUnparseDiag) new DaffodilTDMLUnparseResult(actualDP, dpOutputStream) } @@ -408,11 +426,11 @@ class DaffodilTDMLDFDLProcessor private (private var dp: DataProcessor) final class DaffodilTDMLParseResult(actual: DFDL.ParseResult, outputter: TDMLInfosetOutputter) extends TDMLParseResult { - override def getResult: Node = outputter.getResult() + override def getResult: Node = outputter.getResult override def getBlobPaths: Seq[Path] = outputter.getBlobPaths() - def inputter = outputter.toInfosetInputter() + def inputter = outputter.toInfosetInputter override def isProcessingError: Boolean = actual.isProcessingError diff --git a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala index ea569918b2..40a8e495b9 100644 --- a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala +++ b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala @@ -19,7 +19,10 @@ package org.apache.daffodil.processor.tdml import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream +import java.nio.charset.Charset +import scala.xml.Node +import org.apache.daffodil.runtime1.infoset.InfosetOutputter import org.apache.daffodil.runtime1.infoset.JDOMInfosetInputter import org.apache.daffodil.runtime1.infoset.JDOMInfosetOutputter import org.apache.daffodil.runtime1.infoset.JsonInfosetInputter @@ -33,10 +36,32 @@ import org.apache.daffodil.runtime1.infoset.W3CDOMInfosetOutputter import org.apache.daffodil.runtime1.infoset.XMLTextInfosetInputter import org.apache.daffodil.runtime1.infoset.XMLTextInfosetOutputter -class TDMLInfosetOutputter +class TDMLInfosetOutputterScala + extends { + private val scalaOut = new ScalaXMLInfosetOutputter() + private val outputters: Seq[InfosetOutputter] = Seq(scalaOut) + } + with TeeInfosetOutputter(outputters: _*) + with TDMLInfosetOutputter { + + override def getResult: Node = scalaOut.getResult + + override lazy val xmlStream: ByteArrayOutputStream = { + val bos = new ByteArrayOutputStream() + bos.write(getResult.toString().getBytes(Charset.defaultCharset())) + bos + } + + override def toInfosetInputter: TDMLInfosetInputter = { + val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult) + new TDMLInfosetInputter(scalaIn, Seq()) + } +} + +class TDMLInfosetOutputterAll extends { private val jsonStream = new ByteArrayOutputStream() - val xmlStream = new ByteArrayOutputStream() + override val xmlStream = new ByteArrayOutputStream() private val scalaOut = new ScalaXMLInfosetOutputter() private val jdomOut = new JDOMInfosetOutputter() @@ -44,13 +69,15 @@ class TDMLInfosetOutputter private val jsonOut = new JsonInfosetOutputter(jsonStream, false) private val xmlOut = new XMLTextInfosetOutputter(xmlStream, false) - private val outputters = Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut) + private val outputters: Seq[InfosetOutputter] = + Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut) } - with TeeInfosetOutputter(outputters: _*) { + with TeeInfosetOutputter(outputters: _*) + with TDMLInfosetOutputter { - def getResult() = scalaOut.getResult + override def getResult: Node = scalaOut.getResult - def toInfosetInputter() = { + override def toInfosetInputter: TDMLInfosetInputter = { val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult) val jdomIn = new JDOMInfosetInputter(jdomOut.getResult) val w3cdomIn = new W3CDOMInfosetInputter(w3cdomOut.getResult) @@ -63,3 +90,12 @@ class TDMLInfosetOutputter new TDMLInfosetInputter(scalaIn, Seq(jdomIn, w3cdomIn, jsonIn, xmlIn, nullIn)) } } + +trait TDMLInfosetOutputter extends InfosetOutputter { + + def xmlStream: ByteArrayOutputStream + + def getResult: Node + + def toInfosetInputter: TDMLInfosetInputter +} diff --git a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala new file mode 100644 index 0000000000..7435696f2b --- /dev/null +++ b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.cliTest + +import org.apache.daffodil.cli.Main.ExitCode +import org.apache.daffodil.cli.cliTest.Util._ + +import org.junit.Test + +class TestCLITdml { + + @Test def test_CLI_Tdml_Trace_singleTest1(): Unit = { + val tdml = path( + "daffodil-test/src/test/resources/org/apache/daffodil/section06/entities/Entities.tdml" + ) + + val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "all") + + runCLI(args"test -i -t $tdml byte_entities_6_08", envs = envs) { cli => + // legacy parse + cli.expect("parser: ") + // sax parse + cli.expect("parser: ") + cli.expect("[Pass] byte_entities_6_08") + }(ExitCode.Success) + } + + @Test def test_CLI_Tdml_Trace_singleTest2(): Unit = { + val tdml = path( + "daffodil-test/src/test/resources/org/apache/daffodil/section06/entities/Entities.tdml" + ) + + val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "scala") + + runCLI(args"test -i -t $tdml byte_entities_6_08", envs = envs) { cli => + // parse + cli.expect("parser: ") + // unparse + cli.expect("parser: not available") + cli.expect("[Pass] byte_entities_6_08") + }(ExitCode.Success) + } +}