diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/SpecifiedLength.scala b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/SpecifiedLength.scala index 0bd09d18c2..00552021ac 100644 --- a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/SpecifiedLength.scala +++ b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/SpecifiedLength.scala @@ -110,8 +110,7 @@ trait SpecifiedLengthExplicitImplicitUnparserMixin { new SpecifiedLengthExplicitImplicitUnparser( u, e.elementRuntimeData, - e.unparseTargetLengthInBitsEv, - e.maybeUnparseTargetLengthInCharactersEv + e.unparseTargetLengthInBitsEv ) } } diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/exceptions/ThrowsSDE.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/exceptions/ThrowsSDE.scala index e6801bedbb..dc5df76c36 100644 --- a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/exceptions/ThrowsSDE.scala +++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/exceptions/ThrowsSDE.scala @@ -80,6 +80,19 @@ trait ThrowsSDE { final def notYetImplemented(msg: String, args: Any*): Nothing = SDE("Feature not yet implemented: " + msg, args: _*) + /** + * Use for cases where it is an SDE because of something we've chosen + * not to implement. Not merely short term (haven't coded it yet, but intending to), + * more like things we've chosen to defer intentionally to some future release. + */ + def subset(testThatWillThrowIfFalse: Boolean, msg: String, args: Any*) = { + if (!testThatWillThrowIfFalse) subsetError(msg, args: _*) + } + + def subsetError(msg: String, args: Any*) = { + val msgTxt = msg.format(args: _*) + SDE("Subset: " + msgTxt) + } } /** diff --git a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/SpecifiedLengthUnparsers.scala b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/SpecifiedLengthUnparsers.scala index 321e738318..f2ec32e9b3 100644 --- a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/SpecifiedLengthUnparsers.scala +++ b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/SpecifiedLengthUnparsers.scala @@ -20,42 +20,22 @@ package org.apache.daffodil.unparsers.runtime1 import org.apache.daffodil.lib.exceptions.Assert import org.apache.daffodil.lib.schema.annotation.props.gen.LengthUnits import org.apache.daffodil.lib.schema.annotation.props.gen.Representation -import org.apache.daffodil.lib.util.Maybe import org.apache.daffodil.lib.util.Maybe._ -import org.apache.daffodil.lib.util.MaybeJULong -import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType import org.apache.daffodil.runtime1.infoset.DIElement import org.apache.daffodil.runtime1.infoset.DISimple import org.apache.daffodil.runtime1.infoset.Infoset -import org.apache.daffodil.runtime1.infoset.RetryableException import org.apache.daffodil.runtime1.processors.CharsetEv import org.apache.daffodil.runtime1.processors.ElementRuntimeData -import org.apache.daffodil.runtime1.processors.Evaluatable import org.apache.daffodil.runtime1.processors.ParseOrUnparseState -import org.apache.daffodil.runtime1.processors.Success import org.apache.daffodil.runtime1.processors.UnparseTargetLengthInBitsEv -import org.apache.daffodil.runtime1.processors.UnparseTargetLengthInCharactersEv import org.apache.daffodil.runtime1.processors.unparsers._ import passera.unsigned.ULong -/** - * Restricts the bits available for unparsing to just those within - * the specified length computed. - * - * If a unparser (supplied as arg) runs past the available space, - * that's an unparse error. - * - * Truncation of strings - the only case where we truncate, and only when - * dfdl:truncateSpecifiedLengthString is 'yes', is handled elsewhere. - */ -sealed abstract class SpecifiedLengthUnparserBase(eUnparser: Unparser, erd: ElementRuntimeData) - final class SpecifiedLengthExplicitImplicitUnparser( eUnparser: Unparser, erd: ElementRuntimeData, - targetLengthInBitsEv: UnparseTargetLengthInBitsEv, - maybeTargetLengthInCharactersEv: Maybe[UnparseTargetLengthInCharactersEv] + targetLengthInBitsEv: UnparseTargetLengthInBitsEv ) extends CombinatorUnparser(erd) { override lazy val runtimeDependencies = Vector() @@ -74,215 +54,23 @@ final class SpecifiedLengthExplicitImplicitUnparser( } override final def unparse(state: UState): Unit = { - - erd.impliedRepresentation match { - case Representation.Binary => - unparseBits(state) - case Representation.Text => { - val dcs = getCharset(state) - if (dcs.maybeFixedWidth.isDefined) - unparseBits(state) - else { - // we know the encoding is variable width characters - // but we don't know if the length units characters or bits/bytes. - lengthUnits match { - case LengthUnits.Bits | LengthUnits.Bytes => - unparseVarWidthCharactersInBits(state) - case LengthUnits.Characters => - unparseVarWidthCharactersInCharacters(state) - } - } - } - } - } - - /** - * Encoding is variable width (e.g., utf-8, but the - * target length is expressed in bits. - * - * Truncation, in this case, requires determining how many - * of the string's characters fit within the available target length - * bits. - */ - def unparseVarWidthCharactersInBits(state: UState): Unit = { - val maybeTLBits = getMaybeTL(state, targetLengthInBitsEv) - - if (maybeTLBits.isDefined) { - // - // We know the target length. We can use it. - // - if (areTruncating) { - val diSimple = state.currentInfosetNode.asSimple - val v = diSimple.dataValue.getString - val tl = maybeTLBits.get - val cs = getCharset(state) - val newV = state.truncateToBits(v, cs, tl) - // - // JIRA DFDL-1592 - // - // BUG: should not modify the actual dataset value. - // as fn:string-length of the value should always return the same - // value which is the un-truncated length. - // - diSimple.overwriteDataValue(newV) - } - eUnparser.unparse1(state) + lazy val dcs = getCharset(state) + if ( + erd.impliedRepresentation == Representation.Text && + lengthUnits == LengthUnits.Characters && + dcs.maybeFixedWidth.isEmpty && + erd.isComplexType + ) { + state.subsetError( + "Variable width character encoding '%s', dfdl:lengthKind '%s' and dfdl:lengthUnits '%s' are not supported for complex types.", + getCharset(state).name, + lengthKind.toString, + lengthUnits.toString + ) } else { - // target length unknown - // ignore constraining the output length. Just unparse it. - // - // This happens when we're unparsing, and this element depends on a prior element for - // determining its length, but that prior element has dfdl:outputValueCalc that depends - // on this element. - // This breaks the chicken-egg cycle. - // eUnparser.unparse1(state) } } - - private def areTruncating = { - if (erd.isSimpleType && (erd.optPrimType.get eq PrimType.String)) { - Assert.invariant(erd.optTruncateSpecifiedLengthString.isDefined) - erd.optTruncateSpecifiedLengthString.get - } else - false - } - - /** - * Encoding is variable width (e.g., utf-8). The target - * length is expressed in characters. - */ - def unparseVarWidthCharactersInCharacters(state: UState): Unit = { - - // - // variable-width encodings and lengthUnits characters, and lengthKind explicit - // is not supported (currently) for complex types - // - state.schemaDefinitionUnless( - erd.isSimpleType, - "Variable width character encoding '%s', dfdl:lengthKind '%s' and dfdl:lengthUnits '%s' are not supported for complex types.", - getCharset(state).name, - lengthKind.toString, - lengthUnits.toString - ) - - Assert.invariant(erd.isSimpleType) - Assert.invariant(this.maybeTargetLengthInCharactersEv.isDefined) - val tlEv = this.maybeTargetLengthInCharactersEv.get - val tlChars = this.getMaybeTL(state, tlEv) - - if (tlChars.isDefined) { - // - // possibly truncate - // - if (areTruncating) { - val diSimple = state.currentInfosetNode.asSimple - val v = diSimple.dataValue.getString - val tl = tlChars.get - if (v.length > tl) { - // string is too long, truncate to target length - val newV = v.substring(0, tl.toInt) - // - // BUG: JIRA DFDL-1592 - should not be overwriting the value with - // truncated value. - // - diSimple.overwriteDataValue(newV) - } - } - eUnparser.unparse1(state, erd) - } else { - // target length unknown - // ignore constraining the output length. Just unparse it. - // - // This happens when we're unparsing, and this element depends on a prior element for - // determining its length, but that prior element has dfdl:outputValueCalc that depends - // on this element. - // This breaks the chicken-egg cycle. - // - eUnparser.unparse1(state, erd) - } - } - - private def getMaybeTL(state: UState, TLEv: Evaluatable[MaybeJULong]): MaybeJULong = { - val maybeTLBits = - try { - val tlRes = TLEv.evaluate(state) - Assert.invariant(tlRes.isDefined) // otherwise we shouldn't be in this method at all - tlRes - } catch { - case e: RetryableException => { - // - // TargetLength expression couldn't be evaluated. - // - MaybeJULong.Nope - } - } - maybeTLBits - } - - /** - * Regardless of the type (text or binary), the target length - * will be provided in bits. - */ - def unparseBits(state: UState): Unit = { - val maybeTLBits = getMaybeTL(state, targetLengthInBitsEv) - - if (maybeTLBits.isDefined) { - // - // We know the target length. We can use it. - // - // val nBits = maybeTLBits.get - // val dos = state.dataOutputStream - - // - // withBitLengthLimit is incorrect. It doesn't take into account - // that after the unparse, we could be looking at a current state - // with a distinct DOS - // - // val isLimitOk = dos.withBitLengthLimit(nBits) { - // eUnparser.unparse1(state, erd) - // } - // - // if (!isLimitOk) { - // val availBits = if (dos.remainingBits.isDefined) dos.remainingBits.get.toString else "(unknown)" - // UE(state, "Insufficient bits available. Required %s bits, but only %s were available.", nBits, availBits) - // } - - eUnparser.unparse1(state, erd) - - // at this point the recursive parse of the children is finished - - if (state.processorStatus ne Success) return - - // We might not have used up all the bits. So some bits may need to - // be skipped and filled in by fillbyte. - // - // In the DFDL data grammar the region being skipped is either the - // RightFill region, or the ElementUnused region. Skipping this is handled - // elsewhere, along with insertion of padding before/after a string. - // - - } else { - // - // we couldn't get the target length - // - // This happens when we're unparsing, and this element depends on a prior element for - // determining its length via a length expression, but that prior element - // has dfdl:outputValueCalc that depends on this element, typically by a - // call to dfdl:valueLength of this, or of some structure that includes - // this. - // - // This breaks the chicken-egg cycle, by just unparsing it without - // constraint. That produces the value which (ignoring truncation) - // can be unparsed to produce the dfdl:valueLength of this element. - // - // This does assume that the value will get truncated properly for the - // case where we do truncation (type string, with - // dfdl:truncateSpecifiedLengthString 'yes') by some other mechanism. - // - eUnparser.unparse1(state, erd) - } - } } // TODO: implement the capture length unparsers as just using this trait? diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dsom/SDE.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dsom/SDE.scala index 145bccb3a7..ff2535f012 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dsom/SDE.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dsom/SDE.scala @@ -281,19 +281,4 @@ trait ImplementsThrowsOrSavesSDE extends ImplementsThrowsSDE with SavesErrorsAnd } } } - - /** - * Use for cases where it is an SDE because of something we've chosen - * not to implement. Not merely short term (haven't coded it yet, but intending to), - * more like things we've chosen to defer intentionally to some future release. - */ - def subset(testThatWillThrowIfFalse: Boolean, msg: String, args: Any*) = { - if (!testThatWillThrowIfFalse) subsetError(msg, args: _*) - } - - def subsetError(msg: String, args: Any*) = { - val msgTxt = msg.format(args: _*) - SDE("Subset: " + msgTxt) - } - } diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/testUnparserGeneral.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/testUnparserGeneral.tdml index f4dee13dc9..2b4691fe44 100644 --- a/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/testUnparserGeneral.tdml +++ b/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/testUnparserGeneral.tdml @@ -20,6 +20,7 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:ex="http://example.com" xmlns:daf="urn:ogf:dfdl:2013:imp:daffodil.apache.org:2018:ext" + xmlns:fn="http://www.w3.org/2005/xpath-functions" suiteName="generalUnparserTests"> @@ -40,6 +41,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -56,6 +93,14 @@ + + + + + + + +