Remove unnecessary truncation code

olabusayoT · olabusayoT · commit 146778413c84 · 2024-11-04T18:29:05.000-05:00
- we were truncating and overwriting value, but we didn't actually need to since we ended up truncating again right after. Removed the unnecessary and buggy code which changed the dataValue. StringMaybeTruncateBitsUnparser.unparse does not change the dataValue, instead it changes the DOS, resulting in the value getting truncated but string-length returning the untruncated length when it's called - move subset/subsetError to ThrowsSDE trait so state has access to it - refactor SpecifiedLengthExplicitImplicitUnparser to be more straightforward and to remove unused cruft - add tests DAFFODIL-1592
diff --git a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/SpecifiedLength.scala b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/SpecifiedLength.scala
@@ -110,8 +110,7 @@ trait SpecifiedLengthExplicitImplicitUnparserMixin {
       new SpecifiedLengthExplicitImplicitUnparser(
         u,
         e.elementRuntimeData,
-        e.unparseTargetLengthInBitsEv,
-        e.maybeUnparseTargetLengthInCharactersEv
+        e.unparseTargetLengthInBitsEv
       )
   }
 }
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/exceptions/ThrowsSDE.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/exceptions/ThrowsSDE.scala
@@ -80,6 +80,19 @@ trait ThrowsSDE {
   final def notYetImplemented(msg: String, args: Any*): Nothing =
     SDE("Feature not yet implemented: " + msg, args: _*)
 
+  /**
+   * Use for cases where it is an SDE because of something we've chosen
+   * not to implement. Not merely short term (haven't coded it yet, but intending to),
+   * more like things we've chosen to defer intentionally to some future release.
+   */
+  def subset(testThatWillThrowIfFalse: Boolean, msg: String, args: Any*) = {
+    if (!testThatWillThrowIfFalse) subsetError(msg, args: _*)
+  }
+
+  def subsetError(msg: String, args: Any*) = {
+    val msgTxt = msg.format(args: _*)
+    SDE("Subset: " + msgTxt)
+  }
 }
 
 /**
diff --git a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/SpecifiedLengthUnparsers.scala b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/SpecifiedLengthUnparsers.scala
@@ -20,42 +20,22 @@ package org.apache.daffodil.unparsers.runtime1
 import org.apache.daffodil.lib.exceptions.Assert
 import org.apache.daffodil.lib.schema.annotation.props.gen.LengthUnits
 import org.apache.daffodil.lib.schema.annotation.props.gen.Representation
-import org.apache.daffodil.lib.util.Maybe
 import org.apache.daffodil.lib.util.Maybe._
-import org.apache.daffodil.lib.util.MaybeJULong
-import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
 import org.apache.daffodil.runtime1.infoset.DIElement
 import org.apache.daffodil.runtime1.infoset.DISimple
 import org.apache.daffodil.runtime1.infoset.Infoset
-import org.apache.daffodil.runtime1.infoset.RetryableException
 import org.apache.daffodil.runtime1.processors.CharsetEv
 import org.apache.daffodil.runtime1.processors.ElementRuntimeData
-import org.apache.daffodil.runtime1.processors.Evaluatable
 import org.apache.daffodil.runtime1.processors.ParseOrUnparseState
-import org.apache.daffodil.runtime1.processors.Success
 import org.apache.daffodil.runtime1.processors.UnparseTargetLengthInBitsEv
-import org.apache.daffodil.runtime1.processors.UnparseTargetLengthInCharactersEv
 import org.apache.daffodil.runtime1.processors.unparsers._
 
 import passera.unsigned.ULong
 
-/**
- * Restricts the bits available for unparsing to just those within
- * the specified length computed.
- *
- * If a unparser (supplied as arg) runs past the available space,
- * that's an unparse error.
- *
- * Truncation of strings - the only case where we truncate, and only when
- * dfdl:truncateSpecifiedLengthString is 'yes', is handled elsewhere.
- */
-sealed abstract class SpecifiedLengthUnparserBase(eUnparser: Unparser, erd: ElementRuntimeData)
-
 final class SpecifiedLengthExplicitImplicitUnparser(
   eUnparser: Unparser,
   erd: ElementRuntimeData,
-  targetLengthInBitsEv: UnparseTargetLengthInBitsEv,
-  maybeTargetLengthInCharactersEv: Maybe[UnparseTargetLengthInCharactersEv]
+  targetLengthInBitsEv: UnparseTargetLengthInBitsEv
 ) extends CombinatorUnparser(erd) {
 
   override lazy val runtimeDependencies = Vector()
@@ -74,215 +54,23 @@ final class SpecifiedLengthExplicitImplicitUnparser(
   }
 
   override final def unparse(state: UState): Unit = {
-
-    erd.impliedRepresentation match {
-      case Representation.Binary =>
-        unparseBits(state)
-      case Representation.Text => {
-        val dcs = getCharset(state)
-        if (dcs.maybeFixedWidth.isDefined)
-          unparseBits(state)
-        else {
-          // we know the encoding is variable width characters
-          // but we don't know if the length units characters or bits/bytes.
-          lengthUnits match {
-            case LengthUnits.Bits | LengthUnits.Bytes =>
-              unparseVarWidthCharactersInBits(state)
-            case LengthUnits.Characters =>
-              unparseVarWidthCharactersInCharacters(state)
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Encoding is variable width (e.g., utf-8, but the
-   * target length is expressed in bits.
-   *
-   * Truncation, in this case, requires determining how many
-   * of the string's characters fit within the available target length
-   * bits.
-   */
-  def unparseVarWidthCharactersInBits(state: UState): Unit = {
-    val maybeTLBits = getMaybeTL(state, targetLengthInBitsEv)
-
-    if (maybeTLBits.isDefined) {
-      //
-      // We know the target length. We can use it.
-      //
-      if (areTruncating) {
-        val diSimple = state.currentInfosetNode.asSimple
-        val v = diSimple.dataValue.getString
-        val tl = maybeTLBits.get
-        val cs = getCharset(state)
-        val newV = state.truncateToBits(v, cs, tl)
-        //
-        // JIRA DFDL-1592
-        //
-        // BUG: should not modify the actual dataset value.
-        // as fn:string-length of the value should always return the same
-        // value which is the un-truncated length.
-        //
-        diSimple.overwriteDataValue(newV)
-      }
-      eUnparser.unparse1(state)
+    lazy val dcs = getCharset(state)
+    if (
+      erd.impliedRepresentation == Representation.Text &&
+      lengthUnits == LengthUnits.Characters &&
+      dcs.maybeFixedWidth.isEmpty &&
+      erd.isComplexType
+    ) {
+      state.subsetError(
+        "Variable width character encoding '%s', dfdl:lengthKind '%s' and dfdl:lengthUnits '%s' are not supported for complex types.",
+        getCharset(state).name,
+        lengthKind.toString,
+        lengthUnits.toString
+      )
     } else {
-      // target length unknown
-      // ignore constraining the output length. Just unparse it.
-      //
-      // This happens when we're unparsing, and this element depends on a prior element for
-      // determining its length, but that prior element has dfdl:outputValueCalc that depends
-      // on this element.
-      // This breaks the chicken-egg cycle.
-      //
       eUnparser.unparse1(state)
     }
   }
-
-  private def areTruncating = {
-    if (erd.isSimpleType && (erd.optPrimType.get eq PrimType.String)) {
-      Assert.invariant(erd.optTruncateSpecifiedLengthString.isDefined)
-      erd.optTruncateSpecifiedLengthString.get
-    } else
-      false
-  }
-
-  /**
-   * Encoding is variable width (e.g., utf-8). The target
-   * length is expressed in characters.
-   */
-  def unparseVarWidthCharactersInCharacters(state: UState): Unit = {
-
-    //
-    // variable-width encodings and lengthUnits characters, and lengthKind explicit
-    // is not supported (currently) for complex types
-    //
-    state.schemaDefinitionUnless(
-      erd.isSimpleType,
-      "Variable width character encoding '%s', dfdl:lengthKind '%s' and dfdl:lengthUnits '%s' are not supported for complex types.",
-      getCharset(state).name,
-      lengthKind.toString,
-      lengthUnits.toString
-    )
-
-    Assert.invariant(erd.isSimpleType)
-    Assert.invariant(this.maybeTargetLengthInCharactersEv.isDefined)
-    val tlEv = this.maybeTargetLengthInCharactersEv.get
-    val tlChars = this.getMaybeTL(state, tlEv)
-
-    if (tlChars.isDefined) {
-      //
-      // possibly truncate
-      //
-      if (areTruncating) {
-        val diSimple = state.currentInfosetNode.asSimple
-        val v = diSimple.dataValue.getString
-        val tl = tlChars.get
-        if (v.length > tl) {
-          // string is too long, truncate to target length
-          val newV = v.substring(0, tl.toInt)
-          //
-          // BUG: JIRA DFDL-1592 - should not be overwriting the value with
-          // truncated value.
-          //
-          diSimple.overwriteDataValue(newV)
-        }
-      }
-      eUnparser.unparse1(state, erd)
-    } else {
-      // target length unknown
-      // ignore constraining the output length. Just unparse it.
-      //
-      // This happens when we're unparsing, and this element depends on a prior element for
-      // determining its length, but that prior element has dfdl:outputValueCalc that depends
-      // on this element.
-      // This breaks the chicken-egg cycle.
-      //
-      eUnparser.unparse1(state, erd)
-    }
-  }
-
-  private def getMaybeTL(state: UState, TLEv: Evaluatable[MaybeJULong]): MaybeJULong = {
-    val maybeTLBits =
-      try {
-        val tlRes = TLEv.evaluate(state)
-        Assert.invariant(tlRes.isDefined) // otherwise we shouldn't be in this method at all
-        tlRes
-      } catch {
-        case e: RetryableException => {
-          //
-          // TargetLength expression couldn't be evaluated.
-          //
-          MaybeJULong.Nope
-        }
-      }
-    maybeTLBits
-  }
-
-  /**
-   * Regardless of the type (text or binary), the target length
-   * will be provided in bits.
-   */
-  def unparseBits(state: UState): Unit = {
-    val maybeTLBits = getMaybeTL(state, targetLengthInBitsEv)
-
-    if (maybeTLBits.isDefined) {
-      //
-      // We know the target length. We can use it.
-      //
-      //      val nBits = maybeTLBits.get
-      //      val dos = state.dataOutputStream
-
-      //
-      // withBitLengthLimit is incorrect. It doesn't take into account
-      // that after the unparse, we could be looking at a current state
-      // with a distinct DOS
-      //
-      //      val isLimitOk = dos.withBitLengthLimit(nBits) {
-      //        eUnparser.unparse1(state, erd)
-      //      }
-      //
-      //      if (!isLimitOk) {
-      //        val availBits = if (dos.remainingBits.isDefined) dos.remainingBits.get.toString else "(unknown)"
-      //        UE(state, "Insufficient bits available. Required %s bits, but only %s were available.", nBits, availBits)
-      //      }
-
-      eUnparser.unparse1(state, erd)
-
-      // at this point the recursive parse of the children is finished
-
-      if (state.processorStatus ne Success) return
-
-      // We might not have used up all the bits. So some bits may need to
-      // be skipped and filled in by fillbyte.
-      //
-      // In the DFDL data grammar the region being skipped is either the
-      // RightFill region, or the ElementUnused region. Skipping this is handled
-      // elsewhere, along with insertion of padding before/after a string.
-      //
-
-    } else {
-      //
-      // we couldn't get the target length
-      //
-      // This happens when we're unparsing, and this element depends on a prior element for
-      // determining its length via a length expression, but that prior element
-      // has dfdl:outputValueCalc that depends on this element, typically by a
-      // call to dfdl:valueLength of this, or of some structure that includes
-      // this.
-      //
-      // This breaks the chicken-egg cycle, by just unparsing it without
-      // constraint. That produces the value which (ignoring truncation)
-      // can be unparsed to produce the dfdl:valueLength of this element.
-      //
-      // This does assume that the value will get truncated properly for the
-      // case where we do truncation (type string, with
-      // dfdl:truncateSpecifiedLengthString 'yes') by some other mechanism.
-      //
-      eUnparser.unparse1(state, erd)
-    }
-  }
 }
 
 // TODO: implement the capture length unparsers as just using this trait?
diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dsom/SDE.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dsom/SDE.scala
@@ -281,19 +281,4 @@ trait ImplementsThrowsOrSavesSDE extends ImplementsThrowsSDE with SavesErrorsAnd
       }
     }
   }
-
-  /**
-   * Use for cases where it is an SDE because of something we've chosen
-   * not to implement. Not merely short term (haven't coded it yet, but intending to),
-   * more like things we've chosen to defer intentionally to some future release.
-   */
-  def subset(testThatWillThrowIfFalse: Boolean, msg: String, args: Any*) = {
-    if (!testThatWillThrowIfFalse) subsetError(msg, args: _*)
-  }
-
-  def subsetError(msg: String, args: Any*) = {
-    val msgTxt = msg.format(args: _*)
-    SDE("Subset: " + msgTxt)
-  }
-
 }
diff --git a/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/testUnparserGeneral.tdml b/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/testUnparserGeneral.tdml
diff --git a/daffodil-test/src/test/scala/org/apache/daffodil/section00/general/TestUnparserGeneral.scala b/daffodil-test/src/test/scala/org/apache/daffodil/section00/general/TestUnparserGeneral.scala

Original file line number	Diff line number	Diff line change
`@@ -110,8 +110,7 @@ trait SpecifiedLengthExplicitImplicitUnparserMixin {`
`110`	`110`	`new SpecifiedLengthExplicitImplicitUnparser(`
`111`	`111`	`u,`
`112`	`112`	`e.elementRuntimeData,`
`113`		`- e.unparseTargetLengthInBitsEv,`
`114`		`- e.maybeUnparseTargetLengthInCharactersEv`
	`113`	`+ e.unparseTargetLengthInBitsEv`
`115`	`114`	`)`
`116`	`115`	`}`
`117`	`116`	`}`
Original file line number	Diff line number	Diff line change
`@@ -281,19 +281,4 @@ trait ImplementsThrowsOrSavesSDE extends ImplementsThrowsSDE with SavesErrorsAnd`
`281`	`281`	`}`
`282`	`282`	`}`
`283`	`283`	`}`
`284`		`-`
`285`		`- /**`
`286`		`- * Use for cases where it is an SDE because of something we've chosen`
`287`		`- * not to implement. Not merely short term (haven't coded it yet, but intending to),`
`288`		`- * more like things we've chosen to defer intentionally to some future release.`
`289`		`- */`
`290`		`- def subset(testThatWillThrowIfFalse: Boolean, msg: String, args: Any*) = {`
`291`		`- if (!testThatWillThrowIfFalse) subsetError(msg, args: _*)`
`292`		`- }`
`293`		`-`
`294`		`- def subsetError(msg: String, args: Any*) = {`
`295`		`- val msgTxt = msg.format(args: _*)`
`296`		`- SDE("Subset: " + msgTxt)`
`297`		`- }`
`298`		`-`
`299`	`284`	`}`