Skip to content

Commit

Permalink
Remove unnecessary truncation code
Browse files Browse the repository at this point in the history
- we were truncating and overwriting value, but we didn't actually need to since we ended up truncating again right after. Removed the unnecessary and buggy code which changed the dataValue. StringMaybeTruncateBitsUnparser.unparse does not change the dataValue, instead it changes the DOS, resulting in the value getting truncated but string-length returning the untruncated length when it's called
- move subset/subsetError to ThrowsSDE trait so state has access to it
- refactor SpecifiedLengthExplicitImplicitUnparser to be more straightforward and to remove unused cruft
- add tests

DAFFODIL-1592
  • Loading branch information
olabusayoT committed Nov 4, 2024
1 parent d79efe1 commit 1467784
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 243 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,7 @@ trait SpecifiedLengthExplicitImplicitUnparserMixin {
new SpecifiedLengthExplicitImplicitUnparser(
u,
e.elementRuntimeData,
e.unparseTargetLengthInBitsEv,
e.maybeUnparseTargetLengthInCharactersEv
e.unparseTargetLengthInBitsEv
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,19 @@ trait ThrowsSDE {
final def notYetImplemented(msg: String, args: Any*): Nothing =
SDE("Feature not yet implemented: " + msg, args: _*)

/**
* Use for cases where it is an SDE because of something we've chosen
* not to implement. Not merely short term (haven't coded it yet, but intending to),
* more like things we've chosen to defer intentionally to some future release.
*/
def subset(testThatWillThrowIfFalse: Boolean, msg: String, args: Any*) = {
if (!testThatWillThrowIfFalse) subsetError(msg, args: _*)
}

def subsetError(msg: String, args: Any*) = {
val msgTxt = msg.format(args: _*)
SDE("Subset: " + msgTxt)
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,42 +20,22 @@ package org.apache.daffodil.unparsers.runtime1
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.schema.annotation.props.gen.LengthUnits
import org.apache.daffodil.lib.schema.annotation.props.gen.Representation
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe._
import org.apache.daffodil.lib.util.MaybeJULong
import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
import org.apache.daffodil.runtime1.infoset.DIElement
import org.apache.daffodil.runtime1.infoset.DISimple
import org.apache.daffodil.runtime1.infoset.Infoset
import org.apache.daffodil.runtime1.infoset.RetryableException
import org.apache.daffodil.runtime1.processors.CharsetEv
import org.apache.daffodil.runtime1.processors.ElementRuntimeData
import org.apache.daffodil.runtime1.processors.Evaluatable
import org.apache.daffodil.runtime1.processors.ParseOrUnparseState
import org.apache.daffodil.runtime1.processors.Success
import org.apache.daffodil.runtime1.processors.UnparseTargetLengthInBitsEv
import org.apache.daffodil.runtime1.processors.UnparseTargetLengthInCharactersEv
import org.apache.daffodil.runtime1.processors.unparsers._

import passera.unsigned.ULong

/**
* Restricts the bits available for unparsing to just those within
* the specified length computed.
*
* If a unparser (supplied as arg) runs past the available space,
* that's an unparse error.
*
* Truncation of strings - the only case where we truncate, and only when
* dfdl:truncateSpecifiedLengthString is 'yes', is handled elsewhere.
*/
sealed abstract class SpecifiedLengthUnparserBase(eUnparser: Unparser, erd: ElementRuntimeData)

final class SpecifiedLengthExplicitImplicitUnparser(
eUnparser: Unparser,
erd: ElementRuntimeData,
targetLengthInBitsEv: UnparseTargetLengthInBitsEv,
maybeTargetLengthInCharactersEv: Maybe[UnparseTargetLengthInCharactersEv]
targetLengthInBitsEv: UnparseTargetLengthInBitsEv
) extends CombinatorUnparser(erd) {

override lazy val runtimeDependencies = Vector()
Expand All @@ -74,215 +54,23 @@ final class SpecifiedLengthExplicitImplicitUnparser(
}

override final def unparse(state: UState): Unit = {

erd.impliedRepresentation match {
case Representation.Binary =>
unparseBits(state)
case Representation.Text => {
val dcs = getCharset(state)
if (dcs.maybeFixedWidth.isDefined)
unparseBits(state)
else {
// we know the encoding is variable width characters
// but we don't know if the length units characters or bits/bytes.
lengthUnits match {
case LengthUnits.Bits | LengthUnits.Bytes =>
unparseVarWidthCharactersInBits(state)
case LengthUnits.Characters =>
unparseVarWidthCharactersInCharacters(state)
}
}
}
}
}

/**
* Encoding is variable width (e.g., utf-8, but the
* target length is expressed in bits.
*
* Truncation, in this case, requires determining how many
* of the string's characters fit within the available target length
* bits.
*/
def unparseVarWidthCharactersInBits(state: UState): Unit = {
val maybeTLBits = getMaybeTL(state, targetLengthInBitsEv)

if (maybeTLBits.isDefined) {
//
// We know the target length. We can use it.
//
if (areTruncating) {
val diSimple = state.currentInfosetNode.asSimple
val v = diSimple.dataValue.getString
val tl = maybeTLBits.get
val cs = getCharset(state)
val newV = state.truncateToBits(v, cs, tl)
//
// JIRA DFDL-1592
//
// BUG: should not modify the actual dataset value.
// as fn:string-length of the value should always return the same
// value which is the un-truncated length.
//
diSimple.overwriteDataValue(newV)
}
eUnparser.unparse1(state)
lazy val dcs = getCharset(state)
if (
erd.impliedRepresentation == Representation.Text &&
lengthUnits == LengthUnits.Characters &&
dcs.maybeFixedWidth.isEmpty &&
erd.isComplexType
) {
state.subsetError(
"Variable width character encoding '%s', dfdl:lengthKind '%s' and dfdl:lengthUnits '%s' are not supported for complex types.",
getCharset(state).name,
lengthKind.toString,
lengthUnits.toString
)
} else {
// target length unknown
// ignore constraining the output length. Just unparse it.
//
// This happens when we're unparsing, and this element depends on a prior element for
// determining its length, but that prior element has dfdl:outputValueCalc that depends
// on this element.
// This breaks the chicken-egg cycle.
//
eUnparser.unparse1(state)
}
}

private def areTruncating = {
if (erd.isSimpleType && (erd.optPrimType.get eq PrimType.String)) {
Assert.invariant(erd.optTruncateSpecifiedLengthString.isDefined)
erd.optTruncateSpecifiedLengthString.get
} else
false
}

/**
* Encoding is variable width (e.g., utf-8). The target
* length is expressed in characters.
*/
def unparseVarWidthCharactersInCharacters(state: UState): Unit = {

//
// variable-width encodings and lengthUnits characters, and lengthKind explicit
// is not supported (currently) for complex types
//
state.schemaDefinitionUnless(
erd.isSimpleType,
"Variable width character encoding '%s', dfdl:lengthKind '%s' and dfdl:lengthUnits '%s' are not supported for complex types.",
getCharset(state).name,
lengthKind.toString,
lengthUnits.toString
)

Assert.invariant(erd.isSimpleType)
Assert.invariant(this.maybeTargetLengthInCharactersEv.isDefined)
val tlEv = this.maybeTargetLengthInCharactersEv.get
val tlChars = this.getMaybeTL(state, tlEv)

if (tlChars.isDefined) {
//
// possibly truncate
//
if (areTruncating) {
val diSimple = state.currentInfosetNode.asSimple
val v = diSimple.dataValue.getString
val tl = tlChars.get
if (v.length > tl) {
// string is too long, truncate to target length
val newV = v.substring(0, tl.toInt)
//
// BUG: JIRA DFDL-1592 - should not be overwriting the value with
// truncated value.
//
diSimple.overwriteDataValue(newV)
}
}
eUnparser.unparse1(state, erd)
} else {
// target length unknown
// ignore constraining the output length. Just unparse it.
//
// This happens when we're unparsing, and this element depends on a prior element for
// determining its length, but that prior element has dfdl:outputValueCalc that depends
// on this element.
// This breaks the chicken-egg cycle.
//
eUnparser.unparse1(state, erd)
}
}

private def getMaybeTL(state: UState, TLEv: Evaluatable[MaybeJULong]): MaybeJULong = {
val maybeTLBits =
try {
val tlRes = TLEv.evaluate(state)
Assert.invariant(tlRes.isDefined) // otherwise we shouldn't be in this method at all
tlRes
} catch {
case e: RetryableException => {
//
// TargetLength expression couldn't be evaluated.
//
MaybeJULong.Nope
}
}
maybeTLBits
}

/**
* Regardless of the type (text or binary), the target length
* will be provided in bits.
*/
def unparseBits(state: UState): Unit = {
val maybeTLBits = getMaybeTL(state, targetLengthInBitsEv)

if (maybeTLBits.isDefined) {
//
// We know the target length. We can use it.
//
// val nBits = maybeTLBits.get
// val dos = state.dataOutputStream

//
// withBitLengthLimit is incorrect. It doesn't take into account
// that after the unparse, we could be looking at a current state
// with a distinct DOS
//
// val isLimitOk = dos.withBitLengthLimit(nBits) {
// eUnparser.unparse1(state, erd)
// }
//
// if (!isLimitOk) {
// val availBits = if (dos.remainingBits.isDefined) dos.remainingBits.get.toString else "(unknown)"
// UE(state, "Insufficient bits available. Required %s bits, but only %s were available.", nBits, availBits)
// }

eUnparser.unparse1(state, erd)

// at this point the recursive parse of the children is finished

if (state.processorStatus ne Success) return

// We might not have used up all the bits. So some bits may need to
// be skipped and filled in by fillbyte.
//
// In the DFDL data grammar the region being skipped is either the
// RightFill region, or the ElementUnused region. Skipping this is handled
// elsewhere, along with insertion of padding before/after a string.
//

} else {
//
// we couldn't get the target length
//
// This happens when we're unparsing, and this element depends on a prior element for
// determining its length via a length expression, but that prior element
// has dfdl:outputValueCalc that depends on this element, typically by a
// call to dfdl:valueLength of this, or of some structure that includes
// this.
//
// This breaks the chicken-egg cycle, by just unparsing it without
// constraint. That produces the value which (ignoring truncation)
// can be unparsed to produce the dfdl:valueLength of this element.
//
// This does assume that the value will get truncated properly for the
// case where we do truncation (type string, with
// dfdl:truncateSpecifiedLengthString 'yes') by some other mechanism.
//
eUnparser.unparse1(state, erd)
}
}
}

// TODO: implement the capture length unparsers as just using this trait?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,19 +281,4 @@ trait ImplementsThrowsOrSavesSDE extends ImplementsThrowsSDE with SavesErrorsAnd
}
}
}

/**
* Use for cases where it is an SDE because of something we've chosen
* not to implement. Not merely short term (haven't coded it yet, but intending to),
* more like things we've chosen to defer intentionally to some future release.
*/
def subset(testThatWillThrowIfFalse: Boolean, msg: String, args: Any*) = {
if (!testThatWillThrowIfFalse) subsetError(msg, args: _*)
}

def subsetError(msg: String, args: Any*) = {
val msgTxt = msg.format(args: _*)
SDE("Subset: " + msgTxt)
}

}
Loading

0 comments on commit 1467784

Please sign in to comment.