Skip to content

Commit

Permalink
Merge pull request kaitai-io#278 from Mingun/fix-excess-encoding-warn…
Browse files Browse the repository at this point in the history
…ings

Fix excess warnings about inherited encoding attributes: fixes kaitai-io/kaitai_struct#1088
  • Loading branch information
GreyCat authored Mar 20, 2024
2 parents 6aef6fa + 9374160 commit e082196
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 21 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package io.kaitai.struct.precompile

import io.kaitai.struct.datatype.DataType.{CalcBytesType, StrFromBytesType}
import io.kaitai.struct.exprlang.Ast
import io.kaitai.struct.format._
import io.kaitai.struct.problems._
import org.scalatest.funspec.AnyFunSpec
import org.scalatest.matchers.should.Matchers._
Expand Down Expand Up @@ -44,5 +47,66 @@ class CanonicalizeEncodingNames$Test extends AnyFunSpec {
Locale.setDefault(oldLocale)
}
}

describe("do not report warnings for members with derived encoding") {
it("in parameters") {
val problems = CanonicalizeEncodingNames.canonicalizeMember(ParamDefSpec(
path = List("param"),
id = NumberedIdentifier(1),
dataType = StrFromBytesType(CalcBytesType, "utf-8", false),
)).toList
problems should be(List(EncodingNameWarning(
"UTF-8", "utf-8",
ProblemCoords(None, Some(List("param", "encoding")), None, None)
)))

val noProblems = CanonicalizeEncodingNames.canonicalizeMember(ParamDefSpec(
path = List("param"),
id = NumberedIdentifier(1),
dataType = StrFromBytesType(CalcBytesType, "utf-8", true),
)).toList
noProblems should be(List())
}

it("in parse instance") {
val problems = CanonicalizeEncodingNames.canonicalizeMember(ParseInstanceSpec(
id = InstanceIdentifier("parse_instance"),
path = List("parse", "instance"),
dataType = StrFromBytesType(CalcBytesType, "utf-8", false),
)).toList
problems should be(List(EncodingNameWarning(
"UTF-8", "utf-8",
ProblemCoords(None, Some(List("parse", "instance", "encoding")), None, None)
)))

val noProblems = CanonicalizeEncodingNames.canonicalizeMember(ParseInstanceSpec(
id = InstanceIdentifier("parse_instance"),
path = List("parse", "instance"),
dataType = StrFromBytesType(CalcBytesType, "utf-8", true),
)).toList
noProblems should be(List())
}

it("in value instance") {
val problems = CanonicalizeEncodingNames.canonicalizeMember(ValueInstanceSpec(
id = InstanceIdentifier("value_instance"),
path = List("value", "instance"),
value = Ast.expr.Str("value"),
dataTypeOpt = Some(StrFromBytesType(CalcBytesType, "utf-8", false)),
)).toList
problems should be(List(EncodingNameWarning(
"UTF-8", "utf-8",
ProblemCoords(None, Some(List("value", "instance", "encoding")), None, None)
)))

val noProblems = CanonicalizeEncodingNames.canonicalizeMember(ValueInstanceSpec(
id = InstanceIdentifier("value_instance"),
path = List("value", "instance"),
value = Ast.expr.Str("value"),
dataTypeOpt = Some(StrFromBytesType(CalcBytesType, "utf-8", true)),
)).toList
noProblems should be(List())
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class ConstructClassCompiler(classSpecs: ClassSpecs, topClass: ClassSpec) extend
attrBytesLimitType(blt)
case btt: BytesTerminatedType =>
attrBytesTerminatedType(btt, "GreedyBytes")
case StrFromBytesType(bytes, encoding) =>
case StrFromBytesType(bytes, encoding, _) =>
bytes match {
case BytesEosType(terminator, include, padRight, process) =>
s"GreedyString(encoding='$encoding')"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ class GraphvizClassCompiler(classSpecs: ClassSpecs, topClass: ClassSpec) extends
dataType match {
case _: BytesEosType => END_OF_STREAM
case blt: BytesLimitType => expressionSize(blt.size, attrName)
case StrFromBytesType(basedOn, _) => dataTypeSizeAsString(basedOn, attrName)
case StrFromBytesType(basedOn, _, _) => dataTypeSizeAsString(basedOn, attrName)
case utb: UserTypeFromBytes => dataTypeSizeAsString(utb.bytes, attrName)
case EnumType(_, basedOn) => dataTypeSizeAsString(basedOn, attrName)
case _ =>
Expand Down Expand Up @@ -425,7 +425,7 @@ object GraphvizClassCompiler extends LanguageCompilerStatic {
args += "ignore EOS"
args.mkString(", ")
case _: BytesType => ""
case StrFromBytesType(basedOn, encoding) =>
case StrFromBytesType(basedOn, encoding, _) =>
val bytesStr = dataTypeName(basedOn)
val comma = if (bytesStr.isEmpty) "" else ", "
s"str($bytesStr$comma$encoding)"
Expand Down
21 changes: 19 additions & 2 deletions shared/src/main/scala/io/kaitai/struct/datatype/DataType.scala
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,24 @@ object DataType {

abstract class StrType extends DataType
case object CalcStrType extends StrType
case class StrFromBytesType(bytes: BytesType, var encoding: String) extends StrType
/**
* A type that have the `str` and `strz` built-in Kaitai types.
*
* @param bytes An underlying bytes of the string
* @param encoding An encoding used to convert byte array into string
* @param isEncodingDerived A flag that is `true` when encoding is derived from
* `meta/encoding` key of a type in which attribute with this type is
* defined and `false` when encoding defined in the attribute
*/
case class StrFromBytesType(
bytes: BytesType,
var encoding: String,
// FIXME: Actually, this should not be a part of KSY model used to code generation,
// but be a part of intermediate model. The current design of KSC does not have
// that intermediate model yet, so we put this flag here. See discussion at
// https://github.com/kaitai-io/kaitai_struct_compiler/pull/278#discussion_r1527312479
isEncodingDerived: Boolean,
) extends StrType

case object CalcBooleanType extends BooleanType

Expand Down Expand Up @@ -400,7 +417,7 @@ object DataType {
}

val bat = arg2.getByteArrayType(path)
StrFromBytesType(bat, enc)
StrFromBytesType(bat, enc, arg.encoding.isEmpty)
case _ =>
val typeWithArgs = Expressions.parseTypeRef(dt)
if (arg.size.isEmpty && !arg.sizeEos && arg.terminator.isEmpty) {
Expand Down
22 changes: 11 additions & 11 deletions shared/src/main/scala/io/kaitai/struct/format/InstanceSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ sealed abstract class InstanceSpec(val doc: DocSpec) extends MemberSpec {
case class ValueInstanceSpec(
id: InstanceIdentifier,
path: List[String],
private val _doc: DocSpec,
value: Ast.expr,
ifExpr: Option[Ast.expr],
var dataTypeOpt: Option[DataType]
ifExpr: Option[Ast.expr] = None,
var dataTypeOpt: Option[DataType] = None,
val _doc: DocSpec = DocSpec.EMPTY,
) extends InstanceSpec(_doc) {
override def dataType: DataType = {
dataTypeOpt match {
Expand All @@ -29,12 +29,12 @@ case class ValueInstanceSpec(
case class ParseInstanceSpec(
id: InstanceIdentifier,
path: List[String],
private val _doc: DocSpec,
dataType: DataType,
cond: ConditionalSpec,
pos: Option[Ast.expr],
io: Option[Ast.expr],
valid: Option[ValidationSpec]
cond: ConditionalSpec = ConditionalSpec(None, NoRepeat),
pos: Option[Ast.expr] = None,
io: Option[Ast.expr] = None,
valid: Option[ValidationSpec] = None,
val _doc: DocSpec = DocSpec.EMPTY,
) extends InstanceSpec(_doc) with AttrLikeSpec {
override def isLazy = true
}
Expand Down Expand Up @@ -69,10 +69,10 @@ object InstanceSpec {
ValueInstanceSpec(
id,
path,
DocSpec.fromYaml(srcMap, path),
value2,
ifExpr,
None
None,
DocSpec.fromYaml(srcMap, path),
)
case None =>
// normal parse instance
Expand All @@ -86,7 +86,7 @@ object InstanceSpec {
val a = AttrSpec.fromYaml(fakeAttrMap, path, metaDef, id)
val valid = srcMap.get("valid").map(ValidationSpec.fromYaml(_, path ++ List("valid")))

ParseInstanceSpec(id, path, a.doc, a.dataType, a.cond, pos, io, valid)
ParseInstanceSpec(id, path, a.dataType, a.cond, pos, io, valid, a.doc)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ object CalculateSeqSizes {
case None => DynamicSized
}
case _: BytesTerminatedType => DynamicSized
case StrFromBytesType(basedOn, _) => dataTypeByteSize(basedOn)
case StrFromBytesType(basedOn, _, _) => dataTypeByteSize(basedOn)
case utb: UserTypeFromBytes => dataTypeByteSize(utb.bytes)
case cutb: CalcUserTypeFromBytes => dataTypeByteSize(cutb.bytes)
case st: SwitchType => DynamicSized // FIXME: it's really possible get size if st.hasSize
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ import io.kaitai.struct.Platform

class CanonicalizeEncodingNames(specs: ClassSpecs) extends PrecompileStep {
override def run(): Iterable[CompilationProblem] = specs.mapRec(canonicalize)
}

object CanonicalizeEncodingNames {
def canonicalize(curClass: ClassSpec): Iterable[CompilationProblem] = {
val metaProblems = canonicalizeMeta(curClass.meta)
val seqProblems = curClass.seq.flatMap(attr => canonicalizeMember(attr))
Expand All @@ -34,16 +36,15 @@ class CanonicalizeEncodingNames(specs: ClassSpecs) extends PrecompileStep {
case strType: StrFromBytesType =>
val (newEncoding, problem1) = canonicalizeName(strType.encoding)
strType.encoding = newEncoding
problem1
// Do not report problem if encoding was derived from `meta/encoding` key
if (strType.isEncodingDerived) None else problem1
case _ =>
// not a string type = no problem
None
}).map(problem => problem.localizedInPath(member.path ++ List("encoding")))
}
}

object CanonicalizeEncodingNames {
def canonicalizeName(original: String, unrecognizedIsError: Boolean = true): (String, Option[CompilationProblem with PathLocalizable]) = {
def canonicalizeName(original: String): (String, Option[CompilationProblem with PathLocalizable]) = {
// Try exact match with canonical list
if (EncodingList.canonicalToAlias.contains(original)) {
(original, None)
Expand Down

0 comments on commit e082196

Please sign in to comment.