Merge pull request kaitai-io#278 from Mingun/fix-excess-encoding-warn…

…ings Fix excess warnings about inherited encoding attributes: fixes kaitai-io/kaitai_struct#1088
Mingun · Mar 20, 2024 · e082196 · e082196
2 parents 6aef6fa + 9374160
commit e082196
Show file tree

Hide file tree

Showing 7 changed files with 103 additions and 21 deletions.
diff --git a/jvm/src/test/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames$Test.scala b/jvm/src/test/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames$Test.scala
@@ -1,5 +1,8 @@
 package io.kaitai.struct.precompile
 
+import io.kaitai.struct.datatype.DataType.{CalcBytesType, StrFromBytesType}
+import io.kaitai.struct.exprlang.Ast
+import io.kaitai.struct.format._
 import io.kaitai.struct.problems._
 import org.scalatest.funspec.AnyFunSpec
 import org.scalatest.matchers.should.Matchers._
@@ -44,5 +47,66 @@ class CanonicalizeEncodingNames$Test extends AnyFunSpec {
         Locale.setDefault(oldLocale)
       }
     }
+
+    describe("do not report warnings for members with derived encoding") {
+      it("in parameters") {
+        val problems = CanonicalizeEncodingNames.canonicalizeMember(ParamDefSpec(
+          path = List("param"),
+          id = NumberedIdentifier(1),
+          dataType = StrFromBytesType(CalcBytesType, "utf-8", false),
+        )).toList
+        problems should be(List(EncodingNameWarning(
+          "UTF-8", "utf-8",
+          ProblemCoords(None, Some(List("param", "encoding")), None, None)
+        )))
+
+        val noProblems = CanonicalizeEncodingNames.canonicalizeMember(ParamDefSpec(
+          path = List("param"),
+          id = NumberedIdentifier(1),
+          dataType = StrFromBytesType(CalcBytesType, "utf-8", true),
+        )).toList
+        noProblems should be(List())
+      }
+
+      it("in parse instance") {
+        val problems = CanonicalizeEncodingNames.canonicalizeMember(ParseInstanceSpec(
+          id = InstanceIdentifier("parse_instance"),
+          path = List("parse", "instance"),
+          dataType = StrFromBytesType(CalcBytesType, "utf-8", false),
+        )).toList
+        problems should be(List(EncodingNameWarning(
+          "UTF-8", "utf-8",
+          ProblemCoords(None, Some(List("parse", "instance", "encoding")), None, None)
+        )))
+
+        val noProblems = CanonicalizeEncodingNames.canonicalizeMember(ParseInstanceSpec(
+          id = InstanceIdentifier("parse_instance"),
+          path = List("parse", "instance"),
+          dataType = StrFromBytesType(CalcBytesType, "utf-8", true),
+        )).toList
+        noProblems should be(List())
+      }
+
+      it("in value instance") {
+        val problems = CanonicalizeEncodingNames.canonicalizeMember(ValueInstanceSpec(
+          id = InstanceIdentifier("value_instance"),
+          path = List("value", "instance"),
+          value = Ast.expr.Str("value"),
+          dataTypeOpt = Some(StrFromBytesType(CalcBytesType, "utf-8", false)),
+        )).toList
+        problems should be(List(EncodingNameWarning(
+          "UTF-8", "utf-8",
+          ProblemCoords(None, Some(List("value", "instance", "encoding")), None, None)
+        )))
+
+        val noProblems = CanonicalizeEncodingNames.canonicalizeMember(ValueInstanceSpec(
+          id = InstanceIdentifier("value_instance"),
+          path = List("value", "instance"),
+          value = Ast.expr.Str("value"),
+          dataTypeOpt = Some(StrFromBytesType(CalcBytesType, "utf-8", true)),
+        )).toList
+        noProblems should be(List())
+      }
+    }
   }
 }
diff --git a/shared/src/main/scala/io/kaitai/struct/ConstructClassCompiler.scala b/shared/src/main/scala/io/kaitai/struct/ConstructClassCompiler.scala
@@ -133,7 +133,7 @@ class ConstructClassCompiler(classSpecs: ClassSpecs, topClass: ClassSpec) extend
       attrBytesLimitType(blt)
     case btt: BytesTerminatedType =>
       attrBytesTerminatedType(btt, "GreedyBytes")
-    case StrFromBytesType(bytes, encoding) =>
+    case StrFromBytesType(bytes, encoding, _) =>
       bytes match {
         case BytesEosType(terminator, include, padRight, process) =>
           s"GreedyString(encoding='$encoding')"

diff --git a/shared/src/main/scala/io/kaitai/struct/GraphvizClassCompiler.scala b/shared/src/main/scala/io/kaitai/struct/GraphvizClassCompiler.scala
@@ -241,7 +241,7 @@ class GraphvizClassCompiler(classSpecs: ClassSpecs, topClass: ClassSpec) extends
     dataType match {
       case _: BytesEosType => END_OF_STREAM
       case blt: BytesLimitType => expressionSize(blt.size, attrName)
-      case StrFromBytesType(basedOn, _) => dataTypeSizeAsString(basedOn, attrName)
+      case StrFromBytesType(basedOn, _, _) => dataTypeSizeAsString(basedOn, attrName)
       case utb: UserTypeFromBytes => dataTypeSizeAsString(utb.bytes, attrName)
       case EnumType(_, basedOn) => dataTypeSizeAsString(basedOn, attrName)
       case _ =>
@@ -425,7 +425,7 @@ object GraphvizClassCompiler extends LanguageCompilerStatic {
           args += "ignore EOS"
         args.mkString(", ")
       case _: BytesType => ""
-      case StrFromBytesType(basedOn, encoding) =>
+      case StrFromBytesType(basedOn, encoding, _) =>
         val bytesStr = dataTypeName(basedOn)
         val comma = if (bytesStr.isEmpty) "" else ", "
         s"str($bytesStr$comma$encoding)"

diff --git a/shared/src/main/scala/io/kaitai/struct/datatype/DataType.scala b/shared/src/main/scala/io/kaitai/struct/datatype/DataType.scala
@@ -92,7 +92,24 @@ object DataType {
 
   abstract class StrType extends DataType
   case object CalcStrType extends StrType
-  case class StrFromBytesType(bytes: BytesType, var encoding: String) extends StrType
+  /**
+    * A type that have the `str` and `strz` built-in Kaitai types.
+    *
+    * @param bytes An underlying bytes of the string
+    * @param encoding An encoding used to convert byte array into string
+    * @param isEncodingDerived A flag that is `true` when encoding is derived from
+    *        `meta/encoding` key of a type in which attribute with this type is
+    *        defined and `false` when encoding defined in the attribute
+    */
+  case class StrFromBytesType(
+    bytes: BytesType,
+    var encoding: String,
+    // FIXME: Actually, this should not be a part of KSY model used to code generation,
+    // but be a part of intermediate model. The current design of KSC does not have
+    // that intermediate model yet, so we put this flag here. See discussion at
+    // https://github.com/kaitai-io/kaitai_struct_compiler/pull/278#discussion_r1527312479
+    isEncodingDerived: Boolean,
+  ) extends StrType
 
   case object CalcBooleanType extends BooleanType
 
@@ -400,7 +417,7 @@ object DataType {
           }
 
           val bat = arg2.getByteArrayType(path)
-          StrFromBytesType(bat, enc)
+          StrFromBytesType(bat, enc, arg.encoding.isEmpty)
         case _ =>
           val typeWithArgs = Expressions.parseTypeRef(dt)
           if (arg.size.isEmpty && !arg.sizeEos && arg.terminator.isEmpty) {

diff --git a/shared/src/main/scala/io/kaitai/struct/format/InstanceSpec.scala b/shared/src/main/scala/io/kaitai/struct/format/InstanceSpec.scala
@@ -11,10 +11,10 @@ sealed abstract class InstanceSpec(val doc: DocSpec) extends MemberSpec {
 case class ValueInstanceSpec(
   id: InstanceIdentifier,
   path: List[String],
-  private val _doc: DocSpec,
   value: Ast.expr,
-  ifExpr: Option[Ast.expr],
-  var dataTypeOpt: Option[DataType]
+  ifExpr: Option[Ast.expr] = None,
+  var dataTypeOpt: Option[DataType] = None,
+  val _doc: DocSpec = DocSpec.EMPTY,
 ) extends InstanceSpec(_doc) {
   override def dataType: DataType = {
     dataTypeOpt match {
@@ -29,12 +29,12 @@ case class ValueInstanceSpec(
 case class ParseInstanceSpec(
   id: InstanceIdentifier,
   path: List[String],
-  private val _doc: DocSpec,
   dataType: DataType,
-  cond: ConditionalSpec,
-  pos: Option[Ast.expr],
-  io: Option[Ast.expr],
-  valid: Option[ValidationSpec]
+  cond: ConditionalSpec = ConditionalSpec(None, NoRepeat),
+  pos: Option[Ast.expr] = None,
+  io: Option[Ast.expr] = None,
+  valid: Option[ValidationSpec] = None,
+  val _doc: DocSpec = DocSpec.EMPTY,
 ) extends InstanceSpec(_doc) with AttrLikeSpec {
   override def isLazy = true
 }
@@ -69,10 +69,10 @@ object InstanceSpec {
         ValueInstanceSpec(
           id,
           path,
-          DocSpec.fromYaml(srcMap, path),
           value2,
           ifExpr,
-          None
+          None,
+          DocSpec.fromYaml(srcMap, path),
         )
       case None =>
         // normal parse instance
@@ -86,7 +86,7 @@ object InstanceSpec {
         val a = AttrSpec.fromYaml(fakeAttrMap, path, metaDef, id)
         val valid = srcMap.get("valid").map(ValidationSpec.fromYaml(_, path ++ List("valid")))
 
-        ParseInstanceSpec(id, path, a.doc, a.dataType, a.cond, pos, io, valid)
+        ParseInstanceSpec(id, path, a.dataType, a.cond, pos, io, valid, a.doc)
     }
   }
 }
diff --git a/shared/src/main/scala/io/kaitai/struct/precompile/CalculateSeqSizes.scala b/shared/src/main/scala/io/kaitai/struct/precompile/CalculateSeqSizes.scala
@@ -113,7 +113,7 @@ object CalculateSeqSizes {
         case None => DynamicSized
       }
       case _: BytesTerminatedType => DynamicSized
-      case StrFromBytesType(basedOn, _) => dataTypeByteSize(basedOn)
+      case StrFromBytesType(basedOn, _, _) => dataTypeByteSize(basedOn)
       case utb: UserTypeFromBytes => dataTypeByteSize(utb.bytes)
       case cutb: CalcUserTypeFromBytes => dataTypeByteSize(cutb.bytes)
       case st: SwitchType => DynamicSized // FIXME: it's really possible get size if st.hasSize

diff --git a/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala b/shared/src/main/scala/io/kaitai/struct/precompile/CanonicalizeEncodingNames.scala
@@ -9,7 +9,9 @@ import io.kaitai.struct.Platform
 
 class CanonicalizeEncodingNames(specs: ClassSpecs) extends PrecompileStep {
   override def run(): Iterable[CompilationProblem] = specs.mapRec(canonicalize)
+}
 
+object CanonicalizeEncodingNames {
   def canonicalize(curClass: ClassSpec): Iterable[CompilationProblem] = {
     val metaProblems = canonicalizeMeta(curClass.meta)
     val seqProblems = curClass.seq.flatMap(attr => canonicalizeMember(attr))
@@ -34,16 +36,15 @@ class CanonicalizeEncodingNames(specs: ClassSpecs) extends PrecompileStep {
       case strType: StrFromBytesType =>
         val (newEncoding, problem1) = canonicalizeName(strType.encoding)
         strType.encoding = newEncoding
-        problem1
+        // Do not report problem if encoding was derived from `meta/encoding` key
+        if (strType.isEncodingDerived) None else problem1
       case _ =>
         // not a string type = no problem
         None
     }).map(problem => problem.localizedInPath(member.path ++ List("encoding")))
   }
-}
 
-object CanonicalizeEncodingNames {
-  def canonicalizeName(original: String, unrecognizedIsError: Boolean = true): (String, Option[CompilationProblem with PathLocalizable]) = {
+  def canonicalizeName(original: String): (String, Option[CompilationProblem with PathLocalizable]) = {
     // Try exact match with canonical list
     if (EncodingList.canonicalToAlias.contains(original)) {
       (original, None)