From 7b0dd65040812dbfbf005bb8bde81fa17c357e3f Mon Sep 17 00:00:00 2001 From: Jonas Rickert Date: Tue, 14 Jan 2025 19:13:39 +0100 Subject: [PATCH 1/2] Skip over uninitialized DenseResourceAttrs in verifiers (#3041) Elided elements are uninitialized DenseResourceAttrs, without these changes MLIR containing them can not be parsed, as the verifiers crash when encountering them. Signed-off-by: Rickert, Jonas --- src/Dialect/ONNX/ONNXOps/Math/Scatter.cpp | 4 ++ src/Dialect/ONNX/ONNXOps/OpHelper.cpp | 12 ++++++ src/Dialect/ONNX/ONNXOps/OpHelper.hpp | 8 ++++ .../ONNX/ONNXOps/Sequence/SplitToSequence.cpp | 4 ++ .../ONNX/ONNXOps/Tensor/ConstantOfShape.cpp | 4 ++ .../ONNX/ONNXOps/Tensor/GatherElements.cpp | 11 +++++- src/Dialect/ONNX/ONNXOps/Tensor/GatherND.cpp | 4 ++ test/mlir/onnx/invalid.mlir | 37 +++++++++++++++++++ 8 files changed, 82 insertions(+), 2 deletions(-) diff --git a/src/Dialect/ONNX/ONNXOps/Math/Scatter.cpp b/src/Dialect/ONNX/ONNXOps/Math/Scatter.cpp index 189d855805..701e03721d 100644 --- a/src/Dialect/ONNX/ONNXOps/Math/Scatter.cpp +++ b/src/Dialect/ONNX/ONNXOps/Math/Scatter.cpp @@ -76,6 +76,10 @@ LogicalResult ONNXScatterElementsOp::verify() { if (dataDimAtAxis >= 0) { if (ElementsAttr valueAttribute = getElementAttributeFromONNXValue(indices)) { + if (isElementAttrUninitializedDenseResource(valueAttribute)) { + return success(); // Return success to allow the parsing of MLIR with + // elided attributes + } for (IntegerAttr value : valueAttribute.getValues()) { int64_t index = value.getInt(); if (index >= -dataDimAtAxis && index < dataDimAtAxis) diff --git a/src/Dialect/ONNX/ONNXOps/OpHelper.cpp b/src/Dialect/ONNX/ONNXOps/OpHelper.cpp index 36cefe7675..0468919038 100644 --- a/src/Dialect/ONNX/ONNXOps/OpHelper.cpp +++ b/src/Dialect/ONNX/ONNXOps/OpHelper.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/IR/DialectResourceBlobManager.h" #include "mlir/IR/TypeUtilities.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Path.h" @@ -872,4 +873,15 @@ std::string getNodeNameInPresenceOfOpt(Operation *op, bool useFileLine) { return "NOTSET"; } +//===----------------------------------------------------------------------===// +// Support for DenseElementsAttr. +//===----------------------------------------------------------------------===// + +bool isElementAttrUninitializedDenseResource(mlir::ElementsAttr elementsAttr) { + const auto denseResourceElementsAttr = + mlir::dyn_cast(elementsAttr); + return denseResourceElementsAttr && + !denseResourceElementsAttr.getRawHandle().getBlob(); +} + } // namespace onnx_mlir diff --git a/src/Dialect/ONNX/ONNXOps/OpHelper.hpp b/src/Dialect/ONNX/ONNXOps/OpHelper.hpp index 68976fe05b..e3a364022c 100644 --- a/src/Dialect/ONNX/ONNXOps/OpHelper.hpp +++ b/src/Dialect/ONNX/ONNXOps/OpHelper.hpp @@ -380,6 +380,14 @@ bool isIdentityReshape(mlir::Value input, mlir::Value output, std::string getNodeNameInPresenceOfOpt( mlir::Operation *op, bool useFileLine = true); +//===----------------------------------------------------------------------===// +// Support for DenseElementsAttr. +//===----------------------------------------------------------------------===// + +/// Returns true if elementsAttr is a DenseResourceAttr with a blob that can not +/// be received +bool isElementAttrUninitializedDenseResource(mlir::ElementsAttr elementsAttr); + #include "src/Dialect/ONNX/ONNXOps/OpHelper.hpp.inc" } // namespace onnx_mlir diff --git a/src/Dialect/ONNX/ONNXOps/Sequence/SplitToSequence.cpp b/src/Dialect/ONNX/ONNXOps/Sequence/SplitToSequence.cpp index 3a17990e56..38f922f765 100644 --- a/src/Dialect/ONNX/ONNXOps/Sequence/SplitToSequence.cpp +++ b/src/Dialect/ONNX/ONNXOps/Sequence/SplitToSequence.cpp @@ -58,6 +58,10 @@ LogicalResult ONNXSplitToSequenceOp::verify() { if (splitRank > 1) return emitOpError() << ": split has rank " << splitRank << " > 1"; if (ElementsAttr entries = getElementAttributeFromONNXValue(splitValue)) { + if (isElementAttrUninitializedDenseResource(entries)) { + return success(); // Return success to allow the parsing of MLIR with + // elided attributes + } if (splitRank == 0) { auto scalar = getScalarValue(entries, splitType); if (scalar <= 0) diff --git a/src/Dialect/ONNX/ONNXOps/Tensor/ConstantOfShape.cpp b/src/Dialect/ONNX/ONNXOps/Tensor/ConstantOfShape.cpp index 787fc9b75e..6058adfcdb 100644 --- a/src/Dialect/ONNX/ONNXOps/Tensor/ConstantOfShape.cpp +++ b/src/Dialect/ONNX/ONNXOps/Tensor/ConstantOfShape.cpp @@ -70,6 +70,10 @@ LogicalResult ONNXConstantOfShapeOp::verify() { if (auto constantOp = getONNXConstantOp(input)) { ElementsAttr valueAttribute = mlir::cast(constantOp.getValueAttr()); + if (isElementAttrUninitializedDenseResource(valueAttribute)) { + return success(); // Return success to allow the parsing of MLIR with + // elided attributes + } // Get repeat values from valueAttribute. auto valueIt = valueAttribute.getValues().begin(); for (int i = 0; i < inputShape[0]; ++i) { diff --git a/src/Dialect/ONNX/ONNXOps/Tensor/GatherElements.cpp b/src/Dialect/ONNX/ONNXOps/Tensor/GatherElements.cpp index ce35ad81b3..dde8029994 100644 --- a/src/Dialect/ONNX/ONNXOps/Tensor/GatherElements.cpp +++ b/src/Dialect/ONNX/ONNXOps/Tensor/GatherElements.cpp @@ -71,8 +71,13 @@ LogicalResult ONNXGatherElementsOp::verify() { // along axis of size s. ArrayRef dataShape = dataType.getShape(); const int64_t dataDimAtAxis = dataShape[axis]; - if (dataDimAtAxis >= 0) - if (ElementsAttr valueAttribute = getElementAttributeFromONNXValue(indices)) + if (dataDimAtAxis >= 0) { + if (ElementsAttr valueAttribute = + getElementAttributeFromONNXValue(indices)) { + if (isElementAttrUninitializedDenseResource(valueAttribute)) { + return success(); // Return success to allow the parsing of MLIR with + // elided attributes + } for (IntegerAttr value : valueAttribute.getValues()) { int64_t index = value.getInt(); if (index >= -dataDimAtAxis && index < dataDimAtAxis) @@ -83,6 +88,8 @@ LogicalResult ONNXGatherElementsOp::verify() { onnx_mlir::Diagnostic::Range( -dataDimAtAxis, dataDimAtAxis - 1)); } + } + } return success(); } diff --git a/src/Dialect/ONNX/ONNXOps/Tensor/GatherND.cpp b/src/Dialect/ONNX/ONNXOps/Tensor/GatherND.cpp index f5cf329cd0..b388607c12 100644 --- a/src/Dialect/ONNX/ONNXOps/Tensor/GatherND.cpp +++ b/src/Dialect/ONNX/ONNXOps/Tensor/GatherND.cpp @@ -144,6 +144,10 @@ LogicalResult ONNXGatherNDOp::verify() { // All values in 'indices' are expected to satisfy the inequality: // -data.shape[b + i] <= indices[...,i] <= (data.shape[b + i]-1)]. if (ElementsAttr valueAttribute = getElementAttributeFromONNXValue(indices)) { + if (isElementAttrUninitializedDenseResource(valueAttribute)) { + return success(); // Return success to allow the parsing of MLIR with + // elided attributes + } int flatIndex = 0; for (IntegerAttr value : valueAttribute.getValues()) { int64_t indexValue = value.getInt(); diff --git a/test/mlir/onnx/invalid.mlir b/test/mlir/onnx/invalid.mlir index f91d261eaa..7b8d087c03 100644 --- a/test/mlir/onnx/invalid.mlir +++ b/test/mlir/onnx/invalid.mlir @@ -182,6 +182,15 @@ func.func @test_constantofshape_verifier_4() -> tensor<2xi64> { // ----- +func.func @test_constantofshape_elided() -> tensor<2xi64> { + // Tests that we do not crash on elided elements + %0 = onnx.Constant dense_resource<__elided__> : tensor<2xi64> + %1 = "onnx.ConstantOfShape"(%0) : (tensor<2xi64>) -> tensor<2xi64> + "onnx.Return"(%1) : (tensor<2xi64>) -> () +} + +// ----- + func.func @test_flatten_verifier_1(%arg0 : tensor<5x5x1x32xf32>) -> tensor<*xf32> { // expected-error @+1 {{onnx.Flatten: 'axis' value is 5, accepted range is [-4, 4]}} %1 = "onnx.Flatten"(%arg0) {axis = 5 : si64} : (tensor<5x5x1x32xf32>) -> tensor<*xf32> @@ -214,6 +223,15 @@ func.func @test_gatherElements_verifier_2(%data: tensor<2x2xf32>, %indices: tens // ----- +func.func @test_gatherElements_verifier_elided(%data: tensor<12x14x1024xf32>) -> tensor<12x14x14xf32> { + // Tests that we do not crash on elided elements + %indices = onnx.Constant dense_resource<__elided__> : tensor<12x14x14xi64> + %1 = "onnx.GatherElements"(%data, %indices) {axis = -1 : si64} : (tensor<12x14x1024xf32>, tensor<12x14x14xi64>) -> tensor<12x14x14xf32> + "onnx.Return"(%1) : (tensor<12x14x14xf32>) -> () +} + +// ----- + func.func @test_hardmax_verifier_1(%arg0: tensor<2x2xf32>) -> tensor<*xf32> { // expected-error @+1 {{onnx.Hardmax: 'axis' value is 3, accepted range is [-2, 1]}} %1 = "onnx.Hardmax"(%arg0) {axis = 3: si64} : (tensor<2x2xf32>) -> tensor<*xf32> @@ -307,6 +325,16 @@ func.func @test_gatherND_verifier_6(%arg0 : tensor<3x4x4x4xf32>) -> tensor<*xf32 // expected-error @+2 {{onnx.GatherND: 'indices[0]' value is 3, accepted range is [-3, 2]}} %indices = "onnx.Constant"() {value = dense<[3,2,2]> : tensor<3xi64>} : () -> tensor<3x3x2xi64> %1 = "onnx.GatherND"(%arg0, %indices) : (tensor<3x4x4x4xf32>, tensor<3x3x2xi64>) -> tensor<*xf32> + "onnx.Return"(%1) : (tensor<*xf32>) -> () +} + +// ----- + +func.func @test_gatherND_verifier_elided(%arg0 : tensor<3x4x4x4xf32>) -> tensor<*xf32> { + // Test that we do not crash on elided elements + %indices = onnx.Constant dense_resource<__elided__> : tensor<3x3x2xi64> + %1 = "onnx.GatherND"(%arg0, %indices) : (tensor<3x4x4x4xf32>, tensor<3x3x2xi64>) -> tensor<*xf32> + "onnx.Return"(%1) : (tensor<*xf32>) -> () } // ----- @@ -580,6 +608,15 @@ func.func @test_splitToSequence_verifier_6(%arg0: tensor<2x2xf32>) -> !onnx.Seq< // ----- +func.func @test_splitToSequence_verifier_elided(%arg0: tensor<2x2xf32>) -> !onnx.Seq> { + // Tests that we do not crash on elided elements + %0 = onnx.Constant dense_resource<__elided__> : tensor + %1 = "onnx.SplitToSequence"(%arg0, %0) : (tensor<2x2xf32>, tensor) -> !onnx.Seq> + "onnx.Return"(%1) : (!onnx.Seq>) -> () +} + +// ----- + func.func @test_topK_verifier_1(%arg0: tensor<3x4xi64>, %arg1: tensor<1xi64>) -> (tensor<*xf32>, tensor<*xi64>) { // expected-error @+1 {{onnx.TopK: 'axis' value is 2, accepted range is [-2, 1]}} %1, %2 = "onnx.TopK"(%arg0, %arg1) {axis = 2 : si64, largest = 1 : si64, sorted = 1 : si64} : (tensor<3x4xi64>, tensor<1xi64>) -> (tensor<*xf32>, tensor<*xi64>) From 5a1e295e642975a476dc805c0b80b45daf274e01 Mon Sep 17 00:00:00 2001 From: "Tung D. Le" Date: Thu, 16 Jan 2025 12:38:53 +0900 Subject: [PATCH 2/2] [NNPA] Revise compiler options for quantization (#3043) * Introduce two new options -nnpa-quant-dynamic and -nnpa-quant-op-types, and remove the old option --nnpa-quanzation. Signed-off-by: Tung D. Le --------- Signed-off-by: Tung D. Le --- docs/AddCustomAccelerators.md | 7 ++ src/Accelerators/Accelerator.hpp | 7 ++ .../NNPA/Compiler/NNPACompilerOptions.cpp | 52 +++++++--- .../NNPA/Compiler/NNPACompilerOptions.hpp | 15 +-- .../NNPA/Compiler/NNPACompilerUtils.cpp | 51 +++++++++- .../ONNXToZHigh/DevicePlacement.cpp | 6 +- .../ONNXToZHigh/ONNXLegalityCheck.cpp | 2 +- .../Conversion/ONNXToZHigh/ONNXToZHigh.cpp | 99 ++++++++----------- .../Conversion/ONNXToZHigh/ONNXToZHigh.hpp | 6 +- .../ONNXToZHigh/ONNXToZHighCommon.cpp | 25 ++++- .../ONNXToZHigh/ONNXToZHighCommon.hpp | 9 +- src/Accelerators/NNPA/NNPAAccelerator.cpp | 5 + src/Accelerators/NNPA/NNPAAccelerator.hpp | 3 +- src/Accelerators/NNPA/Pass/NNPAPasses.hpp | 5 +- src/Compiler/CompilerUtils.cpp | 1 + src/Tools/onnx-mlir-opt/onnx-mlir-opt.cpp | 2 + .../onnx-to-zhigh/quantization.mlir | 4 +- 17 files changed, 198 insertions(+), 101 deletions(-) diff --git a/docs/AddCustomAccelerators.md b/docs/AddCustomAccelerators.md index 722abc6ee3..4047cd65f8 100644 --- a/docs/AddCustomAccelerators.md +++ b/docs/AddCustomAccelerators.md @@ -92,6 +92,13 @@ virtual void registerDialects(mlir::DialectRegistry ®istry) const = 0; /// command line options. virtual void registerPasses(int optLevel) const = 0; +//===--------------------------------------------------------------------===// +// Hooks for both onnx-mlir and onnx-mlir-opt drivers +//===--------------------------------------------------------------------===// + +/// Configure passes for the accelerator. +virtual void configurePasses() const = 0; + //===--------------------------------------------------------------------===// // Hooks for onnx-to-krnl pass //===--------------------------------------------------------------------===// diff --git a/src/Accelerators/Accelerator.hpp b/src/Accelerators/Accelerator.hpp index 5c2b47187e..e10449cdf1 100644 --- a/src/Accelerators/Accelerator.hpp +++ b/src/Accelerators/Accelerator.hpp @@ -108,6 +108,13 @@ class Accelerator { /// command line options. virtual void registerPasses(int optLevel) const = 0; + //===--------------------------------------------------------------------===// + // Hooks for both onnx-mlir and onnx-mlir-opt drivers + //===--------------------------------------------------------------------===// + + /// Configure passes for the accelerator. + virtual void configurePasses() const = 0; + //===--------------------------------------------------------------------===// // Hooks for onnx-to-krnl pass //===--------------------------------------------------------------------===// diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp index 52d7933888..34457eafd8 100644 --- a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp +++ b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp @@ -17,6 +17,10 @@ namespace onnx_mlir { +// Use external storage for the options so that they are globally accessible +std::vector nnpaQuantDynamic; // common for both +std::vector nnpaQuantOpTypes; // common for both + llvm::cl::opt nnpaEmissionTarget( llvm::cl::desc("[Optional] Choose NNPA-related target to emit " "(once selected it will cancel the other targets):"), @@ -101,6 +105,41 @@ llvm::cl::opt nnpaEnableSaturation("nnpa-saturation", "Default is false."), llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions)); +llvm::cl::list> + nnpaQuantDynamicOpt("nnpa-quant-dynamic", + llvm::cl::desc( + "Enable dynamic quantization of the input model. If enabled, it " + "only quantizes from fp32 to i8. If an ONNX operation is already " + "in i8, no quantization is applied to that operation. Optionally, " + "a comma-separated list of quantization options can be specified " + "as its value, e.g. -nnpa-quant-dynamic=symActivation,symWeight."), + llvm::cl::values(clEnumVal(symWeight, "Symmetric quant for weights."), + clEnumVal(asymWeight, "Asymmetric quant for weights."), + clEnumVal(symActivation, "Symmetric quant for activations."), + clEnumVal(asymActivation, "Asymmetric quant for activations."), + // Use an empty string for the case where `--nnpa-quant-dynamic` is + // specified on the command line WITHOUT value, which is different + // from the case where `--nnpa-quant-dynamic` is NOT specified on + // the command line. + clEnumValN(autoQuantOpt, "", + "Compiler automatically finds the best options. Once this " + "option (an empty string) is in the list, the other options " + "are ignored. This is the default option when " + "`-nnpa-quant-dynamic` is specified without any value.")), + llvm::cl::location(nnpaQuantDynamic), llvm::cl::ValueOptional, + llvm::cl::CommaSeparated, llvm::cl::cat(OnnxMlirCommonOptions)); + +llvm::cl::list> nnpaQuantOpTypesOpt( + "nnpa-quant-op-types", + llvm::cl::desc( + "A comma-separated list of types of operations that are quantized. " + "E.g. 'MatMul,Conv'. Strings for types are the same as ONNX operator " + "names in https://onnx.ai/onnx/operators/. Currently, only MatMul is " + "supported. Without specifying this option, the compiler will " + "determine the operation types by itself."), + llvm::cl::location(nnpaQuantOpTypes), llvm::cl::ValueOptional, + llvm::cl::CommaSeparated, llvm::cl::cat(OnnxMlirCommonOptions)); + llvm::cl::opt nnpaUseDynamicQuantizeLinearOnCPU("nnpa-cpu-dql", llvm::cl::desc("Use dynamic quantized linear on CPU. Default is false"), llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions)); @@ -111,17 +150,4 @@ llvm::cl::opt nnpaUseDynamicQuantizeLinearOnCPUForScaleOffset( " scale and offset on CPU. Default is false"), llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions)); -llvm::cl::opt nnpaQuantization("nnpa-quantization", - llvm::cl::desc("Enable quantization with a specific type. Only " - "MatMul whose weight is a constant is supported."), - llvm::cl::values( - clEnumVal(DynSymI8, - "Dynamic Quantization to signed integer 8. Asymmetric " - "quant for activations and symmetric quant for weights."), - clEnumVal(SymSymI8, - "Dynamic Quantization to signed integer 8. Symmetric " - "quant for activations and symmetric quant for weights."), - clEnumVal(QNONE, "No quantization (default).")), - llvm::cl::init(QNONE), llvm::cl::cat(OnnxMlirOptions)); - } // namespace onnx_mlir diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp index 366efee3fe..e6f7cf6aa7 100644 --- a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp +++ b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp @@ -57,12 +57,12 @@ typedef enum { // Quantization type typedef enum { - DynSymI8, /* Dynamic quantization to signed integer 8. Asymmetric quant for - activations and symmetric quant for weights.*/ - SymSymI8, /* Dynamic quantization to signed integer 8. Symmetric quant for - activations and symmetric quant for weights.*/ - QNONE, /* Only qualifying ops that are faster on NNPA. */ -} NNPAQuantType; + symWeight, + asymWeight, + symActivation, + asymActivation, + autoQuantOpt, +} NNPAQuantOptions; extern llvm::cl::OptionCategory OnnxMlirOptions; extern llvm::cl::OptionCategory OnnxMlirCommonOptions; @@ -79,7 +79,8 @@ extern llvm::cl::opt nnpaSaveDevicePlacementFile; extern llvm::cl::opt nnpaEnableSaturation; extern llvm::cl::opt nnpaUseDynamicQuantizeLinearOnCPU; extern llvm::cl::opt nnpaUseDynamicQuantizeLinearOnCPUForScaleOffset; -extern llvm::cl::opt nnpaQuantization; +extern std::vector nnpaQuantDynamic; +extern std::vector nnpaQuantOpTypes; } // namespace onnx_mlir #endif diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp index d7c5cfcac0..45a9af09f8 100644 --- a/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp +++ b/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp @@ -49,11 +49,56 @@ using namespace onnx_mlir; namespace onnx_mlir { void configurePassesNNPA() { - configureOnnxToZHighLoweringPass(optReport == OptReport::NNPAUnsupportedOps); // z16 does not support for hardware saturation. // So, force its usage to compiler generated sticks. if (nnpaEnableSaturation && isLessEqualNNPALevel(NNPALevel::M14)) nnpaEnableCompilerStickUnstick = true; + + // Configure ONNXToZHighLoweringPass. + bool isDynQuant = !nnpaQuantDynamic.empty(); + // Default/auto mode: symmetric for weighs and asymmetric for activations. + bool isActivationSym = false; + bool isWeightSym = true; + std::vector quantOpTypes; + if (isDynQuant) { + // Set options for activations and weights if they are given. + // When auto mode is specified, the other specified options are ignored. + if (!llvm::is_contained(nnpaQuantDynamic, NNPAQuantOptions::autoQuantOpt)) { + for (unsigned i = 0; i < nnpaQuantDynamic.size(); ++i) { + switch (nnpaQuantDynamic[i]) { + case NNPAQuantOptions::symWeight: + isWeightSym = true; + break; + case NNPAQuantOptions::asymWeight: + isWeightSym = false; + break; + case NNPAQuantOptions::symActivation: + isActivationSym = true; + break; + case NNPAQuantOptions::asymActivation: + isActivationSym = false; + break; + default: + llvm_unreachable("Unsupported quantization options"); + break; + } + } + } + if (!isWeightSym) { + // TODO: Support asymmetric quantiation for weights. + llvm::outs() + << "Asymmetric quantization for weights is not yet supported. " + "Turning off quantization.\n"; + isDynQuant = false; + } + if (nnpaQuantOpTypes.empty()) { + quantOpTypes.emplace_back("MatMul"); + } else { + quantOpTypes = nnpaQuantOpTypes; + } + } + configureONNXToZHighLoweringPass(optReport == OptReport::NNPAUnsupportedOps, + isDynQuant, isActivationSym, isWeightSym, quantOpTypes); } void addONNXToZHighPasses(mlir::PassManager &pm) { @@ -85,7 +130,8 @@ void addONNXToZHighPasses(mlir::PassManager &pm) { pm.addNestedPass( onnx_mlir::createInstrumentPass(instrumentOps, instrumentActions)); - pm.addPass(onnx_mlir::createONNXToZHighPass(nnpaQuantization)); + // Lowering ONNX to ZHigh. + pm.addPass(onnx_mlir::createONNXToZHighPass()); pm.addNestedPass(onnx_mlir::createShapeInferencePass()); // There are more opportunities for const propagation once all zhigh ops were @@ -191,7 +237,6 @@ void addPassesNNPA(mlir::OwningOpRef &module, // Override pass configurations. configurePasses(); - configurePassesNNPA(); // LLVM_DEBUG(llvm::dbgs() << "Adding NNPA passes" << std::endl;); if (emissionTarget >= EmitONNXIR) { diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/DevicePlacement.cpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/DevicePlacement.cpp index 47724d8d3e..9979f0bbf3 100644 --- a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/DevicePlacement.cpp +++ b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/DevicePlacement.cpp @@ -161,7 +161,7 @@ void DevicePlacementPass::runOnOperation() { // Disable reporting on NNPA unsupported ops in this pass even if // `-opt-report=NNPAUnsupportedOps` is specified.. - OnnxToZHighLoweringConfiguration::reportOnNNPAUnsupportedOps = 0; + ONNXToZHighLoweringConfiguration::reportOnNNPAUnsupportedOps = 0; // Run the unknown dimension analysis to help check equality of unknown // dimensions at compile time. @@ -200,13 +200,13 @@ void DevicePlacementPass::runOnOperation() { // Call ONNXToZHigh pass for lowering multiple ONNX ops at once to ZHigh. // E.g. `onnx.ReLu (onnx.Conv)` to zhigh.Conv. RewritePatternSet Patterns2(context); - getONNXToZHighMultipleOpPatterns(Patterns2, nnpaQuantization); + getONNXToZHighMultipleOpPatterns(Patterns2); (void)applyAnalysisConversion(module, target, std::move(Patterns2), ConversionConfig{.legalizableOps = &legalizedOps2}); // Call ONNXToZHigh pass for lowering a single ONNX op to ZHigh. RewritePatternSet Patterns3(context); - getONNXToZHighOneOpPatterns(Patterns3, nnpaQuantization); + getONNXToZHighOneOpPatterns(Patterns3); getONNXToZHighOneOpDynamicallyLegal(&target, &dimAnalysis); (void)applyAnalysisConversion(module, target, std::move(Patterns3), ConversionConfig{.legalizableOps = &legalizedOps3}); diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp index 76fa3fa547..80c41b77ae 100644 --- a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp +++ b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp @@ -27,7 +27,7 @@ using namespace onnx_mlir; /// Report NNPA unsupported case. bool onnxToZHighUnsupportedReport(Operation *op, const std::string &message) { - if (OnnxToZHighLoweringConfiguration::reportOnNNPAUnsupportedOps && + if (ONNXToZHighLoweringConfiguration::reportOnNNPAUnsupportedOps && !message.empty()) { StringAttr opName = op->getName().getIdentifier(); std::string nodeNameStr = getNodeNameInPresenceOfOpt(op); diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.cpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.cpp index 78e94a6a2a..921fba751d 100644 --- a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.cpp +++ b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.cpp @@ -644,8 +644,8 @@ class replaceONNXMatMulByDynQuantI8Pattern using OpRewritePattern::OpRewritePattern; replaceONNXMatMulByDynQuantI8Pattern( - MLIRContext *context, PatternBenefit benefit = 1, bool symForA = false) - : OpRewritePattern(context, benefit), symForA(symForA) {} + MLIRContext *context, PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit) {} LogicalResult matchAndRewrite( ONNXMatMulOp mmOp, PatternRewriter &rewriter) const override { @@ -655,7 +655,8 @@ class replaceONNXMatMulByDynQuantI8Pattern Value B = mmOp.getB(); // Dynamic quantization helper. - DynQuantI8PatternHelper dqHelper(rewriter, loc, op, A, B, nullptr, symForA); + DynQuantI8PatternHelper dqHelper(rewriter, loc, op, A, B, nullptr, + ONNXToZHighLoweringConfiguration::Quant::isActivationSym); // Match if (!isSuitableForZDNN(mmOp) || failed(dqHelper.match())) @@ -666,9 +667,6 @@ class replaceONNXMatMulByDynQuantI8Pattern rewriter.replaceOp(op, res); return success(); } - -private: - bool symForA = false; }; /** @@ -684,8 +682,8 @@ class replaceONNXMatMulAddByDynQuantI8Pattern using OpRewritePattern::OpRewritePattern; replaceONNXMatMulAddByDynQuantI8Pattern( - MLIRContext *context, PatternBenefit benefit = 1, bool symForA = false) - : OpRewritePattern(context, benefit), symForA(symForA) {} + MLIRContext *context, PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit) {} LogicalResult matchAndRewrite( ONNXAddOp addOp, PatternRewriter &rewriter) const override { @@ -704,7 +702,8 @@ class replaceONNXMatMulAddByDynQuantI8Pattern Value B = mmOp.getB(); // Match A, B, C. - DynQuantI8PatternHelper dqHelper(rewriter, loc, op, A, B, C, symForA); + DynQuantI8PatternHelper dqHelper(rewriter, loc, op, A, B, C, + ONNXToZHighLoweringConfiguration::Quant::isActivationSym); if (succeeded(dqHelper.match())) { Value res = dqHelper.rewriteSym(); rewriter.replaceOp(op, res); @@ -713,9 +712,6 @@ class replaceONNXMatMulAddByDynQuantI8Pattern return failure(); } - -private: - bool symForA = false; }; /** @@ -732,8 +728,8 @@ class replaceONNXGemmByDynQuantI8Pattern : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; replaceONNXGemmByDynQuantI8Pattern( - MLIRContext *context, PatternBenefit benefit = 1, bool symForA = false) - : OpRewritePattern(context, benefit), symForA(symForA) {} + MLIRContext *context, PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit) {} LogicalResult matchAndRewrite( ONNXGemmOp gemmOp, PatternRewriter &rewriter) const override { @@ -747,8 +743,9 @@ class replaceONNXGemmByDynQuantI8Pattern : public OpRewritePattern { bool transB = (gemmOp.getTransB() != 0); // Dynamic quantization helper. - DynQuantI8PatternHelper dqHelper( - rewriter, loc, op, A, B, isNoneValue(C) ? nullptr : C, symForA); + DynQuantI8PatternHelper dqHelper(rewriter, loc, op, A, B, + isNoneValue(C) ? nullptr : C, + ONNXToZHighLoweringConfiguration::Quant::isActivationSym); // Match // TODO: if B is a constant and it is transposed, we can do transpose @@ -765,9 +762,6 @@ class replaceONNXGemmByDynQuantI8Pattern : public OpRewritePattern { rewriter.replaceOp(op, res); return success(); } - -private: - bool symForA = false; }; class replaceONNXMatMulIntegerPattern @@ -1535,28 +1529,11 @@ struct ONNXToZHighLoweringPass ONNXToZHighLoweringPass() = default; ONNXToZHighLoweringPass(const ONNXToZHighLoweringPass &pass) : PassWrapper>() {} - ONNXToZHighLoweringPass(NNPAQuantType quantMode) { - this->quantMode = quantMode; - } void runOnOperation() final; - -public: - Option quantMode{*this, "quantization", - llvm::cl::desc("Enable quantization"), - llvm::cl::values( - clEnumVal(DynSymI8, - "Dynamic Quantization to signed integer 8. Asymmetric quant for " - "activations and symmetric quant for weights."), - clEnumVal(SymSymI8, - "Dynamic Quantization to signed integer 8. Symmetric quant for " - "activations and symmetric quant for weights."), - clEnumVal(QNONE, "No quantization (default).")), - llvm::cl::init(QNONE)}; }; } // end anonymous namespace. -void getONNXToZHighOneOpPatterns( - RewritePatternSet &patterns, NNPAQuantType quantMode) { +void getONNXToZHighOneOpPatterns(RewritePatternSet &patterns) { MLIRContext *context = patterns.getContext(); patterns.insert(context); patterns.insert(context); @@ -1602,17 +1579,21 @@ void getONNXToZHighOneOpPatterns( patterns.insert(context); patterns.insert(context); - // Pattern for i8 dynamic quantization, symmetric mode. + // Pattern for i8 dynamic quantization. if (isCompatibleWithNNPALevel(NNPALevel::M15) && - (quantMode == NNPAQuantType::DynSymI8 || - quantMode == NNPAQuantType::SymSymI8)) { + ONNXToZHighLoweringConfiguration::isDynQuant) { // Bump up the pattern benefit to run these before non-quantization // patterns. PatternBenefit quantPriority(QUANT_PATTERN_BENEFIT); - patterns.insert( - context, quantPriority, quantMode == NNPAQuantType::SymSymI8); - patterns.insert( - context, quantPriority, quantMode == NNPAQuantType::SymSymI8); + if (llvm::any_of(ONNXToZHighLoweringConfiguration::Quant::opTypes, + [](std::string s) { + return StringRef(s).equals_insensitive("MatMul"); + })) { + patterns.insert( + context, quantPriority); + patterns.insert( + context, quantPriority); + } } } @@ -1648,8 +1629,7 @@ void getONNXToZHighOneOpDynamicallyLegal( addDynamicallyLegalOpFor(target, dimAnalysis); } -void getONNXToZHighMultipleOpPatterns( - RewritePatternSet &patterns, NNPAQuantType quantMode) { +void getONNXToZHighMultipleOpPatterns(RewritePatternSet &patterns) { MLIRContext *context = patterns.getContext(); patterns.insert(context); patterns.insert(context); @@ -1663,15 +1643,19 @@ void getONNXToZHighMultipleOpPatterns( patterns.insert(context); patterns.insert(context); - // Pattern for i8 dynamic quantization, symmetric mode. + // Pattern for i8 dynamic quantization. if (isCompatibleWithNNPALevel(NNPALevel::M15) && - (quantMode == NNPAQuantType::DynSymI8 || - quantMode == NNPAQuantType::SymSymI8)) { + (ONNXToZHighLoweringConfiguration::isDynQuant)) { // Bump up the pattern benefit to run these before non-quantization // patterns. PatternBenefit quantPriority(QUANT_PATTERN_BENEFIT); - patterns.insert( - context, quantPriority, quantMode == NNPAQuantType::SymSymI8); + if (llvm::any_of(ONNXToZHighLoweringConfiguration::Quant::opTypes, + [](std::string s) { + return StringRef(s).equals_insensitive("MatMul"); + })) { + patterns.insert( + context, quantPriority); + } } // Shape inference for newly-added operations. @@ -1687,8 +1671,8 @@ void ONNXToZHighLoweringPass::runOnOperation() { // Enable reporting on NNPA unsupported ops when specifying // `--opt-report=NNPAUnsupportedOps`. - OnnxToZHighLoweringConfiguration::reportOnNNPAUnsupportedOps = - OnnxToZHighLoweringConfiguration::optReportNNPAUnsupportedOps; + ONNXToZHighLoweringConfiguration::reportOnNNPAUnsupportedOps = + ONNXToZHighLoweringConfiguration::optReportNNPAUnsupportedOps; // We define the specific operations, or dialects, that are legal targets for // this lowering. @@ -1706,8 +1690,7 @@ void ONNXToZHighLoweringPass::runOnOperation() { // a single ONNX Op, because the single op lowering might have conditions that // prohibit the combined ops lowering happened. RewritePatternSet combinedPatterns(&getContext()); - onnx_mlir::getONNXToZHighMultipleOpPatterns( - combinedPatterns, this->quantMode); + onnx_mlir::getONNXToZHighMultipleOpPatterns(combinedPatterns); // It's ok to fail. (void)applyPatternsAndFoldGreedily(module, std::move(combinedPatterns)); @@ -1719,7 +1702,7 @@ void ONNXToZHighLoweringPass::runOnOperation() { // Single ONNX to ZHigh operation lowering. RewritePatternSet patterns(&getContext()); - onnx_mlir::getONNXToZHighOneOpPatterns(patterns, this->quantMode); + onnx_mlir::getONNXToZHighOneOpPatterns(patterns); // This is to make sure we don't want to alloc any MemRef at this high-level // representation. @@ -1742,8 +1725,4 @@ std::unique_ptr createONNXToZHighPass() { return std::make_unique(); } -std::unique_ptr createONNXToZHighPass(NNPAQuantType quantMode) { - return std::make_unique(quantMode); -} - } // namespace onnx_mlir diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.hpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.hpp index d121058168..caddfc24b8 100644 --- a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.hpp +++ b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.hpp @@ -24,10 +24,8 @@ namespace onnx_mlir { // Exports ONNXtoZHigh patterns. -void getONNXToZHighOneOpPatterns( - mlir::RewritePatternSet &patterns, NNPAQuantType quantMode); -void getONNXToZHighMultipleOpPatterns( - mlir::RewritePatternSet &patterns, NNPAQuantType quantMode); +void getONNXToZHighOneOpPatterns(mlir::RewritePatternSet &patterns); +void getONNXToZHighMultipleOpPatterns(mlir::RewritePatternSet &patterns); // Exports ONNXtoZHigh dynamically legal checks. void getONNXToZHighOneOpDynamicallyLegal( diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.cpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.cpp index ce7c4160bd..4a3c03205d 100644 --- a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.cpp +++ b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.cpp @@ -103,13 +103,30 @@ Value getDynShape(Location loc, PatternRewriter &rewriter, Value x) { RankedTensorType::get({r}, rewriter.getI64Type()), dims, 0); } -int OnnxToZHighLoweringConfiguration::optReportNNPAUnsupportedOps = +int ONNXToZHighLoweringConfiguration::optReportNNPAUnsupportedOps = 0; // 0: Compile option (--opt-report=NNPAUnsupportedOps) not specified. -int OnnxToZHighLoweringConfiguration::reportOnNNPAUnsupportedOps = +int ONNXToZHighLoweringConfiguration::reportOnNNPAUnsupportedOps = 0; // 0: no reporting. -void configureOnnxToZHighLoweringPass(bool optReportNNPAUnsupportedOps) { - OnnxToZHighLoweringConfiguration::optReportNNPAUnsupportedOps = +bool ONNXToZHighLoweringConfiguration::isDynQuant = false; +bool ONNXToZHighLoweringConfiguration::Quant::isActivationSym = false; +bool ONNXToZHighLoweringConfiguration::Quant::isWeightSym = true; +llvm::SmallVector + ONNXToZHighLoweringConfiguration::Quant::opTypes = {}; + +void configureONNXToZHighLoweringPass(bool optReportNNPAUnsupportedOps, + bool isDynQuant, bool quantIsActivationSym, bool quantIsWeightSym, + llvm::ArrayRef quantOpTypes) { + ONNXToZHighLoweringConfiguration::optReportNNPAUnsupportedOps = optReportNNPAUnsupportedOps; + ONNXToZHighLoweringConfiguration::isDynQuant = isDynQuant; + if (isDynQuant) { + ONNXToZHighLoweringConfiguration::Quant::isActivationSym = + quantIsActivationSym; + ONNXToZHighLoweringConfiguration::Quant::isWeightSym = quantIsWeightSym; + ONNXToZHighLoweringConfiguration::Quant::opTypes.insert( + ONNXToZHighLoweringConfiguration::Quant::opTypes.begin(), + quantOpTypes.begin(), quantOpTypes.end()); + } } } // namespace onnx_mlir diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.hpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.hpp index 382d596e35..4a92309443 100644 --- a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.hpp +++ b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.hpp @@ -30,9 +30,16 @@ const std::string NNPA_DEVICE = "nnpa"; bool isEnableScalarBcastBinary(); -struct OnnxToZHighLoweringConfiguration { +// Populated by configureONNXToZHighLoweringPass(). +struct ONNXToZHighLoweringConfiguration { static int optReportNNPAUnsupportedOps; static int reportOnNNPAUnsupportedOps; + static bool isDynQuant; + struct Quant { + static bool isActivationSym; + static bool isWeightSym; + static llvm::SmallVector opTypes; + }; }; template diff --git a/src/Accelerators/NNPA/NNPAAccelerator.cpp b/src/Accelerators/NNPA/NNPAAccelerator.cpp index 2e4a06c477..50ef2bf0ba 100644 --- a/src/Accelerators/NNPA/NNPAAccelerator.cpp +++ b/src/Accelerators/NNPA/NNPAAccelerator.cpp @@ -138,6 +138,11 @@ void NNPAAccelerator::registerPasses(int optLevel) const { }); } +void NNPAAccelerator::configurePasses() const { + LLVM_DEBUG(llvm::dbgs() << "Configuring passes for NNPA accelerator\n"); + configurePassesNNPA(); +} + mlir::MemRefType NNPAAccelerator::convertTensorTypeToMemRefType( const mlir::TensorType tensorType) const { assert(tensorType.hasRank() && "expected only ranked shapes"); diff --git a/src/Accelerators/NNPA/NNPAAccelerator.hpp b/src/Accelerators/NNPA/NNPAAccelerator.hpp index e40bd774b6..a908c02da2 100644 --- a/src/Accelerators/NNPA/NNPAAccelerator.hpp +++ b/src/Accelerators/NNPA/NNPAAccelerator.hpp @@ -47,7 +47,7 @@ class NNPAAccelerator final : public Accelerator { uint64_t getVersionNumber() const final; //===--------------------------------------------------------------------===// - // Hooks for onnx-mlir-opt driver + // Hooks for onnx-mlir driver //===--------------------------------------------------------------------===// virtual void addPasses(mlir::OwningOpRef &module, mlir::PassManager &pm, onnx_mlir::EmissionTargetType &emissionTarget, @@ -57,6 +57,7 @@ class NNPAAccelerator final : public Accelerator { //===--------------------------------------------------------------------===// virtual void registerDialects(mlir::DialectRegistry ®istry) const final; virtual void registerPasses(int optLevel) const final; + virtual void configurePasses() const final; //===--------------------------------------------------------------------===// // Hooks for onnx-to-krnl pass //===--------------------------------------------------------------------===// diff --git a/src/Accelerators/NNPA/Pass/NNPAPasses.hpp b/src/Accelerators/NNPA/Pass/NNPAPasses.hpp index f00fcdedff..c23fb7f158 100644 --- a/src/Accelerators/NNPA/Pass/NNPAPasses.hpp +++ b/src/Accelerators/NNPA/Pass/NNPAPasses.hpp @@ -30,8 +30,9 @@ std::unique_ptr createDevicePlacementPass( /// Add pass for lowering ONNX ops to ZHigh ops. std::unique_ptr createONNXToZHighPass(); -std::unique_ptr createONNXToZHighPass(NNPAQuantType quantMode); -void configureOnnxToZHighLoweringPass(bool reportOnNNPAUnsupportedOps); +void configureONNXToZHighLoweringPass(bool reportOnNNPAUnsupportedOps, + bool isDynQuant, bool quantIsActivationSym, bool quantIsWeightSym, + llvm::ArrayRef quantOpTypes); /// Add pass for rewriting ONNX ops for ZHigh. std::unique_ptr createRewriteONNXForZHighPass(); diff --git a/src/Compiler/CompilerUtils.cpp b/src/Compiler/CompilerUtils.cpp index 4310010d36..8ca220989b 100644 --- a/src/Compiler/CompilerUtils.cpp +++ b/src/Compiler/CompilerUtils.cpp @@ -1024,6 +1024,7 @@ int compileModule(mlir::OwningOpRef &module, bool hasAccel = false; for (auto *accel : onnx_mlir::accel::Accelerator::getAccelerators()) { hasAccel = true; + accel->configurePasses(); accel->addPasses(module, pm, emissionTarget, outputNameNoExt); } if (!hasAccel) diff --git a/src/Tools/onnx-mlir-opt/onnx-mlir-opt.cpp b/src/Tools/onnx-mlir-opt/onnx-mlir-opt.cpp index 29411aaf68..2acbca10b0 100644 --- a/src/Tools/onnx-mlir-opt/onnx-mlir-opt.cpp +++ b/src/Tools/onnx-mlir-opt/onnx-mlir-opt.cpp @@ -181,6 +181,8 @@ int main(int argc, char **argv) { // Passes are configured with command line options so they must be configured // after command line parsing but before any passes are run. configurePasses(); + for (auto *accel : accel::Accelerator::getAccelerators()) + accel->configurePasses(); auto passManagerSetupFn = [&](PassManager &pm) { MLIRContext *ctx = pm.getContext(); diff --git a/test/mlir/accelerators/nnpa/conversion/onnx-to-zhigh/quantization.mlir b/test/mlir/accelerators/nnpa/conversion/onnx-to-zhigh/quantization.mlir index 83565c6e42..658cc0e4e7 100644 --- a/test/mlir/accelerators/nnpa/conversion/onnx-to-zhigh/quantization.mlir +++ b/test/mlir/accelerators/nnpa/conversion/onnx-to-zhigh/quantization.mlir @@ -1,5 +1,5 @@ -// RUN: onnx-mlir-opt --march=arch15 --maccel=NNPA --convert-onnx-to-zhigh="quantization=DynSymI8" --constprop-onnx --canonicalize --mlir-print-elementsattrs-with-hex-if-larger=-1 %s -split-input-file | FileCheck %s -// RUN: onnx-mlir-opt --march=arch15 --maccel=NNPA --convert-onnx-to-zhigh="quantization=SymSymI8" --constprop-onnx --canonicalize --mlir-print-elementsattrs-with-hex-if-larger=-1 %s -split-input-file | FileCheck %s --check-prefix=SYMSYMI8 +// RUN: onnx-mlir-opt --march=arch15 --maccel=NNPA --convert-onnx-to-zhigh --nnpa-quant-dynamic --constprop-onnx --canonicalize --mlir-print-elementsattrs-with-hex-if-larger=-1 %s -split-input-file | FileCheck %s +// RUN: onnx-mlir-opt --march=arch15 --maccel=NNPA --convert-onnx-to-zhigh --nnpa-quant-dynamic=symActivation,symWeight --constprop-onnx --canonicalize --mlir-print-elementsattrs-with-hex-if-larger=-1 %s -split-input-file | FileCheck %s --check-prefix=SYMSYMI8 func.func @test_correctness_of_symmetric_quant_for_weight(%arg0: tensor) -> tensor { %0 = onnx.Constant dense<[[-0.00718058366], [5.253110e-01], [-0.0434652828], [-0.305256933], [0.193365857], [0.0105065238], [-0.143788248], [-0.0161222648], [0.0230324212], [-0.34107244], [-0.273072243], [-0.104352467], [0.0164068397], [-1.32305741], [-0.0345043093], [-0.232206389], [-0.150001124], [0.119475454], [0.730642438], [-0.407772154], [-0.0164191965], [-1.625590e-01], [-0.112515017], [0.158920377], [-0.0997497215], [0.0788274407], [1.1542908], [0.492949218], [-0.125796661], [0.0107790371], [0.141159713], [-0.0774109289], [-0.438130081], [-0.0888700857], [0.207725927], [-0.0913108587], [0.258232892], [0.0672571063], [-0.100412264], [1.68460846], [-0.289168775], [-0.686722457], [0.903651654], [0.110602334], [-0.0505490415], [1.31204939], [0.136107579], [0.26376456], [-0.508291602], [-0.0118971812], [-0.0373991691], [0.448705465], [0.00448446581], [-0.165114298], [0.156860754], [0.141124308], [-0.272756487], [-0.0834815949], [0.020905681], [-0.0877983123], [-1.0087887], [-0.353012145], [-0.0439243801], [-0.00592191564], [-0.0637216269], [0.175808683], [-0.193864927], [-0.0574007072], [0.390869558], [0.138100505], [0.429396927], [1.10117233], [-0.362377733], [0.116578773], [0.0540139228], [-5.85162896E-4], [-0.335441321], [-0.0902953073], [0.017575942], [-0.0359748788], [1.50025952], [-0.668821096], [0.0109066488], [9.907780e-01], [0.10227681], [-0.0582750589], [0.0172416102], [0.0429656394], [0.0465254933], [0.350135148], [-0.260139734], [0.199394852], [-0.136131078], [0.241424322], [0.855418264], [-0.160689577], [-0.825074911], [-0.124827594], [0.0153419804], [0.389386117], [0.153694436], [-0.897866904], [-0.292769879], [0.181667477], [-0.188009143], [-0.0245181341], [-2.17088842], [-0.0526076891], [-0.108600065], [0.187120304], [0.171495944], [0.310159177], [2.204240e+00], [0.0506350659], [-0.159419239], [-0.145082235], [-0.0991335287], [-0.0680764392], [-0.311415762], [-0.187137261], [-0.416945577], [0.0703471377], [0.498331547], [-0.41216433], [-0.427900195], [0.102105901], [0.130767033], [-0.440281332], [0.778514624], [-0.253678083], [0.395671815], [0.380029172], [-0.418493837], [-0.288157403], [0.0689846799], [1.269960e+00], [-0.0585722439], [-0.138125435], [-0.191710189], [0.0163070802], [0.159242466], [0.116627224], [0.289637923], [-0.299413532], [-0.0216965247], [0.271396786], [0.250576884], [-0.131420374], [0.137698188], [-0.0102280416], [0.234722644], [-0.0366179943], [-0.105632246], [-0.145528033], [-0.278210133], [-0.247100428], [0.217718393], [0.171669215], [0.0151556451], [0.961385667], [-0.0484847203], [0.434219301], [-0.00167646946], [-0.0308207348], [-0.102328695], [-0.127907664], [-0.185960412], [0.210866481], [0.140434876], [-0.233541235], [-0.123745643], [-0.0113738365], [1.30043447], [0.179708347], [-0.331716627], [0.0133318678], [-0.107284561], [-0.114116102], [-0.478514463], [0.0616452768], [-0.781869769], [-0.121830635], [-0.0684970543], [-6.584100e-02], [-0.131784603], [-0.619898796], [0.160366163], [-0.50115186], [0.0228514839], [0.581515431], [4.220270e-01], [1.944400e-01], [-1.07740963], [3.732520e-01], [0.725471556], [-0.117193311], [-0.105938725], [0.320118755], [-0.484032601], [-0.0467250831]]> : tensor<200x1xf32>