update

Signed-off-by: Alexandre Eichenberger <[email protected]>
onnx · AlexandreEichenberger · Feb 3, 2025 · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024
commit 903fcb49e3263457f1863584391b86c6c9502260
diff --git a/docs/Dialects/zhigh.md b/docs/Dialects/zhigh.md
diff --git a/docs/Dialects/zlow.md b/docs/Dialects/zlow.md
@@ -342,6 +342,52 @@ Interfaces: `MemoryEffectOpInterface`
 | `shape` | memref of 64-bit signless integer values
 | `hn_output` | memref of dlfloat16 type values
 
+### `zlow.gelu` (::onnx_mlir::zlow::ZLowGeluOp)
+
+_ZLow gelu operation_
+
+ZLow operation to perform a gelu.
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type values
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type values
+
+### `zlow.invsqrt` (::onnx_mlir::zlow::ZLowInvSqrtOp)
+
+_ZLow invsqrt operation_
+
+ZLow operation to perform a invsqrt.
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type values
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type values
+
 ### `zlow.lstm` (::onnx_mlir::zlow::ZLowLSTMOp)
 
 _ZLow lstm operation_
@@ -387,6 +433,30 @@ Interfaces: `MemoryEffectOpInterface`
 | `hn_output` | memref of dlfloat16 type values
 | `cf_output` | memref of dlfloat16 type values
 
+### `zlow.leakyrelu` (::onnx_mlir::zlow::ZLowLeakyReluOp)
+
+_ZLow leakyrelu operation_
+
+ZLow operation to perform a leakyrelu.
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>alpha</code></td><td>::mlir::FloatAttr</td><td>32-bit float attribute</td></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type values
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type values
+
 ### `zlow.log` (::onnx_mlir::zlow::ZLowLogOp)
 
 _ZLow log operation_
@@ -423,14 +493,18 @@ shape is a 1D MemRef (memref<3xi64>) whose items are:
   * 2nd item: n
   * 3rd item: p
 * In case of stacked: X(s, m, n) * Y(s, n, p) + Bias(s, p)
-     or broadcasting: X(s, m, n) * Y(n, p) + Bias(p)
+     or broadcasting1: X(m, n) * Y(s, n, p) + Bias(s, p)
+     or broadcasting23: X(s, m, n) * Y(n, p) + Bias(p)
 shape is a 1D MemRef (memref<4xi64>) whose items are:
   * 1st item: s
   * 2nd item: m
   * 3rd item: n
   * 4th item: p
-* is_bcast: -1 broadcasting, 0: no broadcasting.
+* is_bcast1:  -1 broadcasting1, 0: no broadcasting1.
+* is_bcast23: -1 broadcasting23, 0: no broadcasting23.
 * is_stacked: -1 stacked, 0: unstacked.
+* transposeA: !0 transpose A, 0: do not transpose A.
+* transposeB: !0 transpose B, 0: do not transpose B.
 
 Traits: `MemRefsNormalizable`
 
@@ -440,8 +514,11 @@ Interfaces: `MemoryEffectOpInterface`
 
 <table>
 <tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
-<tr><td><code>is_bcast</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>is_bcast1</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>is_bcast23</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
 <tr><td><code>is_stacked</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>transposeA</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>transposeB</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
 </table>
 
 #### Operands:
@@ -592,6 +669,144 @@ Interfaces: `MemoryEffectOpInterface`
 | `shape` | memref of 64-bit signless integer values
 | `Out` | memref of dlfloat16 type values
 
+### `zlow.quantizedMatmul` (::onnx_mlir::zlow::ZLowQuantizedMatMulOp)
+
+_ZLow quantized matmul operation_
+
+ZLow operation to perform a matmul.
+work_area: a 4K-aligned buffer having the same layout as bias but dlfloat16 type.
+* In case of unstacked: X(m, n) * Y(n, p) + Bias(p)
+shape is a 1D MemRef (memref<3xi64>) whose items are:
+  * 1st item: m
+  * 2nd item: n
+  * 3rd item: p
+* In case of stacked: X(s, m, n) * Y(s, n, p) + Bias(s, p)
+     or broadcasting: X(s, m, n) * Y(n, p) + Bias(p)
+shape is a 1D MemRef (memref<4xi64>) whose items are:
+  * 1st item: s
+  * 2nd item: m
+  * 3rd item: n
+  * 4th item: p
+* is_bcast: -1 broadcasting, 0: no broadcasting.
+* is_stacked: -1 stacked, 0: unstacked.
+* DequantizeOutput: -1 output is dequantized, 0: output is not dequantized.
+* PreComputedBias: -1 bias is re-computed, 0: bias is not pre-computed.
+
+Values for `q_type` are "DLFLOAT16", "INT8", "WEIGHTS", "UNDEFINED".
+
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>x_q_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>y_q_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>bias_q_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>out_q_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>is_bcast</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>is_stacked</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>pre_computed_bias</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>disable_clipping</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>dequantize_output</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type or 8-bit signless integer values
+| `x_rec_scale` | 0D memref of 32-bit float values
+| `x_offset` | 0D memref of 32-bit float values
+| `Y` | memref of dlfloat16 type or 8-bit signless integer values
+| `y_rec_scale` | 0D memref of 32-bit float values
+| `y_offset` | 0D memref of 32-bit float values
+| `Bias` | memref of dlfloat16 type or 8-bit signless integer values
+| `bias_rec_scale` | 0D memref of 32-bit float values
+| `bias_offset` | 0D memref of 32-bit float values
+| `work_area` | memref of dlfloat16 type or 8-bit signless integer values or none type
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type or 8-bit signless integer values
+| `out_rec_scale` | 0D memref of 32-bit float values
+| `out_offset` | 0D memref of 32-bit float values
+
+### `zlow.quantizedStick` (::onnx_mlir::zlow::ZLowQuantizedStickOp)
+
+_ZLow stick operation for quantization_
+
+"ZLow operation to perform a quantization stick."
+"Type is one of values: dlfloat16, int8, and weights."
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>q_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of 8-bit signless integer or 32-bit float values
+| `rec_scale` | 0D memref of 32-bit float values
+| `offset` | 0D memref of 32-bit float values
+| `out` | memref of dlfloat16 type or 8-bit signless integer values
+
+### `zlow.reducemax` (::onnx_mlir::zlow::ZLowReduceMaxOp)
+
+_ZLow reducemax operation_
+
+ZLow operation to perform a reducemax.
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>op_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type values
+| `work_area` | memref of 8-bit signless integer values
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type values
+
+### `zlow.reducemin` (::onnx_mlir::zlow::ZLowReduceMinOp)
+
+_ZLow reducemin operation_
+
+ZLow operation to perform a reducemin.
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>op_type</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type values
+| `work_area` | memref of 8-bit signless integer values
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type values
+
 ### `zlow.relu` (::onnx_mlir::zlow::ZLowReluOp)
 
 _ZLow relu operation_
@@ -670,6 +885,29 @@ Interfaces: `MemoryEffectOpInterface`
 | `shape` | memref of 64-bit signless integer values
 | `Out` | memref of dlfloat16 type values
 
+### `zlow.sqrt` (::onnx_mlir::zlow::ZLowSqrtOp)
+
+_ZLow sqrt operation_
+
+ZLow operation to perform a sqrt.
+
+Traits: `MemRefsNormalizable`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>layout</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | memref of dlfloat16 type values
+| `shape` | memref of 64-bit signless integer values
+| `Out` | memref of dlfloat16 type values
+
 ### `zlow.stickForGRU` (::onnx_mlir::zlow::ZLowStickForGRUOp)
 
 _ZLow stick operation for GRU_

diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp
@@ -115,12 +115,12 @@ llvm::cl::opt<NNPAQuantType> nnpaQuantization("nnpa-quantization",
     llvm::cl::desc("Enable quantization with a specific type. Only "
                    "MatMul whose weight is a constant is supported."),
     llvm::cl::values(
-        clEnumVal(
-            DynSymI8, "Dynamic Quantization to signed integer 8. Asymmetric "
-                      "quant for activations and symmetric quant for weights."),
-        clEnumVal(
-            SymSymI8, "Dynamic Quantization to signed integer 8. Symmetric "
-                      "quant for activations and symmetric quant for weights."),
+        clEnumVal(DynSymI8,
+            "Dynamic Quantization to signed integer 8. Asymmetric "
+            "quant for activations and symmetric quant for weights."),
+        clEnumVal(SymSymI8,
+            "Dynamic Quantization to signed integer 8. Symmetric "
+            "quant for activations and symmetric quant for weights."),
         clEnumVal(QNONE, "No quantization (default).")),
     llvm::cl::init(QNONE), llvm::cl::cat(OnnxMlirOptions));
 

diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp
@@ -1157,7 +1157,7 @@ bool isSuitableForZDNN<ONNXLSTMOp>(
       (rShape[0] != 1 && rShape[0] != 2)) {
     std::string message = "The recurrence weight tensor `R` must have static "
                           "dimension, and the "
-        "first dimension of it must be 1 or 2.";
+                          "first dimension of it must be 1 or 2.";
     return onnxToZHighUnsupportedReport(op.getOperation(), message);
   }
   // Check hidden_size.

diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.cpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.cpp
@@ -1588,9 +1588,9 @@ void getONNXToZHighOneOpPatterns(
   patterns.insert<replaceONNXMinPattern>(context);
   patterns.insert<replaceONNXMulPattern>(context);
   patterns.insert<replaceONNXQLinearMatMulPattern>(context);
-  patterns.insert<replaceONNXReduceMaxPattern>(context); 
+  patterns.insert<replaceONNXReduceMaxPattern>(context);
   patterns.insert<replaceONNXReduceMeanV13Pattern>(context);
-  patterns.insert<replaceONNXReduceMinPattern>(context); 
+  patterns.insert<replaceONNXReduceMinPattern>(context);
   patterns.insert<replaceONNXReluPattern>(context);
   patterns.insert<replaceONNXSigmoidPattern>(context);
   patterns.insert<replaceONNXSoftmax2DPattern>(context);

diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.td b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.td
@@ -74,4 +74,4 @@ def IsCompatibleWithNNPALevelArch15: Constraint<
   "Input level is compatible with NNPA level"
 >;
 
-#endif // ONNX_TO_ZHIGH_COMMON
+#endif // ONNX_TO_ZHIGH_COMMON
diff --git a/src/Accelerators/NNPA/Conversion/ZHighToZLow/CMakeLists.txt b/src/Accelerators/NNPA/Conversion/ZHighToZLow/CMakeLists.txt
@@ -4,6 +4,7 @@ add_onnx_mlir_library(OMZHighToZLow
 
   LINK_LIBS PUBLIC
   MLIRMemRefTransforms
+  OMKrnlToLLVM
   OMLayoutHelper
   OMONNXToKrnl
   OMStickify

diff --git a/src/Accelerators/NNPA/Conversion/ZHighToZLow/ProcessStickData.hpp.inc b/src/Accelerators/NNPA/Conversion/ZHighToZLow/ProcessStickData.hpp.inc
@@ -258,4 +258,4 @@ void IterateOverStickInputData(const BUILDER &b, mlir::Operation *op,
       });
 }
 
-} // namespace onnx_mlir
+} // namespace onnx_mlir
diff --git a/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp b/src/Accelerators/NNPA/Conversion/ZHighToZLow/ZHighToZLow.cpp
@@ -1123,8 +1123,9 @@ struct ZHighToZLowReduceOpLowering : public ConversionPattern {
     Value data = operands[0];
 
     // Helper builders.
-    MultiDialectBuilder<IndexExprBuilderForKrnl, KrnlBuilder, LLVMBuilder, MemRefBuilder> create(
-        rewriter, loc);
+    MultiDialectBuilder<IndexExprBuilderForKrnl, KrnlBuilder, LLVMBuilder,
+        MemRefBuilder>
+        create(rewriter, loc);
 
     // Convert ZTensor type to MemRefType.
     ZMemRefType zMemRefType =
@@ -1142,7 +1143,8 @@ struct ZHighToZLowReduceOpLowering : public ConversionPattern {
     // Get the original shape before it is vanished by lower passes.
     Value shape = insertShapeMemRefI64(rewriter, loc, dims);
 
-    // If set to NULL, the operation will determine, allocate and free storage automatically.
+    // If set to NULL, the operation will determine, allocate and free storage
+    // automatically.
     Value workArea = create.llvm.null(krnl::getI8PointerType(context));
 
     // Emit a ZLow operation.
@@ -2310,4 +2312,4 @@ void populateZHighToZLowConversionPattern(mlir::RewritePatternSet &patterns,
 }
 
 } // namespace zhigh
-} // namespace onnx_mlir
+} // namespace onnx_mlir