Merge pull request #979 from jmitrevs/qonnx-1p0

Update QONNX parsing for 1.0
fastmachinelearning · Oct 31, 2024 · 2c17f66 · 2c17f66
2 parents 82d059b + fc0417b
commit 2c17f66
Show file tree

Hide file tree

Showing 36 changed files with 2,883 additions and 476 deletions.
diff --git a/docs/advanced/qonnx.rst b/docs/advanced/qonnx.rst
@@ -0,0 +1,56 @@
+==============
+ONNX and QONNX
+==============
+
+Parsing of ONNX and QONNX models is made in conjunction with the `qonnx <https://github.com/fastmachinelearning/qonnx>`_ package, even if it no quantization is used. This is a common initial parser shared with the AMD/Xilinx FINN project. The first step is to do constant folding, shape inference, etc., on the ONNX graph, commonly known as `cleaning`.  If a model has convolution layers, the model also needs to be converted to a channels-last format, since that is what hls4ml mainly supports. The ``qonnx`` package also provides a number of additional transforms that may need to be used. For example, ``Gemm`` nodes need to converted to ``MatMul`` and ``Add`` nodes.
+
+There are command-line based versions of cleaning and channels-last conversion:
+
+.. code-block:: bash
+
+    $ qonnx_clean filename.onnx
+    $ qonnx_to_channels_last filename_clean.onnx
+    $ qonnx_clean filename_clean_channels_last.onnx  # good to do a clean again as a last step
+
+Things can similarly be done in python. This method is usually easier if you additionally need to call other transforms. An example is given below which also calls the ``GemmToMatMul`` converter:
+
+.. code-block:: python
+
+    model = ModelWrapper('filename.onnx')
+    model = qonnx.util.cleanup.cleanup_model(model)
+    model = model.transform(ConvertToChannelsLastAndClean())
+    model = model.transform(GemmToMatMul())
+    model = qonnx.util.cleanup.cleanup_model(model)
+
+``ModelWrapper`` is defined in ``qonnx.core.modelwrapper``. More information on the ``qonnx`` package can be found at the `QONNX documentation page <https://qonnx.readthedocs.io/en/latest/index.html>`_.
+
+
+The next steps are very similar to if you are using a Keras model:
+
+.. code-block:: python
+
+    config = hls4ml.utils.config.config_from_onnx_model(
+        model, granularity='name', backend='Vitis', default_precision='fixed<16,6>'
+    )
+    # modify the config as desired
+    hls_model = hls4ml.converters.convert_from_onnx_model(
+        model,
+        output_dir='my-hls-test',
+        io_type='io_stream',
+        backend='Vitis',
+        hls_config=config,
+    )
+    hls_model.compile()
+
+Note, unlike the Keras version, "name" granularity is the default for ``config_from_onnx_model``, and it must be used for QONNX models. Unquantized ONNX models can use "model" if so desired, but generally there is no benefit.
+
+One can subsequently call the ``predict`` function to check the performance or build the project.
+
+Note that ``execute_onnx`` in ``qonnx.core.onnx_exec`` can be use to run the QONNX graphs directly, and it also provides the values at intermediate layers for validating the model (tracing).
+
+Quant nodes
+===========
+
+Documentation for quant nodes is provided in the `qonnx package <https://github.com/fastmachinelearning/qonnx/tree/main/docs/qonnx-custom-ops>`_. Note that currently hls4ml only supports the `Quant operator <https://github.com/fastmachinelearning/qonnx/tree/main/docs/qonnx-custom-ops/quant_op.md>`_. Also, not all legal ``Quant`` configurations are parsable by hls4ml or synthesizable. The ``scale``, ``zeropt``, and ``bitwidth`` values must be constant (though not necessarily scalar for the ``scale`` and ``zeropt``).
+
+Generally if the ``zeropt`` is 0 and the ``scale`` is a scalar power of 2, hls4ml uses ``ap_fixed`` or ``ac_fixed`` types (depending on the backend) to represent the quantizations. In other cases, the ``scale`` and ``zeropt`` need to be explicitly handled by hls4ml, and there is more of a chance of hls4ml not being able to process the input. (Please report any issues that you find.)
diff --git a/docs/index.rst b/docs/index.rst
@@ -22,6 +22,7 @@
     :hidden:
     :caption: Advanced Features
 
+    advanced/qonnx
     advanced/fifo_depth
     advanced/extension
     advanced/oneapi

diff --git a/example-models b/example-models
diff --git a/hls4ml/backends/catapult/passes/pointwise.py b/hls4ml/backends/catapult/passes/pointwise.py
@@ -1,5 +1,3 @@
-from copy import copy
-
 from hls4ml.backends.catapult.passes.convolution_templates import (
     Conv1DConfigTemplate,
     Conv1DFunctionTemplate,
@@ -75,8 +73,10 @@ def match(self, node):
 
     def transform(self, model, node):
         dim = node.__class__.__name__[-2:]  # '1D' or '2D'
-        pw_node = model.make_node('PointwiseConv' + dim, node.name, copy(node.attributes), node.inputs.copy())
-        pw_node.weights['bias'].data = node.weights['bias'].data
+        new_attrs = {k: v for k, v in node.attributes.items() if k not in ('trace', 'precision', 'reuse_factor')}
+        pw_node = model.make_node(
+            'PointwiseConv' + dim, node.name, new_attrs, node.inputs.copy(), outputs=node.outputs.copy()
+        )
         # Set strategy to ensure lowercase string is passed to the template
         if model.config.is_resource_strategy(pw_node):
             pw_node.set_attr('strategy', 'resource')

diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py
@@ -13,6 +13,8 @@
     LSTM,
     Activation,
     BatchNormalization,
+    BatchNormOnnx,
+    Conv,
     Conv1D,
     Conv2D,
     Dense,
@@ -22,8 +24,11 @@
     GarNetStack,
     GlobalPooling1D,
     GlobalPooling2D,
+    MatMul,
+    Merge,
     Pooling1D,
     Pooling2D,
+    Quant,
     SeparableConv1D,
     SeparableConv2D,
     SimpleRNN,
@@ -63,14 +68,25 @@ def __init__(self, name):
             LSTM,
             GRU,
             Dot,
+            Conv,
+            MatMul,
         ]
 
         for layer in accum_layers:
             attrs = self.attribute_map.get(layer, [])
             attrs.append(TypeAttribute('accum'))
             self.attribute_map[layer] = attrs
 
-        rf_layers = accum_layers + [BatchNormalization, Activation, Embedding, GarNet, GarNetStack]
+        rf_layers = accum_layers + [
+            BatchNormalization,
+            Activation,
+            Embedding,
+            GarNet,
+            GarNetStack,
+            Quant,
+            BatchNormOnnx,
+            Merge,
+        ]
 
         for layer in rf_layers:
             attrs = self.attribute_map.get(layer, [])

diff --git a/hls4ml/backends/quartus/passes/pointwise.py b/hls4ml/backends/quartus/passes/pointwise.py
@@ -1,5 +1,3 @@
-from copy import copy
-
 from hls4ml.backends.fpga.fpga_layers import PointwiseConv1D, PointwiseConv2D
 from hls4ml.backends.quartus.passes.convolution_templates import (
     Conv1DConfigTemplate,
@@ -81,10 +79,10 @@ def match(self, node):
 
     def transform(self, model, node):
         dim = node.__class__.__name__[-2:]  # '1D' or '2D'
+        new_attrs = {k: v for k, v in node.attributes.items() if k not in ('trace', 'precision', 'reuse_factor')}
         pw_node = model.make_node(
-            'PointwiseConv' + dim, node.name, copy(node.attributes), node.inputs.copy(), outputs=node.outputs.copy()
+            'PointwiseConv' + dim, node.name, new_attrs, node.inputs.copy(), outputs=node.outputs.copy()
         )
-        pw_node.weights['bias'].data = node.weights['bias'].data
         model.replace_node(node, pw_node)
 
         return True
diff --git a/hls4ml/backends/vivado/passes/pointwise.py b/hls4ml/backends/vivado/passes/pointwise.py
@@ -1,5 +1,3 @@
-from copy import copy
-
 from hls4ml.backends.fpga.fpga_layers import PointwiseConv1D, PointwiseConv2D
 from hls4ml.backends.vivado.passes.convolution_templates import (
     Conv1DConfigTemplate,
@@ -75,8 +73,11 @@ def match(self, node):
 
     def transform(self, model, node):
         dim = node.__class__.__name__[-2:]  # '1D' or '2D'
-        pw_node = model.make_node('PointwiseConv' + dim, node.name, copy(node.attributes), node.inputs.copy())
-        pw_node.weights['bias'].data = node.weights['bias'].data
+        # to remove warning, since these get set again
+        new_attrs = {k: v for k, v in node.attributes.items() if k not in ('trace', 'precision', 'reuse_factor')}
+        pw_node = model.make_node(
+            'PointwiseConv' + dim, node.name, new_attrs, node.inputs.copy(), outputs=node.outputs.copy()
+        )
         # Set strategy to ensure lowercase string is passed to the template
         if model.config.is_resource_strategy(pw_node):
             pw_node.set_attr('strategy', 'resource')

diff --git a/hls4ml/converters/__init__.py b/hls4ml/converters/__init__.py
@@ -10,8 +10,7 @@
 from hls4ml.converters.keras_to_hls import get_supported_keras_layers  # noqa: F401
 from hls4ml.converters.keras_to_hls import parse_keras_model  # noqa: F401
 from hls4ml.converters.keras_to_hls import keras_to_hls, register_keras_layer_handler
-
-# from hls4ml.converters.pytorch_to_hls import parse_pytorch_model  # noqa: F401
+from hls4ml.converters.onnx_to_hls import parse_onnx_model  # noqa: F401
 from hls4ml.model import ModelGraph
 from hls4ml.utils.config import create_config
 from hls4ml.utils.symbolic_utils import LUTFunction

diff --git a/hls4ml/converters/keras/reshape.py b/hls4ml/converters/keras/reshape.py
@@ -11,8 +11,8 @@ def parse_flatten_layer(keras_layer, input_names, input_shapes, data_reader):
     layer = parse_default_keras_layer(keras_layer, input_names)
 
     layer['class_name'] = 'Reshape'
-    layer['target_shape'] = [input_shapes[0][0], np.prod(input_shapes[0][1:])]
-    output_shape = layer['target_shape']
+    layer['target_shape'] = [np.prod(input_shapes[0][1:])]  # target shape has no batch dimension
+    output_shape = input_shapes[0][:1] + layer['target_shape']
 
     return layer, output_shape
 

diff --git a/hls4ml/converters/onnx/convolution.py b/hls4ml/converters/onnx/convolution.py
@@ -1,85 +1,77 @@
-from hls4ml.converters.onnx_to_hls import (
-    compute_pads_1d,
-    compute_pads_2d,
-    get_onnx_attribute,
-    get_onnx_input_name,
-    onnx_handler,
-)
-from hls4ml.converters.utils import compute_padding_1d, compute_padding_2d
+import numpy as np
+
+from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler
 
 
 @onnx_handler('Conv')
-def parse_conv_layer(reader, node, inputs_map, input_shapes, graph, config):
+def parse_conv_layer(node, input_names, input_shapes, graph):
     layer = {}
     layer['name'] = node.name
-    layer['data_format'] = 'channels_first'  # ONNX's default is channel first
-    layer['inputs'] = get_onnx_input_name(node, graph)
-    reader.add_input(layer['name'], node.input)
+    if node.domain != 'qonnx.custom_op.channels_last':
+        raise RuntimeError("Please convert the model to channels-last format with qonnx-to-channels-last")
+    layer['data_format'] = 'channels_last'  # QONNX needs to be channels-last.
+    layer['inputs'] = input_names
+    layer['outputs'] = node.output
 
     strides = get_onnx_attribute(node, 'strides')
     kernel_shape = get_onnx_attribute(node, 'kernel_shape')
-
-    if len(input_shapes[0]) == 3:  # Conv1D
-        layer['class_name'] = 'Conv1D'
-
-        layer['in_width'] = input_shapes[0][2]
-        layer['n_chan'] = input_shapes[0][1]
-        layer['filt_width'] = kernel_shape[0]
-        layer['n_filt'] = reader.get_weights_data(layer['name'], 'kernel').shape[2]
-        layer['stride_width'] = strides[0]
-        pads = compute_pads_1d(node, layer)
-
+    # Note:  currently don't have support for auto_pad.
+    pads = get_onnx_attribute(node, 'pads')
+    dilations = get_onnx_attribute(node, 'dilations')
+    if dilations is None:
+        dilations = [1] * len(layer['kernel_shape'])
+
+    layer['in_width'] = input_shapes[0][-2]
+    layer['n_chan'] = input_shapes[0][-1]
+    layer['n_filt'] = input_shapes[1][0]
+
+    layer['group'] = int(get_onnx_attribute(node, 'group'))
+    if layer['group'] != 1:
+        layer['depth_multiplier'] = get_onnx_attribute(node, 'group') / layer['n_chan']
+        if not layer['depth_multiplier'].is_integer():
+            raise ValueError('Depth multiplier must be an integer')
+        else:
+            layer['depth_multiplier'] = int(layer['depth_multiplier'])
+
+    layer['n_dim'] = len(input_shapes[0]) - 2  # 2 comes from channels and batch dimentions
+    if layer['n_dim'] not in (1, 2):
+        raise ValueError("Only 1D and 2D convolutions are supported")
+    layer['class_name'] = 'Conv'
+
+    # set some values needed later
+    if layer['n_dim'] == 1:
+        # this is 1D convolution
+        full_width = layer['in_width'] + pads[0] + pads[1]
+        eff_kernel_width = kernel_shape[0] * dilations[0]
+        layer['out_width'] = int(np.ceil((full_width - eff_kernel_width + 1) / strides[0]))
+        # for compatibility interpret some variables
         layer['pad_left'] = pads[0]
         layer['pad_right'] = pads[1]
-
-        if all(x == 0 for x in pads):  # No padding, i.e., 'VALID' padding
-            layer['padding'] = 'valid'
-        else:
-            layer['padding'] = 'same'
-
-        (layer['out_width'], _, _) = compute_padding_1d(
-            layer['padding'], layer['in_width'], layer['stride_width'], layer['filt_width']
-        )
-
-        output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_width']]
-
-    elif len(input_shapes[0]) == 4:  # Conv2D
-        layer['class_name'] = 'Conv2D'
-
-        layer['in_height'] = input_shapes[0][2]
-        layer['in_width'] = input_shapes[0][3]
-        layer['n_chan'] = input_shapes[0][1]
-
+        layer['filt_width'] = kernel_shape[0]
+        layer['stride_width'] = strides[0]
+        layer['dilation_width'] = dilations[0]
+    else:
+        # 2d
+        layer['in_height'] = input_shapes[0][-3]
+        full_height = layer['in_height'] + pads[0] + pads[2]
+        eff_kernel_height = kernel_shape[0] * dilations[0]
+        out_height = int(np.ceil((full_height - eff_kernel_height + 1) / strides[0]))
+        layer['out_height'] = out_height
+
+        full_width = input_shapes[0][-2] + pads[1] + pads[3]
+        eff_kernel_width = kernel_shape[1] * dilations[1]
+        out_width = int(np.ceil((full_width - eff_kernel_width + 1) / strides[1]))
+        layer['out_width'] = out_width
+        # for compatibility interpret some variables
+        layer['pad_top'] = pads[0]
+        layer['pad_left'] = pads[1]
+        layer['pad_bottom'] = pads[2]
+        layer['pad_right'] = pads[3]
         layer['filt_height'] = kernel_shape[0]
         layer['filt_width'] = kernel_shape[1]
-
-        layer['n_filt'] = next(
-            (x.type.tensor_type.shape.dim[1].dim_value for x in graph.value_info if x.name == node.output[0]), None
-        )
         layer['stride_height'] = strides[0]
         layer['stride_width'] = strides[1]
-        pads = compute_pads_2d(node, layer)
-
-        layer['pad_top'] = pads[0]
-        layer['pad_bottom'] = pads[2]
-        layer['pad_left'] = pads[1]
-        layer['pad_right'] = pads[3]
-
-        if all(x == 0 for x in pads):  # No padding, i.e., 'VALID' padding in Keras/Tensorflow
-            layer['padding'] = 'valid'
-        else:  # Only 'valid' and 'same' padding are available in Keras
-            layer['padding'] = 'same'
-
-        (layer['out_height'], layer['out_width'], _, _, _, _) = compute_padding_2d(
-            layer['padding'],
-            layer['in_height'],
-            layer['in_width'],
-            layer['stride_height'],
-            layer['stride_width'],
-            layer['filt_height'],
-            layer['filt_width'],
-        )
-
-        output_shape = [input_shapes[0][0], layer['n_filt'], layer['out_height'], layer['out_width']]
+        layer['dilation_height'] = dilations[0]
+        layer['dilation_width'] = dilations[1]
 
-    return layer, output_shape
+    return layer