From 03830929697464666b58be717ece8328bc6c6965 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 13:28:15 +0000 Subject: [PATCH 01/10] [Fix] InferDuplicateStreamsLayer now properly handles forks of multiple-output nodes --- .../fpgadataflow/convert_to_hw_layers.py | 96 +++++++++---------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index 25a2032aeb..b02bc89db8 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -585,63 +585,63 @@ def apply(self, model): for node in graph.node: node_ind += 1 - successors = model.find_consumers(node.output[0]) - if successors is not None and len(successors) >= 2: - output_tensor = node.output[0] - n_outputs = len(successors) + for output_tensor in node.output: + successors = model.find_consumers(output_tensor) + if successors is not None and len(successors) >= 2: + n_outputs = len(successors) - dt = model.get_tensor_datatype(output_tensor) + dt = model.get_tensor_datatype(output_tensor) - # skip conversion for layers with float input - if not dt.is_integer(): - continue + # skip conversion for layers with float input + if not dt.is_integer(): + continue - # create clone tensors - out_shape = model.get_tensor_shape(output_tensor) - out_tensor_clones = [] - for i in range(n_outputs): - clone = helper.make_tensor_value_info( - model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape - ) - model.graph.value_info.append(clone) - out_tensor_clones += [clone.name] + # create clone tensors + out_shape = model.get_tensor_shape(output_tensor) + out_tensor_clones = [] + for i in range(n_outputs): + clone = helper.make_tensor_value_info( + model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape + ) + model.graph.value_info.append(clone) + out_tensor_clones += [clone.name] - num_ch = int(out_shape[-1]) - vecs = out_shape[:-1] + num_ch = int(out_shape[-1]) + vecs = out_shape[:-1] - # create node with no parallelization first - pe = 1 + # create node with no parallelization first + pe = 1 - dup_node = helper.make_node( - "DuplicateStreams", - [output_tensor], - out_tensor_clones, - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - NumChannels=num_ch, - PE=pe, - inputDataType=dt.name, - numInputVectors=vecs, - NumOutputStreams=n_outputs, - outFIFODepths=[2] * n_outputs, - name="DuplicateStreams_" + node.name, - ) + dup_node = helper.make_node( + "DuplicateStreams", + [output_tensor], + out_tensor_clones, + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + NumChannels=num_ch, + PE=pe, + inputDataType=dt.name, + numInputVectors=vecs, + NumOutputStreams=n_outputs, + outFIFODepths=[2] * n_outputs, + name="DuplicateStreams_" + node.name, + ) - graph.node.insert(node_ind, dup_node) + graph.node.insert(node_ind, dup_node) - # connect successors to out tensor clone - clone_idx = 0 - for successor in successors: - for i, succ_input in enumerate(successor.input): - if succ_input == output_tensor: - successor.input[i] = out_tensor_clones[clone_idx] - clone_idx += 1 - # if one node has multiple connections to the same output - # find_direct_successors will return one node per input - # so break the inner loop will result in correct behaviour - break + # connect successors to out tensor clone + clone_idx = 0 + for successor in successors: + for i, succ_input in enumerate(successor.input): + if succ_input == output_tensor: + successor.input[i] = out_tensor_clones[clone_idx] + clone_idx += 1 + # if one node has multiple connections to the same output + # find_direct_successors will return one node per input + # so break the inner loop will result in correct behaviour + break - graph_modified = True + graph_modified = True if graph_modified: model = model.transform(SortGraph()) From d13aa7e7debb21bd1d75b6dbb6eddc959b4ae8c8 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 13:48:43 +0000 Subject: [PATCH 02/10] [Fix] MoveScalarLinearPastInvariants, MakeMaxPoolNHWC, MakeScaleResizeNHWC transformations are checking whether the node to be moved is a fork node, in which case the MoveOpPastFork is called. MoveOpPastFork uses deepcopies of the original node. --- src/finn/transformation/streamline/reorder.py | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 8ac2d7dad6..9a7e9d0723 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -29,6 +29,7 @@ import numpy as np import qonnx.core.data_layout as DataLayout import warnings +from copy import deepcopy from onnx import TensorProto from onnx import helper as oh from qonnx.core.datatype import DataType @@ -641,6 +642,10 @@ def apply(self, model): # if initializer is not scalar, skip if np.prod(init0.shape) != 1: continue + if model.is_fork_node(prod0): + model = model.transform(MoveOpPastFork(prod0.op_type)) + # topology modified, "ask" ModelWrapper to apply this transform again + return (model, True) # Flatten input if required if len(init0.shape) > 0: init0 = init0.flatten()[0] @@ -713,6 +718,12 @@ def apply(self, model): elif producer is not None and producer.op_type == "Transpose": perms = list(get_by_name(producer.attribute, "perm").ints) if perms == [0, 3, 1, 2]: + # check if the producer is a fork node + # (need to move it past the fork before this transform) + if model.is_fork_node(producer): + model = model.transform(MoveTransposePastFork()) + # topology modified, "ask" ModelWrapper to apply this transform again + return (model, True) ceil_mode = get_by_name(n.attribute, "ceil_mode") if ceil_mode is not None: ceil_mode = ceil_mode.i @@ -764,6 +775,12 @@ def apply(self, model): if producer is not None and producer.op_type == "Transpose": perms = list(get_by_name(producer.attribute, "perm").ints) if perms == [0, 3, 1, 2]: + # check if the producer is a fork node + # (need to move it past the fork before this transform) + if model.is_fork_node(producer): + model = model.transform(MoveTransposePastFork()) + # topology modified, "ask" ModelWrapper to apply this transform again + return (model, True) old_value = model.get_initializer(n.input[scales_ind]) new_value = np.array( [old_value[idx] for idx in (0, 2, 3, 1)], @@ -813,10 +830,9 @@ class MoveOpPastFork(Transformation): can be merged with nodes in the branches """ - def __init__(self, op_name_list, get_attrs_fxn=lambda x: {}): + def __init__(self, op_name_list): super().__init__() self.ops_to_move = op_name_list - self.get_attrs_fxn = get_attrs_fxn def apply(self, model): graph = model.graph @@ -859,11 +875,9 @@ def apply(self, model): new_param_name = model.make_new_valueinfo_name() new_inp_list = [n.input[0], new_param_name] model.set_initializer(new_param_name, op_init_param) - attrs = self.get_attrs_fxn(n) - # TODO use copy of original node instead to get attrs? - new_node = oh.make_node( - n.op_type, new_inp_list, [new_output_tensor_name], **attrs - ) + new_node = deepcopy(n) + new_node.input[:] = new_inp_list + new_node.output[:] = [new_output_tensor_name] graph.node.insert(node_ind, new_node) node_ind += 1 @@ -901,7 +915,7 @@ def __init__(self): class MoveTransposePastFork(MoveOpPastFork): def __init__(self): - super().__init__(["Transpose"], lambda x: {"perm": get_by_name(x.attribute, "perm").ints}) + super().__init__(["Transpose"]) class MoveMaxPoolPastMultiThreshold(Transformation): From 6223abe86c7d9aee43788825f3c19545dab0ea54 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 13:59:14 +0000 Subject: [PATCH 03/10] [Fix] InsertFIFO transform is fixed for the case of the last node in the graph being a fork node --- src/finn/transformation/fpgadataflow/insert_fifo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/insert_fifo.py b/src/finn/transformation/fpgadataflow/insert_fifo.py index 21fb843052..9ed0f51cd4 100644 --- a/src/finn/transformation/fpgadataflow/insert_fifo.py +++ b/src/finn/transformation/fpgadataflow/insert_fifo.py @@ -268,7 +268,7 @@ def apply(self, model): fifo_input_tensor = oh.make_tensor_value_info( model.make_new_valueinfo_name(), n0_tensor_dtype, - n0.get_normal_output_shape(), + n0.get_normal_output_shape(out_ind), ) graph.value_info.append(fifo_input_tensor) model.set_tensor_datatype(fifo_input_tensor.name, dtype) @@ -294,7 +294,7 @@ def apply(self, model): graph.node.append(fifo_node) # set fifo output tensor as new input tensor of second node - final_node.output[0] = fifo_input_tensor.name + final_node.output[out_ind] = fifo_input_tensor.name else: warnings.warn( """Output FIFO for %s has depth %d and won't From d7c9391e36102588e1b5cc9b46d132633c9e4267 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 16:44:54 +0000 Subject: [PATCH 04/10] [Feature] Timeout template added --- src/finn/custom_op/fpgadataflow/hlsbackend.py | 14 ++++++ src/finn/custom_op/fpgadataflow/templates.py | 45 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index d8210fd684..c03a9029db 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -474,3 +474,17 @@ def get_ap_int_max_w(self): ret = max([instream, outstream]) assert ret <= 8191, "AP_INT_MAX_W=%d is larger than allowed maximum of 8191" % ret return ret + + def timeout_value(self): + """Set timeout value for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_VALUE$"] = ["100"] + + def timeout_condition(self): + """Set timeout condition for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_CONDITION$"] = ["out_{}.empty()".format(self.hls_sname())] + + def timeout_read_stream(self): + """Set reading output stream procedure for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_READ_STREAM$"] = [ + "debug_out_{} << out_{}.read();".format(self.hls_sname(), self.hls_sname()) + ] diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 3d89a0ab23..7ef74118ec 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -58,6 +58,51 @@ """ +# template for single node execution with timeout (for single clock hls operations) +docompute_template_timeout = """ +#define AP_INT_MAX_W $AP_INT_MAX_W$ +#include "cnpy.h" +#include "npy2apintstream.hpp" +#include "npy2vectorstream.hpp" +#include +#include "bnn-library.h" + +// includes for network parameters +$GLOBALS$ + +// defines for network parameters +$DEFINES$ + +int main(){ +$PRAGMAS$ + +$STREAMDECLARATIONS$ + +$READNPYDATA$ + +unsigned timeout = 0; +while(timeout < $TIMEOUT_VALUE$){ + +$DOCOMPUTE$ + +if($TIMEOUT_CONDITION$){ +timeout++; +} + +else{ +$TIMEOUT_READ_STREAM$ +timeout = 0; +} +} + +$DATAOUTSTREAM$ + +$SAVEASCNPY$ + +} + +""" + # templates for single node ip generation # cpp file From 6da0ce4d10db86f2eea3bb513164c752401956d8 Mon Sep 17 00:00:00 2001 From: mdaniowi Date: Fri, 20 Sep 2024 16:02:40 +0100 Subject: [PATCH 05/10] [Feature] npy2vectorstream.hpp include added to docompute_template --- src/finn/custom_op/fpgadataflow/templates.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 7ef74118ec..d2100a7516 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -32,6 +32,7 @@ #define AP_INT_MAX_W $AP_INT_MAX_W$ #include "cnpy.h" #include "npy2apintstream.hpp" +#include "npy2vectorstream.hpp" #include #include "bnn-library.h" From 8f87454c45c688496d6e4e1650229e81e8417867 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 17:05:34 +0000 Subject: [PATCH 06/10] [Feature] New Split custom_op added --- src/finn/custom_op/fpgadataflow/__init__.py | 2 + .../custom_op/fpgadataflow/hls/__init__.py | 2 + .../custom_op/fpgadataflow/hls/split_hls.py | 278 ++++++++++++++++++ src/finn/custom_op/fpgadataflow/split.py | 164 +++++++++++ 4 files changed, 446 insertions(+) create mode 100644 src/finn/custom_op/fpgadataflow/hls/split_hls.py create mode 100644 src/finn/custom_op/fpgadataflow/split.py diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index aed2ab7fe1..6f48bc6308 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -42,6 +42,7 @@ from finn.custom_op.fpgadataflow.lookup import Lookup from finn.custom_op.fpgadataflow.matrixvectoractivation import MVAU from finn.custom_op.fpgadataflow.pool import Pool +from finn.custom_op.fpgadataflow.split import StreamingSplit from finn.custom_op.fpgadataflow.streamingdataflowpartition import ( StreamingDataflowPartition, ) @@ -77,6 +78,7 @@ custom_op["Lookup"] = Lookup custom_op["Pool"] = Pool custom_op["StreamingConcat"] = StreamingConcat +custom_op["StreamingSplit"] = StreamingSplit custom_op["StreamingDataWidthConverter"] = StreamingDataWidthConverter custom_op["StreamingEltwise"] = StreamingEltwise custom_op["StreamingMaxPool"] = StreamingMaxPool diff --git a/src/finn/custom_op/fpgadataflow/hls/__init__.py b/src/finn/custom_op/fpgadataflow/hls/__init__.py index 405c47a08d..e5b24413eb 100644 --- a/src/finn/custom_op/fpgadataflow/hls/__init__.py +++ b/src/finn/custom_op/fpgadataflow/hls/__init__.py @@ -43,6 +43,7 @@ from finn.custom_op.fpgadataflow.hls.lookup_hls import Lookup_hls from finn.custom_op.fpgadataflow.hls.matrixvectoractivation_hls import MVAU_hls from finn.custom_op.fpgadataflow.hls.pool_hls import Pool_hls +from finn.custom_op.fpgadataflow.hls.split_hls import StreamingSplit_hls from finn.custom_op.fpgadataflow.hls.streamingdatawidthconverter_hls import ( StreamingDataWidthConverter_hls, ) @@ -71,6 +72,7 @@ custom_op["Lookup_hls"] = Lookup_hls custom_op["Pool_hls"] = Pool_hls custom_op["StreamingConcat_hls"] = StreamingConcat_hls +custom_op["StreamingSplit_hls"] = StreamingSplit_hls custom_op["StreamingEltwise_hls"] = StreamingEltwise_hls custom_op["StreamingDataWidthConverter_hls"] = StreamingDataWidthConverter_hls custom_op["StreamingMaxPool_hls"] = StreamingMaxPool_hls diff --git a/src/finn/custom_op/fpgadataflow/hls/split_hls.py b/src/finn/custom_op/fpgadataflow/hls/split_hls.py new file mode 100644 index 0000000000..d6f9d43f51 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/hls/split_hls.py @@ -0,0 +1,278 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import os + +from finn.custom_op.fpgadataflow import templates +from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend +from finn.custom_op.fpgadataflow.split import StreamingSplit +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + + +class StreamingSplit_hls(StreamingSplit, HLSBackend): + """Streaming split node with dynamically generated HLS. + Only supports splitting along the last axis.""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = {} + my_attrs.update(StreamingSplit.get_nodeattr_types(self)) + my_attrs.update(HLSBackend.get_nodeattr_types(self)) + return my_attrs + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + ishape = self.get_normal_input_shape() + folded_ishape = self.get_folded_input_shape() + n_outputs = self.get_n_outputs() + exp_oshapes = [self.get_normal_output_shape(i) for i in range(len(node.output))] + export_idt = self.get_input_datatype() + + if mode == "cppsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + inp = context[node.input[0]] + assert str(inp.dtype) == "float32", "Input datatype is not float32" + assert inp.shape == ishape, "Input shape mismatch for " + node.input[0] + # reshape input into folded form + inp = inp.reshape(folded_ishape) + # make copy before saving array + reshaped_input = inp.copy() + np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) + + if mode == "cppsim": + # execute the precompiled model + super().exec_precompiled_singlenode_model() + # load output npy file + super().npy_to_dynamic_outputs(context, ["output_%d.npy" % i for i in range(n_outputs)]) + for i in range(n_outputs): + assert ( + context[node.output[i]].shape == exp_oshapes[i] + ), "cppsim did not produce expected folded output shape: {}, expected: {}".format( + context[node.output[i]].shape, exp_oshapes[i] + ) + elif mode == "rtlsim": + sim = self.get_rtlsim() + io_dict = {"inputs": {}, "outputs": {}} + + nbits = self.get_instream_width() + rtlsim_inp = npy_to_rtlsim_input( + "%s/input_0.npy" % code_gen_dir, + export_idt, + nbits, + # reverse_inner=True, + ) + io_dict["inputs"]["in0"] = rtlsim_inp + super().reset_rtlsim(sim) + super().toggle_clk(sim) + + for i in range(n_outputs): + io_dict["outputs"]["out_arr_%d" % i] = [] + self.rtlsim_multi_io(sim, io_dict, sname="_") + odt = self.get_output_datatype() + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + for i in range(n_outputs): + out_npy_path = "%s/output_%d.npy" % (code_gen_dir, i) + out_shape = self.get_folded_output_shape(i) + rtlsim_output_to_npy( + io_dict["outputs"]["out_arr_%d" % i], + out_npy_path, + odt, + out_shape, + packed_bits, + target_bits, + # reverse_inner=True, + ) + # load and reshape output + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshapes[i]) + context[node.output[i]] = output + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + for i in range(n_outputs): + assert ( + context[node.output[i]].shape == exp_oshapes[i] + ), "cppsim did not produce expected folded output shape. Got: {}, expected: {}".format( + context[node.output[i]].shape, exp_oshapes[i] + ) + + def code_generation_cppsim(self, model): + """Generates c++ code for simulation (cppsim).""" + node = self.onnx_node + path = self.get_nodeattr("code_gen_dir_cppsim") + self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())] + self.generate_params(model, path) + self.global_includes() + self.defines("cppsim") + self.read_npy_data() + self.strm_decl() + self.pragmas() + self.docompute() + self.dataoutstrm() + self.save_as_npy() + self.timeout_value() + self.timeout_condition() + self.timeout_read_stream() + + template = templates.docompute_template_timeout + + for key in self.code_gen_dict: + # transform list into long string separated by '\n' + code_gen_line = "\n".join(self.code_gen_dict[key]) + template = template.replace(key, code_gen_line) + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + f = open(os.path.join(code_gen_dir, "execute_{}.cpp".format(node.op_type)), "w") + f.write(template) + f.close() + self.code_gen_dict.clear() + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "split.hpp"'] + + def defines(self, var): + self.code_gen_dict["$DEFINES$"] = ["#define NUM_OUTPUTS " + str(self.get_n_outputs())] + + def read_npy_data(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + npy_type = "float" + self.code_gen_dict["$READNPYDATA$"] = [] + simd = self.get_nodeattr("SIMD") + input_elem_hls_type = self.get_input_datatype().get_hls_datatype_str() + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2vectorstream<%s, %s, %d>("%s", in0);' + % (input_elem_hls_type, npy_type, simd, npy_in) + ) + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + simd = self.get_nodeattr("SIMD") + input_elem_hls_type = self.get_input_datatype().get_hls_datatype_str() + stream_name = "in0" + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream> %s ("%s");' + % (input_elem_hls_type, simd, stream_name, stream_name) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + "hls::stream> out_arr[NUM_OUTPUTS];".format( + self.get_output_datatype().get_hls_datatype_str(), simd + ) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + "hls::stream> debug_out_arr[NUM_OUTPUTS];".format( + self.get_output_datatype().get_hls_datatype_str(), simd + ) + ) + + def docompute(self): + self.code_gen_dict["$DOCOMPUTE$"] = [] + n_outputs = self.get_n_outputs() + output_folds = [str(self.get_folded_output_shape(i)[-2]) for i in range(n_outputs)] + out_stream_folds = ", ".join(output_folds) + comp_call = "StreamingSplit<{}>(in0, out_arr);".format(out_stream_folds) + self.code_gen_dict["$DOCOMPUTE$"] = [comp_call] + + def dataoutstrm(self): + npy_type = "float" + simd = self.get_nodeattr("SIMD") + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + n_outputs = self.get_n_outputs() + self.code_gen_dict["$DATAOUTSTREAM$"] = [] + for i in range(n_outputs): + oshape = self.get_folded_output_shape(i) + oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") + npy_out = "%s/output_%d.npy" % (code_gen_dir, i) + self.code_gen_dict["$DATAOUTSTREAM$"].append( + 'vectorstream2npy<%s, %s, %d>(debug_out_arr[%d], %s, "%s");' + % ( + self.get_output_datatype(i).get_hls_datatype_str(), + npy_type, + simd, + i, + oshape_cpp_str, + npy_out, + ) + ) + + def blackboxfunction(self): + input_elem_hls_type = self.get_input_datatype().get_hls_datatype_str() + simd = self.get_nodeattr("SIMD") + in_stream = "hls::stream> &in0" % (input_elem_hls_type, simd) + out_streams = "hls::stream> (&out_arr)[NUM_OUTPUTS]" % ( + input_elem_hls_type, + simd, + ) + blackbox_hls = "void %s(%s, %s)" % (self.onnx_node.name, in_stream, out_streams) + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [blackbox_hls] + + def pragmas(self): + pragmas = [] + pragmas.append("#pragma HLS INTERFACE axis port=in0") + for i in range(self.get_n_outputs()): + pragmas.append("#pragma HLS INTERFACE axis port=out_arr[%d]" % i) + pragmas.append("#pragma HLS INTERFACE ap_ctrl_none port=return") + pragmas.append("#pragma HLS aggregate variable=in0 compact=bit") + for i in range(self.get_n_outputs()): + pragmas.append("#pragma HLS aggregate variable=out_arr[%d] compact=bit" % i) + self.code_gen_dict["$PRAGMAS$"] = pragmas + + def timeout_condition(self): + condition = [] + for i in range(self.get_n_outputs()): + condition.append("out_arr[{}].empty()".format(i)) + condition = " && ".join(condition) + self.code_gen_dict["$TIMEOUT_CONDITION$"] = [condition] + + def timeout_read_stream(self): + read_stream_command = """ +for(int i = 0; i < NUM_OUTPUTS; i++){ + if(!out_arr[i].empty()) + debug_out_arr[i] << out_arr[i].read(); +} +""" + self.code_gen_dict["$TIMEOUT_READ_STREAM$"] = [read_stream_command] diff --git a/src/finn/custom_op/fpgadataflow/split.py b/src/finn/custom_op/fpgadataflow/split.py new file mode 100644 index 0000000000..e6ec551bc4 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/split.py @@ -0,0 +1,164 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import warnings +from onnx import helper +from qonnx.core.datatype import DataType +from qonnx.util.basic import roundup_to_integer_multiple + +from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp + + +class StreamingSplit(HWCustomOp): + """Abstraction layer for HW implementation of Split. + Only supports splitting along the last (channel) axis.""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = { + "SIMD": ("i", True, 0), + # number of elements of each output streams + "ChannelsPerStream": ("ints", True, []), + # FINN DataTypes for input; output datatypes inferred from input + "inputDataType": ("s", True, ""), + # number of input vectors for non-split axes, examples: + # [1] is a single vector (like a FC layer with batch=1) + # [4] is four vectors (like a FC layer with batch=4) + # [1, 4, 4] is four * four vectors (like a conv layer with batch=1) + "numInputVectors": ("ints", False, [1]), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_n_outputs(self): + return len(self.get_nodeattr("ChannelsPerStream")) + + def get_total_elems(self): + elems_per_stream = self.get_nodeattr("ChannelsPerStream") + return int(np.sum(elems_per_stream)) + + def get_normal_input_shape(self, ind=0): + total_elems = self.get_total_elems() + vecs = list(self.get_nodeattr("numInputVectors")) + ishape = tuple(vecs + [total_elems]) + return ishape + + def get_folded_input_shape(self, ind=0): + simd = self.get_nodeattr("SIMD") + folds = self.get_total_elems() // simd + vecs = list(self.get_nodeattr("numInputVectors")) + return tuple(vecs + [folds, simd]) + + def get_normal_output_shape(self, ind=0): + elems = self.get_nodeattr("ChannelsPerStream")[ind] + vecs = list(self.get_nodeattr("numInputVectors")) + return tuple(vecs + [elems]) + + def get_folded_output_shape(self, ind=0): + elems = self.get_nodeattr("ChannelsPerStream")[ind] + simd = self.get_nodeattr("SIMD") + folds = elems // simd + vecs = list(self.get_nodeattr("numInputVectors")) + return tuple(vecs + [folds, simd]) + + def make_shape_compatible_op(self, model): + # check input shape + exp_ishape = self.get_normal_input_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpected input shape" + + assert len(self.onnx_node.output) == self.get_n_outputs(), "Unexpected number of outputs" + ret = helper.make_node("Split", self.onnx_node.input, self.onnx_node.output, axis=-1) + return ret + + def infer_node_datatype(self, model): + # check input datatype + inp = self.onnx_node.input[0] + idt = model.get_tensor_datatype(inp) + if idt != self.get_input_datatype(): + warn_str = "inputDataType changing for %s: %s -> %s " % ( + self.onnx_node.name, + str(self.get_input_datatype()), + str(idt), + ) + warnings.warn(warn_str) + self.set_nodeattr("inputDataType", idt.name) + odt = self.get_output_datatype() + for out in self.onnx_node.output: + model.set_tensor_datatype(out, odt) + + def verify_node(self): + pass + + def get_input_datatype(self, ind=0): + return DataType[self.get_nodeattr("inputDataType")] + + def get_output_datatype(self, ind=0): + # all output datatypes are the same as the input datatype + return self.get_input_datatype() + + def get_instream_width(self, ind=0): + ibits = self.get_input_datatype().bitwidth() + return ibits * self.get_nodeattr("SIMD") + + def get_outstream_width(self, ind=0): + obits = self.get_output_datatype().bitwidth() + out_width = obits * self.get_nodeattr("SIMD") + return out_width + + def get_number_output_values(self): + num_output_values = 0 + for i in range(self.get_n_outputs()): + num_output_values += np.prod(self.get_folded_output_shape(i)[:-1]) + return num_output_values + + def get_exp_cycles(self): + return np.prod(self.get_folded_input_shape()[:-1]) + + def execute_node(self, context, graph): + node = self.onnx_node + split = self.get_nodeattr("ChannelsPerStream") + np_split_param = np.cumsum(split[:-1]) + np_result = np.split(context[node.input[0]], np_split_param, axis=-1) + for i, out in enumerate(node.output): + context[out] = np_result[i] + + def get_instream_width_padded(self, ind=0): + in_width = self.get_instream_width() + return roundup_to_integer_multiple(in_width, 8) + + def get_verilog_top_module_intf_names(self): + intf_names = super().get_verilog_top_module_intf_names() + intf_names["s_axis"] = [("in0", self.get_instream_width_padded())] + intf_names["m_axis"] = [] + for i in range(self.get_n_outputs()): + intf_names["m_axis"].append(("out_arr_%d" % i, self.get_instream_width_padded())) + return intf_names From 8ea47f37f288195564b908c7a374b1ce913ef450 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 17:08:54 +0000 Subject: [PATCH 07/10] [Feature] Change signal name option added to hwcustomop.rtlsim_multi_io, useful for array interfaces --- src/finn/custom_op/fpgadataflow/hwcustomop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index b40b8f3074..602a923424 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -284,11 +284,11 @@ def rtlsim(self, sim, inp, inp2=None): sim.stop_vcd_trace() return outputs - def rtlsim_multi_io(self, sim, io_dict): + def rtlsim_multi_io(self, sim, io_dict, sname=None): "Run rtlsim for this node, supports multiple i/o streams." # signal name - sname = "_" + self.hls_sname() + "_" + sname = "_" + self.hls_sname() + "_" if sname is None else sname trace_file = self.get_nodeattr("rtlsim_trace") if trace_file == "default": From 59cfce74a4ba3788d0bf0596a6b0976ea5a030a0 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 17:11:43 +0000 Subject: [PATCH 08/10] [Feature] InferSplitlayer() added --- .../fpgadataflow/convert_to_hw_layers.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index b02bc89db8..e4f10af3eb 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1250,6 +1250,72 @@ def apply(self, model): return (model, graph_modified) +class InferSplitLayer(Transformation): + """Convert suitable Split nodes (operating on last/-1 axis) + into StreamingConcat HW layers.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if node.op_type == "Split": + split_param = node.input[1] + if model.get_initializer(split_param) is None: + warnings.warn("Split param not constant, skipping InferSplitLayer()") + continue + ishape = model.get_tensor_shape(node.input[0]) + axis = get_by_name(node.attribute, "axis") + if (axis is None) or (ishape is None): + continue + axis = axis.i + last_axis = len(ishape) - 1 + # skip conversion if not using last axis + if (axis != -1) and (axis != last_axis): + warnings.warn( + "StreamingSplit supports only last axis, skipping InferSplitLayer()" + ) + continue + # only one input allowed (two including split_param) + if len(node.input) != 2: + warnings.warn("Only one input allowed, skipping InferSplitLayer()") + continue + # skip conversion if the input is static + if model.get_initializer(node.input[0]) is not None: + warnings.warn("Static input detected, skipping InferSplitLayer()") + continue + # skip conversion if inputs are not integers + if not model.get_tensor_datatype(node.input[0]).is_integer(): + warnings.warn("Non-integer input detected, skipping InferSplitLayer()") + continue + # ready for conversion + channels_per_stream = [model.get_tensor_shape(x)[-1] for x in node.output] + inp_vec = list(model.get_tensor_shape(node.input[0])[:-1]) + new_node = helper.make_node( + "StreamingSplit", + node.input, + node.output, + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + name="StreamingSplit_" + node.name, + SIMD=1, + ChannelsPerStream=channels_per_stream, + inputDataType=model.get_tensor_datatype(node.input[0]).name, + numInputVectors=inp_vec, + outFIFODepths=[2] * len(node.output), + ) + graph.node.insert(node_ind, new_node) + # remove old node + graph.node.remove(node) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + class InferStreamingEltwise(Transformation): """Convert eltwise Add, Sub or Sub -> Abs to StreamingEltwise layer with AddEltwise, SubEltwise or AbsDiffEltwise op.""" From 6960e1505d2c220c7363488852fb82157282f4e0 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 17:17:07 +0000 Subject: [PATCH 09/10] [Feature] fpgadataflow test for split added --- tests/fpgadataflow/test_fpgadataflow_split.py | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 tests/fpgadataflow/test_fpgadataflow_split.py diff --git a/tests/fpgadataflow/test_fpgadataflow_split.py b/tests/fpgadataflow/test_fpgadataflow_split.py new file mode 100644 index 0000000000..5859b6d5a6 --- /dev/null +++ b/tests/fpgadataflow/test_fpgadataflow_split.py @@ -0,0 +1,150 @@ +# Copyright (c) 2021, Xilinx +# Copyright (C) 2023, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import numpy as np +import onnx +from onnx import helper as oh +from qonnx.core.datatype import DataType +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.transformation.general import GiveUniqueNodeNames + +from finn.core.onnx_exec import execute_onnx +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.convert_to_hw_layers import InferSplitLayer +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers + + +def make_split_model(IN_SHAPE, IN_DTYPE, SPLIT, AXIS): + out_shapes = [IN_SHAPE[:-1] + [s] for s in SPLIT] + outputs = [] + for i in range(len(SPLIT)): + name = "global_out_" + str(i) + out = oh.make_tensor_value_info(name, onnx.TensorProto.FLOAT, out_shapes[i]) + outputs.append(out) + + inp = oh.make_tensor_value_info("global_in", onnx.TensorProto.FLOAT, IN_SHAPE) + split_init = onnx.numpy_helper.from_array( + np.array(SPLIT, dtype=np.int64), name="Split_0_param0" + ) + split_node = oh.make_node( + "Split", [inp.name, split_init.name], [out.name for out in outputs], axis=AXIS + ) + graph = oh.make_graph(nodes=[split_node], name="split_test", inputs=[inp], outputs=outputs) + model = oh.make_model(graph) + model = ModelWrapper(model) + for out in outputs: + model.set_tensor_datatype(out.name, IN_DTYPE) + model.set_tensor_layout(out.name, ["N", "H", "W", "C"]) + model.set_tensor_datatype(inp.name, IN_DTYPE) + model.set_tensor_layout(inp.name, ["N", "H", "W", "C"]) + model.set_initializer(split_init.name, np.array(SPLIT, dtype=np.int64)) + model = model.transform(GiveUniqueNodeNames()) + + return model + + +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim", "stitched_rtlsim"]) +@pytest.mark.parametrize("idt", [DataType["INT3"]]) +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_split(exec_mode, idt): + fpga_part = "xc7z020clg400-1" + clk_ns = 10 + i_shape = [1, 5, 5, 10] + split = [2, 2, 6] + split_axis = 3 + model = make_split_model(i_shape, idt, split, split_axis) + assert len(model.graph.output) == len(split) + exp_oshapes = [] + for s in split: + oshape = i_shape.copy() + oshape[split_axis] = s + exp_oshapes.append(oshape) + onames = [o.name for o in model.graph.output] + assert all(model.get_tensor_shape(oname) == exp_oshapes[i] for i, oname in enumerate(onames)) + + inputs = [] + for out_shape in exp_oshapes: + inputs.append(np.random.randint(idt.min(), idt.max() + 1, out_shape).astype(np.float32)) + test_input = np.concatenate(inputs, axis=split_axis) + input_dict = {model.graph.input[0].name: test_input} + ret = execute_onnx(model, input_dict) + for i, (k, v) in enumerate(ret.items()): + assert (v == inputs[i]).all() + + # call transformation to convert to HW and verify conversion + model = model.transform(InferSplitLayer()) + assert model.graph.node[0].op_type == "StreamingSplit" + assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow" + ret = execute_onnx(model, input_dict) + for i, (k, v) in enumerate(ret.items()): + assert (v == inputs[i]).all() + + model = model.transform(SpecializeLayers(fpga_part)) + assert model.graph.node[0].op_type == "StreamingSplit_hls" + assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow.hls" + if exec_mode == "cppsim": + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(fpga_part, clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(PrepareRTLSim()) + elif exec_mode == "stitched_rtlsim": + model = model.transform(InsertFIFO(create_shallow_fifos=True)) + model = model.transform(SpecializeLayers(fpga_part)) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(fpga_part, clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform( + CreateStitchedIP( + fpga_part, + clk_ns, + vitis=False, + ) + ) + model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_trace", "trace.vcd") + ret_sim = execute_onnx(model, input_dict) + for i, (k, v) in enumerate(ret_sim.items()): + assert (v == inputs[i]).all() From c8c8d49cef0c9374ccca4337bc60701fae3ef450 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 23 Sep 2024 13:34:20 +0000 Subject: [PATCH 10/10] [Update] Finn-hlslib commit updated --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index a4fc124fa4..078eb33ec0 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -32,7 +32,7 @@ FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" BREVITAS_COMMIT="d4834bd2a0fad3c1fbc0ff7e1346d5dcb3797ea4" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" -HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3" +HLSLIB_COMMIT="2c066e87f5b8d309693c5d46c206473ca20ac68c" OMX_COMMIT="0b59762f9e4c4f7e5aa535ee9bc29f292434ca7a" AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e"