From d7c9391e36102588e1b5cc9b46d132633c9e4267 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 16:44:54 +0000 Subject: [PATCH 1/6] [Feature] Timeout template added --- src/finn/custom_op/fpgadataflow/hlsbackend.py | 14 ++++++ src/finn/custom_op/fpgadataflow/templates.py | 45 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index d8210fd684..c03a9029db 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -474,3 +474,17 @@ def get_ap_int_max_w(self): ret = max([instream, outstream]) assert ret <= 8191, "AP_INT_MAX_W=%d is larger than allowed maximum of 8191" % ret return ret + + def timeout_value(self): + """Set timeout value for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_VALUE$"] = ["100"] + + def timeout_condition(self): + """Set timeout condition for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_CONDITION$"] = ["out_{}.empty()".format(self.hls_sname())] + + def timeout_read_stream(self): + """Set reading output stream procedure for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_READ_STREAM$"] = [ + "debug_out_{} << out_{}.read();".format(self.hls_sname(), self.hls_sname()) + ] diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 3d89a0ab23..7ef74118ec 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -58,6 +58,51 @@ """ +# template for single node execution with timeout (for single clock hls operations) +docompute_template_timeout = """ +#define AP_INT_MAX_W $AP_INT_MAX_W$ +#include "cnpy.h" +#include "npy2apintstream.hpp" +#include "npy2vectorstream.hpp" +#include +#include "bnn-library.h" + +// includes for network parameters +$GLOBALS$ + +// defines for network parameters +$DEFINES$ + +int main(){ +$PRAGMAS$ + +$STREAMDECLARATIONS$ + +$READNPYDATA$ + +unsigned timeout = 0; +while(timeout < $TIMEOUT_VALUE$){ + +$DOCOMPUTE$ + +if($TIMEOUT_CONDITION$){ +timeout++; +} + +else{ +$TIMEOUT_READ_STREAM$ +timeout = 0; +} +} + +$DATAOUTSTREAM$ + +$SAVEASCNPY$ + +} + +""" + # templates for single node ip generation # cpp file From 6da0ce4d10db86f2eea3bb513164c752401956d8 Mon Sep 17 00:00:00 2001 From: mdaniowi Date: Fri, 20 Sep 2024 16:02:40 +0100 Subject: [PATCH 2/6] [Feature] npy2vectorstream.hpp include added to docompute_template --- src/finn/custom_op/fpgadataflow/templates.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 7ef74118ec..d2100a7516 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -32,6 +32,7 @@ #define AP_INT_MAX_W $AP_INT_MAX_W$ #include "cnpy.h" #include "npy2apintstream.hpp" +#include "npy2vectorstream.hpp" #include #include "bnn-library.h" From c92b919d0919e31206754e51c588f5bd474ecf7b Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 15:36:25 +0000 Subject: [PATCH 3/6] [Feature] The Concat op code is not hardcoded in the compiler anymore and it now accepts different datatypes of inputs. It uses the new implementation from finn-hlslib --- src/finn/custom_op/fpgadataflow/concat.py | 71 ++++++-- .../custom_op/fpgadataflow/hls/concat_hls.py | 166 ++++++++++-------- 2 files changed, 144 insertions(+), 93 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/concat.py b/src/finn/custom_op/fpgadataflow/concat.py index 210b6b7fdd..214c5a4bd5 100644 --- a/src/finn/custom_op/fpgadataflow/concat.py +++ b/src/finn/custom_op/fpgadataflow/concat.py @@ -27,7 +27,9 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import math import numpy as np +import warnings from qonnx.core.datatype import DataType from qonnx.util.basic import roundup_to_integer_multiple @@ -36,17 +38,18 @@ class StreamingConcat(HWCustomOp): """Abstraction layer for HW implementation of Concat. - Only supports concatenating along the last axis.""" + Only supports concatenating along the last (channel) axis.""" def __init__(self, onnx_node, **kwargs): super().__init__(onnx_node, **kwargs) def get_nodeattr_types(self): my_attrs = { + "SIMD": ("i", True, 0), # number of elements from each stream to concat - "ElemsPerStream": ("ints", True, []), - # FINN DataTypes for inputs; output datatype inferred from input - "inputDataType": ("s", True, ""), + "ChannelsPerStream": ("ints", True, []), + # FINN DataTypes for inputs; output datatype inferred from inputs + "inputDataTypes": ("strings", True, [""]), # number of input vectors for non-concat axes, examples: # [1] is a single vector (like a FC layer with batch=1) # [4] is four vectors (like a FC layer with batch=4) @@ -57,21 +60,24 @@ def get_nodeattr_types(self): return my_attrs def get_n_inputs(self): - return len(self.get_nodeattr("ElemsPerStream")) + return len(self.get_nodeattr("ChannelsPerStream")) def get_total_elems(self): - elems_per_stream = self.get_nodeattr("ElemsPerStream") + elems_per_stream = self.get_nodeattr("ChannelsPerStream") return int(np.sum(elems_per_stream)) def get_normal_input_shape(self, ind=0): - elems_per_stream = self.get_nodeattr("ElemsPerStream") + elems_per_stream = self.get_nodeattr("ChannelsPerStream") elems = elems_per_stream[ind] vecs = list(self.get_nodeattr("numInputVectors")) ishape = tuple(vecs + [elems]) return ishape def get_folded_input_shape(self, ind=0): - return self.get_normal_input_shape(ind) + simd = self.get_nodeattr("SIMD") + folds = self.get_nodeattr("ChannelsPerStream")[ind] // simd + vecs = list(self.get_nodeattr("numInputVectors")) + return tuple(vecs + [folds, simd]) def get_normal_output_shape(self, ind=0): total_elems = self.get_total_elems() @@ -79,7 +85,11 @@ def get_normal_output_shape(self, ind=0): return tuple(vecs + [total_elems]) def get_folded_output_shape(self, ind=0): - return self.get_normal_output_shape() + total_elems = self.get_total_elems() + simd = self.get_nodeattr("SIMD") + folds = total_elems // simd + vecs = list(self.get_nodeattr("numInputVectors")) + return tuple(vecs + [folds, simd]) def make_shape_compatible_op(self, model): # check all input shapes @@ -94,7 +104,16 @@ def infer_node_datatype(self, model): # check all input datatypes for i, inp in enumerate(self.onnx_node.input): idt = model.get_tensor_datatype(inp) - assert idt == self.get_input_datatype() + if idt != self.get_input_datatype(i): + warn_str = "inputDataType changing for %s: %s -> %s " % ( + self.onnx_node.name, + str(self.get_input_datatype(i)), + str(idt), + ) + warnings.warn(warn_str) + old_datatypes_attr = self.get_nodeattr("inputDataTypes") + old_datatypes_attr[i] = idt.name + self.set_nodeattr("inputDataTypes", old_datatypes_attr) odt = self.get_output_datatype() model.set_tensor_datatype(self.onnx_node.output[0], odt) @@ -103,21 +122,37 @@ def verify_node(self): def get_input_datatype(self, ind=0): # input dt identical for all inputs - return DataType[self.get_nodeattr("inputDataType")] + return DataType[self.get_nodeattr("inputDataTypes")[ind]] def get_output_datatype(self, ind=0): - return self.get_input_datatype() + # infer output datatype from declared inputDataTypes + min_input = 0 + max_input = 0 + for i in range(len(self.get_nodeattr("inputDataTypes"))): + idt = self.get_input_datatype(i) + if idt.min() < min_input: + min_input = idt.min() + if idt.max() > max_input: + max_input = idt.max() + # if the input range is always greater than 0, then acc_max <= 2^P - 1 + if min_input >= 0: + out_bit_width = math.ceil(np.log2(max_input + 1)) + odt = DataType[f"UINT{out_bit_width}"] + # if the input range is signed, then acc_min >= -2^{P-1} and acc_max <= + # 2^{P - 1} - 1, which means 2^{P - 1} >= max(-acc_min, 1 + acc_max) + else: + max_abs_input = max(-min_input, 1 + max_input) + out_bit_width = math.ceil(np.log2(max_abs_input) + 1) + odt = DataType[f"INT{out_bit_width}"] + return odt def get_instream_width(self, ind=0): - elems_per_stream = self.get_nodeattr("ElemsPerStream") - elems = elems_per_stream[ind] - ibits = self.get_input_datatype().bitwidth() - return elems * ibits + ibits = self.get_input_datatype(ind).bitwidth() + return ibits * self.get_nodeattr("SIMD") def get_outstream_width(self, ind=0): obits = self.get_output_datatype().bitwidth() - total_elems = self.get_total_elems() - out_width = total_elems * obits + out_width = obits * self.get_nodeattr("SIMD") return out_width def get_number_output_values(self): diff --git a/src/finn/custom_op/fpgadataflow/hls/concat_hls.py b/src/finn/custom_op/fpgadataflow/hls/concat_hls.py index 008fa9cee8..641581a12d 100644 --- a/src/finn/custom_op/fpgadataflow/hls/concat_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/concat_hls.py @@ -30,6 +30,7 @@ import numpy as np import os +from finn.custom_op.fpgadataflow import templates from finn.custom_op.fpgadataflow.concat import StreamingConcat from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -48,47 +49,6 @@ def get_nodeattr_types(self): my_attrs.update(HLSBackend.get_nodeattr_types(self)) return my_attrs - def generate_params(self, model, path): - elems_per_stream = self.get_nodeattr("ElemsPerStream") - inp_streams = [] - commands = [] - idt = self.get_input_datatype() - total_elems = self.get_total_elems() - total_bw = idt.bitwidth() * total_elems - for i, elems in enumerate(elems_per_stream): - bw = idt.bitwidth() * elems - inp_stream = "hls::stream > &in%d" % (bw, i) - inp_streams.append(inp_stream) - cmd = "in%d.read()" % i - commands.append(cmd) - out_stream = "hls::stream > &out" % (total_bw) - inp_streams.append(out_stream) - - impl_hls_code = [] - impl_hls_code.append("void StreamingConcat(") - impl_hls_code.append(",".join(inp_streams)) - impl_hls_code.append(", unsigned int numReps) {") - impl_hls_code.append("for(unsigned int i = 0; i < numReps; i++) {") - impl_hls_code.append("#pragma HLS PIPELINE II=1") - impl_hls_code.append("ap_uint<%d> out_elem;" % total_bw) - # FIXME: the order of streams for concatenation works out differently - # for cppsim vs rtlsim, addressed via reversing the order of commands - # for now - impl_hls_code.append("#ifdef __SYNTHESIS__") - impl_hls_code.append("out_elem = (" + ",".join(commands[::-1]) + ");") - impl_hls_code.append("#else") - impl_hls_code.append("out_elem = (" + ",".join(commands) + ");") - impl_hls_code.append("#endif") - impl_hls_code.append("out.write(out_elem);") - impl_hls_code.append("}") - impl_hls_code.append("}") - impl_hls_code = "\n".join(impl_hls_code) - - impl_filename = "{}/concat_impl.hpp".format(path) - f_impl = open(impl_filename, "w") - f_impl.write(impl_hls_code) - f_impl.close() - def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node @@ -96,8 +56,7 @@ def execute_node(self, context, graph): ishapes = [self.get_normal_input_shape(x) for x in range(n_inps)] folded_ishapes = [self.get_folded_input_shape(x) for x in range(n_inps)] exp_oshape = self.get_normal_output_shape() - folded_oshape = self.get_folded_output_shape() - export_idt = self.get_input_datatype() + export_idts = [self.get_input_datatype(i) for i in range(n_inps)] if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -127,8 +86,10 @@ def execute_node(self, context, graph): # load output npy file super().npy_to_dynamic_output(context) assert ( - context[node.output[0]].shape == folded_oshape - ), "cppsim did not produce expected folded output shape" + context[node.output[0]].shape == exp_oshape + ), "cppsim did not produce expected folded output shape. Got: {}, expected: {}".format( + context[node.output[0]].shape, exp_oshape + ) context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) elif mode == "rtlsim": sim = self.get_rtlsim() @@ -137,7 +98,7 @@ def execute_node(self, context, graph): nbits = self.get_instream_width(i) rtlsim_inp = npy_to_rtlsim_input( "%s/input_%d.npy" % (code_gen_dir, i), - export_idt, + export_idts[i], nbits, reverse_inner=True, ) @@ -177,33 +138,54 @@ def execute_node(self, context, graph): context[node.output[0]].shape == exp_oshape ), """Output shape doesn't match expected shape.""" + def code_generation_cppsim(self, model): + """Generates c++ code for simulation (cppsim).""" + node = self.onnx_node + path = self.get_nodeattr("code_gen_dir_cppsim") + self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())] + self.generate_params(model, path) + self.global_includes() + self.defines("cppsim") + self.read_npy_data() + self.strm_decl() + self.pragmas() + self.docompute() + self.dataoutstrm() + self.save_as_npy() + self.timeout_value() + self.timeout_condition() + self.timeout_read_stream() + + template = templates.docompute_template_timeout + + for key in self.code_gen_dict: + # transform list into long string separated by '\n' + code_gen_line = "\n".join(self.code_gen_dict[key]) + template = template.replace(key, code_gen_line) + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + f = open(os.path.join(code_gen_dir, "execute_{}.cpp".format(node.op_type)), "w") + f.write(template) + f.close() + self.code_gen_dict.clear() + def global_includes(self): - self.code_gen_dict["$GLOBALS$"] = ['#include "concat_impl.hpp"'] + self.code_gen_dict["$GLOBALS$"] = ['#include "concat.hpp"'] def defines(self, var): - num_reps = self.get_nodeattr("numInputVectors") - num_reps = np.prod(num_reps) - self.code_gen_dict["$DEFINES$"] = ["#define NumReps %d" % num_reps] + self.code_gen_dict["$DEFINES$"] = ["#define SIMD {}".format(self.get_nodeattr("SIMD"))] def read_npy_data(self): n_inputs = self.get_n_inputs() code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") npy_type = "float" self.code_gen_dict["$READNPYDATA$"] = [] - idt = self.get_input_datatype() - idt_bw = idt.bitwidth() - elem_hls_type = idt.get_hls_datatype_str() - elem_bits = idt_bw for i in range(n_inputs): - packed_bits = self.get_instream_width(i) - packed_hls_type = "ap_uint<%d>" % packed_bits + input_elem_hls_type = self.get_input_datatype(i).get_hls_datatype_str() npy_in = "%s/input_%d.npy" % (code_gen_dir, i) self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in%d_%s);' + 'npy2vectorstream<%s, %s, SIMD>("%s", in%d_%s);' % ( - packed_hls_type, - elem_hls_type, - elem_bits, + input_elem_hls_type, npy_type, npy_in, i, @@ -215,41 +197,70 @@ def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] n_inputs = self.get_n_inputs() for i in range(n_inputs): - packed_bits = self.get_instream_width(i) - packed_hls_type = "ap_uint<%d>" % packed_bits + input_elem_hls_type = self.get_input_datatype(i).get_hls_datatype_str() stream_name = "in%d_%s" % (i, self.hls_sname()) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<%s> %s ("%s");' % (packed_hls_type, stream_name, stream_name) + 'hls::stream> %s ("%s");' + % (input_elem_hls_type, stream_name, stream_name) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream> out_{} ("out_{}");'.format( + self.get_output_datatype().get_hls_datatype_str(), + self.hls_sname(), + self.hls_sname(), ) + ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream> out_{} ("out_{}");'.format( - self.get_outstream_width(), self.hls_sname(), self.hls_sname() + 'hls::stream> debug_out_{} ("debug_out_{}");'.format( + self.get_output_datatype().get_hls_datatype_str(), + self.hls_sname(), + self.hls_sname(), ) ) def docompute(self): self.code_gen_dict["$DOCOMPUTE$"] = [] n_inputs = self.get_n_inputs() + input_folds = [str(self.get_folded_input_shape(i)[-2]) for i in range(n_inputs)] in_streams = [] for i in range(n_inputs): in_streams.append("in%d_%s" % (i, self.hls_sname())) - in_stream_names = ",".join(in_streams) - comp_call = "StreamingConcat(%s, out_%s, NumReps);" % ( - in_stream_names, - self.hls_sname(), + in_stream_names = ", ".join(in_streams) + in_stream_folds = ", ".join(input_folds) + comp_call = "StreamingConcat<{}>(out_{}, {});".format( + in_stream_folds, self.hls_sname(), in_stream_names ) self.code_gen_dict["$DOCOMPUTE$"] = [comp_call] + def dataoutstrm(self): + npy_type = "float" + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + oshape = self.get_folded_output_shape() + oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") + npy_out = "%s/output.npy" % code_gen_dir + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'vectorstream2npy<%s, %s, SIMD>(debug_out_%s, %s, "%s");' + % ( + self.get_output_datatype().get_hls_datatype_str(), + npy_type, + self.hls_sname(), + oshape_cpp_str, + npy_out, + ) + ] + def blackboxfunction(self): n_inputs = self.get_n_inputs() in_streams = [] for i in range(n_inputs): - iwidth = self.get_instream_width(i) - in_streams.append("hls::stream> &in%d_%s" % (iwidth, i, self.hls_sname())) - in_streams = ",".join(in_streams) - total_width = self.get_input_datatype().bitwidth() * self.get_total_elems() - out_stream = "hls::stream> &out_%s" % ( - total_width, + input_elem_hls_type = self.get_input_datatype(i).get_hls_datatype_str() + in_streams.append( + "hls::stream> &in%d_%s" + % (input_elem_hls_type, i, self.hls_sname()) + ) + in_streams = ", ".join(in_streams) + out_stream = "hls::stream> &out_%s" % ( + self.get_output_datatype().get_hls_datatype_str(), self.hls_sname(), ) blackbox_hls = "void %s(%s, %s)" % (self.onnx_node.name, in_streams, out_stream) @@ -264,4 +275,9 @@ def pragmas(self): self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) + for i in range(n_inputs): + pragmas.append( + "#pragma HLS aggregate variable=in%d_%s compact=bit" % (i, self.hls_sname()) + ) + pragmas.append("#pragma HLS aggregate variable=out_%s compact=bit" % self.hls_sname()) self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE ap_ctrl_none port=return") From d185219640282c97f7a144c7a6a0294177202f87 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 16:03:23 +0000 Subject: [PATCH 4/6] [Feature] InferConcatLayer transformation now accepts different datatypes among inputs and sets the SIMD parameter --- .../fpgadataflow/convert_to_hw_layers.py | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index b02bc89db8..121a5484af 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1211,21 +1211,24 @@ def apply(self, model): if (axis != -1) and (axis != last_axis): continue # check datatype coherence - dt0 = model.get_tensor_datatype(node.input[0]) - if dt0 is None: - continue - dt_coherent = all([model.get_tensor_datatype(x) == dt0 for x in node.input]) - if not dt_coherent: + if any([model.get_tensor_datatype(x) is None for x in node.input]): + warnings.warn( + "Inputs with undefined datatype detected, skipping InferConcatLayer()" + ) continue # skip conversion if any inputs are static - all_static = all([model.get_initializer(x) is None for x in node.input]) - if not all_static: + any_static = any([model.get_initializer(x) is not None for x in node.input]) + if any_static: continue # skip conversion if inputs are not integers - if not dt0.is_integer(): + all_integer = all([model.get_tensor_datatype(x).is_integer() for x in node.input]) + if not all_integer: + warnings.warn( + "Inputs with non-integer datatype detected, skipping InferConcatLayer()" + ) continue # ready for conversion - elems_per_stream = [model.get_tensor_shape(x)[-1] for x in node.input] + channels_per_stream = [model.get_tensor_shape(x)[-1] for x in node.input] inp_vec = list(model.get_tensor_shape(node.input[0])[:-1]) new_node = helper.make_node( "StreamingConcat", @@ -1233,9 +1236,10 @@ def apply(self, model): node.output, domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - name="Concat_" + node.name, - ElemsPerStream=elems_per_stream, - inputDataType=dt0.name, + name="StreamingConcat_" + node.name, + SIMD=1, + ChannelsPerStream=channels_per_stream, + inputDataTypes=[model.get_tensor_datatype(x).name for x in node.input], numInputVectors=inp_vec, inFIFODepths=[2] * len(node.input), ) From c8d36fb51ad8580bbd002c454ab3a478da3ac817 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 16:11:14 +0000 Subject: [PATCH 5/6] [Feature] test_fpgadataflow_concat.py test case checks different datatypes among inputs --- .../fpgadataflow/test_fpgadataflow_concat.py | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_concat.py b/tests/fpgadataflow/test_fpgadataflow_concat.py index 25c738d049..719d61905f 100644 --- a/tests/fpgadataflow/test_fpgadataflow_concat.py +++ b/tests/fpgadataflow/test_fpgadataflow_concat.py @@ -52,7 +52,7 @@ from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers -def make_concat_model(i_shapes, idt): +def make_concat_model(i_shapes, idts): class ConcatModel(nn.Module): def forward(self, *args): return torch.cat(args, -1) @@ -67,20 +67,25 @@ def forward(self, *args): torch.onnx.export(torch_model, input_t, model_bytes, opset_version=11) model = onnx.ModelProto.FromString(model_bytes.getvalue()) model = ModelWrapper(model) - for inp in model.graph.input: + for inp, idt in zip(model.graph.input, idts): model.set_tensor_datatype(inp.name, idt) return model @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) -@pytest.mark.parametrize("idt", [DataType["INT4"]]) +# input datatypes and expected inferred out datatype +@pytest.mark.parametrize( + "test_idts", [([DataType["INT3"], DataType["UINT4"], DataType["UINT6"]], DataType["INT7"])] +) @pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow -def test_fpgadataflow_concat(exec_mode, idt): +def test_fpgadataflow_concat(exec_mode, test_idts): + idts = test_idts[0] + exp_odt = test_idts[1] i_shapes = [(1, 2, 4), (1, 2, 6), (1, 2, 1)] - i_data = [gen_finn_dt_tensor(idt, x) for x in i_shapes] - model = make_concat_model(i_shapes, idt) + i_data = [gen_finn_dt_tensor(idt, x) for x, idt in zip(i_shapes, idts)] + model = make_concat_model(i_shapes, idts) assert len(i_shapes) == len(model.graph.input) assert len(model.graph.output) == 1 exp_oshape = list(i_shapes[0][:-1]) + [sum(x[-1] for x in i_shapes)] @@ -96,6 +101,7 @@ def test_fpgadataflow_concat(exec_mode, idt): model = model.transform(InferConcatLayer()) assert model.graph.node[0].op_type == "StreamingConcat" assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow" + assert model.get_tensor_datatype(model.graph.output[0].name) == exp_odt ret = execute_onnx(model, inp_dict) assert (ret[oname] == exp_out).all() model = model.transform(SpecializeLayers("xc7z020clg400-1")) @@ -120,12 +126,13 @@ def test_fpgadataflow_concat(exec_mode, idt): @pytest.mark.vivado @pytest.mark.slow def test_fpgadataflow_concat_stitchedip(): - idt = DataType["INT4"] + idts = [DataType["INT3"], DataType["UINT4"], DataType["UINT6"]] + exp_odt = DataType["INT7"] fpga_part = "xc7z020clg400-1" clk_ns = 10 i_shapes = [(1, 2, 4), (1, 2, 6), (1, 2, 1)] - i_data = [gen_finn_dt_tensor(idt, x) for x in i_shapes] - model = make_concat_model(i_shapes, idt) + i_data = [gen_finn_dt_tensor(idt, x) for x, idt in zip(i_shapes, idts)] + model = make_concat_model(i_shapes, idts) assert len(i_shapes) == len(model.graph.input) assert len(model.graph.output) == 1 exp_oshape = list(i_shapes[0][:-1]) + [sum(x[-1] for x in i_shapes)] @@ -141,6 +148,7 @@ def test_fpgadataflow_concat_stitchedip(): model = model.transform(InferConcatLayer()) assert model.graph.node[0].op_type == "StreamingConcat" assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow" + assert model.get_tensor_datatype(model.graph.output[0].name) == exp_odt model = model.transform(SpecializeLayers(fpga_part)) assert model.graph.node[0].op_type == "StreamingConcat_hls" assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow.hls" From 823588bc1de3881e8a9dab0b91c7c0f4ad17be65 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 23 Sep 2024 13:36:12 +0000 Subject: [PATCH 6/6] [Update] Finn-hlslib commit updated --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index a4fc124fa4..078eb33ec0 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -32,7 +32,7 @@ FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" BREVITAS_COMMIT="d4834bd2a0fad3c1fbc0ff7e1346d5dcb3797ea4" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" -HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3" +HLSLIB_COMMIT="2c066e87f5b8d309693c5d46c206473ca20ac68c" OMX_COMMIT="0b59762f9e4c4f7e5aa535ee9bc29f292434ca7a" AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e"