From bf872a0c9a67eb6adf5619b4af8fcd29381cbb76 Mon Sep 17 00:00:00 2001 From: lstasytis Date: Mon, 16 Sep 2024 14:22:36 +0100 Subject: [PATCH 01/12] analytical fifo sizing functions for: channelwise_op, convolutioninputgenerator, fmpadding, labelselect, matrixvectoractivation, pool, streamingdatawidthconverter (generalized variant, very conservative estimate), streamingmaxpool, thresholding, vectorvectoractivation --- src/finn/builder/build_dataflow_config.py | 3 +- src/finn/builder/build_dataflow_steps.py | 28 +- .../custom_op/fpgadataflow/channelwise_op.py | 135 +++ .../fpgadataflow/convolutioninputgenerator.py | 281 ++++++ src/finn/custom_op/fpgadataflow/fmpadding.py | 155 +++ src/finn/custom_op/fpgadataflow/hwcustomop.py | 7 +- .../custom_op/fpgadataflow/labelselect.py | 156 +++ .../fpgadataflow/matrixvectoractivation.py | 182 +++- src/finn/custom_op/fpgadataflow/pool.py | 147 +++ .../streamingdatawidthconverter.py | 326 ++++++ .../fpgadataflow/streamingmaxpool.py | 267 +++++ .../custom_op/fpgadataflow/thresholding.py | 143 +++ .../fpgadataflow/vectorvectoractivation.py | 186 +++- tests/fpgadataflow/test_fifosizing.py | 950 +++++++++++++++++- 14 files changed, 2928 insertions(+), 38 deletions(-) diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index 5d69802337..471586d924 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -42,6 +42,7 @@ class AutoFIFOSizingMethod(str, Enum): "Select the type of automatic FIFO sizing strategy." CHARACTERIZE = "characterize" + CHARACTERIZE_ANALYTIC = "characterize_analytic" LARGEFIFO_RTLSIM = "largefifo_rtlsim" @@ -116,9 +117,9 @@ class VerificationStepType(str, Enum): "step_apply_folding_config", "step_minimize_bit_width", "step_generate_estimate_reports", + "step_set_fifo_depths", "step_hw_codegen", "step_hw_ipgen", - "step_set_fifo_depths", "step_create_stitched_ip", "step_measure_rtlsim_performance", "step_out_of_context_synthesis", diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index bdbcc53d83..cf81ca3a93 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -82,6 +82,10 @@ from finn.transformation.fpgadataflow.create_dataflow_partition import ( CreateDataflowPartition, ) +from finn.transformation.fpgadataflow.derive_characteristic import ( + set_ignore_list_for_ip_gen, + unset_ignore_list_for_ip_gen, +) from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.derive_characteristic import ( DeriveCharacteristic, @@ -548,18 +552,27 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): `GiveUniqueNodeNames`. """ + print("ENTERED STEP FIFO DEPTHS") if cfg.auto_fifo_depths: - if cfg.auto_fifo_strategy == "characterize": + if cfg.auto_fifo_strategy in ["characterize_analytic", "characterize"]: model = model.transform(InsertDWC()) model = model.transform(SpecializeLayers(cfg._resolve_fpga_part())) model = model.transform(GiveUniqueNodeNames()) + + if cfg.auto_fifo_strategy == "characterize_analytic": + # should RTL sim only nodes which are not supported right now with + # analytic characteristic derivations + model = set_ignore_list_for_ip_gen(model) + model = model.transform( PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()) ) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) model = model.transform(AnnotateCycles()) - period = model.analysis(dataflow_performance)["max_cycles"] + 10 + + period = int(model.analysis(dataflow_performance)["max_cycles"]*3) + #assert True==False model = model.transform(DeriveCharacteristic(period)) model = model.transform(DeriveFIFOSizes()) model = model.transform( @@ -623,7 +636,9 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): "depth_trigger_uram", "depth_trigger_bram", ] - extract_model_config_to_json(model, cfg.output_dir + "/final_hw_config.json", hw_attrs) + + if cfg.extract_hw_config: + extract_model_config_to_json(model, cfg.output_dir + "/final_hw_config.json", hw_attrs) # perform FIFO splitting and shallow FIFO removal only after the final config # json file has been written. otherwise, since these transforms may add/remove @@ -632,10 +647,13 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(SplitLargeFIFOs()) model = model.transform(RemoveShallowFIFOs()) + # FIFO sizing is done, we can allow all ipgen again + model = unset_ignore_list_for_ip_gen(model) + # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again # this will only run for the new nodes (e.g. FIFOs and DWCs) - model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) - model = model.transform(HLSSynthIP()) + # model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) + # model = model.transform(HLSSynthIP()) return model diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op.py b/src/finn/custom_op/fpgadataflow/channelwise_op.py index 9bf4ebdf62..ef9ae2f789 100644 --- a/src/finn/custom_op/fpgadataflow/channelwise_op.py +++ b/src/finn/custom_op/fpgadataflow/channelwise_op.py @@ -232,3 +232,138 @@ def execute_node(self, context, graph): sess = rt.InferenceSession(model_func.SerializeToString()) result = sess.run(None, idict) context[node.output[0]] = np.asarray(result, dtype=np.float32).reshape(oshape) + + + def prepare_kwargs_for_characteristic_fx(self): + + + # key parameters + PE = self.get_nodeattr("PE") + NumChannels = self.get_nodeattr("NumChannels") + NF = int(NumChannels/PE) + dim = np.prod(self.get_folded_output_shape()[1:-1]) + # assert True == False + kwargs = (NF,dim) + + + # assert True==False + + return kwargs + + def characteristic_fx_input(self, txns, cycles, counter, kwargs): + # Compute one period of the input characteristic function + + (NF,dim) = kwargs + + delay = 0 + + for k in range(dim): + txns.append(counter) + counter+=1 + cycles+=1 + + + +# + return txns, cycles, counter + + def characteristic_fx_output(self, txns, cycles, counter, kwargs): + # Compute one period of the output characteristic function + + (NF,dim) = kwargs + + for k in range(dim): + txns.append(counter) + counter+=1 + cycles+=1 + + return txns, cycles, counter + + + def derive_characteristic_fxns(self, period): + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [0 for i in range(n_inps)], + }, + "outputs": {"out": []}, + } + + ignore = self.get_nodeattr("ipgen_ignore") + if ignore == 0: # this node is being derived using RTLSIM + # RTL-based flow + super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + return + + + # Analytical flow + + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + + all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) + all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) + + + self.set_nodeattr("io_chrc_period",period) + + + + + txn_in = [] + txn_out = [] + + # INPUT + + counter = 0 + padding = 0 + + + kwargs = self.prepare_kwargs_for_characteristic_fx() + + + # first period + cycles = 0 + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + txn_in += [counter] * (period-cycles) + padding+=(period*-cycles) + + + # second period + cycles = period + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + txn_in += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + # final assignments + all_txns_in[0, :] = np.array(txn_in) + self.set_nodeattr("io_chrc_in", all_txns_in) + self.set_nodeattr("io_chrc_pads_in", padding) + + + # OUTPUT + + counter = 0 + cycles = 0 + padding = 0 + + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + + txn_out += [counter] * (period-cycles) + padding += (period*-cycles) + + cycles = period + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + txn_out += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + + all_txns_out[0, :] = np.array(txn_out) + self.set_nodeattr("io_chrc_out", all_txns_out) + self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index 1fb4940fb4..284bd31a42 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -277,3 +277,284 @@ def execute_node(self, context, graph): # this automatically updates the execution context inst = getCustomOp(im2col_node) inst.execute_node(context, model_im2col.graph) + + + def prepare_kwargs_for_characteristic_fx(self): + + # key parameters + IFMDim_x = self.get_nodeattr("IFMDim")[0] + OFMDim_x = self.get_nodeattr("OFMDim")[0] + ConvKernelDim_x = self.get_nodeattr("ConvKernelDim")[0] + Stride_x = self.get_nodeattr("Stride")[0] + + IFMDim_y = self.get_nodeattr("IFMDim")[1] + OFMDim_y = self.get_nodeattr("OFMDim")[1] + ConvKernelDim_y = self.get_nodeattr("ConvKernelDim")[1] + Stride_y = self.get_nodeattr("Stride")[1] + + SIMD = self.get_nodeattr("SIMD") + + IFMChannels = self.get_nodeattr("IFMChannels") + + + dilation = self.get_nodeattr("Dilation") + DEPTHWISE = self.get_nodeattr("depthwise") + parallel_window = self.get_nodeattr("parallel_window") + is1d = self.get_nodeattr("is1D") + # m = self.get_nodeattr("m") + # flip = self.get_nodeattr("flip") + + SIMD_COUNT = int(IFMChannels / SIMD) + OUTPUT_SIZE = OFMDim_x * ConvKernelDim_x * SIMD_COUNT + INPUT_SIZE = IFMDim_x * SIMD_COUNT + WINDOW_SIZE = ConvKernelDim_x * SIMD_COUNT + if DEPTHWISE: + BUFFER_SIZE = ConvKernelDim_x * SIMD_COUNT + READ_CYCLES = SIMD_COUNT * (ConvKernelDim_x-1) - (ConvKernelDim_x-1) + FINISH = IFMDim_x-ConvKernelDim_x-2 + else: + BUFFER_SIZE = (ConvKernelDim_x-1) * SIMD_COUNT + READ_CYCLES = 0 + FINISH = 0 + + OCNT_INITIAL = BUFFER_SIZE + (Stride_x - 1) + + DEFAULT_FIFO_DEPTH = 2 + + multiplying_factor = int(IFMChannels/SIMD) + number_blocks = int(ConvKernelDim_y/Stride_y + 1) + cycles_write_block = OFMDim_x * ConvKernelDim_x * ConvKernelDim_y * multiplying_factor + cycles_read_block = Stride_x * IFMDim_x * multiplying_factor + max_cycles = max(cycles_write_block,cycles_read_block) + baseIter = IFMDim_x * ConvKernelDim_y * multiplying_factor + OFMDim_y * max(cycles_write_block,cycles_read_block) + initial_buffer = IFMDim_x * ConvKernelDim_y *multiplying_factor + + READ_DELAY = number_blocks * ConvKernelDim_x*ConvKernelDim_y*OFMDim_x*OFMDim_y*multiplying_factor - ConvKernelDim_x*ConvKernelDim_y*OFMDim_x + READ_ITES = int((baseIter-OFMDim_y) / max(cycles_write_block,cycles_read_block)) + + # assert True == False + kwargs = (SIMD_COUNT,Stride_x,Stride_y,OUTPUT_SIZE,INPUT_SIZE, + WINDOW_SIZE,BUFFER_SIZE,READ_CYCLES,OCNT_INITIAL, + DEPTHWISE,DEFAULT_FIFO_DEPTH, is1d, + multiplying_factor,number_blocks,cycles_write_block, + cycles_read_block,max_cycles,baseIter,initial_buffer, + FINISH,OFMDim_y,READ_DELAY,READ_ITES + ) + + + # assert True==False + + return kwargs + + def characteristic_fx_input(self, txns, cycles, counter, kwargs): + # Compute one period of the input characteristic function + + (SIMD_COUNT,Stride_x,Stride_y,OUTPUT_SIZE,INPUT_SIZE, + WINDOW_SIZE,BUFFER_SIZE,READ_CYCLES, + OCNT_INITIAL, DEPTHWISE,DEFAULT_FIFO_DEPTH,is1d, + multiplying_factor,number_blocks,cycles_write_block, + cycles_read_block,max_cycles,baseIter,initial_buffer,FINISH,OFMDim_y,READ_DELAY, + READ_ITES) = kwargs + + + if DEPTHWISE: + OCNT_MAX = BUFFER_SIZE + ocnt = SIMD_COUNT + + else: + OCNT_MAX = WINDOW_SIZE + if OCNT_INITIAL < WINDOW_SIZE: + ocnt = OCNT_INITIAL + else: ocnt=-1 + + + # fifo filling + for i in range(0,DEFAULT_FIFO_DEPTH): + txns.append(counter) + counter+=1 + cycles+=1 + + + # main function + + inp_count = 0 + + if is1d: + for i in range(0,OUTPUT_SIZE): + txns.append(counter) + we = (i < OCNT_MAX) or (ocnt < (SIMD_COUNT * Stride_x)) + re = i > 0 + + if re: + ocnt+=1 + if ocnt == OCNT_MAX: + ocnt = 0 + if we: + if inp_count < INPUT_SIZE-DEFAULT_FIFO_DEPTH: + counter+=1 + inp_count+=1 + + cycles+=1 + else: + + for i in range(0,initial_buffer+cycles_read_block-1): + txns.append(counter) + cycles+=1 + counter+=1 + + txns.append(counter) + cycles+=1 # one extra for loop tail + + for i in range(0,OFMDim_y-1): + for j in range(0,cycles_write_block-cycles_read_block): + txns.append(counter) + cycles+=1 + + + for j in range(0,cycles_read_block-1): + if i < OFMDim_y-2: + counter+=1 + txns.append(counter) + cycles+=1 + # else: + # if j < FINISH: + # counter+=1 + # txns.append(counter) + # cycles+=1 +# + return txns, cycles, counter + + def characteristic_fx_output(self, txns, cycles, counter, kwargs): + # Compute one period of the output characteristic function + + (SIMD_COUNT,Stride_x,Stride_y,OUTPUT_SIZE,INPUT_SIZE, + WINDOW_SIZE,BUFFER_SIZE,READ_CYCLES, + OCNT_INITIAL, DEPTHWISE,DEFAULT_FIFO_DEPTH, is1d, + multiplying_factor,number_blocks,cycles_write_block, + cycles_read_block,max_cycles,baseIter,initial_buffer,FINISH,OFMDim_y,READ_DELAY, + READ_ITES) = kwargs + + # HYPER PARAMETERS + + + + INITIAL_LOOP_CYCLES = 5 + + + if is1d: + for i in range(0,INITIAL_LOOP_CYCLES): + txns.append(counter) + cycles+=1 + + for i in range(0,READ_CYCLES): + txns.append(counter) + cycles+=1 + + + + for i in range(0,OUTPUT_SIZE): + txns.append(counter) + counter+=1 + cycles+=1 + else: + + for i in range(0,initial_buffer+INITIAL_LOOP_CYCLES-1): + txns.append(counter) + cycles+=1 + + for i in range(0,baseIter-initial_buffer): + txns.append(counter) + counter+=1 + cycles+=1 + + return txns, cycles, counter + + + def derive_characteristic_fxns(self, period): + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [0 for i in range(n_inps)], + }, + "outputs": {"out": []}, + } + + ignore = self.get_nodeattr("ipgen_ignore") + if ignore == 0: # this node is being derived using RTLSIM + # RTL-based flow + super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + return + + + # Analytical flow + + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + + all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) + all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) + + + self.set_nodeattr("io_chrc_period",period) + + + + + txn_in = [] + txn_out = [] + + # INPUT + + counter = 0 + padding = 0 + + + kwargs = self.prepare_kwargs_for_characteristic_fx() + + + # first period + cycles = 0 + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + txn_in += [counter] * (period-cycles) + padding+=(period*-cycles) + + + # second period + cycles = period + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + + txn_in += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + # final assignments + all_txns_in[0, :] = np.array(txn_in) + self.set_nodeattr("io_chrc_in", all_txns_in) + self.set_nodeattr("io_chrc_pads_in", padding) + + + # OUTPUT + + counter = 0 + cycles = 0 + padding = 0 + + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + + txn_out += [counter] * (period-cycles) + padding += (period*-cycles) + + cycles = period + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + txn_out += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + + all_txns_out[0, :] = np.array(txn_out) + self.set_nodeattr("io_chrc_out", all_txns_out) + self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/fmpadding.py b/src/finn/custom_op/fpgadataflow/fmpadding.py index 5767028ea7..55a17f0039 100644 --- a/src/finn/custom_op/fpgadataflow/fmpadding.py +++ b/src/finn/custom_op/fpgadataflow/fmpadding.py @@ -170,3 +170,158 @@ def execute_node(self, context, graph): inp_values, ((0, 0), (pad[0], pad[2]), (pad[1], pad[3]), (0, 0)), "constant" ) context[node.output[0]] = np.asarray(result, dtype=np.float32).reshape(oshape) + + + def prepare_kwargs_for_characteristic_fx(self): + + + # key parameters + ImgDim = self.get_nodeattr("ImgDim") + Padding = self.get_nodeattr("Padding") + NewDim = [ImgDim[0]+Padding[0]+Padding[2],ImgDim[1]+Padding[1]+Padding[3]] + NumChannels = self.get_nodeattr("NumChannels") + SIMD = self.get_nodeattr("SIMD") + TOTAL_ELS = np.prod(NewDim) + NF = int(NumChannels/SIMD) + + # assert True == False + kwargs = (ImgDim, Padding, NumChannels, SIMD, TOTAL_ELS,NF) + + + # assert True==False + + return kwargs + + def characteristic_fx_input(self, txns, cycles, counter, kwargs): + # Compute one period of the input characteristic function + + (ImgDim, Padding, NumChannels, SIMD, TOTAL_ELS, NF) = kwargs + + delay = 0 + # if NF == 1, we always have a one cycle delay + + if NF == 1: nf1 = 2 + else: nf1 = 1 + + for i in range(0,ImgDim[0]): + for j in range(0,ImgDim[1]): + for k in range(NF): + txns.append(counter) + counter+=1 + cycles+=1 + if NF == 1: + txns.append(counter) + cycles+=1 + for z in range((Padding[1]+Padding[3])*NF*nf1+delay): + txns.append(counter) + cycles+=1 + + return txns, cycles, counter + + def characteristic_fx_output(self, txns, cycles, counter, kwargs): + # Compute one period of the output characteristic function + + (ImgDim, Padding, NumChannels, SIMD, TOTAL_ELS,NF) = kwargs + + + for i in range(0,TOTAL_ELS): + for j in range(int(NumChannels/SIMD)): + txns.append(counter) + counter+=1 + cycles+=1 + + return txns, cycles, counter + + + def derive_characteristic_fxns(self, period): + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [0 for i in range(n_inps)], + }, + "outputs": {"out": []}, + } + + ignore = self.get_nodeattr("ipgen_ignore") + if ignore == 0: # this node is being derived using RTLSIM + # RTL-based flow + super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + return + + + # Analytical flow + + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + + all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) + all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) + + + self.set_nodeattr("io_chrc_period",period) + + + + + txn_in = [] + txn_out = [] + + # INPUT + + counter = 0 + padding = 0 + + + kwargs = self.prepare_kwargs_for_characteristic_fx() + + + # first period + cycles = 0 + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + txn_in += [counter] * (period-cycles) + padding+=(period*-cycles) + + + # second period + cycles = period + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + + #for i in range(cycles,period*2): + # txn_in.append(counter) + #pads = (period*2-cycles) + + txn_in += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + # final assignments + all_txns_in[0, :] = np.array(txn_in) + self.set_nodeattr("io_chrc_in", all_txns_in) + self.set_nodeattr("io_chrc_pads_in", padding) + + + # OUTPUT + + counter = 0 + cycles = 0 + padding = 0 + + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + + txn_out += [counter] * (period-cycles) + padding += (period*-cycles) + + cycles = period + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + txn_out += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + + all_txns_out[0, :] = np.array(txn_out) + self.set_nodeattr("io_chrc_out", all_txns_out) + self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index b40b8f3074..84a65ce0d4 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -94,8 +94,11 @@ def get_nodeattr_types(self): # the period for which the characterization was run "io_chrc_period": ("i", False, 0), # amount of zero padding inserted during chrc. - "io_chrc_pads_in": ("ints", False, []), - "io_chrc_pads_out": ("ints", False, []), + "io_chrc_pads_in": ("i", False, 0), + "io_chrc_pads_out": ("i", False, 0), + "io_chrc_in_concat": ("t", False, np.asarray([], dtype=np.int32)), + "io_chrc_out_concat": ("t", False, np.asarray([], dtype=np.int32)), + "ipgen_ignore": ("i", False, 0) } def get_verilog_top_module_name(self): diff --git a/src/finn/custom_op/fpgadataflow/labelselect.py b/src/finn/custom_op/fpgadataflow/labelselect.py index f4b098cff7..fbd89b4f0d 100644 --- a/src/finn/custom_op/fpgadataflow/labelselect.py +++ b/src/finn/custom_op/fpgadataflow/labelselect.py @@ -184,3 +184,159 @@ def get_exp_cycles(self): pe = self.get_nodeattr("PE") exp_cycles = nlabels / pe return int(exp_cycles) + + + def prepare_kwargs_for_characteristic_fx(self): + + + # key parameters + + num_in_words = self.get_nodeattr("Labels") + PE = self.get_nodeattr("PE") + K = self.get_nodeattr("K") + + kwargs = (num_in_words,PE,K) + + + # assert True==False + + return kwargs + + def characteristic_fx_input(self, txns, cycles, counter, kwargs): + # Compute one period of the input characteristic function + + (num_in_words,PE,K) = kwargs + + # input + for i in range(0,int(num_in_words/PE)+1): + txns.append(counter) + counter+=1 + cycles+=1 + + return txns, cycles, counter + + def characteristic_fx_output(self, txns, cycles, counter, kwargs): + # Compute one period of the output characteristic function + + (num_in_words,PE,K) = kwargs + + windup_clocks = 4 + for i in range(0,windup_clocks): + txns.append(counter) + cycles+=1 + + # first output period, computing Labels + for i in range(0,int(num_in_words/PE+K)): + txns.append(counter) + cycles+=1 + + # output the K labels which got selected + for j in range(0,K): + txns.append(counter) + cycles+=1 + counter+=1 + + + return txns, cycles, counter + + + def derive_characteristic_fxns(self, period): + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [0 for i in range(n_inps)], + }, + "outputs": {"out": []}, + } + # mem_mode = self.get_nodeattr("mem_mode") + # if mem_mode in ["internal_decoupled", "external"]: + # n_weight_inps = self.calc_wmem() + # num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) + # io_dict["inputs"]["weights"] = [0 for i in range(num_w_reps * n_weight_inps)] + + + + ignore = self.get_nodeattr("ipgen_ignore") + if ignore == 0: # this node is being derived using RTLSIM + # RTL-based flow + super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + return + + # Analytical flow + + + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + + all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) + all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) + + + self.set_nodeattr("io_chrc_period",period) + + + + + txn_in = [] + txn_out = [] + + + # INPUT + + counter = 0 + padding = 0 + + + kwargs = self.prepare_kwargs_for_characteristic_fx() + + + # first period + cycles = 0 + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + txn_in += [counter] * (period-cycles) + padding+=(period*-cycles) + + + # second period + cycles = period + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + + #for i in range(cycles,period*2): + # txn_in.append(counter) + #pads = (period*2-cycles) + + txn_in += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + # final assignments + all_txns_in[0, :] = np.array(txn_in) + self.set_nodeattr("io_chrc_in", all_txns_in) + self.set_nodeattr("io_chrc_pads_in", padding) + + + # OUTPUT + + counter = 0 + cycles = 0 + padding = 0 + + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + + txn_out += [counter] * (period-cycles) + padding += (period*-cycles) + + cycles = period + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + txn_out += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + + all_txns_out[0, :] = np.array(txn_out) + self.set_nodeattr("io_chrc_out", all_txns_out) + self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index 1c86ae7b7a..36ee14e695 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -838,21 +838,6 @@ def get_op_and_param_counts(self): ret_dict[thres_param_type] = thres_count return ret_dict - def derive_characteristic_fxns(self, period): - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out": []}, - } - mem_mode = self.get_nodeattr("mem_mode") - if mem_mode in ["internal_decoupled", "external"]: - n_weight_inps = self.calc_wmem() - num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) - io_dict["inputs"]["weights"] = [0 for i in range(num_w_reps * n_weight_inps)] - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() mem_mode = self.get_nodeattr("mem_mode") @@ -971,3 +956,170 @@ def code_generation_ipi(self): else: raise Exception("Unrecognized mem_mode for MatrixVectorActivation") return cmd + + + + def prepare_kwargs_for_characteristic_fx(self): + + MW = self.get_nodeattr("MW") + MH = self.get_nodeattr("MH") + + SIMD = self.get_nodeattr("SIMD") + PE = self.get_nodeattr("PE") + numVectors = np.prod(self.get_nodeattr("numInputVectors")) + BURST_SIZE = int(MW/SIMD) + BURST_COUNT = int(MH/PE) + + kwargs = (MW,MH,SIMD,PE,BURST_COUNT,BURST_SIZE,numVectors) + + return kwargs + + def characteristic_fx_input(self, txns, cycles, counter, kwargs): + + (MW,MH,SIMD,PE,BURST_COUNT,BURST_SIZE,numVectors) = kwargs + + tracker = 0 + maximum = numVectors*BURST_SIZE + + if numVectors > 1: + for i in range(2): + txns.append(counter) + counter+=1 + cycles+=1 + tracker+=1 + + for k in range(numVectors): + for j in range(BURST_SIZE): + if tracker < maximum: + txns.append(counter) + counter+=1 + cycles+=1 + tracker+=1 + + + for i in range(BURST_COUNT-1): + for j in range(BURST_SIZE): + txns.append(counter) + cycles+=1 + + return txns, cycles, counter + + + def characteristic_fx_output(self, txns, cycles, counter, kwargs): + + (MW,MH,SIMD,PE,BURST_COUNT,BURST_SIZE,numVectors) = kwargs + + windup_clocks = 3 + + for i in range(0,windup_clocks): + txns.append(counter) + cycles+=1 + + for k in range(numVectors): + for i in range(BURST_COUNT): + for j in range(BURST_SIZE): + txns.append(counter) + cycles+=1 + counter+=1 + + return txns, cycles, counter + + + def derive_characteristic_fxns(self, period): + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [0 for i in range(n_inps)], + }, + "outputs": {"out": []}, + } + + + mem_mode = self.get_nodeattr("mem_mode") + if mem_mode in ["internal_decoupled", "external"]: + n_weight_inps = self.calc_wmem() + num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) + io_dict["inputs"]["weights"] = [0 for i in range(num_w_reps * n_weight_inps)] + + + ignore = self.get_nodeattr("ipgen_ignore") + if ignore == 0: # this node is being derived using RTLSIM + # RTL-based flow + super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + return + + + + # Analytical flow + + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + + all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) + all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) + + + self.set_nodeattr("io_chrc_period",period) + + + + + txn_in = [] + txn_out = [] + + + # INPUT + + counter = 0 + padding = 0 + + + kwargs = self.prepare_kwargs_for_characteristic_fx() + + + # first period + cycles = 0 + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + txn_in += [counter] * (period-cycles) + padding+=(period*-cycles) + + + # second period + cycles = period + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + + txn_in += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + # final assignments + all_txns_in[0, :] = np.array(txn_in) + self.set_nodeattr("io_chrc_in", all_txns_in) + self.set_nodeattr("io_chrc_pads_in", padding) + + + # OUTPUT + + counter = 0 + cycles = 0 + padding = 0 + + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + + txn_out += [counter] * (period-cycles) + padding += (period*-cycles) + + cycles = period + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + txn_out += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + + all_txns_out[0, :] = np.array(txn_out) + self.set_nodeattr("io_chrc_out", all_txns_out) + self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/pool.py b/src/finn/custom_op/fpgadataflow/pool.py index 35aee023b9..183bf562d4 100644 --- a/src/finn/custom_op/fpgadataflow/pool.py +++ b/src/finn/custom_op/fpgadataflow/pool.py @@ -222,3 +222,150 @@ def execute_node(self, context, graph): result = np.right_shift(result.astype(int), shift_bits) oshape = context[node.output[0]].shape context[node.output[0]] = np.asarray(result, dtype=np.float32).reshape(oshape) + + + def prepare_kwargs_for_characteristic_fx(self): + + + # key parameters + Channels = self.get_nodeattr("Channels") + PE = self.get_nodeattr("PE") + KernelSize = np.prod(self.get_nodeattr("KernelSize")) + + # assert True == False + NF = int(Channels/PE) + kwargs = (NF,KernelSize) + + + # assert True==False + + return kwargs + + def characteristic_fx_input(self, txns, cycles, counter, kwargs): + # Compute one period of the input characteristic function + + (NF,KernelSize) = kwargs + + delay = 0 + # if NF == 1, we always have a one cycle delay + # NF = max(NF,2) + if NF == 1: + nf1 = 2 + else: + nf1 = 1 + + for i in range(0,KernelSize): + for k in range(NF): + txns.append(counter) + counter+=1 + cycles+=1 + +# + return txns, cycles, counter + + def characteristic_fx_output(self, txns, cycles, counter, kwargs): + # Compute one period of the output characteristic function + + (NF,KernelSize) = kwargs + + for i in range(0,KernelSize): + for k in range(NF): + txns.append(counter) + counter+=1 + cycles+=1 + return txns, cycles, counter + + + def derive_characteristic_fxns(self, period): + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [0 for i in range(n_inps)], + }, + "outputs": {"out": []}, + } + + ignore = self.get_nodeattr("ipgen_ignore") + if ignore == 0: # this node is being derived using RTLSIM + # RTL-based flow + super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + return + + + # Analytical flow + + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + + all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) + all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) + + + self.set_nodeattr("io_chrc_period",period) + + + + + txn_in = [] + txn_out = [] + + + # INPUT + + counter = 0 + padding = 0 + + + kwargs = self.prepare_kwargs_for_characteristic_fx() + + + # first period + cycles = 0 + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + txn_in += [counter] * (period-cycles) + padding+=(period*-cycles) + + + # second period + cycles = period + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + + #for i in range(cycles,period*2): + # txn_in.append(counter) + #pads = (period*2-cycles) + + txn_in += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + # final assignments + all_txns_in[0, :] = np.array(txn_in) + self.set_nodeattr("io_chrc_in", all_txns_in) + self.set_nodeattr("io_chrc_pads_in", padding) + + + # OUTPUT + + counter = 0 + cycles = 0 + padding = 0 + + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + + txn_out += [counter] * (period-cycles) + padding += (period*-cycles) + + cycles = period + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + txn_out += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + + all_txns_out[0, :] = np.array(txn_out) + self.set_nodeattr("io_chrc_out", all_txns_out) + self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py index 4921caeb00..a571389b0d 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py @@ -214,3 +214,329 @@ def lut_estimation(self): cset_luts += outw return int(cnt_luts + cset_luts) + + + def prepare_kwargs_for_characteristic_fx(self): + + numInWords = int(np.prod(self.get_folded_input_shape()[-2:-1])) + numOutWords = int(np.prod(self.get_folded_output_shape()[-2:-1])) + numReps = int(np.prod(self.get_folded_input_shape()[:1])) + + inWidth = self.get_nodeattr("inWidth") + outWidth = self.get_nodeattr("outWidth") + + + + kwargs = (numInWords,numOutWords,inWidth,outWidth,numReps) + + # assert True==False + return kwargs + + + + def characteristic_fx_input(self, txns, cycles, counter, kwargs): + + (numInWords,numOutWords,inWidth,outWidth,numReps) = kwargs + + + + + # HYPER PARAMETERS WHICH MAY CHANGE OVER TIME + windup_clocks_up_convert_input = 4 + + + windup_clocks_down_convert_input = 3 + + + windup_clocks_down_convert_output = 4 + windup_clocks_equal_convert_output = 3 + + + + if numInWords < windup_clocks_up_convert_input: + windup_clocks_up_convert_input = numInWords + + if numInWords < windup_clocks_down_convert_input: + windup_clocks_down_convert_input = numInWords + + + + if numOutWords < windup_clocks_down_convert_output: + windup_clocks_down_convert_output = numOutWords + + + + if numOutWords < windup_clocks_equal_convert_output: + windup_clocks_equal_convert_output = numOutWords + + + + + # calculation to adjust for padding or cropping adding latency + + + if outWidth > inWidth: + higher = outWidth + lower = inWidth + else: + higher = inWidth + lower = outWidth + + if higher % lower != 0: + if numInWords*inWidth > numOutWords*outWidth: + crop = True + pad = False + else: + cropping = False + pad = True + + else: + crop = False + pad = False + + + + # windup period + # for i in range(0,windup_clocks_down_convert_output): + # txns.append(counter) + # cycles+=1 + #padding +=1 + #counter+=1 + # first input period + + + # first input period + tracker = 0 + maximum = numReps*numInWords + + if numReps > 1: + # loop windup + for i in range(2): + txns.append(counter) + counter+=1 + cycles+=1 + tracker+=1 + + for j in range(0,numReps): + for i in range(0,numInWords): + if tracker < maximum: + txns.append(counter) + counter+=1 + cycles+=1 + tracker+=1 + for i in range(0,1): + txns.append(counter) + cycles+=1 + + return txns, cycles, counter + + + + def characteristic_fx_output(self, txns, cycles, counter, kwargs): + + (numInWords,numOutWords,inWidth,outWidth,numReps) = kwargs + + + + + + # HYPER PARAMETERS WHICH MAY CHANGE + windup_clocks_up_convert_input = 3 + windup_clocks_down_convert_input = 2 + + + windup_clocks_down_convert_output = 3 + windup_clocks_equal_convert_output = 2 + + + + if numInWords < windup_clocks_up_convert_input: + windup_clocks_up_convert_input = numInWords + + if numInWords < windup_clocks_down_convert_input: + windup_clocks_down_convert_input = numInWords + + + + if numOutWords < windup_clocks_down_convert_output: + windup_clocks_down_convert_output = numOutWords + + + + if numOutWords < windup_clocks_equal_convert_output: + windup_clocks_equal_convert_output = numOutWords + + + + + # calculation to adjust for padding or cropping adding latency + + + if outWidth > inWidth: + higher = outWidth + lower = inWidth + else: + higher = inWidth + lower = outWidth + + if higher % lower != 0: + if numInWords*inWidth > numOutWords*outWidth: + crop = True + pad = False + else: + cropping = False + pad = True + + else: + crop = False + pad = False + + + + # windup period + if inWidth == outWidth: + clock = windup_clocks_equal_convert_output + else: + clock = windup_clocks_up_convert_input + for i in range(0,clock): + txns.append(counter) + cycles+=1 + # padding +=1 + + # first input period + + if pad: + offset = 2 + else: + offset = 1 + + + remainder = 0 + + + for k in range(numReps): + + # windup + txns.append(counter) + cycles+=1 + + for i in range(0,numOutWords): + for j in range(0,int(np.floor(outWidth/inWidth))): + if j != 0: + txns.append(counter) + cycles +=1 + remainder += inWidth + # padding +=1 + + + + if pad and remainder < outWidth: + print(remainder) + txns.append(counter) + remainder += inWidth + cycles +=1 + + txns.append(counter) + cycles +=1 + + counter+=1 + remainder -= outWidth + + + return txns, cycles, counter + + + def derive_characteristic_fxns(self, period): + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [0 for i in range(n_inps)], + }, + "outputs": {"out": []}, + } + + ignore = self.get_nodeattr("ipgen_ignore") + if ignore == 0: # this node is being derived using RTLSIM + # RTL-based flow + super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + return + + + + # Analytical flow + + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + + all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) + all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) + + + self.set_nodeattr("io_chrc_period",period) + + + + + txn_in = [] + txn_out = [] + + + # INPUT + + counter = 0 + padding = 0 + + + kwargs = self.prepare_kwargs_for_characteristic_fx() + + + # first period + cycles = 0 + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + txn_in += [counter] * (period-cycles) + padding+=(period*-cycles) + + + # second period + cycles = period + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + + #for i in range(cycles,period*2): + # txn_in.append(counter) + #pads = (period*2-cycles) + + txn_in += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + # final assignments + all_txns_in[0, :] = np.array(txn_in) + self.set_nodeattr("io_chrc_in", all_txns_in) + self.set_nodeattr("io_chrc_pads_in", padding) + + + # OUTPUT + + counter = 0 + cycles = 0 + padding = 0 + + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + + txn_out += [counter] * (period-cycles) + padding += (period*-cycles) + + cycles = period + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + txn_out += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + + all_txns_out[0, :] = np.array(txn_out) + self.set_nodeattr("io_chrc_out", all_txns_out) + self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool.py index 59a8f092d0..8c5c6abbb8 100755 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool.py @@ -234,3 +234,270 @@ def execute_node(self, context, graph): # convert output NCHW -> NHWC result = np.transpose(result, (0, 2, 3, 1)) context[node.output[0]] = result + + + def prepare_kwargs_for_characteristic_fx(self): + + + + + numReps = 1 + ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized() + ceil_mode = self.get_nodeattr("CeilMode") + output_size = compute_pool_output_dim(ifm_dim[1], k[1], k[1], 0, ceil_mode) + is1d = self.is_1d() + + NumChannels = self.get_nodeattr("NumChannels") + PoolDim = self.get_nodeattr("PoolDim")[0] + ImgDim = self.get_nodeattr("ImgDim")[0] + + #SIMD = self.get_nodeattr("SIMD") + PE = self.get_nodeattr("PE") + + # assert True==False + cycles = 0 + p = 0 + padding = 0 + + windup_clocks = 4 + read_delay = 5 + default_fifo_size = 2 # mini fifo instantiated by HLS + + #for i in range(0,windup_clocks): + # txn_out[cycles] = i + # cycles+=1 + # p+=1 + + bursts = int(read_delay+ImgDim/PoolDim) + read_tail_latency = 5 + write_tail_latency = 14 + + + kwargs = (ifm_dim,output_size,is1d, NumChannels,PoolDim,ImgDim,PE,windup_clocks,read_delay,bursts,read_tail_latency,write_tail_latency) + + return kwargs + + + def characteristic_fx_input(self, txns, cycles, counter, kwargs): + + (ifm_dim,output_size,is1d,NumChannels,PoolDim,ImgDim,PE,windup_clocks,read_delay,bursts,read_tail_latency,write_tail_latency) = kwargs + + + # for i in range(0,int(ImgDim/PoolDim)): + if ImgDim > PoolDim * output_size: + REMAINDER_PIXELS = ImgDim - output_size * PoolDim + else: + REMAINDER_PIXELS = 0 + + tracker = 0 + maximum = int(ImgDim/PoolDim * PoolDim * ImgDim/PoolDim * PoolDim) + + if not is1d: + # if i == 0: + for z in range(0,2): + txns.append(counter) + counter+=1 + cycles+=1 + tracker+=1 + + if int(ImgDim/PoolDim) > 2: + txns.append(counter) + cycles+=1 + + + + for j in range(0,int(ImgDim/PoolDim)): + for k in range(0,int(PoolDim)): + for z in range(0,int(ImgDim/PoolDim)): + + # actual read loop + for x in range(0,PoolDim): + if tracker < maximum: + txns.append(counter) + counter+=1 + cycles+=1 + tracker+=1 + + for k in range(0,int(PoolDim)): + # read loop tail end + for z in range(0,read_tail_latency): + txns.append(counter) + cycles+=1 + + + # write delay + for z in range(0,int(ImgDim/PoolDim)): + txns.append(counter) + cycles+=1 + else: + #1d case + for i in range(output_size): + for z in range(0,PoolDim): + for k in range(int(NumChannels/PE)): + txns.append(counter) + counter+=1 + cycles+=1 + + #for z in range(0,PoolDim): + # for k in range(0,read_tail_latency): + # txns.append(counter) + # cycles+=1 + + for k in range(int(NumChannels/PE)): + txns.append(counter) + cycles+=1 + + + for k in range(REMAINDER_PIXELS): + txns.append(counter) + counter+=1 + cycles+=1 + + + return txns, cycles, counter + + + def characteristic_fx_output(self, txns, cycles, counter, kwargs): + + (ifm_dim,output_size,is1d,NumChannels,PoolDim,ImgDim,PE,windup_clocks,read_delay,bursts,read_tail_latency,write_tail_latency) = kwargs + + + txns.append(counter) + cycles+=1 + + if not is1d: + for j in range(0,int(ImgDim/PoolDim)): + for k in range(0,int(PoolDim)): + for z in range(0,int(ImgDim/PoolDim)): + + # actual read loop + for x in range(0,PoolDim): + txns.append(counter) + #counter+=1 + cycles+=1 + + for k in range(0,int(PoolDim)): + # read loop tail end + for z in range(0,read_tail_latency): + txns.append(counter) + cycles+=1 + + + # write delay + for z in range(0,int(ImgDim/PoolDim)): + txns.append(counter) + counter+=1 + cycles+=1 + else: + #1d case + for i in range(output_size): + for z in range(0,PoolDim): + for k in range(int(NumChannels/PE)): + txns.append(counter) + cycles+=1 + + for k in range(int(NumChannels/PE)): + txns.append(counter) + counter+=1 + cycles+=1 + + #for z in range(0,PoolDim): + # for k in range(0,read_tail_latency): + # txns.append(counter) + # cycles+=1 + + + return txns, cycles, counter + + + def derive_characteristic_fxns(self, period): + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [0 for i in range(n_inps)], + }, + "outputs": {"out": []}, + } + + + + ignore = self.get_nodeattr("ipgen_ignore") + if ignore == 0: # this node is being derived using RTLSIM + # RTL-based flow + super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + return + + + + # Analytical flow + + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + + all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) + all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) + + + self.set_nodeattr("io_chrc_period",period) + + + + + txn_in = [] + txn_out = [] + + + # INPUT + + counter = 0 + padding = 0 + + + kwargs = self.prepare_kwargs_for_characteristic_fx() + + + # first period + cycles = 0 + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + txn_in += [counter] * (period-cycles) + padding+=(period*-cycles) + + + # second period + cycles = period + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + txn_in += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + # final assignments + all_txns_in[0, :] = np.array(txn_in) + self.set_nodeattr("io_chrc_in", all_txns_in) + self.set_nodeattr("io_chrc_pads_in", padding) + + + # OUTPUT + + counter = 0 + cycles = 0 + padding = 0 + + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + + txn_out += [counter] * (period-cycles) + padding += (period*-cycles) + + cycles = period + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + txn_out += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + + all_txns_out[0, :] = np.array(txn_out) + self.set_nodeattr("io_chrc_out", all_txns_out) + self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/thresholding.py b/src/finn/custom_op/fpgadataflow/thresholding.py index 12cb76be4e..eed0a920ad 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding.py +++ b/src/finn/custom_op/fpgadataflow/thresholding.py @@ -264,3 +264,146 @@ def calc_tmem(self): num_channels = self.get_nodeattr("NumChannels") pe = self.get_nodeattr("PE") return num_channels // pe + + + def prepare_kwargs_for_characteristic_fx(self): + + NumChannels = self.get_nodeattr("NumChannels") + PE = self.get_nodeattr("PE") + reps = 1 + ImgDim = int(np.prod(list(self.get_nodeattr("numInputVectors")))) + NF = int(NumChannels/PE) + + + TOTAL_ITERATIONS = reps*ImgDim *NF + + kwargs = (TOTAL_ITERATIONS,NumChannels,PE,reps,ImgDim,NF) + + return kwargs + + + def characteristic_fx_input(self, txns, cycles, counter, kwargs): + + (TOTAL_ITERATIONS,NumChannels,PE,reps,ImgDim,NF) = kwargs + for i in range(0,TOTAL_ITERATIONS): + txns.append(counter) + counter +=1 + cycles+=1 + + return txns, cycles, counter + + + def characteristic_fx_output(self, txns, cycles, counter, kwargs): + + (TOTAL_ITERATIONS,NumChannels,PE,reps,ImgDim,NF) = kwargs + + windup = 6 + for i in range(0,windup): + txns.append(counter) + cycles+=1 + # first input period + for i in range(0,TOTAL_ITERATIONS): + txns.append(counter) + counter +=1 + cycles+=1 + + + return txns, cycles, counter + + + def derive_characteristic_fxns(self, period): + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [0 for i in range(n_inps)], + }, + "outputs": {"out": []}, + } + + if self.onnx_node.op_type == "Thresholding_hls": + mem_mode = self.get_nodeattr("mem_mode") + if mem_mode in ["internal_decoupled", "external"]: + n_weight_inps = self.calc_tmem() + num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) + io_dict["inputs"]["weights"] = [0 for i in range(num_w_reps * n_weight_inps)] + + + ignore = self.get_nodeattr("ipgen_ignore") + if ignore == 0: # this node is being derived using RTLSIM + # RTL-based flow + super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + return + + # Analytical flow + + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + + all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) + all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) + + + self.set_nodeattr("io_chrc_period",period) + + + + + txn_in = [] + txn_out = [] + + + # INPUT + + counter = 0 + padding = 0 + + + kwargs = self.prepare_kwargs_for_characteristic_fx() + + + # first period + cycles = 0 + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + txn_in += [counter] * (period-cycles) + padding+=(period*-cycles) + + + # second period + cycles = period + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + + txn_in += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + # final assignments + all_txns_in[0, :] = np.array(txn_in) + self.set_nodeattr("io_chrc_in", all_txns_in) + self.set_nodeattr("io_chrc_pads_in", padding) + + + # OUTPUT + + counter = 0 + cycles = 0 + padding = 0 + + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + + txn_out += [counter] * (period-cycles) + padding += (period*-cycles) + + cycles = period + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + txn_out += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + + all_txns_out[0, :] = np.array(txn_out) + self.set_nodeattr("io_chrc_out", all_txns_out) + self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index d95c6eb7cc..62d834ba05 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -789,20 +789,6 @@ def get_op_and_param_counts(self): ret_dict[thres_param_type] = thres_count return ret_dict - def derive_characteristic_fxns(self, period): - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out": []}, - } - mem_mode = self.get_nodeattr("mem_mode") - if mem_mode in ["internal_decoupled", "external"]: - n_weight_inps = self.calc_wmem() - num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) - io_dict["inputs"]["weights"] = [0 for i in range(num_w_reps * n_weight_inps)] - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() @@ -922,3 +908,175 @@ def code_generation_ipi(self): else: raise Exception("Unrecognized mem_mode for VectorVectorActivation") return cmd + + + def prepare_kwargs_for_characteristic_fx(self): + + + # key parameters + if "hls" in self.onnx_node.name: + impl_style = "hls" + else: + impl_style = "rtl" + + SIMD = self.get_nodeattr("SIMD") + PE = self.get_nodeattr("PE") + Channels = self.get_nodeattr("Channels") + Kernel_2 = np.prod(self.get_nodeattr("Kernel")) + NF = int(Channels / PE) + SF = Kernel_2 + numReps = np.prod(self.get_nodeattr("Dim")) + TOTAL_FOLD = NF*SF*numReps + + + if impl_style == "rtl": + TOTAL_FOLD = int(TOTAL_FOLD/SIMD) + + kwargs = (NF,SF,SIMD,TOTAL_FOLD,impl_style) + + + # assert True==False + + return kwargs + + def characteristic_fx_input(self, txns, cycles, counter, kwargs): + # Compute one period of the input characteristic function + + (NF,SF,SIMD,TOTAL_FOLD,impl_style) = kwargs + + # input + for i in range(0,TOTAL_FOLD): + txns.append(counter) + counter+=1 + cycles+=1 + + return txns, cycles, counter + + def characteristic_fx_output(self, txns, cycles, counter, kwargs): + # Compute one period of the output characteristic function + + (NF,SF,SIMD,TOTAL_FOLD,impl_style) = kwargs + sf = 0 + if impl_style == "hls": + windup = 5 + else: + windup = 7 + + for i in range(0,windup): + txns.append(counter) + cycles+=1 + + # first input period + #txn_in[0:bursts] = np.arange(0,bursts) + for i in range(0,TOTAL_FOLD+1): + + if sf == SF: + counter+=1 + sf = 0 + sf+=1 + # txn_in[cycles] = p_in + txns.append(counter) + cycles+=1 + #p = bursts + + + return txns, cycles, counter + + + def derive_characteristic_fxns(self, period): + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [0 for i in range(n_inps)], + }, + "outputs": {"out": []}, + } + + + mem_mode = self.get_nodeattr("mem_mode") + if mem_mode in ["internal_decoupled", "external"]: + n_weight_inps = self.calc_wmem() + #num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) + io_dict["inputs"]["weights"] = [0 for i in range(1 * n_weight_inps)] + + + ignore = self.get_nodeattr("ipgen_ignore") + if ignore == 0: # this node is being derived using RTLSIM + # RTL-based flow + super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + return + + # Analytical flow + + + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + + all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) + all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) + + + self.set_nodeattr("io_chrc_period",period) + + + + + txn_in = [] + txn_out = [] + + + # INPUT + + counter = 0 + padding = 0 + + + kwargs = self.prepare_kwargs_for_characteristic_fx() + + + # first period + cycles = 0 + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + txn_in += [counter] * (period-cycles) + padding+=(period*-cycles) + + + # second period + cycles = period + txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + + + txn_in += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + # final assignments + all_txns_in[0, :] = np.array(txn_in) + self.set_nodeattr("io_chrc_in", all_txns_in) + self.set_nodeattr("io_chrc_pads_in", padding) + + + # OUTPUT + + counter = 0 + cycles = 0 + padding = 0 + + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + + txn_out += [counter] * (period-cycles) + padding += (period*-cycles) + + cycles = period + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + + txn_out += [counter] * (period*2-cycles) + padding+=(period*2-cycles) + + + all_txns_out[0, :] = np.array(txn_out) + self.set_nodeattr("io_chrc_out", all_txns_out) + self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/tests/fpgadataflow/test_fifosizing.py b/tests/fpgadataflow/test_fifosizing.py index 338204c0c7..668ad5092e 100644 --- a/tests/fpgadataflow/test_fifosizing.py +++ b/tests/fpgadataflow/test_fifosizing.py @@ -27,19 +27,576 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import numpy as np +from onnx import TensorProto, helper +from qonnx.core.datatype import DataType +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.custom_op.general.im2col import compute_conv_output_dim +from qonnx.custom_op.general.multithreshold import multithreshold +from qonnx.custom_op.registry import getCustomOp +from qonnx.transformation.general import ( + ApplyConfig, + GiveReadableTensorNames, + GiveUniqueNodeNames, +) +from qonnx.transformation.infer_datatypes import InferDataTypes +from qonnx.transformation.infer_shapes import InferShapes +from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model + +import finn.core.onnx_exec as oxe +import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw +from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.minimize_accumulator_width import ( + MinimizeAccumulatorWidth, +) +from finn.transformation.fpgadataflow.minimize_weight_bit_width import ( + MinimizeWeightBitWidth, +) +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths +from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers + + import pytest import json +import numpy as np import shutil import torch +import copy +import os +from qonnx.transformation.infer_datatypes import InferDataTypes +import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw +from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim +from finn.builder.build_dataflow_steps import step_set_fifo_depths +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from brevitas.export import export_qonnx +from qonnx.custom_op.general.im2col import compute_conv_output_dim from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp - +from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from qonnx.transformation.general import GiveUniqueNodeNames import finn.builder.build_dataflow as build import finn.builder.build_dataflow_config as build_cfg from finn.util.basic import make_build_dir from finn.util.test import get_trained_network_and_ishape +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model +from onnx import TensorProto, helper +from qonnx.core.datatype import DataType +from finn.transformation.fpgadataflow.convert_to_hw_layers import InferStreamingMaxPool +from qonnx.transformation.infer_shapes import InferShapes + + +def generate_random_threshold_values( + data_type, num_input_channels, num_steps, narrow=False, per_tensor=False +): + if per_tensor: + num_input_channels = 1 + if narrow: + num_steps -= 1 + + return np.random.randint( + data_type.min(), + data_type.max() + 1, + (num_input_channels, num_steps), + ).astype(np.float32) + + +def sort_thresholds_increasing(thresholds): + return np.sort(thresholds, axis=1) + + +def make_single_fmpadding_modelwrapper(impl_style, idim, padding, num_ch, simd, idt): + pad_h = padding[0] + padding[2] + pad_w = padding[1] + padding[3] + idim_h, idim_w = idim + + assert pad_h > 0 or pad_w > 0, "Output dim should be greater than input dim" + odim_h = idim_h + pad_h + odim_w = idim_w + pad_w + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, idim_h, idim_w, num_ch]) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, odim_h, odim_w, num_ch]) + + FMPadding = helper.make_node( + "FMPadding", + ["inp"], + ["outp"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + ImgDim=idim, + Padding=padding, + NumChannels=num_ch, + inputDataType=str(idt.name), + numInputVectors=1, + SIMD=simd, + preferred_impl_style=impl_style, + ) + + graph = helper.make_graph( + nodes=[FMPadding], name="fmpadding_graph", inputs=[inp], outputs=[outp] + ) + + model = qonnx_make_model(graph, producer_name="fmpadding-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", idt) + + return model + +def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=None): + mw = W.shape[0] + mh = W.shape[1] + assert mh % pe == 0 + assert mw % simd == 0 + + # there are two ways to implement bipolar weights and inputs for + # MatrixVectorActivation: + # - specify their datatypes as such + # - specify their datatypes as BINARY as use binaryXnorMode + if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: + # we'll internally convert weights/inputs to binary and specify the + # datatypes as such, and also set the binaryXnorMode attribute to 1 + export_wdt = DataType["BINARY"] + export_idt = DataType["BINARY"] + binary_xnor_mode = 1 + else: + export_wdt = wdt + export_idt = idt + binary_xnor_mode = 0 + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw]) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, mh]) + if T is not None: + no_act = 0 + node_inp_list = ["inp", "weights", "thresh"] + if odt == DataType["BIPOLAR"]: + actval = 0 + else: + actval = odt.min() + else: + # no thresholds + node_inp_list = ["inp", "weights"] + actval = 0 + no_act = 1 + FCLayer_node = helper.make_node( + "MVAU", + node_inp_list, + ["outp"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + MW=mw, + MH=mh, + SIMD=simd, + PE=pe, + inputDataType=export_idt.name, + weightDataType=export_wdt.name, + outputDataType=odt.name, + ActVal=actval, + binaryXnorMode=binary_xnor_mode, + noActivation=no_act, + ) + graph = helper.make_graph( + nodes=[FCLayer_node], name="fclayer_graph", inputs=[inp], outputs=[outp] + ) + + model = qonnx_make_model(graph, producer_name="fclayer-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + model.set_tensor_datatype("weights", wdt) + if binary_xnor_mode: + # convert bipolar to binary + model.set_initializer("weights", (W + 1) / 2) + else: + model.set_initializer("weights", W) + if T is not None: + model.set_tensor_datatype("thresh", tdt) + model.set_initializer("thresh", T) + return model + + + +def make_labelselect_modelwrapper(labels, pe, k, idt, impl_style): + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, labels]) + outp = helper.make_tensor_value_info("outp", TensorProto.INT64, [1, k]) + + labelselect_node = helper.make_node( + "LabelSelect", + ["inp"], + ["outp"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + Labels=labels, + PE=pe, + K=k, + inputDataType=idt.name, + preferred_impl_style=impl_style, + ) + graph = helper.make_graph( + nodes=[labelselect_node], + name="graph", + inputs=[inp], + outputs=[outp], + ) + + model = qonnx_make_model(graph, producer_name="thresholding-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + odt = DataType.get_smallest_possible(labels - 1) + model.set_tensor_datatype("outp", odt) + + return model + + + +def _make_single_vvau_modelwrapper( + W, + pe, + simd, + k_h, + k_w, + channels, + dim_h, + dim_w, + wdt, + idt, + odt, + T=None, + tdt=None, + mem_mode="internal_embedded", + impl_style="rtl", +): + in_shape = [1, dim_h, dim_w, k_h * k_w * channels] # [N, H, W, K*K*CH] + out_shape = [ + 1, + dim_h, + dim_w, + channels, + ] # [N, H, W, OFM_CH] (OFM_CH=IFM_CH because depthwise convolution) + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, in_shape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, out_shape) + + if T is not None: + no_act = 0 + node_inp_list = ["inp", "weights", "thresh"] + if odt == DataType["BIPOLAR"]: + actval = 0 + else: + actval = odt.min() + else: + no_act = 1 + node_inp_list = ["inp", "weights"] + actval = 0 + + VVAU_node = helper.make_node( + "VVAU", + node_inp_list, + ["outp"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + PE=pe, + SIMD=simd, + Dim=[dim_h, dim_w], + Channels=channels, + Kernel=[k_h, k_w], + resType="lut", + ActVal=actval, + inputDataType=idt.name, + weightDataType=wdt.name, + outputDataType=odt.name, + noActivation=no_act, + mem_mode=mem_mode, + impl_style=impl_style, + ) + + graph = helper.make_graph(nodes=[VVAU_node], name="vvau_graph", inputs=[inp], outputs=[outp]) + + model = qonnx_make_model(graph, producer_name="vvau-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + model.set_tensor_datatype("weights", wdt) + + model.set_initializer("weights", W) + model.set_tensor_shape("weights", (channels, 1, k_h, k_w)) + + if T is not None: + model.set_tensor_datatype("thresh", tdt) + model.set_initializer("thresh", T) + + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + + return model + + +def make_single_dw_conv_modelwrapper(conv_config, idt, wdt): + kernel_size, in_feature_dim, in_chn = conv_config + stride = 1 + pad = 0 + + out_feature_dim = compute_conv_output_dim(in_feature_dim, kernel_size, stride, pad) + group = out_chn = in_chn + + conv_param_shape = [out_chn, 1, kernel_size, kernel_size] + input_shape = [1, in_chn, in_feature_dim, in_feature_dim] + output_shape = [1, out_chn, out_feature_dim, out_feature_dim] + + conv_config = {} + conv_config["dilations"] = [1, 1] + conv_config["group"] = group + conv_config["kernel_shape"] = [kernel_size, kernel_size] + conv_config["pads"] = [pad, pad, pad, pad] + conv_config["strides"] = [stride, stride] + + ifm = helper.make_tensor_value_info("ifm", TensorProto.FLOAT, input_shape) + ofm = helper.make_tensor_value_info("ofm", TensorProto.FLOAT, output_shape) + weights = [helper.make_tensor_value_info("weights", TensorProto.FLOAT, conv_param_shape)] + + modelproto = qonnx_make_model( + helper.make_graph( + name="conv_test", + inputs=[ifm], + outputs=[ofm], + value_info=weights, + nodes=[helper.make_node("Conv", ["ifm", "weights"], ["ofm"], **conv_config)], + ) + ) + + model = ModelWrapper(modelproto) + model.set_tensor_datatype("ifm", idt) + model.set_tensor_datatype("weights", wdt) + model.set_initializer("weights", gen_finn_dt_tensor(wdt, conv_param_shape)) + + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + + return model + + + +def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels): + W_sparse = np.zeros((channels, channels, k_h, k_w), dtype=np.float32) + for ch in range(channels): + W_sparse[ch][ch] = W_conv[ch][0] + W_conv = W_sparse.astype(np.float32) + W_matmul = W_conv.transpose(0, 2, 3, 1) + W_matmul = W_matmul.reshape(channels, channels * k_h * k_w) + W_matmul = W_matmul.T + + return W_matmul + + +def _calculate_dot_prod_range(dt_a, dt_b, len): + """Returns the (min,max) values a dot product between two (un)signed vectors of + types dt_a and dt_b of len elements can take.""" + min_prod = 2**30 + max_prod = -(2**30) + for a_val in [dt_a.min(), dt_a.max()]: + for b_val in [dt_b.min(), dt_b.max()]: + prod = a_val * b_val * len + if prod < min_prod: + min_prod = prod + if prod > max_prod: + max_prod = prod + return (min_prod, max_prod) + + + +def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode): + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + ofm_dim_h, ofm_dim_w = ofm_dim + odt = idt + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]) + outp = helper.make_tensor_value_info( + "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, ifm_ch] + ) + + mp_node = helper.make_node( + "MaxPoolNHWC", + ["inp"], + ["outp"], + domain="qonnx.custom_op.general", + kernel_shape=[k_h, k_w], + strides=[k_h, k_w], + ceil_mode=ceil_mode, + pads=[0, 0, 0, 0], + ) + graph = helper.make_graph(nodes=[mp_node], name="mp_graph", inputs=[inp], outputs=[outp]) + + model = qonnx_make_model(graph, producer_name="mp-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + + return model + + +def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw): + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + ofm_dim_h, ofm_dim_w = ofm_dim + + odt = idt + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]) + outp = helper.make_tensor_value_info( + "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch] + ) + + im2col_node = helper.make_node( + "Im2Col", + ["inp"], + ["outp"], + domain="finn.custom_op.general", + stride=[stride_h, stride_w], + kernel_size=[k_h, k_w], + input_shape=str((1, ifm_dim_h, ifm_dim_w, ifm_ch)), + dilations=[dilation_h, dilation_w], + pad_amount=[0, 0, 0, 0], + pad_value=0, + depthwise=dw, + ) + graph = helper.make_graph( + nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp] + ) + + model = qonnx_make_model(graph, producer_name="im2col-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + + return model + + + +def make_channelwise_modelwrapper(C, pe, idt, odt, pdt, func, vecs): + NumChannels = C.shape[0] + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, vecs + [NumChannels]) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, vecs + [NumChannels]) + + node_inp_list = ["inp", "const"] + + node = helper.make_node( + "ChannelwiseOp", + node_inp_list, + ["outp"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + NumChannels=NumChannels, + Func=func, + PE=pe, + inputDataType=idt.name, + outputDataType=odt.name, + paramDataType=pdt.name, + numInputVectors=vecs, + preferred_impl_style="hls", + ) + graph = helper.make_graph(nodes=[node], name="graph", inputs=[inp], outputs=[outp]) + + model = qonnx_make_model(graph, producer_name="model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + + model.set_tensor_datatype("const", idt) + model.set_initializer("const", C) + return model + + +def make_single_dwc_modelwrapper(in_shape, out_shape, inWidth, outWidth, finn_dtype, impl_style): + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, in_shape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, out_shape) + + optype = "StreamingDataWidthConverter" + + DWC_node = helper.make_node( + optype, + ["inp"], + ["outp"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + in_shape=in_shape, + out_shape=out_shape, + inWidth=inWidth, + outWidth=outWidth, + preferred_impl_style=impl_style, + generalized_variant=True, + dataType=str(finn_dtype.name), + ) + + graph = helper.make_graph(nodes=[DWC_node], name="dwc_graph", inputs=[inp], outputs=[outp]) + + model = qonnx_make_model(graph, producer_name="dwc-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", finn_dtype) + model.set_tensor_datatype("outp", finn_dtype) + + return model + + + +def make_single_thresholding_modelwrapper(impl_style, T, idt, odt, actval, n_inp_vecs, num_ch): + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, n_inp_vecs + [num_ch]) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, n_inp_vecs + [num_ch]) + + node_inp_list = ["inp", "thresh"] + + Thresholding_node = helper.make_node( + "Thresholding", + node_inp_list, + ["outp"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + NumChannels=num_ch, + numSteps=T.shape[1], + inputDataType=idt.name, + weightDataType=idt.name, # will be set by MinimizeAccumulatorWidth + outputDataType=odt.name, + ActVal=actval, + numInputVectors=n_inp_vecs, + preferred_impl_style=impl_style, + ) + graph = helper.make_graph( + nodes=[Thresholding_node], + name="thresholding_graph", + inputs=[inp], + outputs=[outp], + ) + + model = qonnx_make_model(graph, producer_name="thresholding-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", odt) + + model.set_tensor_datatype("thresh", idt) + model.set_initializer("thresh", T) + return model + def fetch_test_model(topology, wbits=2, abits=2): @@ -56,6 +613,7 @@ def fetch_test_model(topology, wbits=2, abits=2): @pytest.mark.parametrize( "method", ["largefifo_rtlsim_python", "largefifo_rtlsim_cpp", "characterize"] ) + @pytest.mark.parametrize("topology", ["tfc", "cnv"]) def test_fifosizing_linear(method, topology): force_python_rtlsim = "python" in method @@ -111,3 +669,393 @@ def test_fifosizing_linear(method, topology): shutil.rmtree(tmp_output_dir) shutil.rmtree(tmp_output_dir_cmp) + + + +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.fpgadataflow + +@pytest.mark.parametrize( + "node", [ + + ("LabelSelect",10,1,1,DataType["UINT8"],"hls"), + ("LabelSelect",10,1,3,DataType["UINT8"],"hls"), + ("LabelSelect",10,2,3,DataType["UINT8"],"hls"), + ("MVAU",5,1,8,1,[1,1],DataType["UINT2"],DataType["UINT2"],DataType["UINT2"],"hls"), + ("MVAU",5,1,8,1,[1,4],DataType["UINT2"],DataType["UINT2"],DataType["UINT2"],"hls"), + ("MVAU",10,5,20,4,[1,1],DataType["UINT4"],DataType["UINT8"],DataType["UINT4"],"hls"), + ("StreamingDataWidthConverter",[1,4,1,40],[1,4,1,40],2,8,DataType["BIPOLAR"],"hls"), + ("StreamingDataWidthConverter",[1,240],[1,241],12,2,DataType["BIPOLAR"],"hls"), + ("StreamingDataWidthConverter",[1,36],[1,36],12,12,DataType["BIPOLAR"],"hls"), + ("StreamingDataWidthConverter",[1,4,1,30],[1,4,1,18],3,9,DataType["BIPOLAR"],"hls"), + ("StreamingDataWidthConverter",[1,1,1,18],[1,1,1,30],9,3,DataType["BIPOLAR"],"hls"), + ("StreamingDataWidthConverter",[1,90],[1,90],3,10,DataType["BIPOLAR"],"hls"), + ("StreamingDataWidthConverter",[1,40],[1,30],10,3,DataType["BIPOLAR"],"hls"), + + ("FMPadding",[8,8], [1,1,1,1],2,1,DataType["INT2"],"hls"), + ("FMPadding",[8,8], [1,1,1,1],4,1,DataType["INT2"],"hls"), + ("FMPadding",[8,8], [1,1,1,1],12,1,DataType["INT2"],"hls"), + ("FMPadding",[8,8], [4,0,4,0],12,1,DataType["INT2"],"hls"), + ("FMPadding",[8,8], [0,4,0,4],5,1,DataType["INT2"],"hls"), + ("FMPadding",[2,3], [0,3,0,4],5,5,DataType["INT2"],"hls"), + # idim, pad, num_ch,simd,idt + ("ChannelwiseOp",DataType["INT8"], DataType["INT4"],DataType["INT4"] , 4, 16, "add", [1,4,4], "hls") + ("ChannelwiseOp",DataType["INT8"], DataType["INT4"],DataType["INT4"] , 2, 16, "add", [1], "hls") + ("ChannelwiseOp",DataType["INT8"], DataType["INT4"],DataType["INT4"] , 1, 16, "add", [1, 7 ,7], "hls") + #,idt, act, pdt, nf, ich, func, vecs, impl_style + + # (Pdb) (ifm_dim,output_size,is1d, NumChannels,PoolDim,ImgDim,PE) + # ([1, 512], 256, True, 32, 2, 512, 1) + ("StreamingMaxPool",DataType["INT4"],True,2,32,4,1 ,0,"hls"), + ("StreamingMaxPool",DataType["INT4"],True,1,4,1,1,0,"hls"), + ("StreamingMaxPool",DataType["BIPOLAR"],False,1,10,1,1,1), + ("StreamingMaxPool",DataType["BIPOLAR"],False,2,10,64,1,1,"hls"), + ("StreamingMaxPool",DataType["BIPOLAR"],False,2,28,64,1,0,"hls"), + # idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil_mode,impl_style + ("StreamingMaxPool",DataType["BIPOLAR"],False,1,10,1,1,1), + ("StreamingMaxPool",DataType["INT4"],[True],[4],[10],[3],[3],[1],"hls"), + ("ConvolutionInputGenerator",DataType["INT2"],[6, 6],[12, 12],8,[4,4],[1,1],2,0,0,1,False,0,"hls"), + ("ConvolutionInputGenerator",DataType["INT2"],[6,1],[12,1],16, [1,1], [1,1], 2, 0, 0, 1, False, 1,"hls"), + # """ idt, k, ifm_dim, ifm_ch,stride, dilation, simd, dw, parallel_window, m, flip, is1d""" + + ("ConvolutionInputGenerator",DataType["INT2"],[6,1],[12,1],16, [3,1], [1,1], 2, 0, 0, 1, False, 1,"hls"), + ("ConvolutionInputGenerator",DataType["INT2"],[6,1],[12,1],16, [1,1], [1,1], 2, 1, 0, 1, False, 1,"hls"), + ("ConvolutionInputGenerator",DataType["INT2"],[6,1],[12,1],16, [2,1], [1,1], 2, 1, 0, 1, False, 1,"hls"), + ("ConvolutionInputGenerator",DataType["INT2"],[4,4],[8,8],6, [4,4], [1,1], 2, 1, 0, 1, False, 0,"hls"), + ("ConvolutionInputGenerator",DataType["INT2"],[6,6],[10,10],8, [2,2], [1,1], 2, 1, 0, 1, False, 0,"hls"), + ("ConvolutionInputGenerator",DataType["INT2"],[4,4],[10,10],16, [2,2], [1,1], 2, 1, 0, 1, False, 0,"hls"), + ("ConvolutionInputGenerator",DataType["INT2"],[6,1],[8,1],8,[3,1],[1,1],1,0, 0,1,False, 1,"hls"), + # """ idt, k, ifm_dim, ifm_ch,stride, dilation, simd, dw, parallel_window, m, flip, is1d""" + ("VVAU",DataType["INT4"], DataType["INT4"], DataType["INT4"], 3, 1, 10, 10, 3, 3, 3, "internal_embedded",0,"hls"), + ("VVAU",DataType["INT4"], DataType["INT4"], None, 3, 3, 10, 10, 3, 3, 3, "internal_embedded",1,"rtl"), + ("Thresholding",[15,3],True,True,"hls"), + ("MVAU",48,1,4,1,[1,1],DataType["UINT2"],DataType["UINT2"],DataType["UINT2"],"hls"), + ] +) +def test_fifosizing_analytical_characterization(node): + + test_rtl = True + + build_dir = os.environ["FINN_BUILD_DIR"] + test_fpga_part = "xc7z020clg400-1" + target_clk_ns = 4 + + model_cache = None + for x in os.listdir(build_dir): + if x.startswith(str(node)): + print("cached model found") + model_cache = f'{build_dir}/{x}/model.onnx' + #if model_cache is None: + tmp_output_dir = make_build_dir("build_fifosizing") + + if node[0] == "LabelSelect": + labels, pe, k, idt, impl_style = node[1:] + model0 = make_labelselect_modelwrapper(labels, pe, k, idt, impl_style) + + elif node[0] == "Thresholding": + cfg, narrow, per_tensor, impl_style = node[1:] + ch = cfg[0] + pe = cfg[1] + n_inp_vecs = [1, 2, 2] + hls_mem_mode = "internal_decoupled" + act = DataType["INT4"] + idt = DataType["INT16"] + odt = act + n_steps = act.get_num_possible_values() - 1 + # Generate random thresholds and sort in ascending order + T = generate_random_threshold_values(idt, ch, n_steps, narrow, per_tensor) + + # provide non-decreasing/ascending thresholds + T = sort_thresholds_increasing(T) + + actval = act.min() + if narrow: + actval += 1 + + model = make_single_thresholding_modelwrapper(impl_style, T, idt, odt, actval, n_inp_vecs, ch) + model = model.transform(SpecializeLayers(test_fpga_part)) + + # Make sure that specialize layer did not default to HLS implementation + assert model.graph.node[0].op_type == "Thresholding_" + str(impl_style) + + node_inst = model.get_nodes_by_op_type(f"Thresholding_{impl_style}")[0] + op_inst = getCustomOp(node_inst) + op_inst.set_nodeattr("PE", pe) + if impl_style == "hls": + op_inst.set_nodeattr("mem_mode", hls_mem_mode) + op_inst.set_nodeattr("runtime_writeable_weights", 1) + model0 = model + + + elif node[0] == "MVAU": + mw,simd,mh,pe,numVectors,wdt,idt,odt,impl_style = node[1:] + W = gen_finn_dt_tensor(wdt, (mw, mh)) + model0 = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=None) + + getCustomOp(model0.graph.node[0]).set_nodeattr("numInputVectors",numVectors) + # model0 = make_labelselect_modelwrapper(labels, pe, k, idt, impl_style) + + elif node[0] == "ChannelwiseOp": + idt, act, pdt, nf, ich, func, vecs, impl_style = node[1:] + if nf == -1: + nf = ich + odt = act + pe = ich // nf + C = gen_finn_dt_tensor(pdt, (ich)) + + model0 = make_channelwise_modelwrapper(C, pe, idt, odt, pdt, func, vecs) + + elif node[0] == "FMPadding": + idim,pad,num_ch,simd,idt,impl_style = node[1:] + model0 = make_single_fmpadding_modelwrapper(impl_style, idim, pad, num_ch, simd, idt) + + elif node[0] == "StreamingDataWidthConverter": + in_shape, out_shape, in_width, out_width, dtype, impl_style = node[1:] + model0 = make_single_dwc_modelwrapper(in_shape, out_shape, in_width, out_width,dtype, impl_style) + # model0 = make_labelselect_modelwrapper(labels, pe, k, idt, impl_style) + + elif node[0] == "StreamingMaxPool": + + idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil_mode,impl_style = node[1:] + ifm_dim_h = ifm_dim + k_h = k + if dim_1d: + ifm_dim_w = 1 + k_w = 1 + else: + ifm_dim_w = ifm_dim_h + k_w = k_h + ifm_dim = (ifm_dim_h, ifm_dim_w) + k = (k_h, k_w) + + stride_h = k_h + stride_w = k_w + ofm_dim_h = compute_pool_output_dim(ifm_dim_h, k_h, stride_h, 0, ceil_mode) + ofm_dim_w = compute_pool_output_dim(ifm_dim_w, k_w, stride_w, 0, ceil_mode) + ofm_dim = (ofm_dim_h, ofm_dim_w) + #if idt == DataType["BIPOLAR"] and dim_1d: + # pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)") + if (ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0) and (not dim_1d): + pytest.skip("StreamingMaxPool_2d test w/ ImgDim % PoolDim != 0 not implemented") + if pe > ifm_ch: + pytest.skip("PE cannot be larger than number of input channels") + # if pe > 1 and (not dim_1d): + # pytest.skip("PE>1 only supported for StreamingMaxPool_1d") + + golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode) + + model = golden.transform(InferStreamingMaxPool()) + model = model.transform(InferShapes()) + + model0 = model.transform(SpecializeLayers("xczu3eg-sbva484-1-e")) + + # Ensure PE value is set + streamingmaxpool_node = model0.get_nodes_by_op_type("StreamingMaxPool_hls")[0] + #assert True == False + if pe > 1 and (not dim_1d): + getCustomOp(streamingmaxpool_node).set_nodeattr("PE", 1) + else: + getCustomOp(streamingmaxpool_node).set_nodeattr("PE", pe) + + elif node[0] == "ConvolutionInputGenerator": + idt,k,ifm_dim,ifm_ch,stride,dilation,simd,dw,parallel_window,m,flip,is1d,impl_style = node[1:] + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + model = model.transform(to_hw.InferConvInpGen()) + + # set impl_style + inst = getCustomOp(model.get_nodes_by_op_type("ConvolutionInputGenerator")[0]) + inst.set_nodeattr("is1D",is1d) + inst.set_nodeattr("preferred_impl_style", impl_style) + model = model.transform(SpecializeLayers("xc7z020clg400-1")) + # set simd + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("SIMD", simd) + optype = model.graph.node[0].op_type + if optype == "ConvolutionInputGenerator_rtl": + inst.set_nodeattr("parallel_window", parallel_window) + inst.set_nodeattr("M", m) + if optype == "ConvolutionInputGenerator_hls": + if inst.get_nodeattr("is1D"): + inst.set_nodeattr("parallel_window", parallel_window) + model0 = model + + elif node[0] == "VVAU": + idt, wdt, act, pe, simd, dim_h, dim_w, k_h, k_w, channels, mem_mode, no_act,impl_style = node[1:] + + + if dim_w == 1 and k_w != 1: + pytest.skip("1D image requires 1D kernel, skipping.") + + if channels % pe != 0: + pytest.skip("Requirement Channels divisable by PE is violated.") + + if (k_h * k_w) % simd != 0: + pytest.skip("Requirement kernel (k_h * k_w) divisable by SIMD is violated.") + + # Generate weights in expected shape for ONNX and HLS node + W = gen_finn_dt_tensor(wdt, (channels, 1, k_h, k_w)) # shape: [channels, 1, k, k] + W_onnx = _infer_sparse_weight_tensor(W, k_h, k_w, channels) # shape: [k*k*channels, channels] + + # Generate inputs in expected format for ONNX and HLS node + x = gen_finn_dt_tensor(idt, (1, dim_h, dim_w, k_h * k_w * channels)) + x_vvau = x.reshape(1, dim_h, dim_w, k_h * k_w, channels // pe, pe) + x_vvau = x_vvau.transpose(0, 1, 2, 4, 3, 5) + x_vvau = x_vvau.reshape(1, dim_h, dim_w, channels * k_h * k_w) + + if act is None: + T = None + tdt = None + if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: + odt = DataType["UINT32"] + else: + odt = DataType["INT32"] + else: + odt = act + (min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w) + n_steps = act.get_num_possible_values() - 1 + T = np.random.randint(min_v, max_v - 1, (channels, n_steps)).astype(np.float32) + T = np.sort(T, axis=1) + if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: + tdt = DataType["UINT32"] + # bias thresholds to be positive + T = np.ceil((T + (k_h * k_w)) / 2) + assert (T >= 0).all() + else: + tdt = DataType["INT32"] + + model = _make_single_vvau_modelwrapper( + W, pe, simd, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T, tdt, mem_mode, impl_style + ) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + + inst = getCustomOp(model.graph.node[0]) + inst.set_nodeattr("noActivation",no_act) + if impl_style == "rtl": + inst.set_nodeattr("resType","dsp") + inst.set_nodeattr("preferred_impl_style", impl_style) + + model0 = model.transform(SpecializeLayers("xcvc")) + test_fpga_part = "xcvc" + + + outputs = [build_cfg.DataflowOutputType.ESTIMATE_REPORTS] + model1 = copy.deepcopy(model0) + + + if model_cache is not None: + model0 = ModelWrapper(model_cache) + + + node_inst0 = getCustomOp(model0.graph.node[0]) + node_inst1 = getCustomOp(model1.graph.node[0]) + node_inst0.set_nodeattr("ipgen_ignore", 0) + node_inst1.set_nodeattr("ipgen_ignore", 1) + + cfg = build_cfg.DataflowBuildConfig( + output_dir=tmp_output_dir, + synth_clk_period_ns=target_clk_ns, + generate_outputs=outputs, + fpga_part=test_fpga_part, + auto_fifo_strategy = "characterize", + auto_fifo_depths = True, + split_large_fifos = False + ) + + + # analytical + inst = getCustomOp(model1.graph.node[0]) + inst.set_nodeattr("preferred_impl_style", impl_style) + model1 = model1.transform(SpecializeLayers(test_fpga_part)) + model1 = model1.transform(GiveUniqueNodeNames()) + model1 = model1.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model1 = step_set_fifo_depths(model1,cfg) + + # rtlsim-based + if test_rtl: + if model_cache is None: + inst = getCustomOp(model0.graph.node[0]) + model0 = model0.transform(SpecializeLayers(test_fpga_part)) + model0 = model0.transform(GiveUniqueNodeNames()) + model0 = model0.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model0 = step_set_fifo_depths(model0,cfg) + + tmp_caching_output_dir = make_build_dir(str(node)) + model0.save(tmp_caching_output_dir+"/model.onnx") + + if test_rtl: + for n in model0.graph.node: + if n.op_type.startswith(node[0]): + node_inst0 = getCustomOp(n) + continue + + for n in model1.graph.node: + if n.op_type.startswith(node[0]): + node_inst1 = getCustomOp(n) + continue + + if test_rtl: + print("in RTLSIM") + print(node_inst0.get_nodeattr("io_chrc_in_concat")) + print("in ANALYTICAL") + print(node_inst1.get_nodeattr("io_chrc_in_concat")) + + if test_rtl: + print("out RTLSIM") + print(node_inst0.get_nodeattr("io_chrc_out_concat")) + print("out ANALYTICAL") + print(node_inst1.get_nodeattr("io_chrc_out_concat")) + #assert True==False + + #print("Producer") + # print(node_inst1.get_nodeattr("io_chrc_out")) + + # print("Consumer") + # print(node_inst1.get_nodeattr("io_chrc_in")) + + #assert True==False + #assert node_inst0.get_nodeattr("depth") == node_inst1.get_nodeattr("depth") + if test_rtl: + assert np.array_equal(node_inst0.get_nodeattr("io_chrc_in"),node_inst1.get_nodeattr("io_chrc_in")) + assert np.array_equal(node_inst0.get_nodeattr("io_chrc_out"),node_inst1.get_nodeattr("io_chrc_out")) From 17dffa3bb030512cf28bd65fecec3aa4da03c889 Mon Sep 17 00:00:00 2001 From: lstasytis Date: Wed, 2 Oct 2024 13:53:21 +0000 Subject: [PATCH 02/12] fixed docker scripts --- fetch-repos.sh | 2 +- run-docker.sh | 4 ++++ test_brevitas_debug.onnx | Bin 0 -> 246949 bytes 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 test_brevitas_debug.onnx diff --git a/fetch-repos.sh b/fetch-repos.sh index 6ce9ad76d4..a4fc124fa4 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -29,7 +29,7 @@ QONNX_COMMIT="2281a777d84aa5cbd7469085c2e534fb4a03ccf9" FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" -BREVITAS_COMMIT="84f42259ec869eb151af4cb8a8b23ad925f493db" +BREVITAS_COMMIT="d4834bd2a0fad3c1fbc0ff7e1346d5dcb3797ea4" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3" diff --git a/run-docker.sh b/run-docker.sh index b1fe44eb0c..4bfc719ec6 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -41,6 +41,10 @@ recho () { echo -e "${RED}$1${NC}" } +: ${FINN_XILINX_PATH="/mnt/labstore/Xilinx"} +: ${PLATFORM_REPO_PATHS="/opt/xilinx/platforms"} +: ${FINN_XILINX_VERSION="2023.1"} + if [ -z "$FINN_XILINX_PATH" ];then recho "Please set the FINN_XILINX_PATH environment variable to the path to your Xilinx tools installation directory (e.g. /opt/Xilinx)." recho "FINN functionality depending on Vivado, Vitis or HLS will not be available." diff --git a/test_brevitas_debug.onnx b/test_brevitas_debug.onnx new file mode 100644 index 0000000000000000000000000000000000000000..686d7f5a50427bff37d415d4dca3b13f239ce9dc GIT binary patch literal 246949 zcmeF03Akldas6qA2A+t};sglVq9}>VB%nf{%Y6?~K%A#cq(uZ2Xk;*89Kk8ZA!>}G z(SQR+MKmf7QTjeWq9$mR;H-Zq;t-=zG>NnSsyh2ucb$FDeeX43V)X9sJGZJ#NVb+1x|swmtr&4Ub-b(lIBVGPlEn zk30Q@^`{(6cciM(q zZ+8Bw&F2o7t8{bQ-T%y!#;$X16^>g)zIF=>bNkLoXKuTPocU#Lk@yDHZ%>Ks&{iYpk?M!@soVos#GwyccDW{%!#`&wZxYwhLexkaO&NTKkk&{PFsJ{+_7^##SlkRuWqt|Y4S3Tpz zhn`^mCu}(BF?;Pax2>&t%wx_t`_$vsZh60>AG+_7L+&tFjOJFK@Zh8OJ@)W5+datl zE4N*{#i5UR%-r)=&2`Z3d$~u?d$~vVz3kWbUUunyFSh)0FXr&>i)rW2?eNgk)}M0v zsT)rBe{lYlT65F;uRlZnZEQoic6FQ0J!|v1E@y6+;rcUlpLr=7h1q!XXC{)`hh zoRa@&mTK;^RCC{@n)|J}P4CS3x3*JXYwemzbgv!fwm$CE(@#8U!zs78=*m40Jb%@y zz1GZabMl6hk2~dzGf%$7V;}q2#<88|ZhFRP>rXu8#8Zwx=Hv~JIj;Q;;Z_$FwL=2k!Sh-tbCMd7*=JL1BG5fT?Wq01b3vSYvd!Kzb>HFX@bA21ktv>wBy-yu( zShM;*PkPWgALl=JIFJe%)_|w#;va{qmdP9{E>MOKJah<~P8a_yf6I za4Y{~d7t)gXZw3Qs+cmj-NK1a%s-vh%(YegQL|O#YqzlAf1{5(?cep|IsUgBb0Y8j zU0?3Eo&A9-bIN~y*r1)aicWmF)@&R9t#ZucPCR3;*KfYfV24%1rT)@8;^~_o`;vPP z?)J9Z4KDcDs=?1T-fHl(H(k@bW9)rF9_LhU!zj2SnGjDO&;3*sbt$EKudk%I!>9&J+eDQXJy$?Td@bc{r z8ytD{mV>+QaR0&0Z@tst-v6-E;3m86F*y3)?=<-70Xq&}^8RfG*WK~%gY*A>?cfbR zSUq^myKcGox|d(qT=<7K8~okR<_7h?aZT#nRw;8?fWglr?y!T^ApSatLn^(W-%gu(trOj2Z{lWY9J9Cfb!dHE4-Ou;? z>gcn3|6}vv>;AoY>H6=C{`^_b8@=G|?;m~s;eWI4uOIpB<|8lo+GxjT-*)3oo_4_C z15dlnVC~=j*Sg=^YKOr^dtW+w=M#4r-EEt1HHZJ{(arH+{m5wj`SzL~c;)DlNAEm% z#OU?S-CzFR(d&Qy$>xWjdg|zkZ>-t4?px=Le*A^+kAAl8)0;1^`sC=cz0V&#aK9%s zcl_9EnzcXryXK|ezj(Cw;k%EXa_pK*|Kq&RjefZL4x=k~|J%_eYhN{5^~cX29ku7G zOAlVZb#uvG?$L}M@YQusyXcCIi!c01^NiO%Y;?^lpVVx9iw`%W4{dB#@S66u*L3dh zUwqjE*6(-OqZiH@Zus1Gm!Ea?hQZr!v&H3ur|fh2v-h}S-Dl_bT>SKbe|Fg&?|t=U z&v?t9A9~m)UU%7t&%WQ~+x^SImwjSxblGKpbnNgKf4b{suYK}Pm+!UJp~GK1@)v^# zU-PQV?zr%i%O3jRgAaY|$yXeD`uElhpZwkrU3S%ugUdhv(>GuK`4`;h(BYT%ylnmV zPanSJs*?s!eBVKr{osXv)x6-Ae|-5jj=uk)pSx_c;i((eEdKsQ-yXd0q32)r(xYBI zxNgr+53b$)UkCs6;rCtk_D`I1`PSRKVDNh%zxL2~ZTZ+kpT7HD7QepjF^7KYL4Q5m z=a)Yn9Jg?n;g)~D%W#Xs&$@i{p-&9X`qtOi?Y-lM#n1lWflWVqt}_;ldp*Ja^&Iue`;= zyZ_^mg*TqHcHyyiI(OkVx4Of^?uXrM;dN)eVPW0J?y>N?&p&u!v%6lj(40BHu-Aj0 zzcBZz3l_E>tzUS^Z4O>IP=PvB>7w=y9*{+8yJmHa#U3l>h zw_N!6H5V?t<xMbMCv@VE29B-+X2Mhs`7G=fmrs`-|qxwV!NWa^5eShyL)tn=fs<`C#ksU)NlA z^G`Pye)zSc6W;cP=8vDZ<=_stzp^>)$R9VK-r|GJ0Y6+b7+!K!v&B2N9=!Y3HyK=h z<)}IEzH@`y-g$??npfO%@S1yWJ2?ETe{O#A%Y6oWymH&cga3B(!7K0e@6EMOeNXeF zul%IB>e-tQ-gx^vFZr{nye9wIv~a6o^O~avgZCW&t>)rSf2{f4M}4BXVAWxxyB`1U zX0+kXqkBL8OU;YUePgrrQQsT=-UplJb+3ExXxqmh**yOL-RqEdUHE&Wb542o=$UtY z^TD@RIJ5cWdoLb+?4V~fU%2|`qx-%1O{0$-wa4PxFMoI4U!8Z`#jXGB%gt_2f7ZJ9 z-uo3B4?OSP>-OFCiK7$#{=m_j&-ky={vW({^zu8ubK|Ogn$h39>6+1C$E`>Ia{5z8 z@4oBHn|uFc;RAC*Sw-TaNZZ6KX~&8UotrIZSOev|Gnhh zgTecDJ@i$Vymhe0iQifGtY;1ukG=MS#oe}je)G}m&RN`i@PffjU-_2Nd){|gbHNvW z+`QtZs~0c)^4FS6-+kcXr3YNL_~Uy$wHZ9|Ig59F?^j26|9@{7th?tmiw~Z^WU%nY zt4Gg$`1y-3{lMXa=RED77tcKCQO(ybzU^RDbJNA6UcKMq&5m3(*!`GK4bD02!gb%i z=ploxPuTI$W6yd`bL*d9GWzJp{(L2`$$vIowbd_%=biPM;RC*R_V8QJ`McrySASyo zhfle9c=Nfdhu_`f?}mrJ^xMNHzUafl*WZ5I`8#~z+rxtoK7P2>@4jdFPoMqI;aC6h z%flZX`jz3QHv8!CW&8c>aP_UXnm=LN(eTJSUO0T~{$Cz0Jo6R9KRxZD;nhET&+w;T z{nGHW`~70L%emJLzqR*{^H0A0#^E+~Hkr@%G`qd%kdZ!A(Ci{Eq|QI()~LThE{MmA@YT%N@3yfBac* z9-g}Tm&5=3>vs>2JNEB~7i|2@@KJ9c4PWvgyTgf>4mW({?}jHFdXxFDKl7u*?XLLy z;U2HPV0g0^zGV23YrZ}F+}FP{9KGk?hD-fx;=haRHJ!E7H=5u3>2=LsM{PcM*c0{~ z{Qj->9z5c4+YO#_$(;t9Kg_;|ciVID-c$D-d~l~*4BmR_9S2+Nx8vXmfBBiwN7sM6 zIsYqjgPVNg?t?4#xy#^)hi^YP|KcAv+rMwkVDryzHMr`;?FY}k(|4P%KKa1G9w+a) z__Cdj9PDuGtp^9~)eL_6ufJ^eKJ4hhGyi*s!CgN6?dH87f5_ku?mss;?C0-nR=x6O zgYP_e>%q(Z`@fsNxcAqZBk%L6=Fl&EZM0$S9)s7sb2zx<+Es(o&f9Eov)#5DoOs7( zaOq8V9UOCqFEzh-@Zp2aZgtnecP_pC;5B<~F?ie+M-Q%Byv5*sx4Zk`mnUsIc>F!C zZ9e|vFEqD0WQ)PqkNDE)ez)9f@ZYCxv*gbv{tMAw(|hiF#RvZITRS(aUi8I{`<=0N zwAFK;F*^O5cRysScdS~s)pj>&{^H;EAMJ4R4$X7-+S_o9EfeB+<) zz4;+;eEyp@KItV}kFNXK78`H#z(bpBPTIY>!!LJkcHc06>F?fa>(Srb^h+C0IQ7Cy zPx_sET>8iletxvs3!ZT4U7H7tzVWs%Zv2N&-Q(UrIR828c76E4m!A3gkB@e~{B!H} zy!6YL{`dojt-I(m`>%WP>rT7Zfp355A@6y}57vGAt5UzSjp{^tHQRy5^c69`gQIY&iHsi|Z~Od}hPObN*=Obu0MSwaU%_IK$;frs%=6xFDpaD;D4J z#=RE*{Sm)t&cDk=i+A4TpPKuAZim6X7yZuSIp4i@@%xAGGPuui_ZvL*tn&shzweVr z+kNZH&Djrl#^9>AK6tR(mFI8#=1rcyxXtahU;Nx{U$pV~pS`bn!;|j5?hzk5bMWnd zddeZ6`_9XogC6<3#gBjZIg1x;{*H~$`1I+`p0B@hal`yGo7zPJ8+>%Oq|;>C-e_#cBW-hNf{oW(<%osT_Xarmj%k9KQbI@s#OV;2wj$*Shc zb^qAB^gMiOf2CRfi5HLle&n0OTmS6s!%Kd6kNMTtZash7fB5w9HXA=We93`t8*cp8`-gXU z%If*g?y>v)#<$;O{>sC)pTFNZo6n#3x?9Zu@dLJ)f7>=+9lq(251Bu3-JRyYefDkU zU-pRa3?FjsLGw>~#H#s=c0F+ZyvN;s{`@>gEt+!ca|4A4Bw^06_wB=W>eo#Y4|0D5;EkCg4RUf3mj!2h|ZE_2)YCk4k4^D&?o?0tS}o;?UhlsYf2rPb9?Yse zmebXIt*3b^XQ{l_*EQN2_qxVv?n>{q65dKU%>SQ=zl!@`-#cDk-3@t9)4ulZNpGcl zP2)|&Z^+)YmR)Py>!o^q{OgPTPu%nR?sk22oBG~v=>2b~{`%Z;rkTBg*&CR>f!Q0F zy@A;qn7x778<@R;*&CR>fg9@$Z0dhQ-B@#1;^Y6lbz|+fsXm*^U&(!z;^@3-`BHlC zsjlXFkN?nT^LZo$jEy>$oExM7$$b?*fiT9Jr_-RaD(Xt14c#8)(JoO5$`EZ=$d^F%h`d&}-i=KFkH+>40b@9a~bcSi491anj=r}odg-i5)JU)ZRG0!b>$TM1>!yTzL;xkR>IFIYt zq0b}xvR8*EI6E;HUd)o+!_#*#tk$p>5pH@FA9Uav!`J=o@uOUNflI%`ac$Gpr&rP7 z+u74J>f2TYqenEKm|?ZfLeudzZhhD{c*WB>>FNGO2bVrxXwhgtFwx_MmKYv|rq zo9PaYXsxqSXP!D!`F;A-**yHXU-sOy>=PbVa}-ak^zj4t*xAi}g-btlO{>Cxf>-)z zA}VHieUH|0{K*{UMLrUa<@9xz)XL0hJK(mIHymQlD*mqZQ{#bEJ$UTx^|TZBD0(=3 zI_~Qnc8R>~ms+c}4<6iVPN!FNX!V}DBe-h&ax^txYjS4dQ`g`tjnnkgaQCfup=Ku9 z8mC`JctrYj4PHm*F|+RdD(h)8!B?_>>~8z}srhx?vid)n+25UxUSHjG$4&oQY|1_R z=H3wfuj2lFJI=7#8<@R;*&CR>f!Q0Fy@A;qn7x5t|2J?${;pi^b}RY2vu{4SzTR>( zZ_NIt>;YHjgAujoc^~d~2LIo@zwV)X`)=$vftUZrQ{3}jzT8v!IyZ9X72iG|)%N8b zzfqS_`%=TwdE_3+ljl1+?$I7Sfmx5EJaT#-m5*1(;#1ELo%NDe?;!K*Sz2pe@4?L2 zS^T&I5s&1{F6^hAm{W0TExRNx^YJ1wgXnm@+XUDA@YWvKgqQQn&gu=u%=9YzdLKUA zz=~Js3WuB+I`bi>cdd23V4*i%-_hCZ1uuFJZ~vJzGynJ+N)Ebt#@lxY>dB&>A3~k*Bt@zdbzS_OiPj`V2<~7{V*{&RX;3^LeG556DezGop zc(mR26Mw}#S8nYSeZQaXX?dsietGZ?pIT!aj?Td=@2fiZwVIymC%mWl2Dj)lm)bE7 z{>;)DaW~sx+CfVntw%lTQ_sm-_6xq%esl)DV8r0r%+Sef_+jK;(o4+d^yvb#o}SJ; zbTwx*o&&#*?iu$sZsd50w(YdLW$u0B`tisUwe+%Y7$>-WoZ&vlBEgSXxGvvZF!SNowgKjqc^)p7&^XncSeK>yToCB|p_z-nhu=L>_nP1kbE?{+B zO7Gm|`nBJzp1pzD8<@R;*&CR>f!Q1Q^?w5^`5WO^c?;%WpT9G%kLFi(7vCkG@^_`r zS#CeSI}iNjG`jzzjoKJdNrHu9)WDSFgI4dfTf$ zzE|H-kEtBLXVa>QUMWBBT{siH9&PJ3_RU&mg%4W8wI=s{;wk;WHTS;MVN!k6eeOtP zmd41=e>!(VS4Jm_Q+!g-?eBpdag6kc+_64iN1RlUyZ5-uQ9Lv z;M-Q>S9Vn#zU~`47A`muZRpt3xYK5)KYk^G*D>yfZ;8(F6MH#U<5t&)eNxlZjJ8&< z`i{Aa_$wbrW@YXB`BP>R=@TdB89!&1+__+yt7q+#{_cUN#lC}<8S3kNaPo-aRrBeG zuJDu-`+$$ie$*D37rfGUPd|y(xYf~joTn+6MaQNc?S;1JoS$ArPaVC&)4QP|=Du*v zC$Or8PE@p-&H-NZ%tm<^)!JYkhEJ4t=!LItPDx9jqr^d*Of7@xj;M97>$2or*keGeJ3s2 z?SV0e+M~{-k6-l5yHL$?x$FJtd(ph@wtaG<^6;ToN4V_kwV$YMhm3}6uVr;VlUsK0 zcH0UMdd&Bl9&sw>d)YqpeJ%$)dd{2DQ~t6X@mR{YzQ6ARrt`?dt8mCQlIwns)n}=` z_INk!Iu3^FH0C^p)F}FX!K<0b9xbaDSEY*JY1ieDH2sl3;>=WjV?IA1l6>!Y*K zIHo%EiUp_Kr_RxPOYS{t z#Z==sJ?0WaFFOU7`DIRO-KX^MM-T21e%+()XqVu3e9BzrsE6h;zh_4{=!uSV=g^ME zcdYB+_IRV$zV7Zg2b}pt`X0f;x9kHJeZwM_U9_io%Nl>`o?~>J;iIu{-`vZ0_ z7f->)&TWQojZ-trYkVVL#uvSE!I;_WXTG8Feaj4VfjcI>%pEJwQhL1do#0nIvv0K? z_x3}K+_Vv=aN+dbqg77-MB`CUXYaaadt{&7IAHF>{$L)#GcOo;9c$jid#`7+gGcY# zQ=h=C&(Rs4cWU=2+^M?439aYU7X2jNl)CiWs^td{zV~^WR$p`B*F1Jre#|dEx{GT$ z=!lNjT=+3l{Ui^a`QnovHD;RNN>pFq|A?c^GJ{A4tq zdg$y=xUf*E`(K*b|yqa0#Xwk7#FJ5`+eU$Gr^n|x*mG6CK!Ob12`{JFvvIltj z7u^J2_~<>x>>fON#RoqAS_iMv2T!#1eO1qnVBu$4Jo>a~=wz4N1xccfAK)C zyJ=qcsVAcB#QliQjmLMh9Gs|ow%ze(=RV?-S}>2J@e_Tkr=8@QPEYPPMVo!maX0lL zhfm!Xr`^BC@lrf7+edMq<8_TOzYh<7xmRC5Jn5IH7~aHqFTw+_f$R4M`hLb~G?LN!hUwwjadvH_>kEmSk7#KSiA9_peh*!rOAB*XsnE5Ae z`BDeoG>-P!p$|h(Pq~i89oeyGr|2((r+4x3nP8Edo|?#B8mIO8aL3r4J9WH#rac@y z>igi~Jos=sn0P53x$jaQulo+3!K=KKpF0Q#ZHMRc3MZpse&8o=Jm!M6%=R;SXbV^S zp4Ym%4o>lauX`{f_Yki0>w`Z!`sr!<ui7h37# z6@6xvsD3^_cktXeJn-;}2*+Xvcb)gZr{%`$=$uzD@bnP8Rdu=WiK>fTOwV1kPA{MB zkMlJ5+Me(=JYvE6`1D(5`) zn65K()SZuq^Zj_;iLSQ?jJ-OwatgOqw4Z1+(ob*k2yd@P?d;G=KXL0PSNb*9$4`#W zsGjIKRy=b@!?IU2Q5W7s?wr!o9q@xs1n=x)`o0ss>R)*E9OX^SQH^ z*1f^oZr2N5=)@l%a3Xs~EIZ&&53WZ)ic?24Jw2l1^`S@fKJOEw|JCPIJBYI^I$EO{5b-$j+Ecm)-ougG8e|)@-JifO6)bA0W!NXT^IsQs`a6H!c=<{a>?Il+I{s-3?Op zchKB9b&ZZl-@QF&1`!@HII=T5eBfzB1J`?C@X*&d{TnOl4(`c3aE#7_7hlUcH5PutV=g?`)iVP#4LF*dA@dR|GeTSSnMG6%7?ED57Vn%_ zX0T)8g&Vl#ooJe@OE2b0-sb}r+~Scs(`H_JJ_D>Tw%zu#zGv*`KITggKXlP6obWI6 z=z}HCx#fXFFYC&Ke&`xllhJv8G@`+SKKFnd!lDhXd9i2KJy`gS+L`dxOZ()+zTN0` zVm-IaQ%>Z@t7G4ciFbj&#ya=9V9dz(G8(`0c7yiQ`+j@}B6*7xyoU!rqV<`bo(1dl z9UF~zVZRQi^w|SW*4FRv&9hUNev6i#c;ZJt_VIqtqrQ=gck-KZcaPGCU%!vg8JEL8 z6R~)yK0U1mKQZ$Rtn|Q%>Vp<6Irv%(3{Q(uOP~99cs+fc6}fWEmmT_e>5azuo@)(e zG5KJ|3(Vq#zjN&2sBdD01J5}e@`+uFCw|V?+0+ruPkWub%nGdVQd8@+-xN>B>Y5qEj&_R1z2LW5yB{;?QN!CIVb$fJp$Lf%nM%6 z?1}wN3zrB7Z^g4Gy4c5Nap0uR_b`6#2cMW6=ZhacMEt`~y+v1h;Rnu4V)n(4UOg8) z_ZWe+5j=I~nYnSKShe7p$*HV1FPu#CT(<2<+|y^-6J8}xuHfo3!AD1I+Wq4mg5jYtIi}$RCN;RnrS1kV z_g~7x_VUC0wrW39FC6WYyVg&iN89QDgkR6;XZNFM`@1c=63$Zk8`|Ujtkh>y`78CE z@n&ye_6Gj%yn&^D4(QJ`&Htxv_rEby@76cJ4@0x!cPYQiT)UKxemz$;n#1>~$2mNT zdtU3cKQXt_?l{MLDKFT{asSYYul8M2_Y?CwsjlR~uX|pXhdq|_iN1O2J~~f*;Hqcl z<`H>`ZPoo$U!4~^-@EeYy$c-UbAl&0-b8rIMdmNXQx5(_*LC`3r@(FBPOsiy`p$Dt z9`@8aPDd#D2CTr|yA!)ES!7Q!Ml1t@jANzZ0437ze!8c%S{8 zlf0f8eZBOz*_@7#;rP=h;^%ejwbV@aDgD$}-s+JY)71OW^H}f3jL;N+IQV4rKK_Lp zo&{5X=Ph-Zfkx|EM~@hq;?Eo+z1UNBtUZ-S4u?5-hL_*b{XCLejaISb6mD>YOYOYO zOTR_hS(@ig;=?}SAzpoTdKDg=+#9{coK1 z3ct>+_M`o%dvN8%9>pv1MtnS%e(=E@uY09tcJ>Rmvt!fh!D?>Qri*^@&3y}=J$OyK zyKCu1G@QDgL%+VidUWpH=cSnCxu@czvIBjOhB2q@b}{-nztUvC_$1G2KX?bvw2Ent zE-;_rk9p*ynB{2kML$ui4;~WJr+8*Ps&*)SpA#IS^Zc>eBfaF&FTLmsuT`|4scE{N z>EDN&SInlTwPW`9QIDtg-MWoG-G_rGwa47Gk$p`AH}2^*{i+up z^qpgmp3pg7IP|C^O0VX3c*WZ>Kg?1OdM%^3P-^^NRQ^qo1S zAAHHf+iD_rn8wR~(M;&vd(`^z$&SIbUT3eu&3WE04}4;Bn(++BqVJgCs%aVHKJ)SG z=!?&U#^&hV@lb8%#^rDq_UzzgUpU?)hfhr}V$s!@)|#`oV}5k^78&2I?uXXzUhaff zp9fmFQKKU^?L;`-PwSR3{;Ye`W_RY5Ib)apY$>nstM=`8xp7eLS-VZcYu>D$y@A;q zn7x5t$`(v&Xr!R3qR}YQBLU1LrqESM-Yarf}+6 zo%`87;aeg({_IYjeZK=*%Q?)NT8k#Vi$3eZagW4|gE#vVgD0AtZCG$j8=Mj&FTJ+Y z{lu4fbg8}XC!_eYZ_AkcTuu`+iodFJw#{Nc@8bto=fmr0Wmhz{kLr{YyfR-k_|xz7 z+I^jJf?}fXcMBUKzU3WG>CtbyWu|GsJARI%^H@Lh z!L8O4UOhdX0UvL#nVG%fn|@Z~kx}M!yb4!+UBhv%AF%W*9K1TwJxv!}kNS8|A9{{i z&aBkJg-0|zxL#Y%)6o|k9=*DDEf_u#-P@k%6|YX;vBK#@<y5sHdr$h#@sr-x z%erats6DrbgNH{y^uWP7HC&?k(dS)+S@DZr&9$0Y^^Eiq4j%Yx%`Bq#{M4MNn|5yI zWkzaBKlGkUpV{<0`UxDKX#(>aj9lvAG%c6B!?zwgP54QV**H;Ww3&JWFLRSq{5ta- z11GoWXJXI7i=N~`hCixwUK^J~&P>NU`>4K07ktB2j|clI$8*kv1|N^3;#n}&XwI&9 zMHEf=m=_0Mubmsw`sk_K&g33C4^C)&W;F*Kp3bHhSkbXh^4+sMiNUqV&Yii&_oF@S z(+g7hnAoe#2rhHXSNFpoKWaGj%)WW4Df7_cNeo_@AG`Wa{y0W%5q_JMn!a)Uq>kF6 zV;nrQww}&Uyy(5ozWPXx1}>3V%ugTl_FQ?H7hmoXxL_eFAHlI+`b(}l&DjZzIpBDe z5qbCIH|;!hfg47w?}cCZyH=i7)y3e`3*V7ellP5X3P<(e+%3F(zHo!%nBetH?{UcS z2yW4n*Icb*j`-25wHJ_=p@!TFg?4dqj&I6CU(yCA0N0&QH(MEsNor7|?&f=$ns64j7z> zg~P0lpXj46UTE!@s225)xgnN$_I~onLkJC;4r&zQfG7hz-g=Qo_L{2j%gj^1W#+} z!*h-w>b5HLMGGfpnr0e5`aZ`wH)@%wQud8TJdF8dX387%qr3QemZ>TTHhaf zZ8qlV4!UQ}r^Y{G@lJp0QR^QXnVlNzFLgS8l~>=s>XrN1T)631<|e21E}GO8KK`{X z-j{38mE8GwkW;e@p4l%RQR8W`(=VLdqpnBo+~}8H`juY#Y3;c0|d+6vz zL{s#A9`I5p-0Uev-gI}$-Fo`jE1K99UWst(s9buTyQ+`->*GCY&+SPM!!!4u)l+Aw z&bjimE?uvVZ>5c%=yPsUcF?Fi&*{~9;ib3nG)EU09v)p+N5=}De0Xq$<9bfVt8mjp z>cG^)bMM%7lr+BMA{h0~Jb^a)BH7Dm-zxE4HbgF@CnCm(99B?9i zG`W*{xOb_8Z+Y^p=O=xf>rdgZe{c$}e9!fubqp<7?qEKg4o7jD?}t4i_xXhb-ibvM z!lG53?!nybn-(l*lzBCt{-uvcC-&@;yPB@=Fjsq_DPEp)@bWtLvmRLPi>?{JH}o1) zU-A0(na~vt+>ADRVqeF4X7ufiHhQM>12&Pnr+dS*S~<*&SU4@C@CqLcA8L<^;iJC6 zF(1b`XyEs9gil}f?vXwv2TR;Mz|e*leD9ZsKHA_Fz4rR}Xt)m?j~sel>rRpP?IGIq z7?JgOSTFY|Je?VHizmDq>*KR)M)TlaiIsWbXLT+w--y@+UtfwPZA zj?M)0SmWrG6Fl1kuIkA9_EA25ifPUactrQ@JJWFIsxNuQ$`8D6hGO-5sdemAfHe^Ec^hHoMF04g6nv1N=<&o%?>?^kI5G)hm8O^LlPwuf(6pQ{= zpXzrV8jrP3dwsd`eFk&F%j&*U^OPI=iH@2WT-#sgg4I6M^}O&Z9=b0+>PNWU-kaUAo*J(ZI^<&^!=gxK}QsqcqZ`iNF_cJ}Y;#%|^xGmWF3u;A1_p)oBu+Vm=Zt`ClJm4l9+M$b9v*Q4)ntnUYY zTeY9MgW{eC-@Tjhe!U#;+r#PTM9cgRxAe0&YF(J=xF0;p19V*V#>FRirH6)IXuQXR z&LW3-Xw)Nf({tR*PiG&)I<@&Hug**D^U|j=KDGD#F-*^df8hjf+|W42V;HByDO`FU zna9lZC|(_I@x?Rr?pb_OTWYk92iIb9jT2bmrq(&|gp>NxrzYx*j<0#--8^@_MGkfB zMc+7nqDSsMdjchBW@gbkkm%h%> zx#$veKC{Wa2Gd;ob?$YIjS6OX&IR*mPvFEx=8{{lzDs&3$8&q=MKmpSdK6yn>b3Ex z(M3&*TWu9W8WjE=op4sR!iq0|TgeY7_bnuAW4UN`;l^wEgGdap- zM|crU+jo>3Jh=yVQchsLN14rBxOnI+H1u4Px++J9243RE_tTkIF!0_e9PjJV8S3jC zt#$9{>F&+g(KQ_PRty}^PAq!mbhPw|qqYklu10#goA<%t>V2bcGXm?G;ad3W+lMi$ z_6iU8ktcVVGE=?CHAdd}=lFiyQ}?6~ruPZ4>&++op4(2z^V~2$84ISmmeJ~hi-*Js zPVt2w(XhOSX*r7J9?q@!y0>amW1hMh~om|_Uv`NV*j`O{=bP|)7hPVv){mP z`uFUn+;62n=lK^AzYDA1q4chA^nL4{J9FfB=}KnQe&Mx3kI))d_tAIM$(PxCsa-q% zm5=U_`7-XQTX(O=-i?S+;ZyFRzB!+@8TQ$q9=z-#5-rXHxF8`tK9j1>X{wPHKt7r zpDNOCncZ2@D?YJX%sY$k{!0a?M-c*91;qqvgRHjib-v z;mbH)YaE(2j#6&-F z|I%}f_cXRu@y4Ssf{j{_^zqsr4)4Wl_NcMs;-yi&9G_By>-}S9;WN84%l#U8YH#lq zPDh8vd!^5vat}BbInMEu+`?(m)+=+kqrxv-{KLn4r609(DlB=Wm;Soi#gAEFc%!fT z=`m$S>U_V7<8}H~{pj^zvCpV=#V=!p2Uk4OQ+PQatcvVkN8V3(nuZuN@o2hth7;PV zhkx!<`mW(MqLlygaVegMai#g;%2U*p)eG;ny|%(AvytC+FHM zKYcY=%V>AY+;=Qm)o0&kcluUyI`|SE&UbD+qQ_??v3_qW(ZJC?!B!gUxpj@tax<3e zrac{kLzZ?1dR_@>Um16>{U-hH#c zi13N{Wi5Y0IOaMycw}AvjtEYzt9q(M&rFTlV|HrGF3#b}9lci`{DNZ`9B_?r@M9Jl zcF5Xhb6n%6o*S6-l+!oI`+k_Mv9q7uCH3ade6+ep@=QZNqUeHeHF!khcjkMo8qW- znfA!JlXAJAW6ASfm|@ZJPF;HV;L+ij#!qO;O}7-cRZM>3!G7WEUd4A(G483B{m`RR zPV~$duI`(>YW4)j`-OvsnTcCJu~D1rXXpwic*1cH#ins{mS|gLkKQ=$;|IUc+brs~ zJHA_Ry!s*Oo7xUJ`E;wtip}ld4;QW&7;?e#b;t> z@vD7Z%R%EcxknC|*X|RMdG62S7>{AW^PXXmmtI>HonwWAPjVfnXFLx3WQ1ptlRESz zW`ERX@W4Ih9gIVsIDX#qywvEOAKcQ5+%S0ASIoGoVJ80RVVcZqjnz6MbH`1mb?Z_v z`o>A`l53rN8()v^*x|b`PwnTpVczE?w_x?W;G4z|+_tKGD&OEc#?w@})yXNnPA&bw zol|)12=37jxZV#u_ZYQ4f#Yd0YwdfkwefO?lDEn+ubv&g#iMV(#CqnW7JZq?U350w zh{mI4pWZpuOf;>!y;ES!B^Dj;qQmi?J$hna)B5S;hQY1zmMwkswPrN-#AD*lqQ#e( zT=&%nUyGdV7cAF#F zk6Bw!_n*KEw^g*CxwrK)Hy=M<_Z@ij*dc55=E;Ghp5*l4dW?IRK4ujkzQqq-_?x$K*-bryXWGE3UEt^5 zrLWvp*?uZ7a`OdKP2Wz=<7j^x28ZZ8&+#Ge^lb+Fba2;+HfIVSEans(jG5Z2u}-hd zL$5tB)knYV7yDOwFnp$Y&{sZOdJ&6%@J8Kts{EyL*ZR>&-geq|p1I6H2cMWY`&q0{ z&F$+ur|;`sb8kqkd)yG8m1uRA=6zcF`tQGwHx0YKx@msb2j>pgM?2HZ-oWe)%-+Cn z;~QA&=Vtsq?D>7#_nwyWsrh}sXQ%xRfm{27>JA{-$sb^RI}sb6@NEQErR0pPgNb zHuH+dId|Za1`_0PwJ=eChl*%(2d%t`NXd7X*}*9T*vGokLVnG zGIx$+MD00F&-?zAI`jogFYobq&vTFZIA%|3yysYnfm_~*-nS>Pl26n<8a&iLa`%95 zxt^uC@YF+n9OrPKh&HR{;o*FA8dcZF4PAAI)RaA_;o|SLANmo|Sk!ugSIvyRYl>d^ zb?&tv_vh%{6wBPW^*-JUUGY&5ywTKAwaNu2>U{!Zmh6+>qiP@FpjAI~-s6C2tb27G zT$zQ|dE~sCjL!FcD_we*9yQVVdbm&eimu~rI7jCuX1;hi#vH}qM!(FVKaC$dRI~9> zEqUFeu4B%#z5Tcz5A7w^vu}rIKI{sfzTPuE2OfPQIJ-m?Egp$mFSNpG5y1;5cq8qO zbLPQe|J_Swr_&patGrW%)%QW6JL%kB zAM=Ed-Xpr+UeRdJbLxyu>xftQ8c$E|WL$LGC#MJB@$fCW4qj%sCOAan_8q*v2S0Mx z*`tR#yh~4Kql;MP;fV$hIDr=~v%pkKFLX9LdZvR*KWgv8AsV+n_=9_`$2~aYXqe;u z?D?#s!7KR1m!5jo7=EgQPgG5vd!0|28=jW4yViITb*_4M`ZlBQFqdAf*FL)h=DUd} z`Ltc=`<&1hEjXC#^)S;h?FGN$21j;MpBpP@&uO#j9hu8Lsi#HNPviW$;r|zR{&jnY zvwdc7VD<+3-@vc>=gST8Ux`+KXY}8&A9Zh@enuDV`yJDVbyefvtJCf>4WH(tIP?47 z0lf}i?#$hFeq!bKyw)+p^m=qgC$D&}Nw1!sdIq=52iMv7C@=b=2~MfmtE00Ws5LSV zZ|`yNq4ubl?ojKZ*W-mAtwx;_v+A8kV@B%AjK2A`M{jMO@jg73oGHve7tuIG?YHQ- z7o4W`&b-Wsx+jl4Dott~FBjeAy4>sIy<&j}yX>G3=G;V$G6eMa%hd0vZVf^VACTE66c zyf0pG>!>r-Tl3VIA3LLqnmInxp45U{taouw&-VSEF9e-_>SAd(LOo-*o!$u6CA^ogO^8e@|v&QI`5koKI~9p z9~RoOWArWO-6-F4KkU_3lb@YkY;N$~v-o-#t_d(6_EJui52t*(98yWU6l==fAKI=s@$c~*0DF1hYsV|YXqUuyN8 zx?cmUyep~cbmo@6`o+9T=bF@6>Mi8hFHdPU6LLxqkA?Eaqpdcj&3}9(V{Re5B5K zBeHSC$C0IU(KCG~Rx|MO`@iXB*obNMgU#aP#SAX(pJ;gPzbxZc-Zrn3=@7WPw zVxMnlM=-kYXpHt}wIA=MpWQ`kt#|vduBxq^2RCxPyZ5Z7ulhbr`<}0agJx5>Xl_Vc z$-I?tZpaQZ?d%QA-oS6y8@QoA*H`kkbjDIY!<4rYUpPy7!s(0gyTpH=l;1JLnhQQ{ z4t;c9gSm#>`*;vjW4!vnA=1Y$=2SX*a5VRekMxXh#W$WCyh zF8=U|s_ST*wgZn?^Xr+Di{O3f~{uh;1-b2`3;v%lh@ zD?Pmn9M!~pxE`DF)91sU?3bRdD|*GkLph;CGt4*m-m~0K#=>vWq7_c^z?S1% z-w(RTi-x^Ackx~mlK#E%E4*6EEWCAI_QXH?hM~c8g6|xjQ9mDzM?c|dn#e^PwRvzl zzVNM|c+rN=bL&M;ESTbbdFm^Vm}6eCuSe^{ZL{g0ImbOkr#b4xe1<=+vFL{$x#Gby z?nFG>uM|bdE-!6gm z_&Z)7Ff{l(uak=wO>hjacMdPXr+PQzZDN+y#BpbwF*p5fh93HksSh}@PeU(s#n*E` z^vJaiY#OiN&hew(b-}f0p(|#`W;e9`MCmq}0qu{lsuHn_u=P$=) zM|jk=*HX1=>YUo6&tOjN2hZ`~I^P~nCpRBIwO9C8nwrxp$Dc8)_=TtGb=H_U&gg3- z$79Uy`mv+#sB2xX%HHo0!eaTs;ME8RL1> z#EJW&wMdVM#v@qlR=DIHUNf$=&s_pDj@IEXIvgfAHdjyfE8ICTrw=cf-$yx7S3PFs z-9@AN@M&81w4LB&U-)>%-aR{{ru2em`aV2&_P+7)uDOaQ557f?>*zby=K~&G1M%S08!yUE0j?qU0&ucxpe`>^^zH87bwiF&c^XhPXNB8$=PxPoutobc# ztO>1i?Fp{raBDngjanV^V;9#O&Z&Kq>sr;D&JS2ywVz<>6AvG!*+@1n~dd-my^t2?O{KXlAdPUvkv zI8oajIO$#PK&`x-V>v&}{K5M?a*od`4}6U}3tq>s8UI@iPpgT_Ytiv% z>}Yd26LZw3@Zb<@E%U;==$!)|xy+=l{T%0buhvsDC+m7o*#SQynyh6GGuev>w~pFJ z3x^sG(fiRWJHuZpvNJI}>O^fz_;)f0vUexr#my7T< z);Tp^b*;Gf{IIV_?Slu$@4kZjhllxuro^`DdTXDV`1eKl$_=b%uh1J_pIS?wdvgCe zCeLTto$zrSZRGTg6LpE?s!Oc+>CW&p#$4y(-NcCAkB(RIS0cU>I~1O6;EaI_JX+l;l|^E0svUdD~S>;191jxD3Y3qQ1I z42!z-(#vb(WY6-Lp?KYg8RWva&J^v)DKv|bCAcQUN@PJT1? z2CKR3UE^N+$vN)j^O@(lV4mwqp5USH(cpugIe5}X@4WOMmA%{-Z;PB>57Tg%*&giz+@gfb~M(HU8l^1%lptd*-;xi0`M{FJ2PrPCVeAko*?ld3OHU5&ECN54b0xa>(p9&^yBXY_oJ;)_mWaz-tW+BxnSkLZ~b{et5kTGMc9t!l{o z{Lt5tIf^qApPCbW*lJ<7e{X~a`?;cFVjm={sr6MBucPJ86_4l%Rb)q5QE z3Qp;z-ZbQ{Q6K7vrh`}egTt@qIIagh(R&?@?Zk0hkM5@$=fGtj<?DP%e1m8J@hcA7NaMY{jGhcPVDKpcf%!8kM zG_|r@Pw0zpmoz%$wG8oI_sUpT0+W=?YV z4?Z5rJN=@GzUBB6)muH_hgb1PZkgpC(YKltzJ}8S_sGnQ?ju}sW_#o$W_&+cyGMDz zgCHF6j>dev=HyNVhZ|bKqBfoO;b?^C(U1E_bgldO)04w#*)0bQ53RXFc$r`H%sXQ3 zPc5-!R($X4(~S2>-yRS4%exsT_^#(c=k*e~&S_m*kIY3|bD0lU`}Bx#^1fxS360?# z|>e&|JB>4N~rd+t@D_EPKp!U;@s(dCZ9*FDs)&Z#|e zT+505Y!-)Jy|?!jV{dkge&vhCqaHNeLF*2$_-Q})vmQIR#yQ!uJTT`-9vHR7(3IFJ z#vbU2={CcJw3@N2JH%S?^R>AX9eQ=jPDU1Cm&&f)%%8_r=b@D8uXKX`(7_&(2i z@px`e`f3ixqu}a|hhpT+Bs#7KPsco0p7!W>w6>F;*h4UC^@NXiaLT>B7dw=ieemhi zhNjHKGkLO0jj1O(=D}5OFujZB%EeDH^mU9q?7pf&$3E-|CnNW^2-fGHe$Er_#QvkU zUwib`%=3tk?x`5_JJEGT8_{x3YUwxQ8E`!2JnKhY_~a4kiAOyz#P-2HS&J4-BYT47 zKC*}QJnz#4Zkqbk`_OmXk9yU2LaUrs)!wt#M_uiW)+4#<6iaX8DhGU1V(rU)m-A2_ z_n)Rw?lg|$^)5?sZ>-+`#>^XQ_nFV^4b0xa>$}!k=hvLtgL*k0ch;!>j&u6FQpcSO zU%AS`%cF7>C-*)aqV_!3{pe{#N7SC@s?R;`4)8mD+o#r0J3k)P7OYi`J++RR&NCe+ zXB*ykI$Y5?mtIEkz!#p@i50)Ri}=_4(3ro^3clrFMDS_*fWxmNdpO>ARPX)VMfMKd z_hdio_3;8LI=DKsZx_YT1XgCD&w1=XEc4j0H!8>S&Ysd^R(KR%2Wzw%zmMQs4`1Ei zvHIwpLtAF){d+al{M0hrBhP5OA3M|$eMHlBYSZgsw`mx6${D_g@1T!ieJ44k&mG{_ zHMqynn$Ba?o>xBfTE_TOch)?7%x{{m?&}U{*a2=t-_3nxKl-s3`SwKZT+OZKByN56pzje*Nlj}bA1gDP0uhT17`k)bR>( zZU-|BGds9>_(|`|bDCGHD)ZbMpAys4dXXER959PH7tDDDZw9(>QOmwBmMcJ~v%h^FyFPOnAVPxthb(Y&KBv8~1!Uf}E$8uRjg zeH076@aW-748P(9M}2iJI2ayvji=-Ffjfb-gJ^=Qb>XO%o%?*jh^}Mj$jv+I;)_@E z3`bk{oZ~0@<`a3v_g;O#bnXPleEcxGt;(+A$xd}leh-(PM|M$+oc-z@LRa>S+-3x) zXI7s-e!4Tc&Q5QgWqF@xt;>3eo$z5FGXtVa64C6@;!~Liin!rmxNaaI4YA&@$Ji*k1 zd|FRuB-i)v*{83cvrXfNeWv=lw{SyK>M*E2@Cn~h+nmU!_%R1dy%RnB;dl(pW+{ih zdcf_(qC<<0$UORSm$JWd>b#zt{b_reXU-`8eY4yLo_c0p?FPP7to@W*_qbQ=CU-&O z{CK?A*YCrM#yyxv#P|Bd8^dF{xqUOh@?F{emOAij4*D9O)w4G+djqpKFna^DH?Yz- zFzt8AG<=%xrt+uxEQk9XW|2?*J=yd76)kh>T5VqcL_bJa*7Nx%M>oet6iu>Z3f}-+A04BRll@RG8k4oLLzwP0VCQ ziHecCUwstQc{$7c;5Y`ZKB*J`z^ta%#|I1FqV40u^H|S!K4+rW(3#ZH>FC+PboEZ~ zmx{4F9wj=z-YYQcGh6#$)8@d9{cO+rXy1EvJ;7&R^J0Iz*#%s+_;#@31(tf*Q@#57 zsr7xz1Lm=JZt+jQ(O3&VqxHh4j^>ei&+Xy#;ok3XL|^;Eb05`F=Ny^26c+x~eC6ZI zT=)@fhiNBt)sASq7rD)?4|8&E#pTX;-~&JF9&eq64_MS?R*m&>lT&M~#*3Z2=G1$G zdqfvGKBmP7uKJ%?@p6T%(UYgr|HsXf-kej5__wF7&5GX7uT@H=2B3 z*(-C<>zu$`YY)fsd`#;cKX8bO1;IJO)p@Gvf@p%TZbK&pr&Y{y(Jq1dLNfW!nYk{i{LrGSn>QuK!yu6TspC-1UeEj@bn zuh*D!&2R551u!^kR?XerqsJKC^E*d62a2M0W!6sXJEcyV^X2-S_SyJ4FMYyecdq7X zR`}^;&veWi-SHC)&Dvvk;c!}`S?j!*?O5uHhqvq9ckR12-y-kwhr7o14xY}s%Zg<` z_FHl4(W$wMCt7w(O>xlS0~g%ip&Z^6SKMm(ai;l&Pgv-D{-h0l_TWTZ4}A9&XYyQh zyvcd)GvTZDJia#^MIAhuOB~(tbDiNm<@CYv$hX;C`e=ANJ2W1e%;OD{SJa%VnbmVX zT*btV<30Ld?(J{rea|qj-;Ng^KKG)!?1QKG(!<``o`$(U{OLOycc(|AsDs~CZa(gx zqS8GEUpf9=)?V&lbJv}yU8me@as2o^@FUIiEozVZxVzrYLvx+$IXm4K#ory?=6%nl zfwR-^uq*xM`yRb<=eJ^LujZUr=BS&0(0A4Dt>`D-=1uh1y}*AHc}&JzqU|M{-WXE5*x??0gb$Kd#U@+%MVPN3Iu)xpTw_vqj$uH4l#`?UwVc*rkaoHW6kbis3e=Emv$4a2W@l!y9tr+(>tM(P6RFoRmrF?=WNH1+qSn0neJ-ot)y924KYy@w7@@2waf8U4L6pW{8{9CG?y*X}tt_&$Hi%?nS( z^fQmdtnNAGO<(coXs*t>Q_qqYH@H2cbVGf?(<}HMM>S@24ySxR>igTr_2IehxATRQ zJBGf$f%H~qzdZ9BL~qp$Wc~>^W9W65F{=HZ$t(VvQ?L8AAGLUv@~p(t@!M zb$W;sm$Tc$`{{x28I}F|6qerJw>&+fc3<9jaU7ZZWPY;mA&(&{r^&T2wJKS|g zx$DH~eav&^xL5PfjvnW$7y4DP^1Q$H)*9{+PG525_&0UlQ4as8zv4&p9^$S2TYdWi zw=Zz}0=F-4`vSKwaQgzk>MyYVy=6D=F&uR3-6D#E8v zgwu8Cbj06j9P@mr=AXO1FlO%hIKZJ^#;SyI(&J7KRJ`sEuR}Q}x;(E_>d>m80dp~|ewCQg< zgQrKmA5VH{R($oQub3L`Ryp^k<-S$7=h_=(qjJm%xyvEDyv^)2kZtu5{}&$wvT{ypp z=Zk00us49YmoR+jy9byw;F*VRJx6`~cYHKHN4&=48_*-V57^2V&fMl~c4k)cH+_73 zW@9_a|5Mht+Ra-{;HT>1*%V`TROD-`Vrrb9ukN;n%t2ZPx7z+`ho=3*5fI?F-z# z!0ikCefR?F-?e^OZ_2;te#F0De%xIi(%skh-Y@g-l^^x}+QWC*{W?6<-)SC$?R@zb z^c2b6bBC#q-nnbOYS$XRaQt?h9fo#|E7$9ualWX$e?jLA9jG+IR&Ct^h9*cubGY4Y-(c8C_iDG_9>n7 zd>8TA0SzA6i#@=Tr&+?v7cLw$#2t6l?6fP#>-3Brc4vJHwYcm|L_cwsxA`n|yvg^* zE9aC4T6Ec~@3ziFzw>0?eH`Wz@$uQMj~yH*Uo|;bZ{f0g)a!vGynPm3_QJ#aJ0HjJ z^&P>SNA5gfs+pIyb`lnsL+-rhieI<+QTuTn^IU^Jn(W2=m9z4k|4`oj@YV611~Ps! zd%{ZNm~!?bUiBAUzFqIqJ?(esUUA&>==~EXd^wu29z1-$^AeBQyW>3d@gSms$E?Qn zrn>06XS=4Kc!A-S{J=OneC5OS$WE@G`nCtSYdWu8c0M~yUc5b{&4Ev!x%WjhaMl`p zjlF-&f2;U*P|TFW`UAe$0D++x~OJLvQ*x@85j``RwqA=0CQ7 zd(?{}TtxxUFjel9Y@7;b3qc8-E?T?HlU- z?3r%~Oy{AU;AV_(cyx9?uG#f>Ue4n!5Bb28-;^)=;!RBc!dZ8MOI-WZ5A}0aeRDKK z*G_ry9Yga`=~H{8cVBeR`*^>KUhaDE>$sbzUD;RKqm#UQ`bWCobey+q zPk+DXzsfiG{9omq$9Vs`-h8yn-}U^Xy>9sJ3*5fI?F-z#!0ij%zQFAZ+`ho=3*5fI z?F-z#!0ij%zQFAZ+`ho=3*5fIZ~qs#{r8W*{lA8{7u>$U?F-z#!0ij%zQFAZ+`ho= z3*5fI?F;<2et|FlpIOO2wqe!(K@OuQ9?d%AF26IlDA6IOZiqW1mCvtN967*obiu&XFdTphfK zi7lSFc5$+f9GrQ0`i#z3zGmnK&cS!B!I|f}X{KhBC%)A3M4!3w;NaC!et53@ zs8^q?H~hi0-Q-n%SDM1F=7O7c;sl;vYyVVlsCYGR>G#{i2X4lwyB8WxW9kR4t25!z zOHBR9;Y^tD>fr+~>*#UX`!u||E>73puN)7>D_(FK7yn%Td#|>Jg9lc zKrZ}gPVt?y>ie6Rw{to_UC(gs(LSjU?c@`);45~o@~lJCy~qza^u%|~dDN|2`%!bG z)5)Dmca=+Dy}=$od&lVMw3qmc25$1o3oN~b8QxxjwTAq3@z0*s?IA4q_zaiA3a8h* z2A+P_mq%i!5&4J>>(0wNXP{ktrd`gL zeR@{u*db%^{NOJyoz@y}niHN=9Xx9E?vcIFWUoBoGkbK054`NjJG7mn7goL2!lS2t z-1PGvdG)gIyvVQS33L23bLiosE_;p6 z!Z|JV&Z9ijSA4(4XRgmC@43?Qe|e~`n|)#65gRK`>xz?o?~xOO$H_TW_dZeg zJkyA)GilT}zW49r(d#*QD9Y3C(#siO;zd4T-c!Fgip?Xh=(zLQBm4NA(`NbsZ+Z`W zV(I*Q^{nW19(kUn7#i>?hCb_x z^!)w3S8)98Ls|3Jc~7HgJr$qu_t`yWW+Nf`+~zuoxGguweq^ntv<6m2F{r@=bCSE;(>?0_fTF}TxMi$ zyel7cXFBB*_0EGCik#$?kFeyRH=G_3Ue>{1&pL}a@V!sI_VDB_cyx^Y&=BS2IoyHH z?`tgfZ(Zowr{{u+7guNPC04xdN9{P8fnIy`T=~SD>AJV&>0jKQrMfuj5jiJu>o;DQ zYBb_f6Q`X*lX&oYo@sp6VD7zpj{Af+v!)rs;YFUk;x?~1qkH+uyFjM7rNO(sPM1BJ zAKdIqQ~5wHb^18(blKarys@Krp~E-xz&O4Gob)RWeCAJldF!4nDvz5U#8dSZ14~Wh z$+6g8#ld6cr-wMjqsEU~arHQy;a;cV?b^(b@I~ufc4rs&WY5ORhgy-7+?wy6c=Wr( ziL28*bZ2{4ah#VT>5AtwryhQayBvM>n32(Y(WlotSHJP7GZ$Wb>WqPDPfqVs7@EYz zEq=d^x_WpLn|B@)n3Z^7lb>;z+dlFazwU4w`p(r8FRl*Uj4NimGCxp0CT%i=SN8V3v0;xEyp-42{m!uC9Yaj2?WSz4~Mw8aQ2-zU$)b z^p4?^Q5yLWl^@i2_`J?T!;#+4GzWj;c`&D$fw2!h+ZvwY$$TOgU#DkRotXM{yN-T) zC|~NytF*nd@h7Z!(+uY-4-N;d^T{(BpZ!u(9=z54$UVaAh(2QbYF1nwcAWO3M?96b z>Q$F@^x9wixQ(k;-*9>v3vUhl(5(ITi5_ooPUh;#L$`S7x*xdbc%kcFcw{cGx>FrK zc&qQH=c6qy`i$tUM;|}l(ERY@_-y9UQ$*W5pBFxjD=**lsTwT37L0k5$9edGu?s%J z+lyQp@>BJzPHy$ttFhpsb5D5e&{6!lW6IFukvz>ZU$o7y-ozIpo=*D&p0!2Zm3Z)}rGeY}rQf*u4OM9ukMuJPF0-;9Ug@hCm(y=6eRJUl zj;=MrqMzK#eP{TJ%@NPJ=i%Y=GY_1z^6~OWj!)L^U4Ff9^~B>ms6ExLig9%&zv_dp zduP3PFq4>C_lJMB@1(pq%y1o;NA-1bj(IhPU-URDw`Z0Nbn{>r*ta!ag)bxn{meIYu&vm(*>CySlM|sX-m-Fo3GYjv%qV8PU zxLGTYUe7mtFs}Q(mupMcp6Yr3JluTN6&HSRuXu%_A&Qr|`X|ea-#hGh!SB9uX)3zs zIwN^-7yj6we4@wMd34Wt=BGaUD6Vz(7Q8v-)4QI{OL^Sn7T$UFx_`RxMs}&0zs^Xn z&~?A^#O3d`a$#{>=Xvxwu4``eQ@_Niub$uYWhQec{?czPxV$`9UD&$oqzz8aDQ)J9 z7Jm1N%e>Z07ro|1ow@jygJldH|Ma5{4Ls(wcl1_luuBsjIuqY~eHVO^FOIw`;xX06 z>Ao~aSNyv0J!Y;xdibhW+&PhRT1#K?RG!4Zbc7TAt&In7(w06oYcFcfs&PK~E+!T; zi0t8Ydc(0Dl4Es$hv4?xl*h%7xb92c{qW7)BA(#v<=X7kOmIcc;f^r}|JDgBZR-Nl zS?hsbJm6#u4?GWRe8r8Ni%;s!2S0ebJ<;?``GyAvkF^h;-kKRYPR}bZ>d8Yr?y2&K z$pQCBFXFUcaavb>IP5v~O_R83c!%#Mj!t9JhX#z;7=FRap6bZ+TCe%=EKXc$qL**+ zT$tg$$CbBqaoMN2_vq7G{kR;T*Liv1No1ba?m6jgkH$~+K6)0np27Fy^TC;~T75V@ zQ!}{c2e*0F!z-h-(Q}Wh8Jj-7?GZklE5GWsul%^t>s(mWI(Up~;LP+MdB^f=4|$ev z>x+{$d2muw^{8wg;cz1+_jHMq7u<@2b>XJgI9-Qd@4Ct1O)+?WdXMnekzdBB)0;XX zb-#;VafkBq6R-4>UOeOO{OBukl20Zipc0Dln z7RJeb%@)>maVCB7;H)007bmpLLx0sPuhm03KX?iA+-qhiHePwxns?N%9LAHy*7Kr+AIYD=$CFMihYG9CL&{>52+T<a1x7nXT69r6H}k?8mj)g2oU-kHS32}Wv=Q4EOdN3K)P=3yT}Pk8n~5t2 zTrqphh}bn)aQfFk7`O1|ue_+UC!G9r!A5qeb0_){nS%#>-(4K~j$d)*cdkoLT=A&W zGx{89bv)*N(grWRiNY3qs0)vtPIIK=N@q8D>IIg$dQoR?_&_c`)X9@3E@z&*|NjSg z+5Z0l@W@!RC#?J@U$EdOt~zo~zgPX@q$hR7#Lng6JTOJ!aVNduy%!j9)kMASig)`< z%N4&gy=T;(?`G2{wjaJ8%bz*ylUnuYyJpReBc0B=*FXnOzcJJIyudiZbUL0#4VD_? zc%2;MQzs|z-ZS*=kJsAwT6ysLQWLAkjy>1<^t40wrGrBZtowmw9vHI+GHQAmi`)IE zQ$q|7c#T^x_fTC}azoQx%i9NjL@*-z;)hT6%d2zw6ed16(Xq~@LAU0o^00n-MNLd? z=>|IdJa5kG7q#!p+Z1)^WnMFSS7{4NGW@{&Ode36Q(h~<@r<%!=8 zbC2Ye4;szqko!)_GfvjA0~{he_gH;fTl%RMX1?tQ?wa$=O`Y?K+kWDgX7UZp>&!#z zCv~Nr<}6zLC%$y>PQz6mI=x%et9I&#e#=EiUo}|eIY_fs4qT55CwC}}eTclFYw_ZC zeY)(DSoNEux;U*VPS@4%-O&^eFZjgdJ2!l$UewGc4#s!`Z)0w1ZRT$7*HVw0q zD?E67gVq;+t=ZePoiASQ>pM?)%uc=SpnhZIr^}x7j-tN>4tsYjzw9eFf92pK8?znb z4c&K-^JUi~-)Rq<-F|ow7tW-IQ!)2kuq{74lN)i`Q`!yYzC(HW>Hc^U{d*>_yt^Os z{4g8Lz3FAtp1S4Nxnt@@tGH#fE6k(%;YbNq?y z&YUSSL!OaOnyM3ve)lS;&gfX=M>`3J&mIG*_gdfa)SPC{U& z)3Q(aroZ%&U(FP@;^OKqJ+#F14bGuIKIVl6k2T`eJnc|E!gP9H_3U;pFwdpWI-+|` zIX=d5@8pW3e#G>n4s2Hs^fD8zV)7%ddDrH>&P#3d*K_5Mhhp)s;(6Jjxap-{>3o*& z2=09EOW(cl(@9LY-{@WOggGbj)<-XMade}n+t{uy9vqLEOCwJ4$eovS4t$(1jShR! zQ}ok2pEk1t+nw}+U!30SJQU3l&$w$Yc4EcpI(*zC@xqkrpvydZ8Qn+xs^NB|r~b-O z&u{eG8!r9oWnY;3x{*(qddtnexXW!k_?cVmZxYzm4EIrAH8FAyd*JPywU-?ExL0^K z9=Up5!z)hW#c2=Kj-BQjuhs^)bGXv$ss@Y5+2Jd%_2$$11(R0CeX|zth#UCCS-7bK zEACL(o9LhB*}P@T7q|Sb_Kltnc;;{;y03Ej?FT1%jd`wq+|&t=yXIa`^IYe7`LtK{ z+{bgxKlGIsedTdeD{O<0uJzz}BtPwW?V-M3bc&VdF5(z=*egwYuC;u)>OIsCE~meX z^k_@J<%kEjqIlE%1*f0!te=-1J@37P(=(J;ox+13aC(o^Omx4T8GYw=zi>DiYd$mR zQKJ>Ev*PM=n5h~bIUKL?cJ9Q}zS=Kt>a0eaxH#%nf8quPXJi*H_?51_;3v<$&leBo zItNVAPvfE2ncgM;<-(bGXvK{@HN?PDLp1zqm*~Nr{6pJ&m_E6s8OoY3j^Vuz+Uj5J zy7ci+UiEs;Y3$UTs(a6<{T$9hcdp%4NA9QRuei8zb$FxauEVpj(t4dn^`m`bW^%-j zoa6H*4Bz6Tfxr4ni+|@a$90M0TRk+*3w+YUx7a?;RX=L{hRgW5V*Unt{hsjAY1}-U z6Bq|x*P&0$bn1a+zHl6KZtf{=@!?G|=5RDa2Y%OqPhREMyr@^*q;-Adoas%4yRLcR z4>x1g^cA&>pR3)%-`^lE8n}tU^GJRw57l7g)ThzA@`^rZ(mudnxp*jguj_+3+1ISN zo~MUtM)&fDi7S0^cd@;LgKx*s;zi8wUY`6KIKMG*xaZ3b?GqUJ$Ogaj>Lf zX@+WX!6R=jxW()R*CY9M&Z32z2JwU0;JHU>niKP;?`59CIXz$exH`nlrLzlmZ!E9e zOS$RsZVYa50!ODLzleLXD%MN=Ys?liZrw{nNY$95g=1^VDl5w|rbjzV4K72!2NCrKPTXs})zbxaf)W z@Y~kq(VFl|zTw#uE-Yemiv!La`o!!Pcg3;}PdKVM!gTQWoLTJpP)~iGooC|OA#Ua| zmzf#87LPh2{E-b0wAnMhbLcsSjyJ7u?ES(s^~~3d<yT(F*q5k2BY6~@sb=rG?UZw!(+;+g9|_36gY=Eb>g8>tSjyn4i_GE z#OAAKc+U{F;6qhf@0}LzT8kT3r+4&P{DYgC;Huxb*QV>fu;LK2=eXzM7C(o?58so# z{dfGrdd{gl6pi2S6lR?E4s7)pi1~yzIe1WqmUm21JmYHakv?(xthe-&4o>tYyttZ~ zIJ}5{a;M@ZPkG(0L&Gj;vo0NcIEOv>=7)ASOT85z$k-=);l*Jt_*y4kJFFbv!+D8o zU+L<$*1XWrBeq^1agT8EvL{cw%@1wstq(TTL+ctmi9ToLQ7h_9-!En+R|SdV{z2JiJmTjcSjPa~cnhj@P`m@gtAW<_u!-(y)HCuX@$bNbT5p<40e;CC~i= z=lCx493#)3yzGLx&U0qNqn77{mFGZhM)2S^-?dAQe{!Wg4Lxrl(sxg|^s+Af%4?3# zXI8}SFZ$8Byf1m;xaK@q>@&sUg1J85=6&Vi1)kifTT{IWH*7Fx54;pzm-*>}^<4P{ zKHb95Og(Ycx7$3^jY*fEy-(>=Qy#GRWX^_@%BjWi+Hv*h6V>N}c_hD&Lyss8H?aIl>IIg$^%_&ZIK9tW zm#+Qj7e2jBnEG%c_H4tvKk-jv-|k19dTCuR9(Cl+S@8CTtLXW{-}*QoJyH7gsux&$ zsV=PdiT;Ga2N$0!%yIHHo{gv8+6PBA6oiV{!4|hBjwf&ATSNG!quub$_(P>b6ad+%&%O^>E`s{WPlM%Qz!H(@7xt8vEsFMs~Yb5A}Ie>wcr z|9JYxKmGC7zx5=v!TO!km!B8zxTr*zw`Rj zAHVk2+b{h7-RGV>_rmjcpLp_I-7iG^!s}nSyDQQ=AAaz{XYW28tS7~|d-g|nFZ}tN zuYdT%?>v3)Z{B?GwLi|k3%~FW?>-Z(-%Hj%c=EqK_3VpxpZ=>4pT7OUAz|_PlLMcB zJ&*g-L7SI)<=MYrTG_1q#pOTjT^81#?O8qRV3)<^YkQYJy8Fle8DDyFPwQa^E3Y;$ z9oanpy*J|fq}_Tm}8Q(nCOP4Lg}?uPAQd${l0U%dOwcb5t#~`O^>Hdgtv}HtkmO#qHo5?cEN@7q{{M6}S9j z^2)Ql+X4CFHvXgK;&%5e{*ly|+AfpbKY#0|`-f*&-|#^DrMrK&{f73NWc-4^^6Xyf z%lZ51e(Fp9TklcS$GE=yc-NxNuJL<(e4nc)KgaL?&En79|2bZI{O5QKwHBYLvG~{D zcw_$$=e@sHUwy=KHSsaj`Z_Z;mK(n(H(&J+@9e)!sefGg14Ms##J`eWdG@cIboHC} zm21w`KU*8l)jwY1+&%mJUHt*R{Db$2XV2f&AL1|Yuf#vN`_%Wo{qt}B+3VN;x(l1} zXZy}({Q2HC!Tt98Z$16fZ!g?ew9NQZf9d2h<4=Bf`P#d0{Kp?}UB=%Bb}r-Z1n%;e zKmV)0{hMzr+*jWF^3}iNKY#a`|Nhq7Z@>1&(+{40@UwT{eBpQh^4tIE>)-$C%kTZ0 z>0kEu^6cOG$Gk83Z-Dw;@=w<3pZw-`zyI=k6SaP?Jce4oWgbJV-#G`=ugrf-slOB* zzwO`h_mu~ki*x<`=Ggg5>mQoO)IYiJkI!S-vQhml=$PMceQ@{1x&F#??0k7^{hA+B z_iuerwro^?BRn3{Z+-BqEY9_}#AD~nTkALNF?Ije2W9_$|2NX(IsMoNf1y_E2fzE} zwe`>4@fPu~9s8@%y*&F%(!D(U>(aeE`%g*7yf5~j7YG0D2txfya7?Iw$sZHy-`{(} ttDn64o##LM>OZ>s%4ZV-g*1&zm5Nd8j!NDfIt7-{{;!Y4|o6o literal 0 HcmV?d00001 From 140ec1535e9afc5a8ed43ddb0cddb983950df03e Mon Sep 17 00:00:00 2001 From: lstasytis Date: Tue, 15 Oct 2024 15:31:38 +0000 Subject: [PATCH 03/12] clean up Signed-off-by: lstasytis --- run-docker.sh | 8 +- src/finn/builder/build_dataflow_steps.py | 36 +- .../custom_op/fpgadataflow/channelwise_op.py | 83 +-- .../fpgadataflow/convolutioninputgenerator.py | 301 +++++---- src/finn/custom_op/fpgadataflow/fmpadding.py | 103 ++- .../fpgadataflow/hls/thresholding_hls.py | 15 - src/finn/custom_op/fpgadataflow/hwcustomop.py | 4 +- .../custom_op/fpgadataflow/labelselect.py | 102 ++- .../fpgadataflow/matrixvectoractivation.py | 103 ++- src/finn/custom_op/fpgadataflow/pool.py | 97 ++- .../streamingdatawidthconverter.py | 198 ++---- .../fpgadataflow/streamingmaxpool.py | 247 ++++---- .../custom_op/fpgadataflow/thresholding.py | 91 +-- .../fpgadataflow/vectorvectoractivation.py | 103 ++- .../fpgadataflow/derive_characteristic.py | 1 + .../fpgadataflow/hlssynth_ip.py | 4 + .../fpgadataflow/prepare_cppsim.py | 5 + .../transformation/fpgadataflow/prepare_ip.py | 4 + .../fpgadataflow/prepare_rtlsim.py | 4 + tests/brevitas/test_brevitas_debug.py | 3 +- tests/fpgadataflow/test_fifosizing.py | 595 ++++++++++++------ 21 files changed, 1051 insertions(+), 1056 deletions(-) diff --git a/run-docker.sh b/run-docker.sh index 4bfc719ec6..296c3f42ce 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -41,10 +41,6 @@ recho () { echo -e "${RED}$1${NC}" } -: ${FINN_XILINX_PATH="/mnt/labstore/Xilinx"} -: ${PLATFORM_REPO_PATHS="/opt/xilinx/platforms"} -: ${FINN_XILINX_VERSION="2023.1"} - if [ -z "$FINN_XILINX_PATH" ];then recho "Please set the FINN_XILINX_PATH environment variable to the path to your Xilinx tools installation directory (e.g. /opt/Xilinx)." recho "FINN functionality depending on Vivado, Vitis or HLS will not be available." @@ -88,10 +84,10 @@ SCRIPTPATH=$(dirname "$SCRIPT") : ${ALVEO_USERNAME="alveo_user"} : ${ALVEO_PASSWORD=""} : ${ALVEO_BOARD="U250"} -: ${ALVEO_TARGET_DIR="/tmp"} +: ${ALVEO_TARGET_DIR="/tmp/finn"} : ${PLATFORM_REPO_PATHS="/opt/xilinx/platforms"} : ${XRT_DEB_VERSION="xrt_202220.2.14.354_22.04-amd64-xrt"} -: ${FINN_HOST_BUILD_DIR="/tmp/$DOCKER_INST_NAME"} +: ${FINN_HOST_BUILD_DIR="/tmp/finn/$DOCKER_INST_NAME"} : ${FINN_DOCKER_TAG="xilinx/finn:$(git describe --always --tags --dirty).$XRT_DEB_VERSION"} : ${FINN_DOCKER_PREBUILT="0"} : ${FINN_DOCKER_RUN_AS_ROOT="0"} diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index cf81ca3a93..8d6435a25b 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -82,10 +82,6 @@ from finn.transformation.fpgadataflow.create_dataflow_partition import ( CreateDataflowPartition, ) -from finn.transformation.fpgadataflow.derive_characteristic import ( - set_ignore_list_for_ip_gen, - unset_ignore_list_for_ip_gen, -) from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.derive_characteristic import ( DeriveCharacteristic, @@ -560,9 +556,19 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(GiveUniqueNodeNames()) if cfg.auto_fifo_strategy == "characterize_analytic": - # should RTL sim only nodes which are not supported right now with - # analytic characteristic derivations - model = set_ignore_list_for_ip_gen(model) + # RTL sim only the nodes which are not supported right now with + # analytic characteristic derivations. + # To do this, we first check if the characteristic + # function exists for each node. If yes, we make sure PrepareIP and HLSSynthIP + # do not generate code for them. We unset the flags afterwards + # so that a repeat call to SynthIP and PrepareIP will indeed generate the cpp code. + for node in model.graph.node: + node_inst = getCustomOp(node) + prepare_kwargs_for_characteristic_fx = getattr( + node_inst, "prepare_kwargs_for_characteristic_fx", None + ) + if callable(prepare_kwargs_for_characteristic_fx): + node_inst.set_nodeattr("ipgen_ignore", True) model = model.transform( PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()) @@ -570,9 +576,8 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) model = model.transform(AnnotateCycles()) - - period = int(model.analysis(dataflow_performance)["max_cycles"]*3) - #assert True==False + + period = int(model.analysis(dataflow_performance)["max_cycles"] * 3) model = model.transform(DeriveCharacteristic(period)) model = model.transform(DeriveFIFOSizes()) model = model.transform( @@ -637,8 +642,7 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): "depth_trigger_bram", ] - if cfg.extract_hw_config: - extract_model_config_to_json(model, cfg.output_dir + "/final_hw_config.json", hw_attrs) + extract_model_config_to_json(model, cfg.output_dir + "/final_hw_config.json", hw_attrs) # perform FIFO splitting and shallow FIFO removal only after the final config # json file has been written. otherwise, since these transforms may add/remove @@ -648,12 +652,14 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(RemoveShallowFIFOs()) # FIFO sizing is done, we can allow all ipgen again - model = unset_ignore_list_for_ip_gen(model) + for node in model.graph.node: + node_inst = getCustomOp(node) + node_inst.set_nodeattr("ipgen_ignore", False) # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again # this will only run for the new nodes (e.g. FIFOs and DWCs) - # model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) - # model = model.transform(HLSSynthIP()) + # model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) + # model = model.transform(HLSSynthIP()) return model diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op.py b/src/finn/custom_op/fpgadataflow/channelwise_op.py index ef9ae2f789..f0366e3d83 100644 --- a/src/finn/custom_op/fpgadataflow/channelwise_op.py +++ b/src/finn/custom_op/fpgadataflow/channelwise_op.py @@ -233,53 +233,44 @@ def execute_node(self, context, graph): result = sess.run(None, idict) context[node.output[0]] = np.asarray(result, dtype=np.float32).reshape(oshape) - def prepare_kwargs_for_characteristic_fx(self): - - # key parameters PE = self.get_nodeattr("PE") NumChannels = self.get_nodeattr("NumChannels") - NF = int(NumChannels/PE) + NF = int(NumChannels / PE) dim = np.prod(self.get_folded_output_shape()[1:-1]) - # assert True == False - kwargs = (NF,dim) + # assert True == False + kwargs = (NF, dim) - - # assert True==False + # assert True==False return kwargs def characteristic_fx_input(self, txns, cycles, counter, kwargs): # Compute one period of the input characteristic function - (NF,dim) = kwargs - - delay = 0 + (NF, dim) = kwargs for k in range(dim): txns.append(counter) - counter+=1 - cycles+=1 - + counter += 1 + cycles += 1 - -# + # return txns, cycles, counter def characteristic_fx_output(self, txns, cycles, counter, kwargs): # Compute one period of the output characteristic function - (NF,dim) = kwargs + (NF, dim) = kwargs for k in range(dim): txns.append(counter) - counter+=1 - cycles+=1 + counter += 1 + cycles += 1 return txns, cycles, counter - def derive_characteristic_fxns(self, period): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { @@ -290,25 +281,20 @@ def derive_characteristic_fxns(self, period): } ignore = self.get_nodeattr("ipgen_ignore") - if ignore == 0: # this node is being derived using RTLSIM + if ignore is False: # this node is being derived using RTLSIM # RTL-based flow super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) return - - # Analytical flow - + # Analytical flow + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period",period) - - - + self.set_nodeattr("io_chrc_period", period) txn_in = [] txn_out = [] @@ -317,53 +303,46 @@ def derive_characteristic_fxns(self, period): counter = 0 padding = 0 - kwargs = self.prepare_kwargs_for_characteristic_fx() - # first period cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period-cycles) - padding+=(period*-cycles) - + txn_in += [counter] * (period - cycles) + padding += period * -cycles # second period cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_in += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles # final assignments all_txns_in[0, :] = np.array(txn_in) self.set_nodeattr("io_chrc_in", all_txns_in) self.set_nodeattr("io_chrc_pads_in", padding) - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 + counter = 0 + cycles = 0 + padding = 0 - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - txn_out += [counter] * (period-cycles) - padding += (period*-cycles) + txn_out += [counter] * (period - cycles) + padding += period * -cycles cycles = period - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - - txn_out += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) + txn_out += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles - all_txns_out[0, :] = np.array(txn_out) + all_txns_out[0, :] = np.array(txn_out) self.set_nodeattr("io_chrc_out", all_txns_out) self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index 284bd31a42..72a0440cc9 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -278,42 +278,36 @@ def execute_node(self, context, graph): inst = getCustomOp(im2col_node) inst.execute_node(context, model_im2col.graph) - def prepare_kwargs_for_characteristic_fx(self): - # key parameters IFMDim_x = self.get_nodeattr("IFMDim")[0] OFMDim_x = self.get_nodeattr("OFMDim")[0] ConvKernelDim_x = self.get_nodeattr("ConvKernelDim")[0] Stride_x = self.get_nodeattr("Stride")[0] - IFMDim_y = self.get_nodeattr("IFMDim")[1] OFMDim_y = self.get_nodeattr("OFMDim")[1] ConvKernelDim_y = self.get_nodeattr("ConvKernelDim")[1] Stride_y = self.get_nodeattr("Stride")[1] SIMD = self.get_nodeattr("SIMD") - + IFMChannels = self.get_nodeattr("IFMChannels") - - - dilation = self.get_nodeattr("Dilation") + DEPTHWISE = self.get_nodeattr("depthwise") - parallel_window = self.get_nodeattr("parallel_window") is1d = self.get_nodeattr("is1D") - # m = self.get_nodeattr("m") - # flip = self.get_nodeattr("flip") + # m = self.get_nodeattr("m") + # flip = self.get_nodeattr("flip") - SIMD_COUNT = int(IFMChannels / SIMD) + SIMD_COUNT = int(IFMChannels / SIMD) OUTPUT_SIZE = OFMDim_x * ConvKernelDim_x * SIMD_COUNT INPUT_SIZE = IFMDim_x * SIMD_COUNT WINDOW_SIZE = ConvKernelDim_x * SIMD_COUNT if DEPTHWISE: BUFFER_SIZE = ConvKernelDim_x * SIMD_COUNT - READ_CYCLES = SIMD_COUNT * (ConvKernelDim_x-1) - (ConvKernelDim_x-1) - FINISH = IFMDim_x-ConvKernelDim_x-2 + READ_CYCLES = SIMD_COUNT * (ConvKernelDim_x - 1) - (ConvKernelDim_x - 1) + FINISH = IFMDim_x - ConvKernelDim_x - 2 else: - BUFFER_SIZE = (ConvKernelDim_x-1) * SIMD_COUNT + BUFFER_SIZE = (ConvKernelDim_x - 1) * SIMD_COUNT READ_CYCLES = 0 FINISH = 0 @@ -321,42 +315,87 @@ def prepare_kwargs_for_characteristic_fx(self): DEFAULT_FIFO_DEPTH = 2 - multiplying_factor = int(IFMChannels/SIMD) - number_blocks = int(ConvKernelDim_y/Stride_y + 1) + multiplying_factor = int(IFMChannels / SIMD) + number_blocks = int(ConvKernelDim_y / Stride_y + 1) cycles_write_block = OFMDim_x * ConvKernelDim_x * ConvKernelDim_y * multiplying_factor cycles_read_block = Stride_x * IFMDim_x * multiplying_factor - max_cycles = max(cycles_write_block,cycles_read_block) - baseIter = IFMDim_x * ConvKernelDim_y * multiplying_factor + OFMDim_y * max(cycles_write_block,cycles_read_block) - initial_buffer = IFMDim_x * ConvKernelDim_y *multiplying_factor - - READ_DELAY = number_blocks * ConvKernelDim_x*ConvKernelDim_y*OFMDim_x*OFMDim_y*multiplying_factor - ConvKernelDim_x*ConvKernelDim_y*OFMDim_x - READ_ITES = int((baseIter-OFMDim_y) / max(cycles_write_block,cycles_read_block)) - - # assert True == False - kwargs = (SIMD_COUNT,Stride_x,Stride_y,OUTPUT_SIZE,INPUT_SIZE, - WINDOW_SIZE,BUFFER_SIZE,READ_CYCLES,OCNT_INITIAL, - DEPTHWISE,DEFAULT_FIFO_DEPTH, is1d, - multiplying_factor,number_blocks,cycles_write_block, - cycles_read_block,max_cycles,baseIter,initial_buffer, - FINISH,OFMDim_y,READ_DELAY,READ_ITES - ) - + max_cycles = max(cycles_write_block, cycles_read_block) + baseIter = IFMDim_x * ConvKernelDim_y * multiplying_factor + OFMDim_y * max( + cycles_write_block, cycles_read_block + ) + initial_buffer = IFMDim_x * ConvKernelDim_y * multiplying_factor + + READ_DELAY = ( + number_blocks + * ConvKernelDim_x + * ConvKernelDim_y + * OFMDim_x + * OFMDim_y + * multiplying_factor + - ConvKernelDim_x * ConvKernelDim_y * OFMDim_x + ) + READ_ITES = int((baseIter - OFMDim_y) / max(cycles_write_block, cycles_read_block)) + + # assert True == False + kwargs = ( + SIMD_COUNT, + Stride_x, + Stride_y, + OUTPUT_SIZE, + INPUT_SIZE, + WINDOW_SIZE, + BUFFER_SIZE, + READ_CYCLES, + OCNT_INITIAL, + DEPTHWISE, + DEFAULT_FIFO_DEPTH, + is1d, + multiplying_factor, + number_blocks, + cycles_write_block, + cycles_read_block, + max_cycles, + baseIter, + initial_buffer, + FINISH, + OFMDim_y, + READ_DELAY, + READ_ITES, + ) - # assert True==False + # assert True==False return kwargs def characteristic_fx_input(self, txns, cycles, counter, kwargs): # Compute one period of the input characteristic function - (SIMD_COUNT,Stride_x,Stride_y,OUTPUT_SIZE,INPUT_SIZE, - WINDOW_SIZE,BUFFER_SIZE,READ_CYCLES, - OCNT_INITIAL, DEPTHWISE,DEFAULT_FIFO_DEPTH,is1d, - multiplying_factor,number_blocks,cycles_write_block, - cycles_read_block,max_cycles,baseIter,initial_buffer,FINISH,OFMDim_y,READ_DELAY, - READ_ITES) = kwargs + ( + SIMD_COUNT, + Stride_x, + Stride_y, + OUTPUT_SIZE, + INPUT_SIZE, + WINDOW_SIZE, + BUFFER_SIZE, + READ_CYCLES, + OCNT_INITIAL, + DEPTHWISE, + DEFAULT_FIFO_DEPTH, + is1d, + multiplying_factor, + number_blocks, + cycles_write_block, + cycles_read_block, + max_cycles, + baseIter, + initial_buffer, + FINISH, + OFMDim_y, + READ_DELAY, + READ_ITES, + ) = kwargs - if DEPTHWISE: OCNT_MAX = BUFFER_SIZE ocnt = SIMD_COUNT @@ -365,111 +404,120 @@ def characteristic_fx_input(self, txns, cycles, counter, kwargs): OCNT_MAX = WINDOW_SIZE if OCNT_INITIAL < WINDOW_SIZE: ocnt = OCNT_INITIAL - else: ocnt=-1 - + else: + ocnt = -1 # fifo filling - for i in range(0,DEFAULT_FIFO_DEPTH): + for i in range(0, DEFAULT_FIFO_DEPTH): txns.append(counter) - counter+=1 - cycles+=1 - + counter += 1 + cycles += 1 # main function - + inp_count = 0 if is1d: - for i in range(0,OUTPUT_SIZE): + for i in range(0, OUTPUT_SIZE): txns.append(counter) we = (i < OCNT_MAX) or (ocnt < (SIMD_COUNT * Stride_x)) re = i > 0 if re: - ocnt+=1 + ocnt += 1 if ocnt == OCNT_MAX: ocnt = 0 if we: - if inp_count < INPUT_SIZE-DEFAULT_FIFO_DEPTH: - counter+=1 - inp_count+=1 - - cycles+=1 - else: + if inp_count < INPUT_SIZE - DEFAULT_FIFO_DEPTH: + counter += 1 + inp_count += 1 - for i in range(0,initial_buffer+cycles_read_block-1): + cycles += 1 + else: + for i in range(0, initial_buffer + cycles_read_block - 1): txns.append(counter) - cycles+=1 - counter+=1 + cycles += 1 + counter += 1 txns.append(counter) - cycles+=1 # one extra for loop tail + cycles += 1 # one extra for loop tail - for i in range(0,OFMDim_y-1): - for j in range(0,cycles_write_block-cycles_read_block): + for i in range(0, OFMDim_y - 1): + for j in range(0, cycles_write_block - cycles_read_block): txns.append(counter) - cycles+=1 - - - for j in range(0,cycles_read_block-1): - if i < OFMDim_y-2: - counter+=1 - txns.append(counter) - cycles+=1 - # else: - # if j < FINISH: - # counter+=1 - # txns.append(counter) - # cycles+=1 -# + cycles += 1 + + for j in range(0, cycles_read_block - 1): + if i < OFMDim_y - 2: + counter += 1 + txns.append(counter) + cycles += 1 + # else: + # if j < FINISH: + # counter+=1 + # txns.append(counter) + # cycles+=1 + # return txns, cycles, counter def characteristic_fx_output(self, txns, cycles, counter, kwargs): # Compute one period of the output characteristic function - (SIMD_COUNT,Stride_x,Stride_y,OUTPUT_SIZE,INPUT_SIZE, - WINDOW_SIZE,BUFFER_SIZE,READ_CYCLES, - OCNT_INITIAL, DEPTHWISE,DEFAULT_FIFO_DEPTH, is1d, - multiplying_factor,number_blocks,cycles_write_block, - cycles_read_block,max_cycles,baseIter,initial_buffer,FINISH,OFMDim_y,READ_DELAY, - READ_ITES) = kwargs + ( + SIMD_COUNT, + Stride_x, + Stride_y, + OUTPUT_SIZE, + INPUT_SIZE, + WINDOW_SIZE, + BUFFER_SIZE, + READ_CYCLES, + OCNT_INITIAL, + DEPTHWISE, + DEFAULT_FIFO_DEPTH, + is1d, + multiplying_factor, + number_blocks, + cycles_write_block, + cycles_read_block, + max_cycles, + baseIter, + initial_buffer, + FINISH, + OFMDim_y, + READ_DELAY, + READ_ITES, + ) = kwargs # HYPER PARAMETERS - - INITIAL_LOOP_CYCLES = 5 - if is1d: - for i in range(0,INITIAL_LOOP_CYCLES): + for i in range(0, INITIAL_LOOP_CYCLES): txns.append(counter) - cycles+=1 + cycles += 1 - for i in range(0,READ_CYCLES): + for i in range(0, READ_CYCLES): txns.append(counter) - cycles+=1 + cycles += 1 - - - for i in range(0,OUTPUT_SIZE): + for i in range(0, OUTPUT_SIZE): txns.append(counter) - counter+=1 - cycles+=1 + counter += 1 + cycles += 1 else: - - for i in range(0,initial_buffer+INITIAL_LOOP_CYCLES-1): + for i in range(0, initial_buffer + INITIAL_LOOP_CYCLES - 1): txns.append(counter) - cycles+=1 + cycles += 1 - for i in range(0,baseIter-initial_buffer): + for i in range(0, baseIter - initial_buffer): txns.append(counter) - counter+=1 - cycles+=1 + counter += 1 + cycles += 1 return txns, cycles, counter - def derive_characteristic_fxns(self, period): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { @@ -480,25 +528,20 @@ def derive_characteristic_fxns(self, period): } ignore = self.get_nodeattr("ipgen_ignore") - if ignore == 0: # this node is being derived using RTLSIM + if ignore is False: # this node is being derived using RTLSIM # RTL-based flow super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) return - - # Analytical flow - + # Analytical flow + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period",period) - - - + self.set_nodeattr("io_chrc_period", period) txn_in = [] txn_out = [] @@ -507,54 +550,46 @@ def derive_characteristic_fxns(self, period): counter = 0 padding = 0 - kwargs = self.prepare_kwargs_for_characteristic_fx() - # first period cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period-cycles) - padding+=(period*-cycles) - + txn_in += [counter] * (period - cycles) + padding += period * -cycles # second period cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_in += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles # final assignments all_txns_in[0, :] = np.array(txn_in) self.set_nodeattr("io_chrc_in", all_txns_in) self.set_nodeattr("io_chrc_pads_in", padding) - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + counter = 0 + cycles = 0 + padding = 0 + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - txn_out += [counter] * (period-cycles) - padding += (period*-cycles) + txn_out += [counter] * (period - cycles) + padding += period * -cycles cycles = period - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - - txn_out += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) + txn_out += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles - all_txns_out[0, :] = np.array(txn_out) + all_txns_out[0, :] = np.array(txn_out) self.set_nodeattr("io_chrc_out", all_txns_out) self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/fmpadding.py b/src/finn/custom_op/fpgadataflow/fmpadding.py index 55a17f0039..d83fc4b9e8 100644 --- a/src/finn/custom_op/fpgadataflow/fmpadding.py +++ b/src/finn/custom_op/fpgadataflow/fmpadding.py @@ -171,24 +171,20 @@ def execute_node(self, context, graph): ) context[node.output[0]] = np.asarray(result, dtype=np.float32).reshape(oshape) - def prepare_kwargs_for_characteristic_fx(self): - - # key parameters ImgDim = self.get_nodeattr("ImgDim") Padding = self.get_nodeattr("Padding") - NewDim = [ImgDim[0]+Padding[0]+Padding[2],ImgDim[1]+Padding[1]+Padding[3]] + NewDim = [ImgDim[0] + Padding[0] + Padding[2], ImgDim[1] + Padding[1] + Padding[3]] NumChannels = self.get_nodeattr("NumChannels") SIMD = self.get_nodeattr("SIMD") TOTAL_ELS = np.prod(NewDim) - NF = int(NumChannels/SIMD) + NF = int(NumChannels / SIMD) - # assert True == False - kwargs = (ImgDim, Padding, NumChannels, SIMD, TOTAL_ELS,NF) + # assert True == False + kwargs = (ImgDim, Padding, NumChannels, SIMD, TOTAL_ELS, NF) - - # assert True==False + # assert True==False return kwargs @@ -200,39 +196,39 @@ def characteristic_fx_input(self, txns, cycles, counter, kwargs): delay = 0 # if NF == 1, we always have a one cycle delay - if NF == 1: nf1 = 2 - else: nf1 = 1 + if NF == 1: + nf1 = 2 + else: + nf1 = 1 - for i in range(0,ImgDim[0]): - for j in range(0,ImgDim[1]): + for i in range(0, ImgDim[0]): + for j in range(0, ImgDim[1]): for k in range(NF): txns.append(counter) - counter+=1 - cycles+=1 + counter += 1 + cycles += 1 if NF == 1: txns.append(counter) - cycles+=1 - for z in range((Padding[1]+Padding[3])*NF*nf1+delay): + cycles += 1 + for z in range((Padding[1] + Padding[3]) * NF * nf1 + delay): txns.append(counter) - cycles+=1 + cycles += 1 return txns, cycles, counter def characteristic_fx_output(self, txns, cycles, counter, kwargs): # Compute one period of the output characteristic function - (ImgDim, Padding, NumChannels, SIMD, TOTAL_ELS,NF) = kwargs - + (ImgDim, Padding, NumChannels, SIMD, TOTAL_ELS, NF) = kwargs - for i in range(0,TOTAL_ELS): - for j in range(int(NumChannels/SIMD)): + for i in range(0, TOTAL_ELS): + for j in range(int(NumChannels / SIMD)): txns.append(counter) - counter+=1 - cycles+=1 + counter += 1 + cycles += 1 return txns, cycles, counter - def derive_characteristic_fxns(self, period): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { @@ -243,25 +239,20 @@ def derive_characteristic_fxns(self, period): } ignore = self.get_nodeattr("ipgen_ignore") - if ignore == 0: # this node is being derived using RTLSIM + if ignore is False: # this node is being derived using RTLSIM # RTL-based flow super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) return - - # Analytical flow - + # Analytical flow + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period",period) - - - + self.set_nodeattr("io_chrc_period", period) txn_in = [] txn_out = [] @@ -270,58 +261,50 @@ def derive_characteristic_fxns(self, period): counter = 0 padding = 0 - kwargs = self.prepare_kwargs_for_characteristic_fx() - # first period cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period-cycles) - padding+=(period*-cycles) - + txn_in += [counter] * (period - cycles) + padding += period * -cycles # second period cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) - + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - #for i in range(cycles,period*2): + # for i in range(cycles,period*2): # txn_in.append(counter) - #pads = (period*2-cycles) + # pads = (period*2-cycles) - txn_in += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_in += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles # final assignments all_txns_in[0, :] = np.array(txn_in) self.set_nodeattr("io_chrc_in", all_txns_in) self.set_nodeattr("io_chrc_pads_in", padding) - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 + counter = 0 + cycles = 0 + padding = 0 - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - txn_out += [counter] * (period-cycles) - padding += (period*-cycles) + txn_out += [counter] * (period - cycles) + padding += period * -cycles cycles = period - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - - txn_out += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) + txn_out += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles - all_txns_out[0, :] = np.array(txn_out) + all_txns_out[0, :] = np.array(txn_out) self.set_nodeattr("io_chrc_out", all_txns_out) self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py b/src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py index b753bc7a03..0a4ffc3fea 100644 --- a/src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/thresholding_hls.py @@ -735,18 +735,3 @@ def ipgen_extra_directives(self): "Return a list of extra tcl directives for HLS synthesis." return ["config_compile -pipeline_style frp"] - - def derive_characteristic_fxns(self, period): - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out": []}, - } - mem_mode = self.get_nodeattr("mem_mode") - if mem_mode in ["internal_decoupled", "external"]: - n_weight_inps = self.calc_tmem() - num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) - io_dict["inputs"]["weights"] = [0 for i in range(num_w_reps * n_weight_inps)] - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index 84a65ce0d4..995f3c24cb 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -98,7 +98,8 @@ def get_nodeattr_types(self): "io_chrc_pads_out": ("i", False, 0), "io_chrc_in_concat": ("t", False, np.asarray([], dtype=np.int32)), "io_chrc_out_concat": ("t", False, np.asarray([], dtype=np.int32)), - "ipgen_ignore": ("i", False, 0) + # flag to ignore the ip generation of this node + "ipgen_ignore": ("i", False, 0), } def get_verilog_top_module_name(self): @@ -367,6 +368,7 @@ def get_outstream_width_padded(self, ind=0): def derive_characteristic_fxns(self, period, override_rtlsim_dict=None): """Return the unconstrained characteristic functions for this node.""" # ensure rtlsim is ready + assert self.get_nodeattr("rtlsim_so") != "", "rtlsim not ready for " + self.onnx_node.name if self.get_nodeattr("io_chrc_period") > 0: warnings.warn("Skipping node %s: already has FIFO characteristic" % self.onnx_node.name) diff --git a/src/finn/custom_op/fpgadataflow/labelselect.py b/src/finn/custom_op/fpgadataflow/labelselect.py index fbd89b4f0d..1e5b22c5ec 100644 --- a/src/finn/custom_op/fpgadataflow/labelselect.py +++ b/src/finn/custom_op/fpgadataflow/labelselect.py @@ -185,62 +185,57 @@ def get_exp_cycles(self): exp_cycles = nlabels / pe return int(exp_cycles) - def prepare_kwargs_for_characteristic_fx(self): - - # key parameters num_in_words = self.get_nodeattr("Labels") PE = self.get_nodeattr("PE") K = self.get_nodeattr("K") - kwargs = (num_in_words,PE,K) + kwargs = (num_in_words, PE, K) - - # assert True==False + # assert True==False return kwargs def characteristic_fx_input(self, txns, cycles, counter, kwargs): # Compute one period of the input characteristic function - (num_in_words,PE,K) = kwargs + (num_in_words, PE, K) = kwargs # input - for i in range(0,int(num_in_words/PE)+1): + for i in range(0, int(num_in_words / PE) + 1): txns.append(counter) - counter+=1 - cycles+=1 + counter += 1 + cycles += 1 return txns, cycles, counter def characteristic_fx_output(self, txns, cycles, counter, kwargs): # Compute one period of the output characteristic function - (num_in_words,PE,K) = kwargs + (num_in_words, PE, K) = kwargs windup_clocks = 4 - for i in range(0,windup_clocks): + for i in range(0, windup_clocks): txns.append(counter) - cycles+=1 + cycles += 1 # first output period, computing Labels - for i in range(0,int(num_in_words/PE+K)): + for i in range(0, int(num_in_words / PE + K)): txns.append(counter) - cycles+=1 + cycles += 1 # output the K labels which got selected - for j in range(0,K): + for j in range(0, K): txns.append(counter) - cycles+=1 - counter+=1 - + cycles += 1 + counter += 1 return txns, cycles, counter - def derive_characteristic_fxns(self, period): + print("deriving ch") n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { "inputs": { @@ -248,95 +243,78 @@ def derive_characteristic_fxns(self, period): }, "outputs": {"out": []}, } - # mem_mode = self.get_nodeattr("mem_mode") - # if mem_mode in ["internal_decoupled", "external"]: - # n_weight_inps = self.calc_wmem() - # num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) - # io_dict["inputs"]["weights"] = [0 for i in range(num_w_reps * n_weight_inps)] - + # RTL-based flow, uncomment and use instead of Analytical flow in case + # the analytical flow is too innacurate ignore = self.get_nodeattr("ipgen_ignore") - if ignore == 0: # this node is being derived using RTLSIM + if ignore is False: # this node is being derived using RTLSIM # RTL-based flow + print("rtl flow") super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) return - + # Analytical flow - - + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period",period) - - - + self.set_nodeattr("io_chrc_period", period) txn_in = [] txn_out = [] - # INPUT counter = 0 padding = 0 - kwargs = self.prepare_kwargs_for_characteristic_fx() - # first period cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period-cycles) - padding+=(period*-cycles) - + txn_in += [counter] * (period - cycles) + padding += period * -cycles # second period cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - #for i in range(cycles,period*2): + # for i in range(cycles,period*2): # txn_in.append(counter) - #pads = (period*2-cycles) + # pads = (period*2-cycles) - txn_in += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_in += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles # final assignments all_txns_in[0, :] = np.array(txn_in) self.set_nodeattr("io_chrc_in", all_txns_in) self.set_nodeattr("io_chrc_pads_in", padding) - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 + counter = 0 + cycles = 0 + padding = 0 - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - txn_out += [counter] * (period-cycles) - padding += (period*-cycles) + txn_out += [counter] * (period - cycles) + padding += period * -cycles cycles = period - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - - txn_out += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) + txn_out += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles - all_txns_out[0, :] = np.array(txn_out) + all_txns_out[0, :] = np.array(txn_out) self.set_nodeattr("io_chrc_out", all_txns_out) self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index 36ee14e695..f9d52b1f4f 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -957,74 +957,66 @@ def code_generation_ipi(self): raise Exception("Unrecognized mem_mode for MatrixVectorActivation") return cmd - - def prepare_kwargs_for_characteristic_fx(self): - MW = self.get_nodeattr("MW") MH = self.get_nodeattr("MH") SIMD = self.get_nodeattr("SIMD") PE = self.get_nodeattr("PE") numVectors = np.prod(self.get_nodeattr("numInputVectors")) - BURST_SIZE = int(MW/SIMD) - BURST_COUNT = int(MH/PE) - - kwargs = (MW,MH,SIMD,PE,BURST_COUNT,BURST_SIZE,numVectors) + BURST_SIZE = int(MW / SIMD) + BURST_COUNT = int(MH / PE) + + kwargs = (MW, MH, SIMD, PE, BURST_COUNT, BURST_SIZE, numVectors) return kwargs def characteristic_fx_input(self, txns, cycles, counter, kwargs): - - (MW,MH,SIMD,PE,BURST_COUNT,BURST_SIZE,numVectors) = kwargs + (MW, MH, SIMD, PE, BURST_COUNT, BURST_SIZE, numVectors) = kwargs tracker = 0 - maximum = numVectors*BURST_SIZE + maximum = numVectors * BURST_SIZE if numVectors > 1: for i in range(2): txns.append(counter) - counter+=1 - cycles+=1 - tracker+=1 + counter += 1 + cycles += 1 + tracker += 1 for k in range(numVectors): for j in range(BURST_SIZE): if tracker < maximum: txns.append(counter) - counter+=1 - cycles+=1 - tracker+=1 - + counter += 1 + cycles += 1 + tracker += 1 - for i in range(BURST_COUNT-1): + for i in range(BURST_COUNT - 1): for j in range(BURST_SIZE): txns.append(counter) - cycles+=1 + cycles += 1 return txns, cycles, counter - def characteristic_fx_output(self, txns, cycles, counter, kwargs): - - (MW,MH,SIMD,PE,BURST_COUNT,BURST_SIZE,numVectors) = kwargs + (MW, MH, SIMD, PE, BURST_COUNT, BURST_SIZE, numVectors) = kwargs windup_clocks = 3 - for i in range(0,windup_clocks): + for i in range(0, windup_clocks): txns.append(counter) - cycles+=1 - + cycles += 1 + for k in range(numVectors): for i in range(BURST_COUNT): for j in range(BURST_SIZE): txns.append(counter) - cycles+=1 - counter+=1 + cycles += 1 + counter += 1 return txns, cycles, counter - def derive_characteristic_fxns(self, period): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { @@ -1034,92 +1026,75 @@ def derive_characteristic_fxns(self, period): "outputs": {"out": []}, } - mem_mode = self.get_nodeattr("mem_mode") if mem_mode in ["internal_decoupled", "external"]: n_weight_inps = self.calc_wmem() num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) io_dict["inputs"]["weights"] = [0 for i in range(num_w_reps * n_weight_inps)] - ignore = self.get_nodeattr("ipgen_ignore") - if ignore == 0: # this node is being derived using RTLSIM + if ignore is False: # this node is being derived using RTLSIM # RTL-based flow super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) return - + # Analytical flow - # Analytical flow - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period",period) - - - + self.set_nodeattr("io_chrc_period", period) txn_in = [] txn_out = [] - # INPUT counter = 0 padding = 0 - kwargs = self.prepare_kwargs_for_characteristic_fx() - # first period cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period-cycles) - padding+=(period*-cycles) - + txn_in += [counter] * (period - cycles) + padding += period * -cycles # second period cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) - + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_in += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles # final assignments all_txns_in[0, :] = np.array(txn_in) self.set_nodeattr("io_chrc_in", all_txns_in) self.set_nodeattr("io_chrc_pads_in", padding) - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + counter = 0 + cycles = 0 + padding = 0 + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - txn_out += [counter] * (period-cycles) - padding += (period*-cycles) + txn_out += [counter] * (period - cycles) + padding += period * -cycles cycles = period - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - - txn_out += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) + txn_out += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles - all_txns_out[0, :] = np.array(txn_out) + all_txns_out[0, :] = np.array(txn_out) self.set_nodeattr("io_chrc_out", all_txns_out) self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/pool.py b/src/finn/custom_op/fpgadataflow/pool.py index 183bf562d4..8dbf2c6e3b 100644 --- a/src/finn/custom_op/fpgadataflow/pool.py +++ b/src/finn/custom_op/fpgadataflow/pool.py @@ -223,59 +223,46 @@ def execute_node(self, context, graph): oshape = context[node.output[0]].shape context[node.output[0]] = np.asarray(result, dtype=np.float32).reshape(oshape) - def prepare_kwargs_for_characteristic_fx(self): - - # key parameters Channels = self.get_nodeattr("Channels") PE = self.get_nodeattr("PE") KernelSize = np.prod(self.get_nodeattr("KernelSize")) - # assert True == False - NF = int(Channels/PE) - kwargs = (NF,KernelSize) + # assert True == False + NF = int(Channels / PE) + kwargs = (NF, KernelSize) - - # assert True==False + # assert True==False return kwargs def characteristic_fx_input(self, txns, cycles, counter, kwargs): # Compute one period of the input characteristic function - (NF,KernelSize) = kwargs - - delay = 0 - # if NF == 1, we always have a one cycle delay - # NF = max(NF,2) - if NF == 1: - nf1 = 2 - else: - nf1 = 1 + (NF, KernelSize) = kwargs - for i in range(0,KernelSize): + for i in range(0, KernelSize): for k in range(NF): txns.append(counter) - counter+=1 - cycles+=1 + counter += 1 + cycles += 1 -# + # return txns, cycles, counter def characteristic_fx_output(self, txns, cycles, counter, kwargs): # Compute one period of the output characteristic function - (NF,KernelSize) = kwargs + (NF, KernelSize) = kwargs - for i in range(0,KernelSize): + for i in range(0, KernelSize): for k in range(NF): txns.append(counter) - counter+=1 - cycles+=1 + counter += 1 + cycles += 1 return txns, cycles, counter - def derive_characteristic_fxns(self, period): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { @@ -286,86 +273,72 @@ def derive_characteristic_fxns(self, period): } ignore = self.get_nodeattr("ipgen_ignore") - if ignore == 0: # this node is being derived using RTLSIM + if ignore is False: # this node is being derived using RTLSIM # RTL-based flow super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) return - - # Analytical flow - + # Analytical flow + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period",period) - - - + self.set_nodeattr("io_chrc_period", period) txn_in = [] txn_out = [] - # INPUT counter = 0 padding = 0 - kwargs = self.prepare_kwargs_for_characteristic_fx() - # first period cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period-cycles) - padding+=(period*-cycles) - + txn_in += [counter] * (period - cycles) + padding += period * -cycles # second period cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - #for i in range(cycles,period*2): + # for i in range(cycles,period*2): # txn_in.append(counter) - #pads = (period*2-cycles) + # pads = (period*2-cycles) - txn_in += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_in += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles # final assignments all_txns_in[0, :] = np.array(txn_in) self.set_nodeattr("io_chrc_in", all_txns_in) self.set_nodeattr("io_chrc_pads_in", padding) - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 + counter = 0 + cycles = 0 + padding = 0 - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - txn_out += [counter] * (period-cycles) - padding += (period*-cycles) + txn_out += [counter] * (period - cycles) + padding += period * -cycles cycles = period - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - - txn_out += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) + txn_out += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles - all_txns_out[0, :] = np.array(txn_out) + all_txns_out[0, :] = np.array(txn_out) self.set_nodeattr("io_chrc_out", all_txns_out) self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py index a571389b0d..12faee9ef0 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py @@ -215,43 +215,29 @@ def lut_estimation(self): return int(cnt_luts + cset_luts) - def prepare_kwargs_for_characteristic_fx(self): - numInWords = int(np.prod(self.get_folded_input_shape()[-2:-1])) numOutWords = int(np.prod(self.get_folded_output_shape()[-2:-1])) numReps = int(np.prod(self.get_folded_input_shape()[:1])) inWidth = self.get_nodeattr("inWidth") outWidth = self.get_nodeattr("outWidth") - - - kwargs = (numInWords,numOutWords,inWidth,outWidth,numReps) + kwargs = (numInWords, numOutWords, inWidth, outWidth, numReps) - # assert True==False + # assert True==False return kwargs - - def characteristic_fx_input(self, txns, cycles, counter, kwargs): - - (numInWords,numOutWords,inWidth,outWidth,numReps) = kwargs - - - + (numInWords, numOutWords, inWidth, outWidth, numReps) = kwargs # HYPER PARAMETERS WHICH MAY CHANGE OVER TIME windup_clocks_up_convert_input = 4 - windup_clocks_down_convert_input = 3 - windup_clocks_down_convert_output = 4 windup_clocks_equal_convert_output = 3 - - if numInWords < windup_clocks_up_convert_input: windup_clocks_up_convert_input = numInWords @@ -259,96 +245,46 @@ def characteristic_fx_input(self, txns, cycles, counter, kwargs): if numInWords < windup_clocks_down_convert_input: windup_clocks_down_convert_input = numInWords - - if numOutWords < windup_clocks_down_convert_output: windup_clocks_down_convert_output = numOutWords - - if numOutWords < windup_clocks_equal_convert_output: windup_clocks_equal_convert_output = numOutWords - - - - # calculation to adjust for padding or cropping adding latency - - - if outWidth > inWidth: - higher = outWidth - lower = inWidth - else: - higher = inWidth - lower = outWidth - - if higher % lower != 0: - if numInWords*inWidth > numOutWords*outWidth: - crop = True - pad = False - else: - cropping = False - pad = True - - else: - crop = False - pad = False - - - - # windup period - # for i in range(0,windup_clocks_down_convert_output): - # txns.append(counter) - # cycles+=1 - #padding +=1 - #counter+=1 - # first input period - - # first input period tracker = 0 - maximum = numReps*numInWords + maximum = numReps * numInWords if numReps > 1: # loop windup for i in range(2): txns.append(counter) - counter+=1 - cycles+=1 - tracker+=1 + counter += 1 + cycles += 1 + tracker += 1 - for j in range(0,numReps): - for i in range(0,numInWords): + for j in range(0, numReps): + for i in range(0, numInWords): if tracker < maximum: txns.append(counter) - counter+=1 - cycles+=1 - tracker+=1 - for i in range(0,1): + counter += 1 + cycles += 1 + tracker += 1 + for i in range(0, 1): txns.append(counter) - cycles+=1 + cycles += 1 return txns, cycles, counter - - def characteristic_fx_output(self, txns, cycles, counter, kwargs): - - (numInWords,numOutWords,inWidth,outWidth,numReps) = kwargs - - - - + (numInWords, numOutWords, inWidth, outWidth, numReps) = kwargs # HYPER PARAMETERS WHICH MAY CHANGE windup_clocks_up_convert_input = 3 windup_clocks_down_convert_input = 2 - windup_clocks_down_convert_output = 3 windup_clocks_equal_convert_output = 2 - - if numInWords < windup_clocks_up_convert_input: windup_clocks_up_convert_input = numInWords @@ -356,21 +292,13 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): if numInWords < windup_clocks_down_convert_input: windup_clocks_down_convert_input = numInWords - - if numOutWords < windup_clocks_down_convert_output: windup_clocks_down_convert_output = numOutWords - - if numOutWords < windup_clocks_equal_convert_output: windup_clocks_equal_convert_output = numOutWords - - - # calculation to adjust for padding or cropping adding latency - if outWidth > inWidth: higher = outWidth @@ -380,72 +308,55 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): lower = outWidth if higher % lower != 0: - if numInWords*inWidth > numOutWords*outWidth: - crop = True + if numInWords * inWidth > numOutWords * outWidth: pad = False else: - cropping = False pad = True else: - crop = False pad = False - - # windup period if inWidth == outWidth: clock = windup_clocks_equal_convert_output else: clock = windup_clocks_up_convert_input - for i in range(0,clock): + for i in range(0, clock): txns.append(counter) - cycles+=1 - # padding +=1 + cycles += 1 + # padding +=1 # first input period - if pad: - offset = 2 - else: - offset = 1 - - remainder = 0 - for k in range(numReps): - # windup txns.append(counter) - cycles+=1 + cycles += 1 - for i in range(0,numOutWords): - for j in range(0,int(np.floor(outWidth/inWidth))): + for i in range(0, numOutWords): + for j in range(0, int(np.floor(outWidth / inWidth))): if j != 0: txns.append(counter) - cycles +=1 + cycles += 1 remainder += inWidth # padding +=1 - - if pad and remainder < outWidth: print(remainder) txns.append(counter) remainder += inWidth - cycles +=1 + cycles += 1 txns.append(counter) - cycles +=1 + cycles += 1 - counter+=1 + counter += 1 remainder -= outWidth - return txns, cycles, counter - def derive_characteristic_fxns(self, period): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { @@ -456,87 +367,72 @@ def derive_characteristic_fxns(self, period): } ignore = self.get_nodeattr("ipgen_ignore") - if ignore == 0: # this node is being derived using RTLSIM + if ignore is False: # this node is being derived using RTLSIM # RTL-based flow super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) return - + # Analytical flow - # Analytical flow - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period",period) - - - + self.set_nodeattr("io_chrc_period", period) txn_in = [] txn_out = [] - # INPUT counter = 0 padding = 0 - kwargs = self.prepare_kwargs_for_characteristic_fx() - # first period cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period-cycles) - padding+=(period*-cycles) - + txn_in += [counter] * (period - cycles) + padding += period * -cycles # second period cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) - + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - #for i in range(cycles,period*2): + # for i in range(cycles,period*2): # txn_in.append(counter) - #pads = (period*2-cycles) + # pads = (period*2-cycles) - txn_in += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_in += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles # final assignments all_txns_in[0, :] = np.array(txn_in) self.set_nodeattr("io_chrc_in", all_txns_in) self.set_nodeattr("io_chrc_pads_in", padding) - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + counter = 0 + cycles = 0 + padding = 0 + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - txn_out += [counter] * (period-cycles) - padding += (period*-cycles) + txn_out += [counter] * (period - cycles) + padding += period * -cycles cycles = period - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - - txn_out += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) + txn_out += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles - all_txns_out[0, :] = np.array(txn_out) + all_txns_out[0, :] = np.array(txn_out) self.set_nodeattr("io_chrc_out", all_txns_out) self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool.py index 8c5c6abbb8..24e176c857 100755 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool.py @@ -235,13 +235,7 @@ def execute_node(self, context, graph): result = np.transpose(result, (0, 2, 3, 1)) context[node.output[0]] = result - def prepare_kwargs_for_characteristic_fx(self): - - - - - numReps = 1 ifm_dim, k, ifm_ch = self.get_1d_attrs_normalized() ceil_mode = self.get_nodeattr("CeilMode") output_size = compute_pool_output_dim(ifm_dim[1], k[1], k[1], 0, ceil_mode) @@ -251,165 +245,180 @@ def prepare_kwargs_for_characteristic_fx(self): PoolDim = self.get_nodeattr("PoolDim")[0] ImgDim = self.get_nodeattr("ImgDim")[0] - #SIMD = self.get_nodeattr("SIMD") + # SIMD = self.get_nodeattr("SIMD") PE = self.get_nodeattr("PE") - # assert True==False - cycles = 0 - p = 0 - padding = 0 - windup_clocks = 4 read_delay = 5 - default_fifo_size = 2 # mini fifo instantiated by HLS - #for i in range(0,windup_clocks): + # for i in range(0,windup_clocks): # txn_out[cycles] = i # cycles+=1 # p+=1 - bursts = int(read_delay+ImgDim/PoolDim) + bursts = int(read_delay + ImgDim / PoolDim) read_tail_latency = 5 write_tail_latency = 14 - - kwargs = (ifm_dim,output_size,is1d, NumChannels,PoolDim,ImgDim,PE,windup_clocks,read_delay,bursts,read_tail_latency,write_tail_latency) + kwargs = ( + ifm_dim, + output_size, + is1d, + NumChannels, + PoolDim, + ImgDim, + PE, + windup_clocks, + read_delay, + bursts, + read_tail_latency, + write_tail_latency, + ) return kwargs - def characteristic_fx_input(self, txns, cycles, counter, kwargs): + ( + ifm_dim, + output_size, + is1d, + NumChannels, + PoolDim, + ImgDim, + PE, + windup_clocks, + read_delay, + bursts, + read_tail_latency, + write_tail_latency, + ) = kwargs - (ifm_dim,output_size,is1d,NumChannels,PoolDim,ImgDim,PE,windup_clocks,read_delay,bursts,read_tail_latency,write_tail_latency) = kwargs - - - # for i in range(0,int(ImgDim/PoolDim)): if ImgDim > PoolDim * output_size: - REMAINDER_PIXELS = ImgDim - output_size * PoolDim + REMAINDER_PIXELS = ImgDim - output_size * PoolDim else: - REMAINDER_PIXELS = 0 + REMAINDER_PIXELS = 0 tracker = 0 - maximum = int(ImgDim/PoolDim * PoolDim * ImgDim/PoolDim * PoolDim) - + maximum = int(ImgDim / PoolDim * PoolDim * ImgDim / PoolDim * PoolDim) + if not is1d: - # if i == 0: - for z in range(0,2): + # if i == 0: + for z in range(0, 2): txns.append(counter) - counter+=1 - cycles+=1 - tracker+=1 + counter += 1 + cycles += 1 + tracker += 1 - if int(ImgDim/PoolDim) > 2: + if int(ImgDim / PoolDim) > 2: txns.append(counter) - cycles+=1 - - - - for j in range(0,int(ImgDim/PoolDim)): - for k in range(0,int(PoolDim)): - for z in range(0,int(ImgDim/PoolDim)): + cycles += 1 + for j in range(0, int(ImgDim / PoolDim)): + for k in range(0, int(PoolDim)): + for z in range(0, int(ImgDim / PoolDim)): # actual read loop - for x in range(0,PoolDim): + for x in range(0, PoolDim): if tracker < maximum: txns.append(counter) - counter+=1 - cycles+=1 - tracker+=1 + counter += 1 + cycles += 1 + tracker += 1 - for k in range(0,int(PoolDim)): + for k in range(0, int(PoolDim)): # read loop tail end - for z in range(0,read_tail_latency): + for z in range(0, read_tail_latency): txns.append(counter) - cycles+=1 + cycles += 1 - # write delay - for z in range(0,int(ImgDim/PoolDim)): + for z in range(0, int(ImgDim / PoolDim)): txns.append(counter) - cycles+=1 + cycles += 1 else: - #1d case + # 1d case for i in range(output_size): - for z in range(0,PoolDim): - for k in range(int(NumChannels/PE)): + for z in range(0, PoolDim): + for k in range(int(NumChannels / PE)): txns.append(counter) - counter+=1 - cycles+=1 + counter += 1 + cycles += 1 - #for z in range(0,PoolDim): + # for z in range(0,PoolDim): # for k in range(0,read_tail_latency): # txns.append(counter) # cycles+=1 - for k in range(int(NumChannels/PE)): + for k in range(int(NumChannels / PE)): txns.append(counter) - cycles+=1 - + cycles += 1 for k in range(REMAINDER_PIXELS): txns.append(counter) - counter+=1 - cycles+=1 - + counter += 1 + cycles += 1 return txns, cycles, counter - def characteristic_fx_output(self, txns, cycles, counter, kwargs): - - (ifm_dim,output_size,is1d,NumChannels,PoolDim,ImgDim,PE,windup_clocks,read_delay,bursts,read_tail_latency,write_tail_latency) = kwargs - + ( + ifm_dim, + output_size, + is1d, + NumChannels, + PoolDim, + ImgDim, + PE, + windup_clocks, + read_delay, + bursts, + read_tail_latency, + write_tail_latency, + ) = kwargs txns.append(counter) - cycles+=1 + cycles += 1 if not is1d: - for j in range(0,int(ImgDim/PoolDim)): - for k in range(0,int(PoolDim)): - for z in range(0,int(ImgDim/PoolDim)): - + for j in range(0, int(ImgDim / PoolDim)): + for k in range(0, int(PoolDim)): + for z in range(0, int(ImgDim / PoolDim)): # actual read loop - for x in range(0,PoolDim): + for x in range(0, PoolDim): txns.append(counter) - #counter+=1 - cycles+=1 - - for k in range(0,int(PoolDim)): + # counter+=1 + cycles += 1 + + for k in range(0, int(PoolDim)): # read loop tail end - for z in range(0,read_tail_latency): + for z in range(0, read_tail_latency): txns.append(counter) - cycles+=1 + cycles += 1 - # write delay - for z in range(0,int(ImgDim/PoolDim)): + for z in range(0, int(ImgDim / PoolDim)): txns.append(counter) - counter+=1 - cycles+=1 + counter += 1 + cycles += 1 else: - #1d case + # 1d case for i in range(output_size): - for z in range(0,PoolDim): - for k in range(int(NumChannels/PE)): - txns.append(counter) - cycles+=1 + for z in range(0, PoolDim): + for k in range(int(NumChannels / PE)): + txns.append(counter) + cycles += 1 - for k in range(int(NumChannels/PE)): + for k in range(int(NumChannels / PE)): txns.append(counter) - counter+=1 - cycles+=1 + counter += 1 + cycles += 1 - #for z in range(0,PoolDim): + # for z in range(0,PoolDim): # for k in range(0,read_tail_latency): # txns.append(counter) # cycles+=1 - return txns, cycles, counter - def derive_characteristic_fxns(self, period): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { @@ -419,85 +428,69 @@ def derive_characteristic_fxns(self, period): "outputs": {"out": []}, } - - ignore = self.get_nodeattr("ipgen_ignore") - if ignore == 0: # this node is being derived using RTLSIM + if ignore is False: # this node is being derived using RTLSIM # RTL-based flow super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) return - + # Analytical flow - # Analytical flow - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period",period) - - - + self.set_nodeattr("io_chrc_period", period) txn_in = [] txn_out = [] - # INPUT counter = 0 padding = 0 - kwargs = self.prepare_kwargs_for_characteristic_fx() - # first period cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period-cycles) - padding+=(period*-cycles) - + txn_in += [counter] * (period - cycles) + padding += period * -cycles # second period cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_in += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles # final assignments all_txns_in[0, :] = np.array(txn_in) self.set_nodeattr("io_chrc_in", all_txns_in) self.set_nodeattr("io_chrc_pads_in", padding) - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + counter = 0 + cycles = 0 + padding = 0 + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - txn_out += [counter] * (period-cycles) - padding += (period*-cycles) + txn_out += [counter] * (period - cycles) + padding += period * -cycles cycles = period - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - - txn_out += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) + txn_out += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles - all_txns_out[0, :] = np.array(txn_out) + all_txns_out[0, :] = np.array(txn_out) self.set_nodeattr("io_chrc_out", all_txns_out) self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/thresholding.py b/src/finn/custom_op/fpgadataflow/thresholding.py index eed0a920ad..9df8395bc0 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding.py +++ b/src/finn/custom_op/fpgadataflow/thresholding.py @@ -265,52 +265,43 @@ def calc_tmem(self): pe = self.get_nodeattr("PE") return num_channels // pe - def prepare_kwargs_for_characteristic_fx(self): - NumChannels = self.get_nodeattr("NumChannels") PE = self.get_nodeattr("PE") reps = 1 ImgDim = int(np.prod(list(self.get_nodeattr("numInputVectors")))) - NF = int(NumChannels/PE) - + NF = int(NumChannels / PE) - TOTAL_ITERATIONS = reps*ImgDim *NF - - kwargs = (TOTAL_ITERATIONS,NumChannels,PE,reps,ImgDim,NF) + TOTAL_ITERATIONS = reps * ImgDim * NF - return kwargs + kwargs = (TOTAL_ITERATIONS, NumChannels, PE, reps, ImgDim, NF) + return kwargs def characteristic_fx_input(self, txns, cycles, counter, kwargs): - - (TOTAL_ITERATIONS,NumChannels,PE,reps,ImgDim,NF) = kwargs - for i in range(0,TOTAL_ITERATIONS): + (TOTAL_ITERATIONS, NumChannels, PE, reps, ImgDim, NF) = kwargs + for i in range(0, TOTAL_ITERATIONS): txns.append(counter) - counter +=1 - cycles+=1 + counter += 1 + cycles += 1 return txns, cycles, counter - def characteristic_fx_output(self, txns, cycles, counter, kwargs): - - (TOTAL_ITERATIONS,NumChannels,PE,reps,ImgDim,NF) = kwargs + (TOTAL_ITERATIONS, NumChannels, PE, reps, ImgDim, NF) = kwargs windup = 6 - for i in range(0,windup): + for i in range(0, windup): txns.append(counter) - cycles+=1 + cycles += 1 # first input period - for i in range(0,TOTAL_ITERATIONS): + for i in range(0, TOTAL_ITERATIONS): txns.append(counter) - counter +=1 - cycles+=1 - + counter += 1 + cycles += 1 return txns, cycles, counter - def derive_characteristic_fxns(self, period): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { @@ -327,83 +318,69 @@ def derive_characteristic_fxns(self, period): num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) io_dict["inputs"]["weights"] = [0 for i in range(num_w_reps * n_weight_inps)] - ignore = self.get_nodeattr("ipgen_ignore") - if ignore == 0: # this node is being derived using RTLSIM + if ignore is False: # this node is being derived using RTLSIM # RTL-based flow super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) return - + # Analytical flow - + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period",period) - - - + self.set_nodeattr("io_chrc_period", period) txn_in = [] txn_out = [] - # INPUT counter = 0 padding = 0 - kwargs = self.prepare_kwargs_for_characteristic_fx() - # first period cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period-cycles) - padding+=(period*-cycles) - + txn_in += [counter] * (period - cycles) + padding += period * -cycles # second period cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) - + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_in += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles # final assignments all_txns_in[0, :] = np.array(txn_in) self.set_nodeattr("io_chrc_in", all_txns_in) self.set_nodeattr("io_chrc_pads_in", padding) - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + counter = 0 + cycles = 0 + padding = 0 + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - txn_out += [counter] * (period-cycles) - padding += (period*-cycles) + txn_out += [counter] * (period - cycles) + padding += period * -cycles cycles = period - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - - txn_out += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) + txn_out += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles - all_txns_out[0, :] = np.array(txn_out) + all_txns_out[0, :] = np.array(txn_out) self.set_nodeattr("io_chrc_out", all_txns_out) self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index 62d834ba05..3724b7081c 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -789,7 +789,6 @@ def get_op_and_param_counts(self): ret_dict[thres_param_type] = thres_count return ret_dict - def get_verilog_top_module_intf_names(self): intf_names = super().get_verilog_top_module_intf_names() mem_mode = self.get_nodeattr("mem_mode") @@ -909,10 +908,7 @@ def code_generation_ipi(self): raise Exception("Unrecognized mem_mode for VectorVectorActivation") return cmd - def prepare_kwargs_for_characteristic_fx(self): - - # key parameters if "hls" in self.onnx_node.name: impl_style = "hls" @@ -926,64 +922,60 @@ def prepare_kwargs_for_characteristic_fx(self): NF = int(Channels / PE) SF = Kernel_2 numReps = np.prod(self.get_nodeattr("Dim")) - TOTAL_FOLD = NF*SF*numReps - + TOTAL_FOLD = NF * SF * numReps if impl_style == "rtl": - TOTAL_FOLD = int(TOTAL_FOLD/SIMD) - - kwargs = (NF,SF,SIMD,TOTAL_FOLD,impl_style) + TOTAL_FOLD = int(TOTAL_FOLD / SIMD) + kwargs = (NF, SF, SIMD, TOTAL_FOLD, impl_style) - # assert True==False + # assert True==False return kwargs def characteristic_fx_input(self, txns, cycles, counter, kwargs): # Compute one period of the input characteristic function - (NF,SF,SIMD,TOTAL_FOLD,impl_style) = kwargs + (NF, SF, SIMD, TOTAL_FOLD, impl_style) = kwargs # input - for i in range(0,TOTAL_FOLD): + for i in range(0, TOTAL_FOLD): txns.append(counter) - counter+=1 - cycles+=1 + counter += 1 + cycles += 1 return txns, cycles, counter def characteristic_fx_output(self, txns, cycles, counter, kwargs): # Compute one period of the output characteristic function - (NF,SF,SIMD,TOTAL_FOLD,impl_style) = kwargs + (NF, SF, SIMD, TOTAL_FOLD, impl_style) = kwargs sf = 0 if impl_style == "hls": windup = 5 else: windup = 7 - for i in range(0,windup): + for i in range(0, windup): txns.append(counter) - cycles+=1 + cycles += 1 # first input period - #txn_in[0:bursts] = np.arange(0,bursts) - for i in range(0,TOTAL_FOLD+1): - + # txn_in[0:bursts] = np.arange(0,bursts) + for i in range(0, TOTAL_FOLD + 1): if sf == SF: - counter+=1 + counter += 1 sf = 0 - sf+=1 - # txn_in[cycles] = p_in + sf += 1 + # txn_in[cycles] = p_in txns.append(counter) - cycles+=1 - #p = bursts - + cycles += 1 + # p = bursts return txns, cycles, counter - def derive_characteristic_fxns(self, period): + print("deriving characteristic func") n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { "inputs": { @@ -992,91 +984,76 @@ def derive_characteristic_fxns(self, period): "outputs": {"out": []}, } - mem_mode = self.get_nodeattr("mem_mode") if mem_mode in ["internal_decoupled", "external"]: n_weight_inps = self.calc_wmem() - #num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) + # num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) io_dict["inputs"]["weights"] = [0 for i in range(1 * n_weight_inps)] - ignore = self.get_nodeattr("ipgen_ignore") - if ignore == 0: # this node is being derived using RTLSIM + if ignore is False: # this node is being derived using RTLSIM # RTL-based flow + print("rtl flow") super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) return # Analytical flow - - + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period",period) - - - + self.set_nodeattr("io_chrc_period", period) txn_in = [] txn_out = [] - # INPUT counter = 0 padding = 0 - kwargs = self.prepare_kwargs_for_characteristic_fx() - # first period cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period-cycles) - padding+=(period*-cycles) - + txn_in += [counter] * (period - cycles) + padding += period * -cycles # second period cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in,cycles,counter,kwargs) - + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - txn_in += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_in += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles # final assignments all_txns_in[0, :] = np.array(txn_in) self.set_nodeattr("io_chrc_in", all_txns_in) self.set_nodeattr("io_chrc_pads_in", padding) - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) + counter = 0 + cycles = 0 + padding = 0 + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - txn_out += [counter] * (period-cycles) - padding += (period*-cycles) + txn_out += [counter] * (period - cycles) + padding += period * -cycles cycles = period - txn_out, cycles, counter = self.characteristic_fx_output(txn_out,cycles,counter,kwargs) - - txn_out += [counter] * (period*2-cycles) - padding+=(period*2-cycles) + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) + txn_out += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles - all_txns_out[0, :] = np.array(txn_out) + all_txns_out[0, :] = np.array(txn_out) self.set_nodeattr("io_chrc_out", all_txns_out) self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/transformation/fpgadataflow/derive_characteristic.py b/src/finn/transformation/fpgadataflow/derive_characteristic.py index 4d3ac7dc67..4819c74b52 100644 --- a/src/finn/transformation/fpgadataflow/derive_characteristic.py +++ b/src/finn/transformation/fpgadataflow/derive_characteristic.py @@ -63,6 +63,7 @@ def applyNodeLocal(self, node): try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) + print(inst.get_nodeattr("ipgen_ignore")) inst.derive_characteristic_fxns(period=self.period) except KeyError: # exception if op_type is not supported diff --git a/src/finn/transformation/fpgadataflow/hlssynth_ip.py b/src/finn/transformation/fpgadataflow/hlssynth_ip.py index 5b901d9284..2811b09415 100644 --- a/src/finn/transformation/fpgadataflow/hlssynth_ip.py +++ b/src/finn/transformation/fpgadataflow/hlssynth_ip.py @@ -59,6 +59,10 @@ def applyNodeLocal(self, node): try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) + # find out if the node should be ignored + ignore = inst.get_nodeattr("ipgen_ignore") + if ignore: + return (node, False) # ensure that code is generated assert ( inst.get_nodeattr("code_gen_dir_ipgen") != "" diff --git a/src/finn/transformation/fpgadataflow/prepare_cppsim.py b/src/finn/transformation/fpgadataflow/prepare_cppsim.py index d4cc6dcc99..a00789b986 100644 --- a/src/finn/transformation/fpgadataflow/prepare_cppsim.py +++ b/src/finn/transformation/fpgadataflow/prepare_cppsim.py @@ -46,6 +46,11 @@ def _codegen_single_node(node, model): try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) + + # find out if the node should be ignored + ignore = inst.get_nodeattr("ipgen_ignore") + if ignore: + return # get the path of the code generation directory code_gen_dir = inst.get_nodeattr("code_gen_dir_cppsim") # ensure that there is a directory diff --git a/src/finn/transformation/fpgadataflow/prepare_ip.py b/src/finn/transformation/fpgadataflow/prepare_ip.py index a74e0f7afc..0e521be5ae 100644 --- a/src/finn/transformation/fpgadataflow/prepare_ip.py +++ b/src/finn/transformation/fpgadataflow/prepare_ip.py @@ -44,6 +44,10 @@ def _codegen_single_node(node, model, fpgapart, clk): try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) + # find out if the node should be ignored + ignore = inst.get_nodeattr("ipgen_ignore") + if ignore: + return # get the path of the code generation directory code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen") # ensure that there is a directory diff --git a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py index b8f45deb1d..1b500ccd7b 100644 --- a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py +++ b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py @@ -68,6 +68,10 @@ def applyNodeLocal(self, node): try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) + # find out if the node should be ignored + ignore = inst.get_nodeattr("ipgen_ignore") + if ignore: + return (node, False) inst.prepare_rtlsim() # ensure that executable path is now set assert ( diff --git a/tests/brevitas/test_brevitas_debug.py b/tests/brevitas/test_brevitas_debug.py index d6879a727b..3d059a6856 100644 --- a/tests/brevitas/test_brevitas_debug.py +++ b/tests/brevitas/test_brevitas_debug.py @@ -35,6 +35,7 @@ import os import torch from brevitas.export import export_qonnx +from brevitas.quant_tensor import _unpack_quant_tensor from pkgutil import get_data from qonnx.core.modelwrapper import ModelWrapper from qonnx.util.cleanup import cleanup as qonnx_cleanup @@ -90,7 +91,7 @@ def test_brevitas_debug(QONNX_FINN_conversion): else: assert len(names_common) == 8 for dbg_name in names_common: - tensor_pytorch = dbg_hook.values[dbg_name].value.detach().numpy() + tensor_pytorch = _unpack_quant_tensor(dbg_hook.values[dbg_name]).detach().numpy() tensor_finn = output_dict[dbg_name] assert np.isclose(tensor_finn, tensor_pytorch, atol=1e-5).all() os.remove(finn_onnx) diff --git a/tests/fpgadataflow/test_fifosizing.py b/tests/fpgadataflow/test_fifosizing.py index 668ad5092e..6641be1bb7 100644 --- a/tests/fpgadataflow/test_fifosizing.py +++ b/tests/fpgadataflow/test_fifosizing.py @@ -27,74 +27,35 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + +import copy +import json import numpy as np +import os +import shutil +import torch +from brevitas.export import export_qonnx from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.im2col import compute_conv_output_dim -from qonnx.custom_op.general.multithreshold import multithreshold +from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim from qonnx.custom_op.registry import getCustomOp -from qonnx.transformation.general import ( - ApplyConfig, - GiveReadableTensorNames, - GiveUniqueNodeNames, -) +from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes -from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model -import finn.core.onnx_exec as oxe -import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw -from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer -from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim -from finn.transformation.fpgadataflow.create_dataflow_partition import ( - CreateDataflowPartition, -) -from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP -from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP -from finn.transformation.fpgadataflow.minimize_accumulator_width import ( - MinimizeAccumulatorWidth, -) -from finn.transformation.fpgadataflow.minimize_weight_bit_width import ( - MinimizeWeightBitWidth, -) -from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim -from finn.transformation.fpgadataflow.prepare_ip import PrepareIP -from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim -from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode -from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths -from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers - - -import pytest - -import json -import numpy as np -import shutil -import torch -import copy -import os -from qonnx.transformation.infer_datatypes import InferDataTypes +import finn.builder.build_dataflow as build +import finn.builder.build_dataflow_config as build_cfg import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw -from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim from finn.builder.build_dataflow_steps import step_set_fifo_depths +from finn.transformation.fpgadataflow.convert_to_hw_layers import InferStreamingMaxPool from finn.transformation.fpgadataflow.prepare_ip import PrepareIP -from brevitas.export import export_qonnx -from qonnx.custom_op.general.im2col import compute_conv_output_dim -from qonnx.core.modelwrapper import ModelWrapper -from qonnx.custom_op.registry import getCustomOp from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers -from qonnx.transformation.general import GiveUniqueNodeNames -import finn.builder.build_dataflow as build -import finn.builder.build_dataflow_config as build_cfg from finn.util.basic import make_build_dir from finn.util.test import get_trained_network_and_ishape -from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model -from onnx import TensorProto, helper -from qonnx.core.datatype import DataType -from finn.transformation.fpgadataflow.convert_to_hw_layers import InferStreamingMaxPool -from qonnx.transformation.infer_shapes import InferShapes def generate_random_threshold_values( @@ -155,6 +116,7 @@ def make_single_fmpadding_modelwrapper(impl_style, idim, padding, num_ch, simd, return model + def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=None): mw = W.shape[0] mh = W.shape[1] @@ -228,7 +190,6 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non return model - def make_labelselect_modelwrapper(labels, pe, k, idt, impl_style): inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, labels]) outp = helper.make_tensor_value_info("outp", TensorProto.INT64, [1, k]) @@ -262,7 +223,6 @@ def make_labelselect_modelwrapper(labels, pe, k, idt, impl_style): return model - def _make_single_vvau_modelwrapper( W, pe, @@ -390,7 +350,6 @@ def make_single_dw_conv_modelwrapper(conv_config, idt, wdt): return model - def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels): W_sparse = np.zeros((channels, channels, k_h, k_w), dtype=np.float32) for ch in range(channels): @@ -418,7 +377,6 @@ def _calculate_dot_prod_range(dt_a, dt_b, len): return (min_prod, max_prod) - def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode): k_h, k_w = k ifm_dim_h, ifm_dim_w = ifm_dim @@ -489,7 +447,6 @@ def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilatio return model - def make_channelwise_modelwrapper(C, pe, idt, odt, pdt, func, vecs): NumChannels = C.shape[0] @@ -558,7 +515,6 @@ def make_single_dwc_modelwrapper(in_shape, out_shape, inWidth, outWidth, finn_dt return model - def make_single_thresholding_modelwrapper(impl_style, T, idt, odt, actval, n_inp_vecs, num_ch): inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, n_inp_vecs + [num_ch]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, n_inp_vecs + [num_ch]) @@ -598,7 +554,6 @@ def make_single_thresholding_modelwrapper(impl_style, T, idt, odt, actval, n_inp return model - def fetch_test_model(topology, wbits=2, abits=2): tmp_output_dir = make_build_dir("build_fifosizing_%s_" % topology) (model, ishape) = get_trained_network_and_ishape(topology, wbits, abits) @@ -613,7 +568,6 @@ def fetch_test_model(topology, wbits=2, abits=2): @pytest.mark.parametrize( "method", ["largefifo_rtlsim_python", "largefifo_rtlsim_cpp", "characterize"] ) - @pytest.mark.parametrize("topology", ["tfc", "cnv"]) def test_fifosizing_linear(method, topology): force_python_rtlsim = "python" in method @@ -671,82 +625,319 @@ def test_fifosizing_linear(method, topology): shutil.rmtree(tmp_output_dir_cmp) - @pytest.mark.slow @pytest.mark.vivado @pytest.mark.fpgadataflow - @pytest.mark.parametrize( - "node", [ - - ("LabelSelect",10,1,1,DataType["UINT8"],"hls"), - ("LabelSelect",10,1,3,DataType["UINT8"],"hls"), - ("LabelSelect",10,2,3,DataType["UINT8"],"hls"), - ("MVAU",5,1,8,1,[1,1],DataType["UINT2"],DataType["UINT2"],DataType["UINT2"],"hls"), - ("MVAU",5,1,8,1,[1,4],DataType["UINT2"],DataType["UINT2"],DataType["UINT2"],"hls"), - ("MVAU",10,5,20,4,[1,1],DataType["UINT4"],DataType["UINT8"],DataType["UINT4"],"hls"), - ("StreamingDataWidthConverter",[1,4,1,40],[1,4,1,40],2,8,DataType["BIPOLAR"],"hls"), - ("StreamingDataWidthConverter",[1,240],[1,241],12,2,DataType["BIPOLAR"],"hls"), - ("StreamingDataWidthConverter",[1,36],[1,36],12,12,DataType["BIPOLAR"],"hls"), - ("StreamingDataWidthConverter",[1,4,1,30],[1,4,1,18],3,9,DataType["BIPOLAR"],"hls"), - ("StreamingDataWidthConverter",[1,1,1,18],[1,1,1,30],9,3,DataType["BIPOLAR"],"hls"), - ("StreamingDataWidthConverter",[1,90],[1,90],3,10,DataType["BIPOLAR"],"hls"), - ("StreamingDataWidthConverter",[1,40],[1,30],10,3,DataType["BIPOLAR"],"hls"), - - ("FMPadding",[8,8], [1,1,1,1],2,1,DataType["INT2"],"hls"), - ("FMPadding",[8,8], [1,1,1,1],4,1,DataType["INT2"],"hls"), - ("FMPadding",[8,8], [1,1,1,1],12,1,DataType["INT2"],"hls"), - ("FMPadding",[8,8], [4,0,4,0],12,1,DataType["INT2"],"hls"), - ("FMPadding",[8,8], [0,4,0,4],5,1,DataType["INT2"],"hls"), - ("FMPadding",[2,3], [0,3,0,4],5,5,DataType["INT2"],"hls"), + "node", + [ + ("LabelSelect", 10, 1, 1, DataType["UINT8"], "hls"), + ("LabelSelect", 10, 1, 3, DataType["UINT8"], "hls"), + ("LabelSelect", 10, 2, 3, DataType["UINT8"], "hls"), + ( + "MVAU", + 5, + 1, + 8, + 1, + [1, 1], + DataType["UINT2"], + DataType["UINT2"], + DataType["UINT2"], + "hls", + ), + ( + "MVAU", + 5, + 1, + 8, + 1, + [1, 4], + DataType["UINT2"], + DataType["UINT2"], + DataType["UINT2"], + "hls", + ), + ( + "MVAU", + 10, + 5, + 20, + 4, + [1, 1], + DataType["UINT4"], + DataType["UINT8"], + DataType["UINT4"], + "hls", + ), + ( + "MVAU", + 48, + 1, + 4, + 1, + [1, 1], + DataType["UINT2"], + DataType["UINT2"], + DataType["UINT2"], + "hls", + ), + # generalized DWC-variant required + # ("StreamingDataWidthConverter",[1,4,1,40],[1,4,1,40],8,2,DataType["BIPOLAR"],"hls"), + # ("StreamingDataWidthConverter",[1,240],[1,240],12,2,DataType["BIPOLAR"],"hls"), + # ("StreamingDataWidthConverter",[1,36],[1,36],12,12,DataType["BIPOLAR"],"hls"), + # ("StreamingDataWidthConverter",[1,4,1,9],[1,4,1,18],3,9,DataType["UINT4"],"hls"), + # ("StreamingDataWidthConverter",[1,1,1,18],[1,1,1,30],9,3,DataType["BIPOLAR"],"hls"), + # ("StreamingDataWidthConverter",[1,90],[1,90],3,10,DataType["BIPOLAR"],"hls"), + # ("StreamingDataWidthConverter",[1,40],[1,30],10,3,DataType["BIPOLAR"],"hls"), + ("FMPadding", [8, 8], [1, 1, 1, 1], 2, 1, DataType["INT2"], "hls"), + ("FMPadding", [8, 8], [1, 1, 1, 1], 4, 1, DataType["INT2"], "hls"), + ("FMPadding", [8, 8], [1, 1, 1, 1], 12, 1, DataType["INT2"], "hls"), + ("FMPadding", [8, 8], [4, 0, 4, 0], 12, 1, DataType["INT2"], "hls"), + ("FMPadding", [8, 8], [0, 4, 0, 4], 5, 1, DataType["INT2"], "hls"), + ("FMPadding", [2, 3], [0, 3, 0, 4], 5, 5, DataType["INT2"], "hls"), # idim, pad, num_ch,simd,idt - ("ChannelwiseOp",DataType["INT8"], DataType["INT4"],DataType["INT4"] , 4, 16, "add", [1,4,4], "hls") - ("ChannelwiseOp",DataType["INT8"], DataType["INT4"],DataType["INT4"] , 2, 16, "add", [1], "hls") - ("ChannelwiseOp",DataType["INT8"], DataType["INT4"],DataType["INT4"] , 1, 16, "add", [1, 7 ,7], "hls") - #,idt, act, pdt, nf, ich, func, vecs, impl_style - - # (Pdb) (ifm_dim,output_size,is1d, NumChannels,PoolDim,ImgDim,PE) - # ([1, 512], 256, True, 32, 2, 512, 1) - ("StreamingMaxPool",DataType["INT4"],True,2,32,4,1 ,0,"hls"), - ("StreamingMaxPool",DataType["INT4"],True,1,4,1,1,0,"hls"), - ("StreamingMaxPool",DataType["BIPOLAR"],False,1,10,1,1,1), - ("StreamingMaxPool",DataType["BIPOLAR"],False,2,10,64,1,1,"hls"), - ("StreamingMaxPool",DataType["BIPOLAR"],False,2,28,64,1,0,"hls"), - # idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil_mode,impl_style - ("StreamingMaxPool",DataType["BIPOLAR"],False,1,10,1,1,1), - ("StreamingMaxPool",DataType["INT4"],[True],[4],[10],[3],[3],[1],"hls"), - ("ConvolutionInputGenerator",DataType["INT2"],[6, 6],[12, 12],8,[4,4],[1,1],2,0,0,1,False,0,"hls"), - ("ConvolutionInputGenerator",DataType["INT2"],[6,1],[12,1],16, [1,1], [1,1], 2, 0, 0, 1, False, 1,"hls"), - # """ idt, k, ifm_dim, ifm_ch,stride, dilation, simd, dw, parallel_window, m, flip, is1d""" - - ("ConvolutionInputGenerator",DataType["INT2"],[6,1],[12,1],16, [3,1], [1,1], 2, 0, 0, 1, False, 1,"hls"), - ("ConvolutionInputGenerator",DataType["INT2"],[6,1],[12,1],16, [1,1], [1,1], 2, 1, 0, 1, False, 1,"hls"), - ("ConvolutionInputGenerator",DataType["INT2"],[6,1],[12,1],16, [2,1], [1,1], 2, 1, 0, 1, False, 1,"hls"), - ("ConvolutionInputGenerator",DataType["INT2"],[4,4],[8,8],6, [4,4], [1,1], 2, 1, 0, 1, False, 0,"hls"), - ("ConvolutionInputGenerator",DataType["INT2"],[6,6],[10,10],8, [2,2], [1,1], 2, 1, 0, 1, False, 0,"hls"), - ("ConvolutionInputGenerator",DataType["INT2"],[4,4],[10,10],16, [2,2], [1,1], 2, 1, 0, 1, False, 0,"hls"), - ("ConvolutionInputGenerator",DataType["INT2"],[6,1],[8,1],8,[3,1],[1,1],1,0, 0,1,False, 1,"hls"), - # """ idt, k, ifm_dim, ifm_ch,stride, dilation, simd, dw, parallel_window, m, flip, is1d""" - ("VVAU",DataType["INT4"], DataType["INT4"], DataType["INT4"], 3, 1, 10, 10, 3, 3, 3, "internal_embedded",0,"hls"), - ("VVAU",DataType["INT4"], DataType["INT4"], None, 3, 3, 10, 10, 3, 3, 3, "internal_embedded",1,"rtl"), - ("Thresholding",[15,3],True,True,"hls"), - ("MVAU",48,1,4,1,[1,1],DataType["UINT2"],DataType["UINT2"],DataType["UINT2"],"hls"), - ] + ( + "ChannelwiseOp", + DataType["INT8"], + DataType["INT4"], + DataType["INT4"], + 4, + 16, + "add", + [1, 4, 4], + "hls", + ), + ( + "ChannelwiseOp", + DataType["INT8"], + DataType["INT4"], + DataType["INT4"], + 2, + 16, + "add", + [1], + "hls", + ), + ( + "ChannelwiseOp", + DataType["INT8"], + DataType["INT4"], + DataType["INT4"], + 1, + 16, + "add", + [1, 7, 7], + "hls", + ), + # ,idt, act, pdt, nf, ich, func, vecs, impl_style + # (Pdb) (ifm_dim,output_size,is1d, NumChannels,PoolDim,ImgDim,PE) + ("StreamingMaxPool", DataType["INT4"], True, 2, 32, 4, 1, 0, "hls"), + ("StreamingMaxPool", DataType["INT4"], True, 1, 4, 1, 1, 0, "hls"), + ("StreamingMaxPool", DataType["BIPOLAR"], False, 1, 10, 1, 1, 1, "hls"), + ("StreamingMaxPool", DataType["BIPOLAR"], False, 2, 10, 64, 1, 1, "hls"), + ("StreamingMaxPool", DataType["BIPOLAR"], False, 2, 28, 64, 1, 0, "hls"), + # idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil_mode,impl_style + ("StreamingMaxPool", DataType["BIPOLAR"], False, 1, 10, 1, 1, 1, "hls"), + ("StreamingMaxPool", DataType["INT4"], True, 4, 10, 3, 3, 1, "hls"), + ( + "ConvolutionInputGenerator", + DataType["INT2"], + [6, 6], + [12, 12], + 8, + [4, 4], + [1, 1], + 2, + 0, + 0, + 1, + False, + 0, + "hls", + ), + ( + "ConvolutionInputGenerator", + DataType["INT2"], + [6, 1], + [12, 1], + 16, + [1, 1], + [1, 1], + 2, + 0, + 0, + 1, + False, + 1, + "hls", + ), + # idt,k, ifm_dim, ifm_ch,stride, dilation, + # simd, dw, parallel_window, m, flip, is1d + ( + "ConvolutionInputGenerator", + DataType["INT2"], + [6, 1], + [12, 1], + 16, + [3, 1], + [1, 1], + 2, + 0, + 0, + 1, + False, + 1, + "hls", + ), + ( + "ConvolutionInputGenerator", + DataType["INT2"], + [6, 1], + [12, 1], + 16, + [1, 1], + [1, 1], + 2, + 1, + 0, + 1, + False, + 1, + "hls", + ), + ( + "ConvolutionInputGenerator", + DataType["INT2"], + [6, 1], + [12, 1], + 16, + [2, 1], + [1, 1], + 2, + 1, + 0, + 1, + False, + 1, + "hls", + ), + ( + "ConvolutionInputGenerator", + DataType["INT2"], + [4, 4], + [8, 8], + 6, + [4, 4], + [1, 1], + 2, + 1, + 0, + 1, + False, + 0, + "hls", + ), + ( + "ConvolutionInputGenerator", + DataType["INT2"], + [6, 6], + [10, 10], + 8, + [2, 2], + [1, 1], + 2, + 1, + 0, + 1, + False, + 0, + "hls", + ), + ( + "ConvolutionInputGenerator", + DataType["INT2"], + [4, 4], + [10, 10], + 16, + [2, 2], + [1, 1], + 2, + 1, + 0, + 1, + False, + 0, + "hls", + ), + ( + "ConvolutionInputGenerator", + DataType["INT2"], + [6, 1], + [8, 1], + 8, + [3, 1], + [1, 1], + 1, + 0, + 0, + 1, + False, + 1, + "hls", + ), + # idt,k, ifm_dim, ifm_ch,stride, dilation, simd, + # dw, parallel_window, m, flip, is1d + ( + "VVAU", + DataType["INT4"], + DataType["INT4"], + DataType["INT4"], + 3, + 1, + 10, + 10, + 3, + 3, + 3, + "internal_embedded", + 0, + "hls", + ), + ( + "VVAU", + DataType["INT4"], + DataType["INT4"], + None, + 3, + 3, + 10, + 10, + 3, + 3, + 3, + "internal_embedded", + 1, + "rtl", + ), + ("Thresholding", [15, 3], True, True, "hls"), + ], ) def test_fifosizing_analytical_characterization(node): - test_rtl = True - build_dir = os.environ["FINN_BUILD_DIR"] test_fpga_part = "xc7z020clg400-1" target_clk_ns = 4 - + + # attempt to cache a pre-existing variant of the model + # this is to avoid generating RTL multiple times during + # test debugging + build_dir = os.environ["FINN_BUILD_DIR"] model_cache = None for x in os.listdir(build_dir): if x.startswith(str(node)): - print("cached model found") - model_cache = f'{build_dir}/{x}/model.onnx' - #if model_cache is None: + model_cache = f"{build_dir}/{x}/model.onnx" + tmp_output_dir = make_build_dir("build_fifosizing") if node[0] == "LabelSelect": @@ -773,7 +964,9 @@ def test_fifosizing_analytical_characterization(node): if narrow: actval += 1 - model = make_single_thresholding_modelwrapper(impl_style, T, idt, odt, actval, n_inp_vecs, ch) + model = make_single_thresholding_modelwrapper( + impl_style, T, idt, odt, actval, n_inp_vecs, ch + ) model = model.transform(SpecializeLayers(test_fpga_part)) # Make sure that specialize layer did not default to HLS implementation @@ -787,14 +980,13 @@ def test_fifosizing_analytical_characterization(node): op_inst.set_nodeattr("runtime_writeable_weights", 1) model0 = model - elif node[0] == "MVAU": - mw,simd,mh,pe,numVectors,wdt,idt,odt,impl_style = node[1:] + mw, simd, mh, pe, numVectors, wdt, idt, odt, impl_style = node[1:] W = gen_finn_dt_tensor(wdt, (mw, mh)) model0 = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=None) - - getCustomOp(model0.graph.node[0]).set_nodeattr("numInputVectors",numVectors) - # model0 = make_labelselect_modelwrapper(labels, pe, k, idt, impl_style) + + getCustomOp(model0.graph.node[0]).set_nodeattr("numInputVectors", numVectors) + # model0 = make_labelselect_modelwrapper(labels, pe, k, idt, impl_style) elif node[0] == "ChannelwiseOp": idt, act, pdt, nf, ich, func, vecs, impl_style = node[1:] @@ -803,21 +995,22 @@ def test_fifosizing_analytical_characterization(node): odt = act pe = ich // nf C = gen_finn_dt_tensor(pdt, (ich)) - + model0 = make_channelwise_modelwrapper(C, pe, idt, odt, pdt, func, vecs) elif node[0] == "FMPadding": - idim,pad,num_ch,simd,idt,impl_style = node[1:] + idim, pad, num_ch, simd, idt, impl_style = node[1:] model0 = make_single_fmpadding_modelwrapper(impl_style, idim, pad, num_ch, simd, idt) elif node[0] == "StreamingDataWidthConverter": in_shape, out_shape, in_width, out_width, dtype, impl_style = node[1:] - model0 = make_single_dwc_modelwrapper(in_shape, out_shape, in_width, out_width,dtype, impl_style) - # model0 = make_labelselect_modelwrapper(labels, pe, k, idt, impl_style) + model0 = make_single_dwc_modelwrapper( + in_shape, out_shape, in_width, out_width, dtype, impl_style + ) + # model0 = make_labelselect_modelwrapper(labels, pe, k, idt, impl_style) elif node[0] == "StreamingMaxPool": - - idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil_mode,impl_style = node[1:] + idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil_mode, impl_style = node[1:] ifm_dim_h = ifm_dim k_h = k if dim_1d: @@ -834,14 +1027,14 @@ def test_fifosizing_analytical_characterization(node): ofm_dim_h = compute_pool_output_dim(ifm_dim_h, k_h, stride_h, 0, ceil_mode) ofm_dim_w = compute_pool_output_dim(ifm_dim_w, k_w, stride_w, 0, ceil_mode) ofm_dim = (ofm_dim_h, ofm_dim_w) - #if idt == DataType["BIPOLAR"] and dim_1d: + # if idt == DataType["BIPOLAR"] and dim_1d: # pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)") if (ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0) and (not dim_1d): pytest.skip("StreamingMaxPool_2d test w/ ImgDim % PoolDim != 0 not implemented") if pe > ifm_ch: pytest.skip("PE cannot be larger than number of input channels") - # if pe > 1 and (not dim_1d): - # pytest.skip("PE>1 only supported for StreamingMaxPool_1d") + # if pe > 1 and (not dim_1d): + # pytest.skip("PE>1 only supported for StreamingMaxPool_1d") golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode) @@ -852,14 +1045,28 @@ def test_fifosizing_analytical_characterization(node): # Ensure PE value is set streamingmaxpool_node = model0.get_nodes_by_op_type("StreamingMaxPool_hls")[0] - #assert True == False + # assert True == False if pe > 1 and (not dim_1d): getCustomOp(streamingmaxpool_node).set_nodeattr("PE", 1) else: getCustomOp(streamingmaxpool_node).set_nodeattr("PE", pe) elif node[0] == "ConvolutionInputGenerator": - idt,k,ifm_dim,ifm_ch,stride,dilation,simd,dw,parallel_window,m,flip,is1d,impl_style = node[1:] + ( + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + simd, + dw, + parallel_window, + m, + flip, + is1d, + impl_style, + ) = node[1:] if flip: if ( ifm_dim[0] == ifm_dim[1] @@ -891,7 +1098,9 @@ def test_fifosizing_analytical_characterization(node): pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") - if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + if ((stride_h > k_h) or (stride_w > k_w)) and not ( + parallel_window or (k_h == 1 and k_w == 1) + ): pytest.skip("Not all combinations for stride > k edge case supported in default mode") if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): pytest.skip("Parallel window requires SIMD=C for non-depthwise case") @@ -900,12 +1109,14 @@ def test_fifosizing_analytical_characterization(node): ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) ofm_dim = [ofm_dim_h, ofm_dim_w] - model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + model = make_single_im2col_modelwrapper( + k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw + ) model = model.transform(to_hw.InferConvInpGen()) # set impl_style inst = getCustomOp(model.get_nodes_by_op_type("ConvolutionInputGenerator")[0]) - inst.set_nodeattr("is1D",is1d) + inst.set_nodeattr("is1D", is1d) inst.set_nodeattr("preferred_impl_style", impl_style) model = model.transform(SpecializeLayers("xc7z020clg400-1")) # set simd @@ -921,11 +1132,24 @@ def test_fifosizing_analytical_characterization(node): model0 = model elif node[0] == "VVAU": - idt, wdt, act, pe, simd, dim_h, dim_w, k_h, k_w, channels, mem_mode, no_act,impl_style = node[1:] - + ( + idt, + wdt, + act, + pe, + simd, + dim_h, + dim_w, + k_h, + k_w, + channels, + mem_mode, + no_act, + impl_style, + ) = node[1:] if dim_w == 1 and k_w != 1: - pytest.skip("1D image requires 1D kernel, skipping.") + pytest.skip("1D image requires 1D kernel, skipping.") if channels % pe != 0: pytest.skip("Requirement Channels divisable by PE is violated.") @@ -935,8 +1159,6 @@ def test_fifosizing_analytical_characterization(node): # Generate weights in expected shape for ONNX and HLS node W = gen_finn_dt_tensor(wdt, (channels, 1, k_h, k_w)) # shape: [channels, 1, k, k] - W_onnx = _infer_sparse_weight_tensor(W, k_h, k_w, channels) # shape: [k*k*channels, channels] - # Generate inputs in expected format for ONNX and HLS node x = gen_finn_dt_tensor(idt, (1, dim_h, dim_w, k_h * k_w * channels)) x_vvau = x.reshape(1, dim_h, dim_w, k_h * k_w, channels // pe, pe) @@ -965,44 +1187,58 @@ def test_fifosizing_analytical_characterization(node): tdt = DataType["INT32"] model = _make_single_vvau_modelwrapper( - W, pe, simd, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T, tdt, mem_mode, impl_style + W, + pe, + simd, + k_h, + k_w, + channels, + dim_h, + dim_w, + wdt, + idt, + odt, + T, + tdt, + mem_mode, + impl_style, ) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) inst = getCustomOp(model.graph.node[0]) - inst.set_nodeattr("noActivation",no_act) + inst.set_nodeattr("noActivation", no_act) if impl_style == "rtl": - inst.set_nodeattr("resType","dsp") + inst.set_nodeattr("resType", "dsp") inst.set_nodeattr("preferred_impl_style", impl_style) model0 = model.transform(SpecializeLayers("xcvc")) test_fpga_part = "xcvc" - - outputs = [build_cfg.DataflowOutputType.ESTIMATE_REPORTS] + outputs = [build_cfg.DataflowOutputType.ESTIMATE_REPORTS] model1 = copy.deepcopy(model0) - if model_cache is not None: model0 = ModelWrapper(model_cache) - node_inst0 = getCustomOp(model0.graph.node[0]) node_inst1 = getCustomOp(model1.graph.node[0]) - node_inst0.set_nodeattr("ipgen_ignore", 0) - node_inst1.set_nodeattr("ipgen_ignore", 1) + + # generate ip for node0 (RTL-based characterization) + node_inst0.set_nodeattr("ipgen_ignore", False) + + # do not generate ip for node0 (analytical characterization) + node_inst1.set_nodeattr("ipgen_ignore", True) cfg = build_cfg.DataflowBuildConfig( output_dir=tmp_output_dir, synth_clk_period_ns=target_clk_ns, generate_outputs=outputs, fpga_part=test_fpga_part, - auto_fifo_strategy = "characterize", - auto_fifo_depths = True, - split_large_fifos = False - ) - + auto_fifo_strategy="characterize_analytic", + auto_fifo_depths=True, + split_large_fifos=False, + ) # analytical inst = getCustomOp(model1.graph.node[0]) @@ -1010,20 +1246,22 @@ def test_fifosizing_analytical_characterization(node): model1 = model1.transform(SpecializeLayers(test_fpga_part)) model1 = model1.transform(GiveUniqueNodeNames()) model1 = model1.transform(PrepareIP(test_fpga_part, target_clk_ns)) - model1 = step_set_fifo_depths(model1,cfg) + model1 = step_set_fifo_depths(model1, cfg) # rtlsim-based if test_rtl: + cfg.auto_fifo_strategy = "characterize" if model_cache is None: inst = getCustomOp(model0.graph.node[0]) model0 = model0.transform(SpecializeLayers(test_fpga_part)) model0 = model0.transform(GiveUniqueNodeNames()) model0 = model0.transform(PrepareIP(test_fpga_part, target_clk_ns)) - model0 = step_set_fifo_depths(model0,cfg) + model0 = step_set_fifo_depths(model0, cfg) tmp_caching_output_dir = make_build_dir(str(node)) - model0.save(tmp_caching_output_dir+"/model.onnx") + model0.save(tmp_caching_output_dir + "/model.onnx") + # grab the last nodes of the model if test_rtl: for n in model0.graph.node: if n.op_type.startswith(node[0]): @@ -1036,26 +1274,9 @@ def test_fifosizing_analytical_characterization(node): continue if test_rtl: - print("in RTLSIM") - print(node_inst0.get_nodeattr("io_chrc_in_concat")) - print("in ANALYTICAL") - print(node_inst1.get_nodeattr("io_chrc_in_concat")) - - if test_rtl: - print("out RTLSIM") - print(node_inst0.get_nodeattr("io_chrc_out_concat")) - print("out ANALYTICAL") - print(node_inst1.get_nodeattr("io_chrc_out_concat")) - #assert True==False - - #print("Producer") - # print(node_inst1.get_nodeattr("io_chrc_out")) - - # print("Consumer") - # print(node_inst1.get_nodeattr("io_chrc_in")) - - #assert True==False - #assert node_inst0.get_nodeattr("depth") == node_inst1.get_nodeattr("depth") - if test_rtl: - assert np.array_equal(node_inst0.get_nodeattr("io_chrc_in"),node_inst1.get_nodeattr("io_chrc_in")) - assert np.array_equal(node_inst0.get_nodeattr("io_chrc_out"),node_inst1.get_nodeattr("io_chrc_out")) + assert np.array_equal( + node_inst0.get_nodeattr("io_chrc_in"), node_inst1.get_nodeattr("io_chrc_in") + ) + assert np.array_equal( + node_inst0.get_nodeattr("io_chrc_out"), node_inst1.get_nodeattr("io_chrc_out") + ) From 4df80fadeb359314942849b4a187d2a8fb55c699 Mon Sep 17 00:00:00 2001 From: lstasytis Date: Tue, 15 Oct 2024 15:33:42 +0000 Subject: [PATCH 04/12] remove run-docker personalization code Signed-off-by: lstasytis --- run-docker.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run-docker.sh b/run-docker.sh index 296c3f42ce..b1fe44eb0c 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -84,10 +84,10 @@ SCRIPTPATH=$(dirname "$SCRIPT") : ${ALVEO_USERNAME="alveo_user"} : ${ALVEO_PASSWORD=""} : ${ALVEO_BOARD="U250"} -: ${ALVEO_TARGET_DIR="/tmp/finn"} +: ${ALVEO_TARGET_DIR="/tmp"} : ${PLATFORM_REPO_PATHS="/opt/xilinx/platforms"} : ${XRT_DEB_VERSION="xrt_202220.2.14.354_22.04-amd64-xrt"} -: ${FINN_HOST_BUILD_DIR="/tmp/finn/$DOCKER_INST_NAME"} +: ${FINN_HOST_BUILD_DIR="/tmp/$DOCKER_INST_NAME"} : ${FINN_DOCKER_TAG="xilinx/finn:$(git describe --always --tags --dirty).$XRT_DEB_VERSION"} : ${FINN_DOCKER_PREBUILT="0"} : ${FINN_DOCKER_RUN_AS_ROOT="0"} From 0efe48b61b88b65b14e9283965b3e4684046ec23 Mon Sep 17 00:00:00 2001 From: lstasytis Date: Tue, 15 Oct 2024 16:09:43 +0000 Subject: [PATCH 05/12] faulty base branch update removed Signed-off-by: lstasytis --- test_brevitas_debug.onnx | Bin 246949 -> 0 bytes tests/brevitas/test_brevitas_debug.py | 3 +-- 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 test_brevitas_debug.onnx diff --git a/test_brevitas_debug.onnx b/test_brevitas_debug.onnx deleted file mode 100644 index 686d7f5a50427bff37d415d4dca3b13f239ce9dc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 246949 zcmeF03Akldas6qA2A+t};sglVq9}>VB%nf{%Y6?~K%A#cq(uZ2Xk;*89Kk8ZA!>}G z(SQR+MKmf7QTjeWq9$mR;H-Zq;t-=zG>NnSsyh2ucb$FDeeX43V)X9sJGZJ#NVb+1x|swmtr&4Ub-b(lIBVGPlEn zk30Q@^`{(6cciM(q zZ+8Bw&F2o7t8{bQ-T%y!#;$X16^>g)zIF=>bNkLoXKuTPocU#Lk@yDHZ%>Ks&{iYpk?M!@soVos#GwyccDW{%!#`&wZxYwhLexkaO&NTKkk&{PFsJ{+_7^##SlkRuWqt|Y4S3Tpz zhn`^mCu}(BF?;Pax2>&t%wx_t`_$vsZh60>AG+_7L+&tFjOJFK@Zh8OJ@)W5+datl zE4N*{#i5UR%-r)=&2`Z3d$~u?d$~vVz3kWbUUunyFSh)0FXr&>i)rW2?eNgk)}M0v zsT)rBe{lYlT65F;uRlZnZEQoic6FQ0J!|v1E@y6+;rcUlpLr=7h1q!XXC{)`hh zoRa@&mTK;^RCC{@n)|J}P4CS3x3*JXYwemzbgv!fwm$CE(@#8U!zs78=*m40Jb%@y zz1GZabMl6hk2~dzGf%$7V;}q2#<88|ZhFRP>rXu8#8Zwx=Hv~JIj;Q;;Z_$FwL=2k!Sh-tbCMd7*=JL1BG5fT?Wq01b3vSYvd!Kzb>HFX@bA21ktv>wBy-yu( zShM;*PkPWgALl=JIFJe%)_|w#;va{qmdP9{E>MOKJah<~P8a_yf6I za4Y{~d7t)gXZw3Qs+cmj-NK1a%s-vh%(YegQL|O#YqzlAf1{5(?cep|IsUgBb0Y8j zU0?3Eo&A9-bIN~y*r1)aicWmF)@&R9t#ZucPCR3;*KfYfV24%1rT)@8;^~_o`;vPP z?)J9Z4KDcDs=?1T-fHl(H(k@bW9)rF9_LhU!zj2SnGjDO&;3*sbt$EKudk%I!>9&J+eDQXJy$?Td@bc{r z8ytD{mV>+QaR0&0Z@tst-v6-E;3m86F*y3)?=<-70Xq&}^8RfG*WK~%gY*A>?cfbR zSUq^myKcGox|d(qT=<7K8~okR<_7h?aZT#nRw;8?fWglr?y!T^ApSatLn^(W-%gu(trOj2Z{lWY9J9Cfb!dHE4-Ou;? z>gcn3|6}vv>;AoY>H6=C{`^_b8@=G|?;m~s;eWI4uOIpB<|8lo+GxjT-*)3oo_4_C z15dlnVC~=j*Sg=^YKOr^dtW+w=M#4r-EEt1HHZJ{(arH+{m5wj`SzL~c;)DlNAEm% z#OU?S-CzFR(d&Qy$>xWjdg|zkZ>-t4?px=Le*A^+kAAl8)0;1^`sC=cz0V&#aK9%s zcl_9EnzcXryXK|ezj(Cw;k%EXa_pK*|Kq&RjefZL4x=k~|J%_eYhN{5^~cX29ku7G zOAlVZb#uvG?$L}M@YQusyXcCIi!c01^NiO%Y;?^lpVVx9iw`%W4{dB#@S66u*L3dh zUwqjE*6(-OqZiH@Zus1Gm!Ea?hQZr!v&H3ur|fh2v-h}S-Dl_bT>SKbe|Fg&?|t=U z&v?t9A9~m)UU%7t&%WQ~+x^SImwjSxblGKpbnNgKf4b{suYK}Pm+!UJp~GK1@)v^# zU-PQV?zr%i%O3jRgAaY|$yXeD`uElhpZwkrU3S%ugUdhv(>GuK`4`;h(BYT%ylnmV zPanSJs*?s!eBVKr{osXv)x6-Ae|-5jj=uk)pSx_c;i((eEdKsQ-yXd0q32)r(xYBI zxNgr+53b$)UkCs6;rCtk_D`I1`PSRKVDNh%zxL2~ZTZ+kpT7HD7QepjF^7KYL4Q5m z=a)Yn9Jg?n;g)~D%W#Xs&$@i{p-&9X`qtOi?Y-lM#n1lWflWVqt}_;ldp*Ja^&Iue`;= zyZ_^mg*TqHcHyyiI(OkVx4Of^?uXrM;dN)eVPW0J?y>N?&p&u!v%6lj(40BHu-Aj0 zzcBZz3l_E>tzUS^Z4O>IP=PvB>7w=y9*{+8yJmHa#U3l>h zw_N!6H5V?t<xMbMCv@VE29B-+X2Mhs`7G=fmrs`-|qxwV!NWa^5eShyL)tn=fs<`C#ksU)NlA z^G`Pye)zSc6W;cP=8vDZ<=_stzp^>)$R9VK-r|GJ0Y6+b7+!K!v&B2N9=!Y3HyK=h z<)}IEzH@`y-g$??npfO%@S1yWJ2?ETe{O#A%Y6oWymH&cga3B(!7K0e@6EMOeNXeF zul%IB>e-tQ-gx^vFZr{nye9wIv~a6o^O~avgZCW&t>)rSf2{f4M}4BXVAWxxyB`1U zX0+kXqkBL8OU;YUePgrrQQsT=-UplJb+3ExXxqmh**yOL-RqEdUHE&Wb542o=$UtY z^TD@RIJ5cWdoLb+?4V~fU%2|`qx-%1O{0$-wa4PxFMoI4U!8Z`#jXGB%gt_2f7ZJ9 z-uo3B4?OSP>-OFCiK7$#{=m_j&-ky={vW({^zu8ubK|Ogn$h39>6+1C$E`>Ia{5z8 z@4oBHn|uFc;RAC*Sw-TaNZZ6KX~&8UotrIZSOev|Gnhh zgTecDJ@i$Vymhe0iQifGtY;1ukG=MS#oe}je)G}m&RN`i@PffjU-_2Nd){|gbHNvW z+`QtZs~0c)^4FS6-+kcXr3YNL_~Uy$wHZ9|Ig59F?^j26|9@{7th?tmiw~Z^WU%nY zt4Gg$`1y-3{lMXa=RED77tcKCQO(ybzU^RDbJNA6UcKMq&5m3(*!`GK4bD02!gb%i z=ploxPuTI$W6yd`bL*d9GWzJp{(L2`$$vIowbd_%=biPM;RC*R_V8QJ`McrySASyo zhfle9c=Nfdhu_`f?}mrJ^xMNHzUafl*WZ5I`8#~z+rxtoK7P2>@4jdFPoMqI;aC6h z%flZX`jz3QHv8!CW&8c>aP_UXnm=LN(eTJSUO0T~{$Cz0Jo6R9KRxZD;nhET&+w;T z{nGHW`~70L%emJLzqR*{^H0A0#^E+~Hkr@%G`qd%kdZ!A(Ci{Eq|QI()~LThE{MmA@YT%N@3yfBac* z9-g}Tm&5=3>vs>2JNEB~7i|2@@KJ9c4PWvgyTgf>4mW({?}jHFdXxFDKl7u*?XLLy z;U2HPV0g0^zGV23YrZ}F+}FP{9KGk?hD-fx;=haRHJ!E7H=5u3>2=LsM{PcM*c0{~ z{Qj->9z5c4+YO#_$(;t9Kg_;|ciVID-c$D-d~l~*4BmR_9S2+Nx8vXmfBBiwN7sM6 zIsYqjgPVNg?t?4#xy#^)hi^YP|KcAv+rMwkVDryzHMr`;?FY}k(|4P%KKa1G9w+a) z__Cdj9PDuGtp^9~)eL_6ufJ^eKJ4hhGyi*s!CgN6?dH87f5_ku?mss;?C0-nR=x6O zgYP_e>%q(Z`@fsNxcAqZBk%L6=Fl&EZM0$S9)s7sb2zx<+Es(o&f9Eov)#5DoOs7( zaOq8V9UOCqFEzh-@Zp2aZgtnecP_pC;5B<~F?ie+M-Q%Byv5*sx4Zk`mnUsIc>F!C zZ9e|vFEqD0WQ)PqkNDE)ez)9f@ZYCxv*gbv{tMAw(|hiF#RvZITRS(aUi8I{`<=0N zwAFK;F*^O5cRysScdS~s)pj>&{^H;EAMJ4R4$X7-+S_o9EfeB+<) zz4;+;eEyp@KItV}kFNXK78`H#z(bpBPTIY>!!LJkcHc06>F?fa>(Srb^h+C0IQ7Cy zPx_sET>8iletxvs3!ZT4U7H7tzVWs%Zv2N&-Q(UrIR828c76E4m!A3gkB@e~{B!H} zy!6YL{`dojt-I(m`>%WP>rT7Zfp355A@6y}57vGAt5UzSjp{^tHQRy5^c69`gQIY&iHsi|Z~Od}hPObN*=Obu0MSwaU%_IK$;frs%=6xFDpaD;D4J z#=RE*{Sm)t&cDk=i+A4TpPKuAZim6X7yZuSIp4i@@%xAGGPuui_ZvL*tn&shzweVr z+kNZH&Djrl#^9>AK6tR(mFI8#=1rcyxXtahU;Nx{U$pV~pS`bn!;|j5?hzk5bMWnd zddeZ6`_9XogC6<3#gBjZIg1x;{*H~$`1I+`p0B@hal`yGo7zPJ8+>%Oq|;>C-e_#cBW-hNf{oW(<%osT_Xarmj%k9KQbI@s#OV;2wj$*Shc zb^qAB^gMiOf2CRfi5HLle&n0OTmS6s!%Kd6kNMTtZash7fB5w9HXA=We93`t8*cp8`-gXU z%If*g?y>v)#<$;O{>sC)pTFNZo6n#3x?9Zu@dLJ)f7>=+9lq(251Bu3-JRyYefDkU zU-pRa3?FjsLGw>~#H#s=c0F+ZyvN;s{`@>gEt+!ca|4A4Bw^06_wB=W>eo#Y4|0D5;EkCg4RUf3mj!2h|ZE_2)YCk4k4^D&?o?0tS}o;?UhlsYf2rPb9?Yse zmebXIt*3b^XQ{l_*EQN2_qxVv?n>{q65dKU%>SQ=zl!@`-#cDk-3@t9)4ulZNpGcl zP2)|&Z^+)YmR)Py>!o^q{OgPTPu%nR?sk22oBG~v=>2b~{`%Z;rkTBg*&CR>f!Q0F zy@A;qn7x778<@R;*&CR>fg9@$Z0dhQ-B@#1;^Y6lbz|+fsXm*^U&(!z;^@3-`BHlC zsjlXFkN?nT^LZo$jEy>$oExM7$$b?*fiT9Jr_-RaD(Xt14c#8)(JoO5$`EZ=$d^F%h`d&}-i=KFkH+>40b@9a~bcSi491anj=r}odg-i5)JU)ZRG0!b>$TM1>!yTzL;xkR>IFIYt zq0b}xvR8*EI6E;HUd)o+!_#*#tk$p>5pH@FA9Uav!`J=o@uOUNflI%`ac$Gpr&rP7 z+u74J>f2TYqenEKm|?ZfLeudzZhhD{c*WB>>FNGO2bVrxXwhgtFwx_MmKYv|rq zo9PaYXsxqSXP!D!`F;A-**yHXU-sOy>=PbVa}-ak^zj4t*xAi}g-btlO{>Cxf>-)z zA}VHieUH|0{K*{UMLrUa<@9xz)XL0hJK(mIHymQlD*mqZQ{#bEJ$UTx^|TZBD0(=3 zI_~Qnc8R>~ms+c}4<6iVPN!FNX!V}DBe-h&ax^txYjS4dQ`g`tjnnkgaQCfup=Ku9 z8mC`JctrYj4PHm*F|+RdD(h)8!B?_>>~8z}srhx?vid)n+25UxUSHjG$4&oQY|1_R z=H3wfuj2lFJI=7#8<@R;*&CR>f!Q0Fy@A;qn7x5t|2J?${;pi^b}RY2vu{4SzTR>( zZ_NIt>;YHjgAujoc^~d~2LIo@zwV)X`)=$vftUZrQ{3}jzT8v!IyZ9X72iG|)%N8b zzfqS_`%=TwdE_3+ljl1+?$I7Sfmx5EJaT#-m5*1(;#1ELo%NDe?;!K*Sz2pe@4?L2 zS^T&I5s&1{F6^hAm{W0TExRNx^YJ1wgXnm@+XUDA@YWvKgqQQn&gu=u%=9YzdLKUA zz=~Js3WuB+I`bi>cdd23V4*i%-_hCZ1uuFJZ~vJzGynJ+N)Ebt#@lxY>dB&>A3~k*Bt@zdbzS_OiPj`V2<~7{V*{&RX;3^LeG556DezGop zc(mR26Mw}#S8nYSeZQaXX?dsietGZ?pIT!aj?Td=@2fiZwVIymC%mWl2Dj)lm)bE7 z{>;)DaW~sx+CfVntw%lTQ_sm-_6xq%esl)DV8r0r%+Sef_+jK;(o4+d^yvb#o}SJ; zbTwx*o&&#*?iu$sZsd50w(YdLW$u0B`tisUwe+%Y7$>-WoZ&vlBEgSXxGvvZF!SNowgKjqc^)p7&^XncSeK>yToCB|p_z-nhu=L>_nP1kbE?{+B zO7Gm|`nBJzp1pzD8<@R;*&CR>f!Q1Q^?w5^`5WO^c?;%WpT9G%kLFi(7vCkG@^_`r zS#CeSI}iNjG`jzzjoKJdNrHu9)WDSFgI4dfTf$ zzE|H-kEtBLXVa>QUMWBBT{siH9&PJ3_RU&mg%4W8wI=s{;wk;WHTS;MVN!k6eeOtP zmd41=e>!(VS4Jm_Q+!g-?eBpdag6kc+_64iN1RlUyZ5-uQ9Lv z;M-Q>S9Vn#zU~`47A`muZRpt3xYK5)KYk^G*D>yfZ;8(F6MH#U<5t&)eNxlZjJ8&< z`i{Aa_$wbrW@YXB`BP>R=@TdB89!&1+__+yt7q+#{_cUN#lC}<8S3kNaPo-aRrBeG zuJDu-`+$$ie$*D37rfGUPd|y(xYf~joTn+6MaQNc?S;1JoS$ArPaVC&)4QP|=Du*v zC$Or8PE@p-&H-NZ%tm<^)!JYkhEJ4t=!LItPDx9jqr^d*Of7@xj;M97>$2or*keGeJ3s2 z?SV0e+M~{-k6-l5yHL$?x$FJtd(ph@wtaG<^6;ToN4V_kwV$YMhm3}6uVr;VlUsK0 zcH0UMdd&Bl9&sw>d)YqpeJ%$)dd{2DQ~t6X@mR{YzQ6ARrt`?dt8mCQlIwns)n}=` z_INk!Iu3^FH0C^p)F}FX!K<0b9xbaDSEY*JY1ieDH2sl3;>=WjV?IA1l6>!Y*K zIHo%EiUp_Kr_RxPOYS{t z#Z==sJ?0WaFFOU7`DIRO-KX^MM-T21e%+()XqVu3e9BzrsE6h;zh_4{=!uSV=g^ME zcdYB+_IRV$zV7Zg2b}pt`X0f;x9kHJeZwM_U9_io%Nl>`o?~>J;iIu{-`vZ0_ z7f->)&TWQojZ-trYkVVL#uvSE!I;_WXTG8Feaj4VfjcI>%pEJwQhL1do#0nIvv0K? z_x3}K+_Vv=aN+dbqg77-MB`CUXYaaadt{&7IAHF>{$L)#GcOo;9c$jid#`7+gGcY# zQ=h=C&(Rs4cWU=2+^M?439aYU7X2jNl)CiWs^td{zV~^WR$p`B*F1Jre#|dEx{GT$ z=!lNjT=+3l{Ui^a`QnovHD;RNN>pFq|A?c^GJ{A4tq zdg$y=xUf*E`(K*b|yqa0#Xwk7#FJ5`+eU$Gr^n|x*mG6CK!Ob12`{JFvvIltj z7u^J2_~<>x>>fON#RoqAS_iMv2T!#1eO1qnVBu$4Jo>a~=wz4N1xccfAK)C zyJ=qcsVAcB#QliQjmLMh9Gs|ow%ze(=RV?-S}>2J@e_Tkr=8@QPEYPPMVo!maX0lL zhfm!Xr`^BC@lrf7+edMq<8_TOzYh<7xmRC5Jn5IH7~aHqFTw+_f$R4M`hLb~G?LN!hUwwjadvH_>kEmSk7#KSiA9_peh*!rOAB*XsnE5Ae z`BDeoG>-P!p$|h(Pq~i89oeyGr|2((r+4x3nP8Edo|?#B8mIO8aL3r4J9WH#rac@y z>igi~Jos=sn0P53x$jaQulo+3!K=KKpF0Q#ZHMRc3MZpse&8o=Jm!M6%=R;SXbV^S zp4Ym%4o>lauX`{f_Yki0>w`Z!`sr!<ui7h37# z6@6xvsD3^_cktXeJn-;}2*+Xvcb)gZr{%`$=$uzD@bnP8Rdu=WiK>fTOwV1kPA{MB zkMlJ5+Me(=JYvE6`1D(5`) zn65K()SZuq^Zj_;iLSQ?jJ-OwatgOqw4Z1+(ob*k2yd@P?d;G=KXL0PSNb*9$4`#W zsGjIKRy=b@!?IU2Q5W7s?wr!o9q@xs1n=x)`o0ss>R)*E9OX^SQH^ z*1f^oZr2N5=)@l%a3Xs~EIZ&&53WZ)ic?24Jw2l1^`S@fKJOEw|JCPIJBYI^I$EO{5b-$j+Ecm)-ougG8e|)@-JifO6)bA0W!NXT^IsQs`a6H!c=<{a>?Il+I{s-3?Op zchKB9b&ZZl-@QF&1`!@HII=T5eBfzB1J`?C@X*&d{TnOl4(`c3aE#7_7hlUcH5PutV=g?`)iVP#4LF*dA@dR|GeTSSnMG6%7?ED57Vn%_ zX0T)8g&Vl#ooJe@OE2b0-sb}r+~Scs(`H_JJ_D>Tw%zu#zGv*`KITggKXlP6obWI6 z=z}HCx#fXFFYC&Ke&`xllhJv8G@`+SKKFnd!lDhXd9i2KJy`gS+L`dxOZ()+zTN0` zVm-IaQ%>Z@t7G4ciFbj&#ya=9V9dz(G8(`0c7yiQ`+j@}B6*7xyoU!rqV<`bo(1dl z9UF~zVZRQi^w|SW*4FRv&9hUNev6i#c;ZJt_VIqtqrQ=gck-KZcaPGCU%!vg8JEL8 z6R~)yK0U1mKQZ$Rtn|Q%>Vp<6Irv%(3{Q(uOP~99cs+fc6}fWEmmT_e>5azuo@)(e zG5KJ|3(Vq#zjN&2sBdD01J5}e@`+uFCw|V?+0+ruPkWub%nGdVQd8@+-xN>B>Y5qEj&_R1z2LW5yB{;?QN!CIVb$fJp$Lf%nM%6 z?1}wN3zrB7Z^g4Gy4c5Nap0uR_b`6#2cMW6=ZhacMEt`~y+v1h;Rnu4V)n(4UOg8) z_ZWe+5j=I~nYnSKShe7p$*HV1FPu#CT(<2<+|y^-6J8}xuHfo3!AD1I+Wq4mg5jYtIi}$RCN;RnrS1kV z_g~7x_VUC0wrW39FC6WYyVg&iN89QDgkR6;XZNFM`@1c=63$Zk8`|Ujtkh>y`78CE z@n&ye_6Gj%yn&^D4(QJ`&Htxv_rEby@76cJ4@0x!cPYQiT)UKxemz$;n#1>~$2mNT zdtU3cKQXt_?l{MLDKFT{asSYYul8M2_Y?CwsjlR~uX|pXhdq|_iN1O2J~~f*;Hqcl z<`H>`ZPoo$U!4~^-@EeYy$c-UbAl&0-b8rIMdmNXQx5(_*LC`3r@(FBPOsiy`p$Dt z9`@8aPDd#D2CTr|yA!)ES!7Q!Ml1t@jANzZ0437ze!8c%S{8 zlf0f8eZBOz*_@7#;rP=h;^%ejwbV@aDgD$}-s+JY)71OW^H}f3jL;N+IQV4rKK_Lp zo&{5X=Ph-Zfkx|EM~@hq;?Eo+z1UNBtUZ-S4u?5-hL_*b{XCLejaISb6mD>YOYOYO zOTR_hS(@ig;=?}SAzpoTdKDg=+#9{coK1 z3ct>+_M`o%dvN8%9>pv1MtnS%e(=E@uY09tcJ>Rmvt!fh!D?>Qri*^@&3y}=J$OyK zyKCu1G@QDgL%+VidUWpH=cSnCxu@czvIBjOhB2q@b}{-nztUvC_$1G2KX?bvw2Ent zE-;_rk9p*ynB{2kML$ui4;~WJr+8*Ps&*)SpA#IS^Zc>eBfaF&FTLmsuT`|4scE{N z>EDN&SInlTwPW`9QIDtg-MWoG-G_rGwa47Gk$p`AH}2^*{i+up z^qpgmp3pg7IP|C^O0VX3c*WZ>Kg?1OdM%^3P-^^NRQ^qo1S zAAHHf+iD_rn8wR~(M;&vd(`^z$&SIbUT3eu&3WE04}4;Bn(++BqVJgCs%aVHKJ)SG z=!?&U#^&hV@lb8%#^rDq_UzzgUpU?)hfhr}V$s!@)|#`oV}5k^78&2I?uXXzUhaff zp9fmFQKKU^?L;`-PwSR3{;Ye`W_RY5Ib)apY$>nstM=`8xp7eLS-VZcYu>D$y@A;q zn7x5t$`(v&Xr!R3qR}YQBLU1LrqESM-Yarf}+6 zo%`87;aeg({_IYjeZK=*%Q?)NT8k#Vi$3eZagW4|gE#vVgD0AtZCG$j8=Mj&FTJ+Y z{lu4fbg8}XC!_eYZ_AkcTuu`+iodFJw#{Nc@8bto=fmr0Wmhz{kLr{YyfR-k_|xz7 z+I^jJf?}fXcMBUKzU3WG>CtbyWu|GsJARI%^H@Lh z!L8O4UOhdX0UvL#nVG%fn|@Z~kx}M!yb4!+UBhv%AF%W*9K1TwJxv!}kNS8|A9{{i z&aBkJg-0|zxL#Y%)6o|k9=*DDEf_u#-P@k%6|YX;vBK#@<y5sHdr$h#@sr-x z%erats6DrbgNH{y^uWP7HC&?k(dS)+S@DZr&9$0Y^^Eiq4j%Yx%`Bq#{M4MNn|5yI zWkzaBKlGkUpV{<0`UxDKX#(>aj9lvAG%c6B!?zwgP54QV**H;Ww3&JWFLRSq{5ta- z11GoWXJXI7i=N~`hCixwUK^J~&P>NU`>4K07ktB2j|clI$8*kv1|N^3;#n}&XwI&9 zMHEf=m=_0Mubmsw`sk_K&g33C4^C)&W;F*Kp3bHhSkbXh^4+sMiNUqV&Yii&_oF@S z(+g7hnAoe#2rhHXSNFpoKWaGj%)WW4Df7_cNeo_@AG`Wa{y0W%5q_JMn!a)Uq>kF6 zV;nrQww}&Uyy(5ozWPXx1}>3V%ugTl_FQ?H7hmoXxL_eFAHlI+`b(}l&DjZzIpBDe z5qbCIH|;!hfg47w?}cCZyH=i7)y3e`3*V7ellP5X3P<(e+%3F(zHo!%nBetH?{UcS z2yW4n*Icb*j`-25wHJ_=p@!TFg?4dqj&I6CU(yCA0N0&QH(MEsNor7|?&f=$ns64j7z> zg~P0lpXj46UTE!@s225)xgnN$_I~onLkJC;4r&zQfG7hz-g=Qo_L{2j%gj^1W#+} z!*h-w>b5HLMGGfpnr0e5`aZ`wH)@%wQud8TJdF8dX387%qr3QemZ>TTHhaf zZ8qlV4!UQ}r^Y{G@lJp0QR^QXnVlNzFLgS8l~>=s>XrN1T)631<|e21E}GO8KK`{X z-j{38mE8GwkW;e@p4l%RQR8W`(=VLdqpnBo+~}8H`juY#Y3;c0|d+6vz zL{s#A9`I5p-0Uev-gI}$-Fo`jE1K99UWst(s9buTyQ+`->*GCY&+SPM!!!4u)l+Aw z&bjimE?uvVZ>5c%=yPsUcF?Fi&*{~9;ib3nG)EU09v)p+N5=}De0Xq$<9bfVt8mjp z>cG^)bMM%7lr+BMA{h0~Jb^a)BH7Dm-zxE4HbgF@CnCm(99B?9i zG`W*{xOb_8Z+Y^p=O=xf>rdgZe{c$}e9!fubqp<7?qEKg4o7jD?}t4i_xXhb-ibvM z!lG53?!nybn-(l*lzBCt{-uvcC-&@;yPB@=Fjsq_DPEp)@bWtLvmRLPi>?{JH}o1) zU-A0(na~vt+>ADRVqeF4X7ufiHhQM>12&Pnr+dS*S~<*&SU4@C@CqLcA8L<^;iJC6 zF(1b`XyEs9gil}f?vXwv2TR;Mz|e*leD9ZsKHA_Fz4rR}Xt)m?j~sel>rRpP?IGIq z7?JgOSTFY|Je?VHizmDq>*KR)M)TlaiIsWbXLT+w--y@+UtfwPZA zj?M)0SmWrG6Fl1kuIkA9_EA25ifPUactrQ@JJWFIsxNuQ$`8D6hGO-5sdemAfHe^Ec^hHoMF04g6nv1N=<&o%?>?^kI5G)hm8O^LlPwuf(6pQ{= zpXzrV8jrP3dwsd`eFk&F%j&*U^OPI=iH@2WT-#sgg4I6M^}O&Z9=b0+>PNWU-kaUAo*J(ZI^<&^!=gxK}QsqcqZ`iNF_cJ}Y;#%|^xGmWF3u;A1_p)oBu+Vm=Zt`ClJm4l9+M$b9v*Q4)ntnUYY zTeY9MgW{eC-@Tjhe!U#;+r#PTM9cgRxAe0&YF(J=xF0;p19V*V#>FRirH6)IXuQXR z&LW3-Xw)Nf({tR*PiG&)I<@&Hug**D^U|j=KDGD#F-*^df8hjf+|W42V;HByDO`FU zna9lZC|(_I@x?Rr?pb_OTWYk92iIb9jT2bmrq(&|gp>NxrzYx*j<0#--8^@_MGkfB zMc+7nqDSsMdjchBW@gbkkm%h%> zx#$veKC{Wa2Gd;ob?$YIjS6OX&IR*mPvFEx=8{{lzDs&3$8&q=MKmpSdK6yn>b3Ex z(M3&*TWu9W8WjE=op4sR!iq0|TgeY7_bnuAW4UN`;l^wEgGdap- zM|crU+jo>3Jh=yVQchsLN14rBxOnI+H1u4Px++J9243RE_tTkIF!0_e9PjJV8S3jC zt#$9{>F&+g(KQ_PRty}^PAq!mbhPw|qqYklu10#goA<%t>V2bcGXm?G;ad3W+lMi$ z_6iU8ktcVVGE=?CHAdd}=lFiyQ}?6~ruPZ4>&++op4(2z^V~2$84ISmmeJ~hi-*Js zPVt2w(XhOSX*r7J9?q@!y0>amW1hMh~om|_Uv`NV*j`O{=bP|)7hPVv){mP z`uFUn+;62n=lK^AzYDA1q4chA^nL4{J9FfB=}KnQe&Mx3kI))d_tAIM$(PxCsa-q% zm5=U_`7-XQTX(O=-i?S+;ZyFRzB!+@8TQ$q9=z-#5-rXHxF8`tK9j1>X{wPHKt7r zpDNOCncZ2@D?YJX%sY$k{!0a?M-c*91;qqvgRHjib-v z;mbH)YaE(2j#6&-F z|I%}f_cXRu@y4Ssf{j{_^zqsr4)4Wl_NcMs;-yi&9G_By>-}S9;WN84%l#U8YH#lq zPDh8vd!^5vat}BbInMEu+`?(m)+=+kqrxv-{KLn4r609(DlB=Wm;Soi#gAEFc%!fT z=`m$S>U_V7<8}H~{pj^zvCpV=#V=!p2Uk4OQ+PQatcvVkN8V3(nuZuN@o2hth7;PV zhkx!<`mW(MqLlygaVegMai#g;%2U*p)eG;ny|%(AvytC+FHM zKYcY=%V>AY+;=Qm)o0&kcluUyI`|SE&UbD+qQ_??v3_qW(ZJC?!B!gUxpj@tax<3e zrac{kLzZ?1dR_@>Um16>{U-hH#c zi13N{Wi5Y0IOaMycw}AvjtEYzt9q(M&rFTlV|HrGF3#b}9lci`{DNZ`9B_?r@M9Jl zcF5Xhb6n%6o*S6-l+!oI`+k_Mv9q7uCH3ade6+ep@=QZNqUeHeHF!khcjkMo8qW- znfA!JlXAJAW6ASfm|@ZJPF;HV;L+ij#!qO;O}7-cRZM>3!G7WEUd4A(G483B{m`RR zPV~$duI`(>YW4)j`-OvsnTcCJu~D1rXXpwic*1cH#ins{mS|gLkKQ=$;|IUc+brs~ zJHA_Ry!s*Oo7xUJ`E;wtip}ld4;QW&7;?e#b;t> z@vD7Z%R%EcxknC|*X|RMdG62S7>{AW^PXXmmtI>HonwWAPjVfnXFLx3WQ1ptlRESz zW`ERX@W4Ih9gIVsIDX#qywvEOAKcQ5+%S0ASIoGoVJ80RVVcZqjnz6MbH`1mb?Z_v z`o>A`l53rN8()v^*x|b`PwnTpVczE?w_x?W;G4z|+_tKGD&OEc#?w@})yXNnPA&bw zol|)12=37jxZV#u_ZYQ4f#Yd0YwdfkwefO?lDEn+ubv&g#iMV(#CqnW7JZq?U350w zh{mI4pWZpuOf;>!y;ES!B^Dj;qQmi?J$hna)B5S;hQY1zmMwkswPrN-#AD*lqQ#e( zT=&%nUyGdV7cAF#F zk6Bw!_n*KEw^g*CxwrK)Hy=M<_Z@ij*dc55=E;Ghp5*l4dW?IRK4ujkzQqq-_?x$K*-bryXWGE3UEt^5 zrLWvp*?uZ7a`OdKP2Wz=<7j^x28ZZ8&+#Ge^lb+Fba2;+HfIVSEans(jG5Z2u}-hd zL$5tB)knYV7yDOwFnp$Y&{sZOdJ&6%@J8Kts{EyL*ZR>&-geq|p1I6H2cMWY`&q0{ z&F$+ur|;`sb8kqkd)yG8m1uRA=6zcF`tQGwHx0YKx@msb2j>pgM?2HZ-oWe)%-+Cn z;~QA&=Vtsq?D>7#_nwyWsrh}sXQ%xRfm{27>JA{-$sb^RI}sb6@NEQErR0pPgNb zHuH+dId|Za1`_0PwJ=eChl*%(2d%t`NXd7X*}*9T*vGokLVnG zGIx$+MD00F&-?zAI`jogFYobq&vTFZIA%|3yysYnfm_~*-nS>Pl26n<8a&iLa`%95 zxt^uC@YF+n9OrPKh&HR{;o*FA8dcZF4PAAI)RaA_;o|SLANmo|Sk!ugSIvyRYl>d^ zb?&tv_vh%{6wBPW^*-JUUGY&5ywTKAwaNu2>U{!Zmh6+>qiP@FpjAI~-s6C2tb27G zT$zQ|dE~sCjL!FcD_we*9yQVVdbm&eimu~rI7jCuX1;hi#vH}qM!(FVKaC$dRI~9> zEqUFeu4B%#z5Tcz5A7w^vu}rIKI{sfzTPuE2OfPQIJ-m?Egp$mFSNpG5y1;5cq8qO zbLPQe|J_Swr_&patGrW%)%QW6JL%kB zAM=Ed-Xpr+UeRdJbLxyu>xftQ8c$E|WL$LGC#MJB@$fCW4qj%sCOAan_8q*v2S0Mx z*`tR#yh~4Kql;MP;fV$hIDr=~v%pkKFLX9LdZvR*KWgv8AsV+n_=9_`$2~aYXqe;u z?D?#s!7KR1m!5jo7=EgQPgG5vd!0|28=jW4yViITb*_4M`ZlBQFqdAf*FL)h=DUd} z`Ltc=`<&1hEjXC#^)S;h?FGN$21j;MpBpP@&uO#j9hu8Lsi#HNPviW$;r|zR{&jnY zvwdc7VD<+3-@vc>=gST8Ux`+KXY}8&A9Zh@enuDV`yJDVbyefvtJCf>4WH(tIP?47 z0lf}i?#$hFeq!bKyw)+p^m=qgC$D&}Nw1!sdIq=52iMv7C@=b=2~MfmtE00Ws5LSV zZ|`yNq4ubl?ojKZ*W-mAtwx;_v+A8kV@B%AjK2A`M{jMO@jg73oGHve7tuIG?YHQ- z7o4W`&b-Wsx+jl4Dott~FBjeAy4>sIy<&j}yX>G3=G;V$G6eMa%hd0vZVf^VACTE66c zyf0pG>!>r-Tl3VIA3LLqnmInxp45U{taouw&-VSEF9e-_>SAd(LOo-*o!$u6CA^ogO^8e@|v&QI`5koKI~9p z9~RoOWArWO-6-F4KkU_3lb@YkY;N$~v-o-#t_d(6_EJui52t*(98yWU6l==fAKI=s@$c~*0DF1hYsV|YXqUuyN8 zx?cmUyep~cbmo@6`o+9T=bF@6>Mi8hFHdPU6LLxqkA?Eaqpdcj&3}9(V{Re5B5K zBeHSC$C0IU(KCG~Rx|MO`@iXB*obNMgU#aP#SAX(pJ;gPzbxZc-Zrn3=@7WPw zVxMnlM=-kYXpHt}wIA=MpWQ`kt#|vduBxq^2RCxPyZ5Z7ulhbr`<}0agJx5>Xl_Vc z$-I?tZpaQZ?d%QA-oS6y8@QoA*H`kkbjDIY!<4rYUpPy7!s(0gyTpH=l;1JLnhQQ{ z4t;c9gSm#>`*;vjW4!vnA=1Y$=2SX*a5VRekMxXh#W$WCyh zF8=U|s_ST*wgZn?^Xr+Di{O3f~{uh;1-b2`3;v%lh@ zD?Pmn9M!~pxE`DF)91sU?3bRdD|*GkLph;CGt4*m-m~0K#=>vWq7_c^z?S1% z-w(RTi-x^Ackx~mlK#E%E4*6EEWCAI_QXH?hM~c8g6|xjQ9mDzM?c|dn#e^PwRvzl zzVNM|c+rN=bL&M;ESTbbdFm^Vm}6eCuSe^{ZL{g0ImbOkr#b4xe1<=+vFL{$x#Gby z?nFG>uM|bdE-!6gm z_&Z)7Ff{l(uak=wO>hjacMdPXr+PQzZDN+y#BpbwF*p5fh93HksSh}@PeU(s#n*E` z^vJaiY#OiN&hew(b-}f0p(|#`W;e9`MCmq}0qu{lsuHn_u=P$=) zM|jk=*HX1=>YUo6&tOjN2hZ`~I^P~nCpRBIwO9C8nwrxp$Dc8)_=TtGb=H_U&gg3- z$79Uy`mv+#sB2xX%HHo0!eaTs;ME8RL1> z#EJW&wMdVM#v@qlR=DIHUNf$=&s_pDj@IEXIvgfAHdjyfE8ICTrw=cf-$yx7S3PFs z-9@AN@M&81w4LB&U-)>%-aR{{ru2em`aV2&_P+7)uDOaQ557f?>*zby=K~&G1M%S08!yUE0j?qU0&ucxpe`>^^zH87bwiF&c^XhPXNB8$=PxPoutobc# ztO>1i?Fp{raBDngjanV^V;9#O&Z&Kq>sr;D&JS2ywVz<>6AvG!*+@1n~dd-my^t2?O{KXlAdPUvkv zI8oajIO$#PK&`x-V>v&}{K5M?a*od`4}6U}3tq>s8UI@iPpgT_Ytiv% z>}Yd26LZw3@Zb<@E%U;==$!)|xy+=l{T%0buhvsDC+m7o*#SQynyh6GGuev>w~pFJ z3x^sG(fiRWJHuZpvNJI}>O^fz_;)f0vUexr#my7T< z);Tp^b*;Gf{IIV_?Slu$@4kZjhllxuro^`DdTXDV`1eKl$_=b%uh1J_pIS?wdvgCe zCeLTto$zrSZRGTg6LpE?s!Oc+>CW&p#$4y(-NcCAkB(RIS0cU>I~1O6;EaI_JX+l;l|^E0svUdD~S>;191jxD3Y3qQ1I z42!z-(#vb(WY6-Lp?KYg8RWva&J^v)DKv|bCAcQUN@PJT1? z2CKR3UE^N+$vN)j^O@(lV4mwqp5USH(cpugIe5}X@4WOMmA%{-Z;PB>57Tg%*&giz+@gfb~M(HU8l^1%lptd*-;xi0`M{FJ2PrPCVeAko*?ld3OHU5&ECN54b0xa>(p9&^yBXY_oJ;)_mWaz-tW+BxnSkLZ~b{et5kTGMc9t!l{o z{Lt5tIf^qApPCbW*lJ<7e{X~a`?;cFVjm={sr6MBucPJ86_4l%Rb)q5QE z3Qp;z-ZbQ{Q6K7vrh`}egTt@qIIagh(R&?@?Zk0hkM5@$=fGtj<?DP%e1m8J@hcA7NaMY{jGhcPVDKpcf%!8kM zG_|r@Pw0zpmoz%$wG8oI_sUpT0+W=?YV z4?Z5rJN=@GzUBB6)muH_hgb1PZkgpC(YKltzJ}8S_sGnQ?ju}sW_#o$W_&+cyGMDz zgCHF6j>dev=HyNVhZ|bKqBfoO;b?^C(U1E_bgldO)04w#*)0bQ53RXFc$r`H%sXQ3 zPc5-!R($X4(~S2>-yRS4%exsT_^#(c=k*e~&S_m*kIY3|bD0lU`}Bx#^1fxS360?# z|>e&|JB>4N~rd+t@D_EPKp!U;@s(dCZ9*FDs)&Z#|e zT+505Y!-)Jy|?!jV{dkge&vhCqaHNeLF*2$_-Q})vmQIR#yQ!uJTT`-9vHR7(3IFJ z#vbU2={CcJw3@N2JH%S?^R>AX9eQ=jPDU1Cm&&f)%%8_r=b@D8uXKX`(7_&(2i z@px`e`f3ixqu}a|hhpT+Bs#7KPsco0p7!W>w6>F;*h4UC^@NXiaLT>B7dw=ieemhi zhNjHKGkLO0jj1O(=D}5OFujZB%EeDH^mU9q?7pf&$3E-|CnNW^2-fGHe$Er_#QvkU zUwib`%=3tk?x`5_JJEGT8_{x3YUwxQ8E`!2JnKhY_~a4kiAOyz#P-2HS&J4-BYT47 zKC*}QJnz#4Zkqbk`_OmXk9yU2LaUrs)!wt#M_uiW)+4#<6iaX8DhGU1V(rU)m-A2_ z_n)Rw?lg|$^)5?sZ>-+`#>^XQ_nFV^4b0xa>$}!k=hvLtgL*k0ch;!>j&u6FQpcSO zU%AS`%cF7>C-*)aqV_!3{pe{#N7SC@s?R;`4)8mD+o#r0J3k)P7OYi`J++RR&NCe+ zXB*ykI$Y5?mtIEkz!#p@i50)Ri}=_4(3ro^3clrFMDS_*fWxmNdpO>ARPX)VMfMKd z_hdio_3;8LI=DKsZx_YT1XgCD&w1=XEc4j0H!8>S&Ysd^R(KR%2Wzw%zmMQs4`1Ei zvHIwpLtAF){d+al{M0hrBhP5OA3M|$eMHlBYSZgsw`mx6${D_g@1T!ieJ44k&mG{_ zHMqynn$Ba?o>xBfTE_TOch)?7%x{{m?&}U{*a2=t-_3nxKl-s3`SwKZT+OZKByN56pzje*Nlj}bA1gDP0uhT17`k)bR>( zZU-|BGds9>_(|`|bDCGHD)ZbMpAys4dXXER959PH7tDDDZw9(>QOmwBmMcJ~v%h^FyFPOnAVPxthb(Y&KBv8~1!Uf}E$8uRjg zeH076@aW-748P(9M}2iJI2ayvji=-Ffjfb-gJ^=Qb>XO%o%?*jh^}Mj$jv+I;)_@E z3`bk{oZ~0@<`a3v_g;O#bnXPleEcxGt;(+A$xd}leh-(PM|M$+oc-z@LRa>S+-3x) zXI7s-e!4Tc&Q5QgWqF@xt;>3eo$z5FGXtVa64C6@;!~Liin!rmxNaaI4YA&@$Ji*k1 zd|FRuB-i)v*{83cvrXfNeWv=lw{SyK>M*E2@Cn~h+nmU!_%R1dy%RnB;dl(pW+{ih zdcf_(qC<<0$UORSm$JWd>b#zt{b_reXU-`8eY4yLo_c0p?FPP7to@W*_qbQ=CU-&O z{CK?A*YCrM#yyxv#P|Bd8^dF{xqUOh@?F{emOAij4*D9O)w4G+djqpKFna^DH?Yz- zFzt8AG<=%xrt+uxEQk9XW|2?*J=yd76)kh>T5VqcL_bJa*7Nx%M>oet6iu>Z3f}-+A04BRll@RG8k4oLLzwP0VCQ ziHecCUwstQc{$7c;5Y`ZKB*J`z^ta%#|I1FqV40u^H|S!K4+rW(3#ZH>FC+PboEZ~ zmx{4F9wj=z-YYQcGh6#$)8@d9{cO+rXy1EvJ;7&R^J0Iz*#%s+_;#@31(tf*Q@#57 zsr7xz1Lm=JZt+jQ(O3&VqxHh4j^>ei&+Xy#;ok3XL|^;Eb05`F=Ny^26c+x~eC6ZI zT=)@fhiNBt)sASq7rD)?4|8&E#pTX;-~&JF9&eq64_MS?R*m&>lT&M~#*3Z2=G1$G zdqfvGKBmP7uKJ%?@p6T%(UYgr|HsXf-kej5__wF7&5GX7uT@H=2B3 z*(-C<>zu$`YY)fsd`#;cKX8bO1;IJO)p@Gvf@p%TZbK&pr&Y{y(Jq1dLNfW!nYk{i{LrGSn>QuK!yu6TspC-1UeEj@bn zuh*D!&2R551u!^kR?XerqsJKC^E*d62a2M0W!6sXJEcyV^X2-S_SyJ4FMYyecdq7X zR`}^;&veWi-SHC)&Dvvk;c!}`S?j!*?O5uHhqvq9ckR12-y-kwhr7o14xY}s%Zg<` z_FHl4(W$wMCt7w(O>xlS0~g%ip&Z^6SKMm(ai;l&Pgv-D{-h0l_TWTZ4}A9&XYyQh zyvcd)GvTZDJia#^MIAhuOB~(tbDiNm<@CYv$hX;C`e=ANJ2W1e%;OD{SJa%VnbmVX zT*btV<30Ld?(J{rea|qj-;Ng^KKG)!?1QKG(!<``o`$(U{OLOycc(|AsDs~CZa(gx zqS8GEUpf9=)?V&lbJv}yU8me@as2o^@FUIiEozVZxVzrYLvx+$IXm4K#ory?=6%nl zfwR-^uq*xM`yRb<=eJ^LujZUr=BS&0(0A4Dt>`D-=1uh1y}*AHc}&JzqU|M{-WXE5*x??0gb$Kd#U@+%MVPN3Iu)xpTw_vqj$uH4l#`?UwVc*rkaoHW6kbis3e=Emv$4a2W@l!y9tr+(>tM(P6RFoRmrF?=WNH1+qSn0neJ-ot)y924KYy@w7@@2waf8U4L6pW{8{9CG?y*X}tt_&$Hi%?nS( z^fQmdtnNAGO<(coXs*t>Q_qqYH@H2cbVGf?(<}HMM>S@24ySxR>igTr_2IehxATRQ zJBGf$f%H~qzdZ9BL~qp$Wc~>^W9W65F{=HZ$t(VvQ?L8AAGLUv@~p(t@!M zb$W;sm$Tc$`{{x28I}F|6qerJw>&+fc3<9jaU7ZZWPY;mA&(&{r^&T2wJKS|g zx$DH~eav&^xL5PfjvnW$7y4DP^1Q$H)*9{+PG525_&0UlQ4as8zv4&p9^$S2TYdWi zw=Zz}0=F-4`vSKwaQgzk>MyYVy=6D=F&uR3-6D#E8v zgwu8Cbj06j9P@mr=AXO1FlO%hIKZJ^#;SyI(&J7KRJ`sEuR}Q}x;(E_>d>m80dp~|ewCQg< zgQrKmA5VH{R($oQub3L`Ryp^k<-S$7=h_=(qjJm%xyvEDyv^)2kZtu5{}&$wvT{ypp z=Zk00us49YmoR+jy9byw;F*VRJx6`~cYHKHN4&=48_*-V57^2V&fMl~c4k)cH+_73 zW@9_a|5Mht+Ra-{;HT>1*%V`TROD-`Vrrb9ukN;n%t2ZPx7z+`ho=3*5fI?F-z# z!0ikCefR?F-?e^OZ_2;te#F0De%xIi(%skh-Y@g-l^^x}+QWC*{W?6<-)SC$?R@zb z^c2b6bBC#q-nnbOYS$XRaQt?h9fo#|E7$9ualWX$e?jLA9jG+IR&Ct^h9*cubGY4Y-(c8C_iDG_9>n7 zd>8TA0SzA6i#@=Tr&+?v7cLw$#2t6l?6fP#>-3Brc4vJHwYcm|L_cwsxA`n|yvg^* zE9aC4T6Ec~@3ziFzw>0?eH`Wz@$uQMj~yH*Uo|;bZ{f0g)a!vGynPm3_QJ#aJ0HjJ z^&P>SNA5gfs+pIyb`lnsL+-rhieI<+QTuTn^IU^Jn(W2=m9z4k|4`oj@YV611~Ps! zd%{ZNm~!?bUiBAUzFqIqJ?(esUUA&>==~EXd^wu29z1-$^AeBQyW>3d@gSms$E?Qn zrn>06XS=4Kc!A-S{J=OneC5OS$WE@G`nCtSYdWu8c0M~yUc5b{&4Ev!x%WjhaMl`p zjlF-&f2;U*P|TFW`UAe$0D++x~OJLvQ*x@85j``RwqA=0CQ7 zd(?{}TtxxUFjel9Y@7;b3qc8-E?T?HlU- z?3r%~Oy{AU;AV_(cyx9?uG#f>Ue4n!5Bb28-;^)=;!RBc!dZ8MOI-WZ5A}0aeRDKK z*G_ry9Yga`=~H{8cVBeR`*^>KUhaDE>$sbzUD;RKqm#UQ`bWCobey+q zPk+DXzsfiG{9omq$9Vs`-h8yn-}U^Xy>9sJ3*5fI?F-z#!0ij%zQFAZ+`ho=3*5fI z?F-z#!0ij%zQFAZ+`ho=3*5fIZ~qs#{r8W*{lA8{7u>$U?F-z#!0ij%zQFAZ+`ho= z3*5fI?F;<2et|FlpIOO2wqe!(K@OuQ9?d%AF26IlDA6IOZiqW1mCvtN967*obiu&XFdTphfK zi7lSFc5$+f9GrQ0`i#z3zGmnK&cS!B!I|f}X{KhBC%)A3M4!3w;NaC!et53@ zs8^q?H~hi0-Q-n%SDM1F=7O7c;sl;vYyVVlsCYGR>G#{i2X4lwyB8WxW9kR4t25!z zOHBR9;Y^tD>fr+~>*#UX`!u||E>73puN)7>D_(FK7yn%Td#|>Jg9lc zKrZ}gPVt?y>ie6Rw{to_UC(gs(LSjU?c@`);45~o@~lJCy~qza^u%|~dDN|2`%!bG z)5)Dmca=+Dy}=$od&lVMw3qmc25$1o3oN~b8QxxjwTAq3@z0*s?IA4q_zaiA3a8h* z2A+P_mq%i!5&4J>>(0wNXP{ktrd`gL zeR@{u*db%^{NOJyoz@y}niHN=9Xx9E?vcIFWUoBoGkbK054`NjJG7mn7goL2!lS2t z-1PGvdG)gIyvVQS33L23bLiosE_;p6 z!Z|JV&Z9ijSA4(4XRgmC@43?Qe|e~`n|)#65gRK`>xz?o?~xOO$H_TW_dZeg zJkyA)GilT}zW49r(d#*QD9Y3C(#siO;zd4T-c!Fgip?Xh=(zLQBm4NA(`NbsZ+Z`W zV(I*Q^{nW19(kUn7#i>?hCb_x z^!)w3S8)98Ls|3Jc~7HgJr$qu_t`yWW+Nf`+~zuoxGguweq^ntv<6m2F{r@=bCSE;(>?0_fTF}TxMi$ zyel7cXFBB*_0EGCik#$?kFeyRH=G_3Ue>{1&pL}a@V!sI_VDB_cyx^Y&=BS2IoyHH z?`tgfZ(Zowr{{u+7guNPC04xdN9{P8fnIy`T=~SD>AJV&>0jKQrMfuj5jiJu>o;DQ zYBb_f6Q`X*lX&oYo@sp6VD7zpj{Af+v!)rs;YFUk;x?~1qkH+uyFjM7rNO(sPM1BJ zAKdIqQ~5wHb^18(blKarys@Krp~E-xz&O4Gob)RWeCAJldF!4nDvz5U#8dSZ14~Wh z$+6g8#ld6cr-wMjqsEU~arHQy;a;cV?b^(b@I~ufc4rs&WY5ORhgy-7+?wy6c=Wr( ziL28*bZ2{4ah#VT>5AtwryhQayBvM>n32(Y(WlotSHJP7GZ$Wb>WqPDPfqVs7@EYz zEq=d^x_WpLn|B@)n3Z^7lb>;z+dlFazwU4w`p(r8FRl*Uj4NimGCxp0CT%i=SN8V3v0;xEyp-42{m!uC9Yaj2?WSz4~Mw8aQ2-zU$)b z^p4?^Q5yLWl^@i2_`J?T!;#+4GzWj;c`&D$fw2!h+ZvwY$$TOgU#DkRotXM{yN-T) zC|~NytF*nd@h7Z!(+uY-4-N;d^T{(BpZ!u(9=z54$UVaAh(2QbYF1nwcAWO3M?96b z>Q$F@^x9wixQ(k;-*9>v3vUhl(5(ITi5_ooPUh;#L$`S7x*xdbc%kcFcw{cGx>FrK zc&qQH=c6qy`i$tUM;|}l(ERY@_-y9UQ$*W5pBFxjD=**lsTwT37L0k5$9edGu?s%J z+lyQp@>BJzPHy$ttFhpsb5D5e&{6!lW6IFukvz>ZU$o7y-ozIpo=*D&p0!2Zm3Z)}rGeY}rQf*u4OM9ukMuJPF0-;9Ug@hCm(y=6eRJUl zj;=MrqMzK#eP{TJ%@NPJ=i%Y=GY_1z^6~OWj!)L^U4Ff9^~B>ms6ExLig9%&zv_dp zduP3PFq4>C_lJMB@1(pq%y1o;NA-1bj(IhPU-URDw`Z0Nbn{>r*ta!ag)bxn{meIYu&vm(*>CySlM|sX-m-Fo3GYjv%qV8PU zxLGTYUe7mtFs}Q(mupMcp6Yr3JluTN6&HSRuXu%_A&Qr|`X|ea-#hGh!SB9uX)3zs zIwN^-7yj6we4@wMd34Wt=BGaUD6Vz(7Q8v-)4QI{OL^Sn7T$UFx_`RxMs}&0zs^Xn z&~?A^#O3d`a$#{>=Xvxwu4``eQ@_Niub$uYWhQec{?czPxV$`9UD&$oqzz8aDQ)J9 z7Jm1N%e>Z07ro|1ow@jygJldH|Ma5{4Ls(wcl1_luuBsjIuqY~eHVO^FOIw`;xX06 z>Ao~aSNyv0J!Y;xdibhW+&PhRT1#K?RG!4Zbc7TAt&In7(w06oYcFcfs&PK~E+!T; zi0t8Ydc(0Dl4Es$hv4?xl*h%7xb92c{qW7)BA(#v<=X7kOmIcc;f^r}|JDgBZR-Nl zS?hsbJm6#u4?GWRe8r8Ni%;s!2S0ebJ<;?``GyAvkF^h;-kKRYPR}bZ>d8Yr?y2&K z$pQCBFXFUcaavb>IP5v~O_R83c!%#Mj!t9JhX#z;7=FRap6bZ+TCe%=EKXc$qL**+ zT$tg$$CbBqaoMN2_vq7G{kR;T*Liv1No1ba?m6jgkH$~+K6)0np27Fy^TC;~T75V@ zQ!}{c2e*0F!z-h-(Q}Wh8Jj-7?GZklE5GWsul%^t>s(mWI(Up~;LP+MdB^f=4|$ev z>x+{$d2muw^{8wg;cz1+_jHMq7u<@2b>XJgI9-Qd@4Ct1O)+?WdXMnekzdBB)0;XX zb-#;VafkBq6R-4>UOeOO{OBukl20Zipc0Dln z7RJeb%@)>maVCB7;H)007bmpLLx0sPuhm03KX?iA+-qhiHePwxns?N%9LAHy*7Kr+AIYD=$CFMihYG9CL&{>52+T<a1x7nXT69r6H}k?8mj)g2oU-kHS32}Wv=Q4EOdN3K)P=3yT}Pk8n~5t2 zTrqphh}bn)aQfFk7`O1|ue_+UC!G9r!A5qeb0_){nS%#>-(4K~j$d)*cdkoLT=A&W zGx{89bv)*N(grWRiNY3qs0)vtPIIK=N@q8D>IIg$dQoR?_&_c`)X9@3E@z&*|NjSg z+5Z0l@W@!RC#?J@U$EdOt~zo~zgPX@q$hR7#Lng6JTOJ!aVNduy%!j9)kMASig)`< z%N4&gy=T;(?`G2{wjaJ8%bz*ylUnuYyJpReBc0B=*FXnOzcJJIyudiZbUL0#4VD_? zc%2;MQzs|z-ZS*=kJsAwT6ysLQWLAkjy>1<^t40wrGrBZtowmw9vHI+GHQAmi`)IE zQ$q|7c#T^x_fTC}azoQx%i9NjL@*-z;)hT6%d2zw6ed16(Xq~@LAU0o^00n-MNLd? z=>|IdJa5kG7q#!p+Z1)^WnMFSS7{4NGW@{&Ode36Q(h~<@r<%!=8 zbC2Ye4;szqko!)_GfvjA0~{he_gH;fTl%RMX1?tQ?wa$=O`Y?K+kWDgX7UZp>&!#z zCv~Nr<}6zLC%$y>PQz6mI=x%et9I&#e#=EiUo}|eIY_fs4qT55CwC}}eTclFYw_ZC zeY)(DSoNEux;U*VPS@4%-O&^eFZjgdJ2!l$UewGc4#s!`Z)0w1ZRT$7*HVw0q zD?E67gVq;+t=ZePoiASQ>pM?)%uc=SpnhZIr^}x7j-tN>4tsYjzw9eFf92pK8?znb z4c&K-^JUi~-)Rq<-F|ow7tW-IQ!)2kuq{74lN)i`Q`!yYzC(HW>Hc^U{d*>_yt^Os z{4g8Lz3FAtp1S4Nxnt@@tGH#fE6k(%;YbNq?y z&YUSSL!OaOnyM3ve)lS;&gfX=M>`3J&mIG*_gdfa)SPC{U& z)3Q(aroZ%&U(FP@;^OKqJ+#F14bGuIKIVl6k2T`eJnc|E!gP9H_3U;pFwdpWI-+|` zIX=d5@8pW3e#G>n4s2Hs^fD8zV)7%ddDrH>&P#3d*K_5Mhhp)s;(6Jjxap-{>3o*& z2=09EOW(cl(@9LY-{@WOggGbj)<-XMade}n+t{uy9vqLEOCwJ4$eovS4t$(1jShR! zQ}ok2pEk1t+nw}+U!30SJQU3l&$w$Yc4EcpI(*zC@xqkrpvydZ8Qn+xs^NB|r~b-O z&u{eG8!r9oWnY;3x{*(qddtnexXW!k_?cVmZxYzm4EIrAH8FAyd*JPywU-?ExL0^K z9=Up5!z)hW#c2=Kj-BQjuhs^)bGXv$ss@Y5+2Jd%_2$$11(R0CeX|zth#UCCS-7bK zEACL(o9LhB*}P@T7q|Sb_Kltnc;;{;y03Ej?FT1%jd`wq+|&t=yXIa`^IYe7`LtK{ z+{bgxKlGIsedTdeD{O<0uJzz}BtPwW?V-M3bc&VdF5(z=*egwYuC;u)>OIsCE~meX z^k_@J<%kEjqIlE%1*f0!te=-1J@37P(=(J;ox+13aC(o^Omx4T8GYw=zi>DiYd$mR zQKJ>Ev*PM=n5h~bIUKL?cJ9Q}zS=Kt>a0eaxH#%nf8quPXJi*H_?51_;3v<$&leBo zItNVAPvfE2ncgM;<-(bGXvK{@HN?PDLp1zqm*~Nr{6pJ&m_E6s8OoY3j^Vuz+Uj5J zy7ci+UiEs;Y3$UTs(a6<{T$9hcdp%4NA9QRuei8zb$FxauEVpj(t4dn^`m`bW^%-j zoa6H*4Bz6Tfxr4ni+|@a$90M0TRk+*3w+YUx7a?;RX=L{hRgW5V*Unt{hsjAY1}-U z6Bq|x*P&0$bn1a+zHl6KZtf{=@!?G|=5RDa2Y%OqPhREMyr@^*q;-Adoas%4yRLcR z4>x1g^cA&>pR3)%-`^lE8n}tU^GJRw57l7g)ThzA@`^rZ(mudnxp*jguj_+3+1ISN zo~MUtM)&fDi7S0^cd@;LgKx*s;zi8wUY`6KIKMG*xaZ3b?GqUJ$Ogaj>Lf zX@+WX!6R=jxW()R*CY9M&Z32z2JwU0;JHU>niKP;?`59CIXz$exH`nlrLzlmZ!E9e zOS$RsZVYa50!ODLzleLXD%MN=Ys?liZrw{nNY$95g=1^VDl5w|rbjzV4K72!2NCrKPTXs})zbxaf)W z@Y~kq(VFl|zTw#uE-Yemiv!La`o!!Pcg3;}PdKVM!gTQWoLTJpP)~iGooC|OA#Ua| zmzf#87LPh2{E-b0wAnMhbLcsSjyJ7u?ES(s^~~3d<yT(F*q5k2BY6~@sb=rG?UZw!(+;+g9|_36gY=Eb>g8>tSjyn4i_GE z#OAAKc+U{F;6qhf@0}LzT8kT3r+4&P{DYgC;Huxb*QV>fu;LK2=eXzM7C(o?58so# z{dfGrdd{gl6pi2S6lR?E4s7)pi1~yzIe1WqmUm21JmYHakv?(xthe-&4o>tYyttZ~ zIJ}5{a;M@ZPkG(0L&Gj;vo0NcIEOv>=7)ASOT85z$k-=);l*Jt_*y4kJFFbv!+D8o zU+L<$*1XWrBeq^1agT8EvL{cw%@1wstq(TTL+ctmi9ToLQ7h_9-!En+R|SdV{z2JiJmTjcSjPa~cnhj@P`m@gtAW<_u!-(y)HCuX@$bNbT5p<40e;CC~i= z=lCx493#)3yzGLx&U0qNqn77{mFGZhM)2S^-?dAQe{!Wg4Lxrl(sxg|^s+Af%4?3# zXI8}SFZ$8Byf1m;xaK@q>@&sUg1J85=6&Vi1)kifTT{IWH*7Fx54;pzm-*>}^<4P{ zKHb95Og(Ycx7$3^jY*fEy-(>=Qy#GRWX^_@%BjWi+Hv*h6V>N}c_hD&Lyss8H?aIl>IIg$^%_&ZIK9tW zm#+Qj7e2jBnEG%c_H4tvKk-jv-|k19dTCuR9(Cl+S@8CTtLXW{-}*QoJyH7gsux&$ zsV=PdiT;Ga2N$0!%yIHHo{gv8+6PBA6oiV{!4|hBjwf&ATSNG!quub$_(P>b6ad+%&%O^>E`s{WPlM%Qz!H(@7xt8vEsFMs~Yb5A}Ie>wcr z|9JYxKmGC7zx5=v!TO!km!B8zxTr*zw`Rj zAHVk2+b{h7-RGV>_rmjcpLp_I-7iG^!s}nSyDQQ=AAaz{XYW28tS7~|d-g|nFZ}tN zuYdT%?>v3)Z{B?GwLi|k3%~FW?>-Z(-%Hj%c=EqK_3VpxpZ=>4pT7OUAz|_PlLMcB zJ&*g-L7SI)<=MYrTG_1q#pOTjT^81#?O8qRV3)<^YkQYJy8Fle8DDyFPwQa^E3Y;$ z9oanpy*J|fq}_Tm}8Q(nCOP4Lg}?uPAQd${l0U%dOwcb5t#~`O^>Hdgtv}HtkmO#qHo5?cEN@7q{{M6}S9j z^2)Ql+X4CFHvXgK;&%5e{*ly|+AfpbKY#0|`-f*&-|#^DrMrK&{f73NWc-4^^6Xyf z%lZ51e(Fp9TklcS$GE=yc-NxNuJL<(e4nc)KgaL?&En79|2bZI{O5QKwHBYLvG~{D zcw_$$=e@sHUwy=KHSsaj`Z_Z;mK(n(H(&J+@9e)!sefGg14Ms##J`eWdG@cIboHC} zm21w`KU*8l)jwY1+&%mJUHt*R{Db$2XV2f&AL1|Yuf#vN`_%Wo{qt}B+3VN;x(l1} zXZy}({Q2HC!Tt98Z$16fZ!g?ew9NQZf9d2h<4=Bf`P#d0{Kp?}UB=%Bb}r-Z1n%;e zKmV)0{hMzr+*jWF^3}iNKY#a`|Nhq7Z@>1&(+{40@UwT{eBpQh^4tIE>)-$C%kTZ0 z>0kEu^6cOG$Gk83Z-Dw;@=w<3pZw-`zyI=k6SaP?Jce4oWgbJV-#G`=ugrf-slOB* zzwO`h_mu~ki*x<`=Ggg5>mQoO)IYiJkI!S-vQhml=$PMceQ@{1x&F#??0k7^{hA+B z_iuerwro^?BRn3{Z+-BqEY9_}#AD~nTkALNF?Ije2W9_$|2NX(IsMoNf1y_E2fzE} zwe`>4@fPu~9s8@%y*&F%(!D(U>(aeE`%g*7yf5~j7YG0D2txfya7?Iw$sZHy-`{(} ttDn64o##LM>OZ>s%4ZV-g*1&zm5Nd8j!NDfIt7-{{;!Y4|o6o diff --git a/tests/brevitas/test_brevitas_debug.py b/tests/brevitas/test_brevitas_debug.py index 3d059a6856..d6879a727b 100644 --- a/tests/brevitas/test_brevitas_debug.py +++ b/tests/brevitas/test_brevitas_debug.py @@ -35,7 +35,6 @@ import os import torch from brevitas.export import export_qonnx -from brevitas.quant_tensor import _unpack_quant_tensor from pkgutil import get_data from qonnx.core.modelwrapper import ModelWrapper from qonnx.util.cleanup import cleanup as qonnx_cleanup @@ -91,7 +90,7 @@ def test_brevitas_debug(QONNX_FINN_conversion): else: assert len(names_common) == 8 for dbg_name in names_common: - tensor_pytorch = _unpack_quant_tensor(dbg_hook.values[dbg_name]).detach().numpy() + tensor_pytorch = dbg_hook.values[dbg_name].value.detach().numpy() tensor_finn = output_dict[dbg_name] assert np.isclose(tensor_finn, tensor_pytorch, atol=1e-5).all() os.remove(finn_onnx) From a6e30e1c527fc24359614f76280a9e7469f2eecc Mon Sep 17 00:00:00 2001 From: lstasytis Date: Tue, 15 Oct 2024 16:10:34 +0000 Subject: [PATCH 06/12] faulty base branch update removed, 2 Signed-off-by: lstasytis --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index a4fc124fa4..6ce9ad76d4 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -29,7 +29,7 @@ QONNX_COMMIT="2281a777d84aa5cbd7469085c2e534fb4a03ccf9" FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" -BREVITAS_COMMIT="d4834bd2a0fad3c1fbc0ff7e1346d5dcb3797ea4" +BREVITAS_COMMIT="84f42259ec869eb151af4cb8a8b23ad925f493db" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3" From 27407b528a67358c05a6a6710d24094242378df9 Mon Sep 17 00:00:00 2001 From: lstasytis Date: Tue, 29 Oct 2024 23:49:18 +0000 Subject: [PATCH 07/12] structural overhaul and fmpadding characterization fixes --- src/finn/builder/build_dataflow_config.py | 17 +- src/finn/builder/build_dataflow_steps.py | 24 ++- .../custom_op/fpgadataflow/channelwise_op.py | 76 ------- .../fpgadataflow/convolutioninputgenerator.py | 76 ------- .../fpgadataflow/duplicatestreams.py | 9 +- src/finn/custom_op/fpgadataflow/fmpadding.py | 116 ++--------- src/finn/custom_op/fpgadataflow/hwcustomop.py | 166 ++++++++++++++-- .../custom_op/fpgadataflow/labelselect.py | 85 -------- .../fpgadataflow/matrixvectoractivation.py | 77 +------ src/finn/custom_op/fpgadataflow/pool.py | 80 -------- .../streamingdatawidthconverter.py | 80 -------- .../fpgadataflow/streamingmaxpool.py | 155 ++++++--------- .../custom_op/fpgadataflow/thresholding.py | 83 -------- .../fpgadataflow/vectorvectoractivation.py | 75 +------ .../fpgadataflow/derive_characteristic.py | 19 +- .../fpgadataflow/hlssynth_ip.py | 4 - .../fpgadataflow/prepare_cppsim.py | 4 - .../transformation/fpgadataflow/prepare_ip.py | 4 - .../fpgadataflow/prepare_rtlsim.py | 4 - tests/fpgadataflow/test_fifosizing.py | 188 +++++++----------- 20 files changed, 374 insertions(+), 968 deletions(-) diff --git a/src/finn/builder/build_dataflow_config.py b/src/finn/builder/build_dataflow_config.py index 471586d924..62814c487f 100644 --- a/src/finn/builder/build_dataflow_config.py +++ b/src/finn/builder/build_dataflow_config.py @@ -40,12 +40,16 @@ class AutoFIFOSizingMethod(str, Enum): "Select the type of automatic FIFO sizing strategy." - CHARACTERIZE = "characterize" - CHARACTERIZE_ANALYTIC = "characterize_analytic" LARGEFIFO_RTLSIM = "largefifo_rtlsim" +class FIFOCharacterizationMethod(str, Enum): + "Select the strategy for characteristic sizing of FIFOs." + CHARACTERIZE_RTLSIM = "rtlsim" + CHARACTERIZE_ANALYTICAL = "analytical" + + class ShellFlowType(str, Enum): """For builds that produce a bitfile, select the shell flow that will integrate the FINN-generated accelerator.""" @@ -274,6 +278,15 @@ class DataflowBuildConfig: #: setting the FIFO sizes. auto_fifo_strategy: Optional[AutoFIFOSizingMethod] = AutoFIFOSizingMethod.LARGEFIFO_RTLSIM + #: Which strategy will be used for characteristic function-based FIFO sizing. + #: CHARACTERIZE_RTLSIM will result in performing RTLSIM for each node + #: to deduce the characteristic functions empirically + #: CHARACTERIZE_ANALYTICAL will use analytical functions if available, avoiding the generation + #: of IP cores. + characteristic_function_strategy: Optional[ + FIFOCharacterizationMethod + ] = FIFOCharacterizationMethod.CHARACTERIZE_RTLSIM + #: Avoid using C++ rtlsim for auto FIFO sizing and rtlsim throughput test #: if set to True, always using Python instead force_python_rtlsim: Optional[bool] = False diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index 8d6435a25b..33ff3d4483 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -550,12 +550,13 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): print("ENTERED STEP FIFO DEPTHS") if cfg.auto_fifo_depths: - if cfg.auto_fifo_strategy in ["characterize_analytic", "characterize"]: + if cfg.auto_fifo_strategy == "characterize": model = model.transform(InsertDWC()) model = model.transform(SpecializeLayers(cfg._resolve_fpga_part())) model = model.transform(GiveUniqueNodeNames()) - if cfg.auto_fifo_strategy == "characterize_analytic": + """ + if cfg.characteristic_function_strategy == : # RTL sim only the nodes which are not supported right now with # analytic characteristic derivations. # To do this, we first check if the characteristic @@ -575,10 +576,20 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): ) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) + + """ model = model.transform(AnnotateCycles()) - period = int(model.analysis(dataflow_performance)["max_cycles"] * 3) - model = model.transform(DeriveCharacteristic(period)) + period = int(model.analysis(dataflow_performance)["max_cycles"] * 3 + 10) + model = model.transform( + DeriveCharacteristic( + model, + period, + cfg.characteristic_function_strategy, + cfg._resolve_fpga_part(), + cfg._resolve_hls_clk_period(), + ) + ) model = model.transform(DeriveFIFOSizes()) model = model.transform( InsertFIFO( @@ -651,11 +662,6 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): model = model.transform(SplitLargeFIFOs()) model = model.transform(RemoveShallowFIFOs()) - # FIFO sizing is done, we can allow all ipgen again - for node in model.graph.node: - node_inst = getCustomOp(node) - node_inst.set_nodeattr("ipgen_ignore", False) - # after FIFOs are ready to go, call PrepareIP and HLSSynthIP again # this will only run for the new nodes (e.g. FIFOs and DWCs) # model = model.transform(PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period())) diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op.py b/src/finn/custom_op/fpgadataflow/channelwise_op.py index f0366e3d83..1f17ddc851 100644 --- a/src/finn/custom_op/fpgadataflow/channelwise_op.py +++ b/src/finn/custom_op/fpgadataflow/channelwise_op.py @@ -270,79 +270,3 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): cycles += 1 return txns, cycles, counter - - def derive_characteristic_fxns(self, period): - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out": []}, - } - - ignore = self.get_nodeattr("ipgen_ignore") - if ignore is False: # this node is being derived using RTLSIM - # RTL-based flow - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - return - - # Analytical flow - - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} - txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} - - all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) - all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period", period) - - txn_in = [] - txn_out = [] - - # INPUT - - counter = 0 - padding = 0 - - kwargs = self.prepare_kwargs_for_characteristic_fx() - - # first period - cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period - cycles) - padding += period * -cycles - - # second period - cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - # final assignments - all_txns_in[0, :] = np.array(txn_in) - self.set_nodeattr("io_chrc_in", all_txns_in) - self.set_nodeattr("io_chrc_pads_in", padding) - - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period - cycles) - padding += period * -cycles - - cycles = period - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - all_txns_out[0, :] = np.array(txn_out) - self.set_nodeattr("io_chrc_out", all_txns_out) - self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index 72a0440cc9..c00603f375 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -517,79 +517,3 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): cycles += 1 return txns, cycles, counter - - def derive_characteristic_fxns(self, period): - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out": []}, - } - - ignore = self.get_nodeattr("ipgen_ignore") - if ignore is False: # this node is being derived using RTLSIM - # RTL-based flow - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - return - - # Analytical flow - - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} - txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} - - all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) - all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period", period) - - txn_in = [] - txn_out = [] - - # INPUT - - counter = 0 - padding = 0 - - kwargs = self.prepare_kwargs_for_characteristic_fx() - - # first period - cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period - cycles) - padding += period * -cycles - - # second period - cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - # final assignments - all_txns_in[0, :] = np.array(txn_in) - self.set_nodeattr("io_chrc_in", all_txns_in) - self.set_nodeattr("io_chrc_pads_in", padding) - - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period - cycles) - padding += period * -cycles - - cycles = period - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - all_txns_out[0, :] = np.array(txn_out) - self.set_nodeattr("io_chrc_out", all_txns_out) - self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/duplicatestreams.py b/src/finn/custom_op/fpgadataflow/duplicatestreams.py index 8943ffc9e3..891009eae8 100644 --- a/src/finn/custom_op/fpgadataflow/duplicatestreams.py +++ b/src/finn/custom_op/fpgadataflow/duplicatestreams.py @@ -166,7 +166,9 @@ def get_verilog_top_module_intf_names(self): ) return intf_names - def derive_characteristic_fxns(self, period): + def derive_characteristic_fxns( + self, model, period, strategy, fpga_part, clk_period, op_type, override_dict=None + ): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { "inputs": { @@ -174,4 +176,7 @@ def derive_characteristic_fxns(self, period): }, "outputs": {"out0": [], "out1": []}, } - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + + super().derive_characteristic_fxns( + model, period, strategy, fpga_part, clk_period, op_type, override_dict=io_dict + ) diff --git a/src/finn/custom_op/fpgadataflow/fmpadding.py b/src/finn/custom_op/fpgadataflow/fmpadding.py index d83fc4b9e8..bf1415d4ca 100644 --- a/src/finn/custom_op/fpgadataflow/fmpadding.py +++ b/src/finn/custom_op/fpgadataflow/fmpadding.py @@ -182,7 +182,7 @@ def prepare_kwargs_for_characteristic_fx(self): NF = int(NumChannels / SIMD) # assert True == False - kwargs = (ImgDim, Padding, NumChannels, SIMD, TOTAL_ELS, NF) + kwargs = (ImgDim, NewDim, Padding, NumChannels, SIMD, TOTAL_ELS, NF) # assert True==False @@ -191,120 +191,38 @@ def prepare_kwargs_for_characteristic_fx(self): def characteristic_fx_input(self, txns, cycles, counter, kwargs): # Compute one period of the input characteristic function - (ImgDim, Padding, NumChannels, SIMD, TOTAL_ELS, NF) = kwargs + (ImgDim, NewDim, Padding, NumChannels, SIMD, TOTAL_ELS, NF) = kwargs - delay = 0 - # if NF == 1, we always have a one cycle delay - - if NF == 1: - nf1 = 2 - else: - nf1 = 1 - - for i in range(0, ImgDim[0]): - for j in range(0, ImgDim[1]): + for y in range(0, NewDim[0]): + for x in range(0, NewDim[1]): for k in range(NF): txns.append(counter) - counter += 1 + if ( + Padding[0] <= y + and (y < (NewDim[0] - Padding[2])) + and Padding[1] <= x + and (x < (NewDim[1] - Padding[3])) + ): + counter += 1 cycles += 1 - if NF == 1: + if NF == 1: # loop end delay when fully unrolled txns.append(counter) cycles += 1 - for z in range((Padding[1] + Padding[3]) * NF * nf1 + delay): - txns.append(counter) - cycles += 1 return txns, cycles, counter def characteristic_fx_output(self, txns, cycles, counter, kwargs): # Compute one period of the output characteristic function - (ImgDim, Padding, NumChannels, SIMD, TOTAL_ELS, NF) = kwargs + (ImgDim, NewDim, Padding, NumChannels, SIMD, TOTAL_ELS, NF) = kwargs for i in range(0, TOTAL_ELS): - for j in range(int(NumChannels / SIMD)): + for j in range(NF): txns.append(counter) counter += 1 cycles += 1 + if NF == 1: # loop end delay when fully unrolled + txns.append(counter) + cycles += 1 return txns, cycles, counter - - def derive_characteristic_fxns(self, period): - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out": []}, - } - - ignore = self.get_nodeattr("ipgen_ignore") - if ignore is False: # this node is being derived using RTLSIM - # RTL-based flow - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - return - - # Analytical flow - - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} - txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} - - all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) - all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period", period) - - txn_in = [] - txn_out = [] - - # INPUT - - counter = 0 - padding = 0 - - kwargs = self.prepare_kwargs_for_characteristic_fx() - - # first period - cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period - cycles) - padding += period * -cycles - - # second period - cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - # for i in range(cycles,period*2): - # txn_in.append(counter) - # pads = (period*2-cycles) - - txn_in += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - # final assignments - all_txns_in[0, :] = np.array(txn_in) - self.set_nodeattr("io_chrc_in", all_txns_in) - self.set_nodeattr("io_chrc_pads_in", padding) - - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period - cycles) - padding += period * -cycles - - cycles = period - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - all_txns_out[0, :] = np.array(txn_out) - self.set_nodeattr("io_chrc_out", all_txns_out) - self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index 995f3c24cb..f57b89ed2e 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -34,7 +34,8 @@ from qonnx.custom_op.base import CustomOp from qonnx.util.basic import roundup_to_integer_multiple -from finn.util.basic import pyverilate_get_liveness_threshold_cycles +from finn.util.basic import make_build_dir, pyverilate_get_liveness_threshold_cycles +from finn.util.fpgadataflow import is_hls_node try: from pyverilator import PyVerilator @@ -98,8 +99,6 @@ def get_nodeattr_types(self): "io_chrc_pads_out": ("i", False, 0), "io_chrc_in_concat": ("t", False, np.asarray([], dtype=np.int32)), "io_chrc_out_concat": ("t", False, np.asarray([], dtype=np.int32)), - # flag to ignore the ip generation of this node - "ipgen_ignore": ("i", False, 0), } def get_verilog_top_module_name(self): @@ -365,11 +364,155 @@ def get_outstream_width_padded(self, ind=0): out_width = self.get_outstream_width(ind=ind) return roundup_to_integer_multiple(out_width, 8) - def derive_characteristic_fxns(self, period, override_rtlsim_dict=None): + def derive_characteristic_fxns( + self, model, period, strategy, fpga_part, clk_period, op_type, override_dict=None + ): + print("deriving characteristic func") + if override_dict is None: + n_inps = np.prod(self.get_folded_input_shape()[:-1]) + io_dict = { + "inputs": { + "in0": [0 for i in range(n_inps)], + }, + "outputs": {"out": []}, + } + else: + io_dict = override_dict + + if strategy == "analytical": + # check for override function + prepare_kwargs_for_characteristic_fx = getattr( + self, "prepare_kwargs_for_characteristic_fx", None + ) + if callable(prepare_kwargs_for_characteristic_fx): + # Analytical flow + self.derive_characteristic_fxns_analytically(period, io_dict=io_dict) + return + + # RTL-based flow + self.derive_characteristic_fxns_rtlsim( + model, period, fpga_part, clk_period, io_dict=io_dict + ) + + def derive_characteristic_fxns_analytically(self, period, io_dict): + # Analytical flow + + txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} + txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} + + all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) + all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) + + self.set_nodeattr("io_chrc_period", period) + + txn_in = [] + txn_out = [] + + # INPUT + + counter = 0 + padding = 0 + + kwargs = self.prepare_kwargs_for_characteristic_fx() + + # first period + cycles = 0 + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) + + txn_in += [counter] * (period - cycles) + padding += period - cycles + + # second period + cycles = period + txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) + + txn_in += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles + + # final assignments + all_txns_in[0, :] = np.array(txn_in[: period * 2]) + self.set_nodeattr("io_chrc_in", all_txns_in) + self.set_nodeattr("io_chrc_pads_in", padding) + + # OUTPUT + + counter = 0 + cycles = 0 + padding = 0 + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) + + txn_out += [counter] * (period - cycles) + padding += period - cycles + + cycles = period + + txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) + + txn_out += [counter] * (period * 2 - cycles) + padding += period * 2 - cycles + + all_txns_out[0, :] = np.array(txn_out[: period * 2]) + self.set_nodeattr("io_chrc_out", all_txns_out) + self.set_nodeattr("io_chrc_pads_out", padding) + + def derive_characteristic_fxns_rtlsim(self, model, period, fpga_part, clk_period, io_dict=None): """Return the unconstrained characteristic functions for this node.""" # ensure rtlsim is ready - - assert self.get_nodeattr("rtlsim_so") != "", "rtlsim not ready for " + self.onnx_node.name + if self.get_nodeattr("rtlsim_so") == "": + # generate the IP for this node + + # lazy construction of prepare_ip step + node = self.onnx_node + op_type = node.op_type + # get the path of the code generation directory + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + # ensure that there is a directory + if code_gen_dir == "" or not os.path.isdir(code_gen_dir): + code_gen_dir = make_build_dir(prefix="code_gen_ipgen_" + str(self.name) + "_") + self.set_nodeattr("code_gen_dir_ipgen", code_gen_dir) + # ensure that there is generated code inside the dir + self.code_generation_ipgen(model, fpga_part, clk_period) + + # lazy construction of hlssynthip step + if is_hls_node(node): + # ensure that code is generated + try: + assert ( + self.get_nodeattr("code_gen_dir_ipgen") != "" + ), """Node + attribute "code_gen_dir_ipgen" is empty. Please run + transformation PrepareIP first.""" + if not os.path.isdir(self.get_nodeattr("ipgen_path")) or not self.get_nodeattr( + "code_gen_dir_ipgen" + ) in self.get_nodeattr("ipgen_path"): + # call the compilation function for this node + self.ipgen_singlenode_code() + else: + warnings.warn("Using pre-existing IP for %s" % self.name) + # ensure that executable path is now set + assert ( + self.get_nodeattr("ipgen_path") != "" + ), """Transformation + HLSSynthIP was not successful. Node attribute "ipgen_path" + is empty.""" + except KeyError: + # exception if op_type is not supported + raise Exception("Custom op_type %s is currently not supported." % op_type) + + # lazy construction of prepare rtlsim step + + try: + self.prepare_rtlsim() + # ensure that executable path is now set + assert ( + self.get_nodeattr("rtlsim_so") != "" + ), "Failed to prepare RTLSim, no rtlsim_so attribute found." + except KeyError: + # exception if op_type is not supported + raise Exception("Custom op_type %s is currently not supported." % op_type) + + # assert , "rtlsim not ready for " + self.onnx_node.name if self.get_nodeattr("io_chrc_period") > 0: warnings.warn("Skipping node %s: already has FIFO characteristic" % self.onnx_node.name) return @@ -389,15 +532,6 @@ def derive_characteristic_fxns(self, period, override_rtlsim_dict=None): sim = self.get_rtlsim() # signal name sname = "_" + self.hls_sname() + "_" - if override_rtlsim_dict is not None: - io_dict = override_rtlsim_dict - else: - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out": []}, - } # extra dicts to keep track of cycle-by-cycle transaction behavior # note that we restrict key names to filter out weight streams etc @@ -452,6 +586,8 @@ def accumulate_char_fxn(chrc): all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) all_pad_in = [] all_pad_out = [] + pad_in = 0 + pad_out = 0 for in_idx, in_strm_nm in enumerate(txns_in.keys()): txn_in = txns_in[in_strm_nm] if len(txn_in) < period: diff --git a/src/finn/custom_op/fpgadataflow/labelselect.py b/src/finn/custom_op/fpgadataflow/labelselect.py index 1e5b22c5ec..dd88e331a2 100644 --- a/src/finn/custom_op/fpgadataflow/labelselect.py +++ b/src/finn/custom_op/fpgadataflow/labelselect.py @@ -233,88 +233,3 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): counter += 1 return txns, cycles, counter - - def derive_characteristic_fxns(self, period): - print("deriving ch") - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out": []}, - } - - # RTL-based flow, uncomment and use instead of Analytical flow in case - # the analytical flow is too innacurate - - ignore = self.get_nodeattr("ipgen_ignore") - if ignore is False: # this node is being derived using RTLSIM - # RTL-based flow - print("rtl flow") - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - return - - # Analytical flow - - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} - txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} - - all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) - all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period", period) - - txn_in = [] - txn_out = [] - - # INPUT - - counter = 0 - padding = 0 - - kwargs = self.prepare_kwargs_for_characteristic_fx() - - # first period - cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period - cycles) - padding += period * -cycles - - # second period - cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - # for i in range(cycles,period*2): - # txn_in.append(counter) - # pads = (period*2-cycles) - - txn_in += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - # final assignments - all_txns_in[0, :] = np.array(txn_in) - self.set_nodeattr("io_chrc_in", all_txns_in) - self.set_nodeattr("io_chrc_pads_in", padding) - - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period - cycles) - padding += period * -cycles - - cycles = period - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - all_txns_out[0, :] = np.array(txn_out) - self.set_nodeattr("io_chrc_out", all_txns_out) - self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index f9d52b1f4f..1cb3d2afd5 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -1017,7 +1017,9 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): return txns, cycles, counter - def derive_characteristic_fxns(self, period): + def derive_characteristic_fxns( + self, model, period, strategy, fpga_part, clk_period, op_type, override_dict=None + ): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { "inputs": { @@ -1029,72 +1031,9 @@ def derive_characteristic_fxns(self, period): mem_mode = self.get_nodeattr("mem_mode") if mem_mode in ["internal_decoupled", "external"]: n_weight_inps = self.calc_wmem() - num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) - io_dict["inputs"]["weights"] = [0 for i in range(num_w_reps * n_weight_inps)] + # num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) + io_dict["inputs"]["weights"] = [0 for i in range(1 * n_weight_inps)] - ignore = self.get_nodeattr("ipgen_ignore") - if ignore is False: # this node is being derived using RTLSIM - # RTL-based flow - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - return - - # Analytical flow - - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} - txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} - - all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) - all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period", period) - - txn_in = [] - txn_out = [] - - # INPUT - - counter = 0 - padding = 0 - - kwargs = self.prepare_kwargs_for_characteristic_fx() - - # first period - cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period - cycles) - padding += period * -cycles - - # second period - cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - # final assignments - all_txns_in[0, :] = np.array(txn_in) - self.set_nodeattr("io_chrc_in", all_txns_in) - self.set_nodeattr("io_chrc_pads_in", padding) - - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period - cycles) - padding += period * -cycles - - cycles = period - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - all_txns_out[0, :] = np.array(txn_out) - self.set_nodeattr("io_chrc_out", all_txns_out) - self.set_nodeattr("io_chrc_pads_out", padding) + super().derive_characteristic_fxns( + model, period, strategy, fpga_part, clk_period, op_type, override_dict=io_dict + ) diff --git a/src/finn/custom_op/fpgadataflow/pool.py b/src/finn/custom_op/fpgadataflow/pool.py index 8dbf2c6e3b..b548548013 100644 --- a/src/finn/custom_op/fpgadataflow/pool.py +++ b/src/finn/custom_op/fpgadataflow/pool.py @@ -262,83 +262,3 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): counter += 1 cycles += 1 return txns, cycles, counter - - def derive_characteristic_fxns(self, period): - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out": []}, - } - - ignore = self.get_nodeattr("ipgen_ignore") - if ignore is False: # this node is being derived using RTLSIM - # RTL-based flow - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - return - - # Analytical flow - - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} - txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} - - all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) - all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period", period) - - txn_in = [] - txn_out = [] - - # INPUT - - counter = 0 - padding = 0 - - kwargs = self.prepare_kwargs_for_characteristic_fx() - - # first period - cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period - cycles) - padding += period * -cycles - - # second period - cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - # for i in range(cycles,period*2): - # txn_in.append(counter) - # pads = (period*2-cycles) - - txn_in += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - # final assignments - all_txns_in[0, :] = np.array(txn_in) - self.set_nodeattr("io_chrc_in", all_txns_in) - self.set_nodeattr("io_chrc_pads_in", padding) - - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period - cycles) - padding += period * -cycles - - cycles = period - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - all_txns_out[0, :] = np.array(txn_out) - self.set_nodeattr("io_chrc_out", all_txns_out) - self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py index 12faee9ef0..d9f07e822f 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter.py @@ -356,83 +356,3 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): remainder -= outWidth return txns, cycles, counter - - def derive_characteristic_fxns(self, period): - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out": []}, - } - - ignore = self.get_nodeattr("ipgen_ignore") - if ignore is False: # this node is being derived using RTLSIM - # RTL-based flow - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - return - - # Analytical flow - - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} - txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} - - all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) - all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period", period) - - txn_in = [] - txn_out = [] - - # INPUT - - counter = 0 - padding = 0 - - kwargs = self.prepare_kwargs_for_characteristic_fx() - - # first period - cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period - cycles) - padding += period * -cycles - - # second period - cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - # for i in range(cycles,period*2): - # txn_in.append(counter) - # pads = (period*2-cycles) - - txn_in += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - # final assignments - all_txns_in[0, :] = np.array(txn_in) - self.set_nodeattr("io_chrc_in", all_txns_in) - self.set_nodeattr("io_chrc_pads_in", padding) - - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period - cycles) - padding += period * -cycles - - cycles = period - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - all_txns_out[0, :] = np.array(txn_out) - self.set_nodeattr("io_chrc_out", all_txns_out) - self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool.py index 24e176c857..92c004d90a 100755 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool.py @@ -257,7 +257,7 @@ def prepare_kwargs_for_characteristic_fx(self): # p+=1 bursts = int(read_delay + ImgDim / PoolDim) - read_tail_latency = 5 + read_tail_latency = 6 write_tail_latency = 14 kwargs = ( @@ -300,6 +300,7 @@ def characteristic_fx_input(self, txns, cycles, counter, kwargs): tracker = 0 maximum = int(ImgDim / PoolDim * PoolDim * ImgDim / PoolDim * PoolDim) + input_count = 0 if not is1d: # if i == 0: @@ -334,25 +335,47 @@ def characteristic_fx_input(self, txns, cycles, counter, kwargs): for z in range(0, int(ImgDim / PoolDim)): txns.append(counter) cycles += 1 + + # for k in range(0, int(PoolDim)): + # read loop tail end + for z in range(0, read_tail_latency - 2): + txns.append(counter) + cycles += 1 + else: # 1d case + + # initial buffer space + # for k in range(int(NumChannels / PE)): + # txns.append(counter) + # cycles += 1 + for i in range(output_size): for z in range(0, PoolDim): - for k in range(int(NumChannels / PE)): - txns.append(counter) - counter += 1 - cycles += 1 + if input_count < ImgDim: + for k in range(int(NumChannels / PE)): + txns.append(counter) + counter += 1 + cycles += 1 + input_count += 1 + txns.append(counter) + cycles += 1 - # for z in range(0,PoolDim): - # for k in range(0,read_tail_latency): - # txns.append(counter) - # cycles+=1 + # read loop tail end + # for z in range(0, read_tail_latency): + # txns.append(counter) + # cycles += 1 for k in range(int(NumChannels / PE)): txns.append(counter) cycles += 1 - for k in range(REMAINDER_PIXELS): + # read loop tail end + for z in range(0, write_tail_latency): + txns.append(counter) + cycles += 1 + + for k in range(int(REMAINDER_PIXELS * NumChannels / PE)): txns.append(counter) counter += 1 cycles += 1 @@ -377,16 +400,30 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): txns.append(counter) cycles += 1 + tracker = 0 + maximum = int(ImgDim / PoolDim * PoolDim * ImgDim / PoolDim * PoolDim) if not is1d: + # if i == 0: + for z in range(0, 2): + txns.append(counter) + # counter += 1 + cycles += 1 + tracker += 1 + + if int(ImgDim / PoolDim) > 2: + txns.append(counter) + cycles += 1 + for j in range(0, int(ImgDim / PoolDim)): for k in range(0, int(PoolDim)): for z in range(0, int(ImgDim / PoolDim)): # actual read loop for x in range(0, PoolDim): - txns.append(counter) - # counter+=1 - cycles += 1 + if tracker < maximum: + txns.append(counter) + cycles += 1 + tracker += 1 for k in range(0, int(PoolDim)): # read loop tail end @@ -399,14 +436,30 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): txns.append(counter) counter += 1 cycles += 1 + + # for k in range(0, int(PoolDim)): + # read loop tail end + for z in range(0, read_tail_latency - 2): + txns.append(counter) + cycles += 1 + else: # 1d case + # initial buffer space + # for k in range(int(NumChannels / PE)): + # txns.append(counter) + # cycles += 1 + for i in range(output_size): for z in range(0, PoolDim): for k in range(int(NumChannels / PE)): txns.append(counter) cycles += 1 + for z in range(0, read_tail_latency): + txns.append(counter) + cycles += 1 + for k in range(int(NumChannels / PE)): txns.append(counter) counter += 1 @@ -418,79 +471,3 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): # cycles+=1 return txns, cycles, counter - - def derive_characteristic_fxns(self, period): - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out": []}, - } - - ignore = self.get_nodeattr("ipgen_ignore") - if ignore is False: # this node is being derived using RTLSIM - # RTL-based flow - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - return - - # Analytical flow - - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} - txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} - - all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) - all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period", period) - - txn_in = [] - txn_out = [] - - # INPUT - - counter = 0 - padding = 0 - - kwargs = self.prepare_kwargs_for_characteristic_fx() - - # first period - cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period - cycles) - padding += period * -cycles - - # second period - cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - # final assignments - all_txns_in[0, :] = np.array(txn_in) - self.set_nodeattr("io_chrc_in", all_txns_in) - self.set_nodeattr("io_chrc_pads_in", padding) - - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period - cycles) - padding += period * -cycles - - cycles = period - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - all_txns_out[0, :] = np.array(txn_out) - self.set_nodeattr("io_chrc_out", all_txns_out) - self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/thresholding.py b/src/finn/custom_op/fpgadataflow/thresholding.py index 9df8395bc0..e9b0b17d73 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding.py +++ b/src/finn/custom_op/fpgadataflow/thresholding.py @@ -301,86 +301,3 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): cycles += 1 return txns, cycles, counter - - def derive_characteristic_fxns(self, period): - n_inps = np.prod(self.get_folded_input_shape()[:-1]) - io_dict = { - "inputs": { - "in0": [0 for i in range(n_inps)], - }, - "outputs": {"out": []}, - } - - if self.onnx_node.op_type == "Thresholding_hls": - mem_mode = self.get_nodeattr("mem_mode") - if mem_mode in ["internal_decoupled", "external"]: - n_weight_inps = self.calc_tmem() - num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) - io_dict["inputs"]["weights"] = [0 for i in range(num_w_reps * n_weight_inps)] - - ignore = self.get_nodeattr("ipgen_ignore") - if ignore is False: # this node is being derived using RTLSIM - # RTL-based flow - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - return - - # Analytical flow - - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} - txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} - - all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) - all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period", period) - - txn_in = [] - txn_out = [] - - # INPUT - - counter = 0 - padding = 0 - - kwargs = self.prepare_kwargs_for_characteristic_fx() - - # first period - cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period - cycles) - padding += period * -cycles - - # second period - cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - # final assignments - all_txns_in[0, :] = np.array(txn_in) - self.set_nodeattr("io_chrc_in", all_txns_in) - self.set_nodeattr("io_chrc_pads_in", padding) - - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period - cycles) - padding += period * -cycles - - cycles = period - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - all_txns_out[0, :] = np.array(txn_out) - self.set_nodeattr("io_chrc_out", all_txns_out) - self.set_nodeattr("io_chrc_pads_out", padding) diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index 3724b7081c..7f2a1bbfa5 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -974,8 +974,9 @@ def characteristic_fx_output(self, txns, cycles, counter, kwargs): return txns, cycles, counter - def derive_characteristic_fxns(self, period): - print("deriving characteristic func") + def derive_characteristic_fxns( + self, model, period, strategy, fpga_part, clk_period, op_type, override_dict=None + ): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { "inputs": { @@ -990,70 +991,6 @@ def derive_characteristic_fxns(self, period): # num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) io_dict["inputs"]["weights"] = [0 for i in range(1 * n_weight_inps)] - ignore = self.get_nodeattr("ipgen_ignore") - if ignore is False: # this node is being derived using RTLSIM - # RTL-based flow - print("rtl flow") - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) - return - - # Analytical flow - - txns_in = {key: [] for (key, value) in io_dict["inputs"].items() if "in" in key} - txns_out = {key: [] for (key, value) in io_dict["outputs"].items() if "out" in key} - - all_txns_in = np.empty((len(txns_in.keys()), 2 * period), dtype=np.int32) - all_txns_out = np.empty((len(txns_out.keys()), 2 * period), dtype=np.int32) - - self.set_nodeattr("io_chrc_period", period) - - txn_in = [] - txn_out = [] - - # INPUT - - counter = 0 - padding = 0 - - kwargs = self.prepare_kwargs_for_characteristic_fx() - - # first period - cycles = 0 - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period - cycles) - padding += period * -cycles - - # second period - cycles = period - txn_in, cycles, counter = self.characteristic_fx_input(txn_in, cycles, counter, kwargs) - - txn_in += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - # final assignments - all_txns_in[0, :] = np.array(txn_in) - self.set_nodeattr("io_chrc_in", all_txns_in) - self.set_nodeattr("io_chrc_pads_in", padding) - - # OUTPUT - - counter = 0 - cycles = 0 - padding = 0 - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period - cycles) - padding += period * -cycles - - cycles = period - - txn_out, cycles, counter = self.characteristic_fx_output(txn_out, cycles, counter, kwargs) - - txn_out += [counter] * (period * 2 - cycles) - padding += period * 2 - cycles - - all_txns_out[0, :] = np.array(txn_out) - self.set_nodeattr("io_chrc_out", all_txns_out) - self.set_nodeattr("io_chrc_pads_out", padding) + super().derive_characteristic_fxns( + model, period, strategy, fpga_part, clk_period, op_type, override_dict=io_dict + ) diff --git a/src/finn/transformation/fpgadataflow/derive_characteristic.py b/src/finn/transformation/fpgadataflow/derive_characteristic.py index 4819c74b52..460642d017 100644 --- a/src/finn/transformation/fpgadataflow/derive_characteristic.py +++ b/src/finn/transformation/fpgadataflow/derive_characteristic.py @@ -52,9 +52,15 @@ class DeriveCharacteristic(NodeLocalTransformation): NodeLocalTransformation for more details. """ - def __init__(self, period, num_workers=None, manual_bypass=False): + def __init__( + self, model, period, strategy, fpga_part, clk_period, num_workers=None, manual_bypass=False + ): super().__init__(num_workers=num_workers) + self.model = model self.period = period + self.strategy = strategy + self.fpga_part = fpga_part + self.clk_period = clk_period self.manual_bypass = manual_bypass def applyNodeLocal(self, node): @@ -63,8 +69,15 @@ def applyNodeLocal(self, node): try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) - print(inst.get_nodeattr("ipgen_ignore")) - inst.derive_characteristic_fxns(period=self.period) + + inst.derive_characteristic_fxns( + model=self.model, + period=self.period, + strategy=self.strategy, + fpga_part=self.fpga_part, + clk_period=self.clk_period, + op_type=op_type, + ) except KeyError: # exception if op_type is not supported raise Exception("Custom op_type %s is currently not supported." % op_type) diff --git a/src/finn/transformation/fpgadataflow/hlssynth_ip.py b/src/finn/transformation/fpgadataflow/hlssynth_ip.py index 2811b09415..5b901d9284 100644 --- a/src/finn/transformation/fpgadataflow/hlssynth_ip.py +++ b/src/finn/transformation/fpgadataflow/hlssynth_ip.py @@ -59,10 +59,6 @@ def applyNodeLocal(self, node): try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) - # find out if the node should be ignored - ignore = inst.get_nodeattr("ipgen_ignore") - if ignore: - return (node, False) # ensure that code is generated assert ( inst.get_nodeattr("code_gen_dir_ipgen") != "" diff --git a/src/finn/transformation/fpgadataflow/prepare_cppsim.py b/src/finn/transformation/fpgadataflow/prepare_cppsim.py index a00789b986..523cb020e4 100644 --- a/src/finn/transformation/fpgadataflow/prepare_cppsim.py +++ b/src/finn/transformation/fpgadataflow/prepare_cppsim.py @@ -47,10 +47,6 @@ def _codegen_single_node(node, model): # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) - # find out if the node should be ignored - ignore = inst.get_nodeattr("ipgen_ignore") - if ignore: - return # get the path of the code generation directory code_gen_dir = inst.get_nodeattr("code_gen_dir_cppsim") # ensure that there is a directory diff --git a/src/finn/transformation/fpgadataflow/prepare_ip.py b/src/finn/transformation/fpgadataflow/prepare_ip.py index 0e521be5ae..a74e0f7afc 100644 --- a/src/finn/transformation/fpgadataflow/prepare_ip.py +++ b/src/finn/transformation/fpgadataflow/prepare_ip.py @@ -44,10 +44,6 @@ def _codegen_single_node(node, model, fpgapart, clk): try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) - # find out if the node should be ignored - ignore = inst.get_nodeattr("ipgen_ignore") - if ignore: - return # get the path of the code generation directory code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen") # ensure that there is a directory diff --git a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py index 1b500ccd7b..b8f45deb1d 100644 --- a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py +++ b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py @@ -68,10 +68,6 @@ def applyNodeLocal(self, node): try: # lookup op_type in registry of CustomOps inst = registry.getCustomOp(node) - # find out if the node should be ignored - ignore = inst.get_nodeattr("ipgen_ignore") - if ignore: - return (node, False) inst.prepare_rtlsim() # ensure that executable path is now set assert ( diff --git a/tests/fpgadataflow/test_fifosizing.py b/tests/fpgadataflow/test_fifosizing.py index 6641be1bb7..13dd45e5f5 100644 --- a/tests/fpgadataflow/test_fifosizing.py +++ b/tests/fpgadataflow/test_fifosizing.py @@ -77,6 +77,17 @@ def sort_thresholds_increasing(thresholds): return np.sort(thresholds, axis=1) +def compare_two_chr_funcs(a, b, relaxation): + # relaxation determines how much leeway we allow for the + # analytical implementation to be off from RTL ground truth + equal = True + for inp in range(len(a)): + for i in range(len(a[inp])): + if (a[inp][i] > (b[inp][i] + relaxation)) or (a[inp][i] < (b[inp][i] - relaxation)): + equal = False + return equal + + def make_single_fmpadding_modelwrapper(impl_style, idim, padding, num_ch, simd, idt): pad_h = padding[0] + padding[2] pad_w = padding[1] + padding[3] @@ -628,6 +639,8 @@ def test_fifosizing_linear(method, topology): @pytest.mark.slow @pytest.mark.vivado @pytest.mark.fpgadataflow +# whether we are testing input or output characterization +@pytest.mark.parametrize("direction", ["input", "output"]) @pytest.mark.parametrize( "node", [ @@ -649,10 +662,10 @@ def test_fifosizing_linear(method, topology): ( "MVAU", 5, - 1, + 5, 8, - 1, - [1, 4], + 8, + [1, 1], DataType["UINT2"], DataType["UINT2"], DataType["UINT2"], @@ -696,6 +709,8 @@ def test_fifosizing_linear(method, topology): ("FMPadding", [8, 8], [4, 0, 4, 0], 12, 1, DataType["INT2"], "hls"), ("FMPadding", [8, 8], [0, 4, 0, 4], 5, 1, DataType["INT2"], "hls"), ("FMPadding", [2, 3], [0, 3, 0, 4], 5, 5, DataType["INT2"], "hls"), + ("FMPadding", [4, 8], [0, 4, 0, 2], 5, 5, DataType["INT2"], "hls"), + ("FMPadding", [2, 3], [0, 3, 0, 4], 5, 5, DataType["INT2"], "hls"), # idim, pad, num_ch,simd,idt ( "ChannelwiseOp", @@ -719,43 +734,15 @@ def test_fifosizing_linear(method, topology): [1], "hls", ), - ( - "ChannelwiseOp", - DataType["INT8"], - DataType["INT4"], - DataType["INT4"], - 1, - 16, - "add", - [1, 7, 7], - "hls", - ), # ,idt, act, pdt, nf, ich, func, vecs, impl_style # (Pdb) (ifm_dim,output_size,is1d, NumChannels,PoolDim,ImgDim,PE) - ("StreamingMaxPool", DataType["INT4"], True, 2, 32, 4, 1, 0, "hls"), ("StreamingMaxPool", DataType["INT4"], True, 1, 4, 1, 1, 0, "hls"), ("StreamingMaxPool", DataType["BIPOLAR"], False, 1, 10, 1, 1, 1, "hls"), - ("StreamingMaxPool", DataType["BIPOLAR"], False, 2, 10, 64, 1, 1, "hls"), - ("StreamingMaxPool", DataType["BIPOLAR"], False, 2, 28, 64, 1, 0, "hls"), + # ("StreamingMaxPool", DataType["INT4"], True, 2, 32, 4, 1, 0, "hls"), + # ("StreamingMaxPool", DataType["BIPOLAR"], False, 2, 28, 64, 1, 0, "hls"), + # ("StreamingMaxPool", DataType["BIPOLAR"], False, 2, 10, 64, 1, 1, "hls"), + # ("StreamingMaxPool", DataType["INT4"], True, 4, 10, 3, 3, 1, "hls"), # idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil_mode,impl_style - ("StreamingMaxPool", DataType["BIPOLAR"], False, 1, 10, 1, 1, 1, "hls"), - ("StreamingMaxPool", DataType["INT4"], True, 4, 10, 3, 3, 1, "hls"), - ( - "ConvolutionInputGenerator", - DataType["INT2"], - [6, 6], - [12, 12], - 8, - [4, 4], - [1, 1], - 2, - 0, - 0, - 1, - False, - 0, - "hls", - ), ( "ConvolutionInputGenerator", DataType["INT2"], @@ -774,54 +761,6 @@ def test_fifosizing_linear(method, topology): ), # idt,k, ifm_dim, ifm_ch,stride, dilation, # simd, dw, parallel_window, m, flip, is1d - ( - "ConvolutionInputGenerator", - DataType["INT2"], - [6, 1], - [12, 1], - 16, - [3, 1], - [1, 1], - 2, - 0, - 0, - 1, - False, - 1, - "hls", - ), - ( - "ConvolutionInputGenerator", - DataType["INT2"], - [6, 1], - [12, 1], - 16, - [1, 1], - [1, 1], - 2, - 1, - 0, - 1, - False, - 1, - "hls", - ), - ( - "ConvolutionInputGenerator", - DataType["INT2"], - [6, 1], - [12, 1], - 16, - [2, 1], - [1, 1], - 2, - 1, - 0, - 1, - False, - 1, - "hls", - ), ( "ConvolutionInputGenerator", DataType["INT2"], @@ -886,6 +825,12 @@ def test_fifosizing_linear(method, topology): 1, "hls", ), + # ("ConvolutionInputGenerator", DataType["INT2"], + # [6, 6],[12, 12],8,[4, 4],[1, 1],2,0,0,1,False,0,"hls",), + # ("ConvolutionInputGenerator",DataType["INT2"], + # [6, 1],[12, 1],16,[2, 1],[1, 1],2,1,0,1,False,1,"hls",), + # ("ConvolutionInputGenerator",DataType["INT2"], + # [6, 1],[12, 1],16,[1, 1],[1, 1],2,1,0,1,False,1,"hls",), # idt,k, ifm_dim, ifm_ch,stride, dilation, simd, # dw, parallel_window, m, flip, is1d ( @@ -905,25 +850,36 @@ def test_fifosizing_linear(method, topology): "hls", ), ( - "VVAU", + "ChannelwiseOp", + DataType["INT8"], DataType["INT4"], DataType["INT4"], - None, - 3, - 3, - 10, - 10, - 3, - 3, - 3, - "internal_embedded", 1, - "rtl", + 16, + "add", + [1, 3, 3], + "hls", + ), + ( + "ConvolutionInputGenerator", + DataType["INT2"], + [6, 1], + [12, 1], + 16, + [3, 1], + [1, 1], + 2, + 0, + 0, + 1, + False, + 1, + "hls", + # ("Thresholding", [15, 3], True, True, "hls"), ), - ("Thresholding", [15, 3], True, True, "hls"), ], ) -def test_fifosizing_analytical_characterization(node): +def test_fifosizing_analytical_characterization(direction, node): test_rtl = True test_fpga_part = "xc7z020clg400-1" @@ -948,7 +904,7 @@ def test_fifosizing_analytical_characterization(node): cfg, narrow, per_tensor, impl_style = node[1:] ch = cfg[0] pe = cfg[1] - n_inp_vecs = [1, 2, 2] + n_inp_vecs = [1, 1, 1] hls_mem_mode = "internal_decoupled" act = DataType["INT4"] idt = DataType["INT16"] @@ -1041,7 +997,7 @@ def test_fifosizing_analytical_characterization(node): model = golden.transform(InferStreamingMaxPool()) model = model.transform(InferShapes()) - model0 = model.transform(SpecializeLayers("xczu3eg-sbva484-1-e")) + model0 = model.transform(SpecializeLayers(test_fpga_part)) # Ensure PE value is set streamingmaxpool_node = model0.get_nodes_by_op_type("StreamingMaxPool_hls")[0] @@ -1118,7 +1074,7 @@ def test_fifosizing_analytical_characterization(node): inst = getCustomOp(model.get_nodes_by_op_type("ConvolutionInputGenerator")[0]) inst.set_nodeattr("is1D", is1d) inst.set_nodeattr("preferred_impl_style", impl_style) - model = model.transform(SpecializeLayers("xc7z020clg400-1")) + model = model.transform(SpecializeLayers(test_fpga_part)) # set simd inst = getCustomOp(model.graph.node[0]) inst.set_nodeattr("SIMD", simd) @@ -1212,8 +1168,8 @@ def test_fifosizing_analytical_characterization(node): inst.set_nodeattr("resType", "dsp") inst.set_nodeattr("preferred_impl_style", impl_style) - model0 = model.transform(SpecializeLayers("xcvc")) - test_fpga_part = "xcvc" + model0 = model.transform(SpecializeLayers(test_fpga_part)) + # test_fpga_part = test_fpga_part outputs = [build_cfg.DataflowOutputType.ESTIMATE_REPORTS] model1 = copy.deepcopy(model0) @@ -1224,18 +1180,13 @@ def test_fifosizing_analytical_characterization(node): node_inst0 = getCustomOp(model0.graph.node[0]) node_inst1 = getCustomOp(model1.graph.node[0]) - # generate ip for node0 (RTL-based characterization) - node_inst0.set_nodeattr("ipgen_ignore", False) - - # do not generate ip for node0 (analytical characterization) - node_inst1.set_nodeattr("ipgen_ignore", True) - cfg = build_cfg.DataflowBuildConfig( output_dir=tmp_output_dir, synth_clk_period_ns=target_clk_ns, generate_outputs=outputs, fpga_part=test_fpga_part, - auto_fifo_strategy="characterize_analytic", + auto_fifo_strategy="characterize", + characteristic_function_strategy="analytical", auto_fifo_depths=True, split_large_fifos=False, ) @@ -1250,7 +1201,7 @@ def test_fifosizing_analytical_characterization(node): # rtlsim-based if test_rtl: - cfg.auto_fifo_strategy = "characterize" + cfg.characteristic_function_strategy = "rtlsim" if model_cache is None: inst = getCustomOp(model0.graph.node[0]) model0 = model0.transform(SpecializeLayers(test_fpga_part)) @@ -1274,9 +1225,16 @@ def test_fifosizing_analytical_characterization(node): continue if test_rtl: - assert np.array_equal( - node_inst0.get_nodeattr("io_chrc_in"), node_inst1.get_nodeattr("io_chrc_in") - ) - assert np.array_equal( - node_inst0.get_nodeattr("io_chrc_out"), node_inst1.get_nodeattr("io_chrc_out") - ) + test_relaxation = 5 + if direction == "input": + assert compare_two_chr_funcs( + node_inst0.get_nodeattr("io_chrc_in"), + node_inst1.get_nodeattr("io_chrc_in"), + test_relaxation, + ) + elif direction == "output": + assert compare_two_chr_funcs( + node_inst0.get_nodeattr("io_chrc_out"), + node_inst1.get_nodeattr("io_chrc_out"), + test_relaxation, + ) From 7494103a99acef128ab7777042564429f7800b31 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Wed, 13 Nov 2024 15:42:34 +0000 Subject: [PATCH 08/12] Initial test for non-linear fifo sizing + fixes --- src/finn/custom_op/fpgadataflow/addstreams.py | 8 +- .../fpgadataflow/duplicatestreams.py | 33 +- src/finn/custom_op/fpgadataflow/hwcustomop.py | 6 +- tests/fpgadataflow/test_fifosizing.py | 289 +++++++++++++++++- 4 files changed, 317 insertions(+), 19 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/addstreams.py b/src/finn/custom_op/fpgadataflow/addstreams.py index ac61786ac1..4af2b64197 100644 --- a/src/finn/custom_op/fpgadataflow/addstreams.py +++ b/src/finn/custom_op/fpgadataflow/addstreams.py @@ -159,7 +159,9 @@ def get_verilog_top_module_intf_names(self): intf_names["s_axis"] = [(x + "_" + sname, swidth) for x in ["in0", "in1"]] return intf_names - def derive_characteristic_fxns(self, period): + def derive_characteristic_fxns( + self, model, period, strategy, fpga_part, clk_period, op_type, override_dict=None + ): n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { "inputs": { @@ -168,4 +170,6 @@ def derive_characteristic_fxns(self, period): }, "outputs": {"out": []}, } - super().derive_characteristic_fxns(period, override_rtlsim_dict=io_dict) + super().derive_characteristic_fxns( + model, period, strategy, fpga_part, clk_period, op_type, override_dict=io_dict + ) diff --git a/src/finn/custom_op/fpgadataflow/duplicatestreams.py b/src/finn/custom_op/fpgadataflow/duplicatestreams.py index 891009eae8..ac59868f27 100644 --- a/src/finn/custom_op/fpgadataflow/duplicatestreams.py +++ b/src/finn/custom_op/fpgadataflow/duplicatestreams.py @@ -40,20 +40,25 @@ def __init__(self, onnx_node, **kwargs): super().__init__(onnx_node, **kwargs) def get_nodeattr_types(self): - my_attrs = { - "NumChannels": ("i", True, 0), - "PE": ("i", True, 0), - # how many duplicated output streams to create - "NumOutputStreams": ("i", True, 0), - # FINN DataTypes for input - "inputDataType": ("s", True, ""), - # number of input vectors, examples: - # [1] is a single vector (like a FC layer with batch=1) - # [4] is four vectors (like a FC layer with batch=4) - # [1, 4, 4] is four * four vectors (like a conv layer with batch=1) - "numInputVectors": ("ints", False, [1]), - } - my_attrs.update(super().get_nodeattr_types()) + my_attrs = super().get_nodeattr_types() + my_attrs.update( + { + "NumChannels": ("i", True, 0), + "PE": ("i", True, 0), + # how many duplicated output streams to create + "NumOutputStreams": ("i", True, 0), + # FINN DataTypes for input + "inputDataType": ("s", True, ""), + # number of input vectors, examples: + # [1] is a single vector (like a FC layer with batch=1) + # [4] is four vectors (like a FC layer with batch=4) + # [1, 4, 4] is four * four vectors (like a conv layer with batch=1) + "numInputVectors": ("ints", False, [1]), + # TODO: how to set a default value depending on NumOutputStreams? + # transformations like set_fifo_depth expect this attribute for every i/o of every node + "outFIFODepths": ("ints", False, [2, 2]), + } + ) return my_attrs def get_num_output_streams(self): diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index f57b89ed2e..d0cafa85da 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -469,7 +469,9 @@ def derive_characteristic_fxns_rtlsim(self, model, period, fpga_part, clk_period code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") # ensure that there is a directory if code_gen_dir == "" or not os.path.isdir(code_gen_dir): - code_gen_dir = make_build_dir(prefix="code_gen_ipgen_" + str(self.name) + "_") + code_gen_dir = make_build_dir( + prefix="code_gen_ipgen_" + str(self.onnx_node.name) + "_" + ) self.set_nodeattr("code_gen_dir_ipgen", code_gen_dir) # ensure that there is generated code inside the dir self.code_generation_ipgen(model, fpga_part, clk_period) @@ -489,7 +491,7 @@ def derive_characteristic_fxns_rtlsim(self, model, period, fpga_part, clk_period # call the compilation function for this node self.ipgen_singlenode_code() else: - warnings.warn("Using pre-existing IP for %s" % self.name) + warnings.warn("Using pre-existing IP for %s" % self.onnx_node.name) # ensure that executable path is now set assert ( self.get_nodeattr("ipgen_path") != "" diff --git a/tests/fpgadataflow/test_fifosizing.py b/tests/fpgadataflow/test_fifosizing.py index 13dd45e5f5..df5ccd488e 100644 --- a/tests/fpgadataflow/test_fifosizing.py +++ b/tests/fpgadataflow/test_fifosizing.py @@ -42,9 +42,17 @@ from qonnx.custom_op.general.im2col import compute_conv_output_dim from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim from qonnx.custom_op.registry import getCustomOp -from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from qonnx.transformation.general import ( + GiveRandomTensorNames, + GiveReadableTensorNames, + GiveUniqueNodeNames, + GiveUniqueParameterTensors, +) +from qonnx.transformation.infer_data_layouts import InferDataLayouts from qonnx.transformation.infer_datatypes import InferDataTypes from qonnx.transformation.infer_shapes import InferShapes +from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul +from qonnx.transformation.merge_onnx_models import MergeONNXModels from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.builder.build_dataflow as build @@ -361,6 +369,199 @@ def make_single_dw_conv_modelwrapper(conv_config, idt, wdt): return model +def make_conv_building_block(ifm_dim, ch, kernel_size, simd, pe, parallel_window=0): + # hardcoded parameters + idt = DataType["UINT4"] + wdt = DataType["UINT4"] + odt = DataType["UINT4"] + tdt = DataType["UINT32"] + stride = 1 + in_ch = out_ch = ch # input channel = output channel for stacking + pad = int(np.floor(kernel_size / 2)) # pad so that input dim = output dim for stacking + + total_pad = 2 * pad + out_feature_dim = compute_conv_output_dim(ifm_dim, kernel_size, stride, total_pad) + weights_shape = [in_ch * kernel_size * kernel_size, out_ch] + thresholds_shape = [1, odt.get_num_possible_values() - 1] + input_shape = [1, ifm_dim, ifm_dim, in_ch] + padding_out_shape = [1, ifm_dim + total_pad, ifm_dim + total_pad, in_ch] + inpgen_out_shape = [1, out_feature_dim, out_feature_dim, in_ch * kernel_size * kernel_size] + output_shape = [1, out_feature_dim, out_feature_dim, out_ch] + + padding_config = {} + padding_config["domain"] = "finn.custom_op.fpgadataflow.rtl" + padding_config["backend"] = "fpgadataflow" + padding_config["ImgDim"] = [ifm_dim, ifm_dim] + padding_config["NumChannels"] = in_ch + padding_config["SIMD"] = simd + padding_config["Padding"] = [pad, pad, pad, pad] + padding_config["inputDataType"] = idt.name + + inpgen_config = {} + inpgen_config["domain"] = "finn.custom_op.fpgadataflow.rtl" + inpgen_config["backend"] = "fpgadataflow" + inpgen_config["ConvKernelDim"] = [kernel_size, kernel_size] + inpgen_config["IFMChannels"] = in_ch + inpgen_config["IFMDim"] = [ifm_dim + total_pad, ifm_dim + total_pad] + inpgen_config["OFMDim"] = [ifm_dim, ifm_dim] + inpgen_config["inputDataType"] = idt.name + inpgen_config["outputDataType"] = idt.name + inpgen_config["SIMD"] = simd + inpgen_config["parallel_window"] = parallel_window + inpgen_config["Stride"] = [stride, stride] + inpgen_config["Dilation"] = [1, 1] + + mvau_config = {} + mvau_config["domain"] = "finn.custom_op.fpgadataflow.hls" + mvau_config["backend"] = "fpgadataflow" + mvau_config["numInputVectors"] = [1, ifm_dim, ifm_dim] + mvau_config["MW"] = in_ch * kernel_size * kernel_size + mvau_config["MH"] = in_ch + mvau_config["SIMD"] = simd if parallel_window == 0 else simd * kernel_size * kernel_size + mvau_config["PE"] = pe + mvau_config["resType"] = "lut" + mvau_config["inputDataType"] = idt.name + mvau_config["weightDataType"] = wdt.name + mvau_config["outputDataType"] = odt.name + + top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) + top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape) + value_info = [ + helper.make_tensor_value_info("weights", TensorProto.FLOAT, weights_shape), + helper.make_tensor_value_info("thresholds", TensorProto.FLOAT, thresholds_shape), + helper.make_tensor_value_info("padding_out", TensorProto.FLOAT, padding_out_shape), + helper.make_tensor_value_info("inpgen_out", TensorProto.FLOAT, inpgen_out_shape), + ] + + modelproto = qonnx_make_model( + helper.make_graph( + name="building_block", + inputs=[top_in], + outputs=[top_out], + value_info=value_info, + nodes=[ + helper.make_node("FMPadding_rtl", ["top_in"], ["padding_out"], **padding_config), + helper.make_node( + "ConvolutionInputGenerator_rtl", + ["padding_out"], + ["inpgen_out"], + **inpgen_config, + ), + helper.make_node( + "MVAU_hls", ["inpgen_out", "weights", "thresholds"], ["top_out"], **mvau_config + ), + ], + ) + ) + + model = ModelWrapper(modelproto) + model.set_tensor_datatype("top_in", idt) + model.set_tensor_layout("top_in", ["N", "H", "W", "C"]) + model.set_tensor_datatype("top_out", odt) + model.set_tensor_datatype("weights", wdt) + model.set_tensor_datatype("thresholds", tdt) + + weights = gen_finn_dt_tensor(wdt, weights_shape) + # TODO: thresholds are all the same + thresholds = generate_random_threshold_values( + tdt, out_ch, odt.get_num_possible_values() - 1, False, True + ) + thresholds = sort_thresholds_increasing(thresholds) + + model.set_initializer("weights", weights) + model.set_initializer("thresholds", thresholds) + + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + + return model + + +def combine_blocks(lb, rb, ifm_dim, ch, pe): + # assumes left branch (lb) and right branch (rb) each have a single (dynamic) input/output with the same shape + # to avoid mix-ups, start by giving all tensors random names + lb = lb.transform(GiveRandomTensorNames()) + rb = rb.transform(GiveRandomTensorNames()) + # erase all node names to avoid conflict + for n in lb.graph.node: + n.name = "" + for n in rb.graph.node: + n.name = "" + + lb_input = lb.graph.input[0] + lb_output = lb.graph.output[0] + rb_input = rb.graph.input[0] + rb_output = rb.graph.output[0] + + top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ch]) + top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, [1, ifm_dim, ifm_dim, ch]) + + dup_config = {} + dup_config["domain"] = "finn.custom_op.fpgadataflow.hls" + dup_config["backend"] = "fpgadataflow" + dup_config["numInputVectors"] = [1, ifm_dim, ifm_dim] + dup_config["NumChannels"] = ch + dup_config["PE"] = pe + dup_config["NumOutputStreams"] = 2 + dup_config["inputDataType"] = lb.get_tensor_datatype(lb_input.name).name + + add_config = {} + add_config["domain"] = "finn.custom_op.fpgadataflow.hls" + add_config["backend"] = "fpgadataflow" + add_config["numInputVectors"] = [1, ifm_dim, ifm_dim] + add_config["NumChannels"] = ch + add_config["PE"] = pe + add_config["inputDataType"] = lb.get_tensor_datatype(lb_output.name).name + + nodes_lb = [node for node in lb.graph.node] + nodes_rb = [node for node in rb.graph.node] + nodes_new = ( + nodes_lb + + nodes_rb + + [ + helper.make_node( + "DuplicateStreams_hls", ["top_in"], [lb_input.name, rb_input.name], **dup_config + ), + helper.make_node( + "AddStreams_hls", [lb_output.name, rb_output.name], ["top_out"], **add_config + ), + ] + ) + + value_info_lb = [x for x in lb.graph.value_info] + value_info_rb = [x for x in rb.graph.value_info] + value_info_new = value_info_lb + value_info_rb + [lb_input, lb_output, rb_input, rb_output] + + initializer_lb = [x for x in lb.graph.initializer] + initializer_rb = [x for x in rb.graph.initializer] + initializer_new = initializer_lb + initializer_rb + modelproto = qonnx_make_model( + helper.make_graph( + name="branching_model", + inputs=[top_in], + outputs=[top_out], + value_info=value_info_new, + nodes=nodes_new, + ) + ) + + model = ModelWrapper(modelproto) + model.set_tensor_datatype("top_in", lb.get_tensor_datatype(lb_input.name)) + model.set_tensor_layout("top_in", lb.get_tensor_layout(lb_input.name)) + for i in initializer_new: + model.graph.initializer.append(i) + + # tidy-up + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferDataLayouts()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveUniqueParameterTensors()) + model = model.transform(GiveReadableTensorNames()) + return model + + def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels): W_sparse = np.zeros((channels, channels, k_h, k_w), dtype=np.float32) for ch in range(channels): @@ -636,6 +837,92 @@ def test_fifosizing_linear(method, topology): shutil.rmtree(tmp_output_dir_cmp) +@pytest.mark.slow +@pytest.mark.vivado +@pytest.mark.fpgadataflow +@pytest.mark.parametrize("strategy", ["rtlsim"]) # rtlsim #analytical +@pytest.mark.parametrize("lb_num_layers", [1]) +@pytest.mark.parametrize("rb_num_layers", [4]) +def test_fifosizing_nonlinear(strategy, lb_num_layers, rb_num_layers): + np.random.seed(0) + tmp_output_dir = make_build_dir( + "build_fifosizing_nonlinear_%s_%s" % (lb_num_layers, rb_num_layers) + ) + + rtlsim_n = 10 + + dim = 16 + ch = 4 + + lb = None + for i in range(lb_num_layers): + new_block = make_conv_building_block( + dim, ch, kernel_size=3, simd=4, pe=4, parallel_window=1 + ) + lb = new_block if lb is None else lb.transform(MergeONNXModels(new_block)) + lb.save(tmp_output_dir + "/lb.onnx") + + rb = None + for i in range(rb_num_layers): + new_block = make_conv_building_block( + dim, ch, kernel_size=3, simd=4, pe=4, parallel_window=1 + ) + rb = new_block if rb is None else rb.transform(MergeONNXModels(new_block)) + rb.save(tmp_output_dir + "/rb.onnx") + + model = combine_blocks(lb, rb, dim, ch, pe=4) + model.save(tmp_output_dir + "/model.onnx") + + cfg = build_cfg.DataflowBuildConfig( + output_dir=tmp_output_dir, + # only works with characterization-based FIFO-sizing + auto_fifo_depths=True, + auto_fifo_strategy="characterize", + characteristic_function_strategy=strategy, + split_large_fifos=False, + # manual folding + target_fps=None, + # general rtlsim settings + force_python_rtlsim=False, + rtlsim_batch_size=rtlsim_n, + synth_clk_period_ns=10.0, + board="Pynq-Z1", + shell_flow_type=build_cfg.ShellFlowType.VIVADO_ZYNQ, + generate_outputs=[ + build_cfg.DataflowOutputType.ESTIMATE_REPORTS, + build_cfg.DataflowOutputType.STITCHED_IP, + build_cfg.DataflowOutputType.RTLSIM_PERFORMANCE, + ], + ) + + build.build_dataflow_cfg(tmp_output_dir + "/model.onnx", cfg) + + with open(tmp_output_dir + "/report/estimate_network_performance.json") as f: + est_data = json.load(f) + with open(tmp_output_dir + "/report/rtlsim_performance.json") as f: + sim_data = json.load(f) + + # check for deadlock + model_final = ModelWrapper(tmp_output_dir + "/intermediate_models/step_create_stitched_ip.onnx") + first_node = getCustomOp(model_final.find_consumer(model_final.graph.input[0].name)) + last_node = getCustomOp(model_final.find_producer(model_final.graph.output[0].name)) + input_txns_expected = np.prod(first_node.get_folded_input_shape()[:-1]) * rtlsim_n + output_txns_expected = np.prod(last_node.get_folded_output_shape()[:-1]) * rtlsim_n + assert sim_data["N_IN_TXNS"] == input_txns_expected + assert sim_data["N_OUT_TXNS"] == output_txns_expected + + # check rtlsim throughput + # TODO: how to determine N? Take throughput or stable_throughput? + # sim_data["stable_throughput[images/s]"] + assert ( + float(sim_data["throughput[images/s]"]) / float(est_data["estimated_throughput_fps"]) > 0.9 + ) + + # TODO: + # reduce (individual) FIFO sizes by x % and observe throughput drop or deadlock appear + # shutil.rmtree(tmp_output_dir) + + @pytest.mark.slow @pytest.mark.vivado @pytest.mark.fpgadataflow From bdc391211cc10ebc6a50a68ac6f303f4e6947576 Mon Sep 17 00:00:00 2001 From: Lukas Stasytis Date: Mon, 18 Nov 2024 13:18:33 +0000 Subject: [PATCH 09/12] moved fifo sizing tests to respective node tests, failing = thresholding and specific cases of streamingmaxpool and slidingwindow --- src/finn/builder/build_dataflow_steps.py | 25 - src/finn/custom_op/fpgadataflow/hwcustomop.py | 1 - src/finn/util/test.py | 74 +- tests/fpgadataflow/test_fifosizing.py | 1141 +---------------- .../test_fpgadataflow_channelwise_ops.py | 60 + .../test_fpgadataflow_convinputgenerator.py | 125 ++ .../test_fpgadataflow_fmpadding.py | 53 + .../test_fpgadataflow_labelselect.py | 57 +- tests/fpgadataflow/test_fpgadataflow_mvau.py | 79 ++ .../test_fpgadataflow_streamingmaxpool.py | 75 ++ .../test_fpgadataflow_thresholding.py | 154 +++ tests/fpgadataflow/test_fpgadataflow_vvau.py | 116 ++ 12 files changed, 804 insertions(+), 1156 deletions(-) diff --git a/src/finn/builder/build_dataflow_steps.py b/src/finn/builder/build_dataflow_steps.py index 33ff3d4483..3ad8ee9807 100644 --- a/src/finn/builder/build_dataflow_steps.py +++ b/src/finn/builder/build_dataflow_steps.py @@ -548,36 +548,11 @@ def step_set_fifo_depths(model: ModelWrapper, cfg: DataflowBuildConfig): `GiveUniqueNodeNames`. """ - print("ENTERED STEP FIFO DEPTHS") if cfg.auto_fifo_depths: if cfg.auto_fifo_strategy == "characterize": model = model.transform(InsertDWC()) model = model.transform(SpecializeLayers(cfg._resolve_fpga_part())) model = model.transform(GiveUniqueNodeNames()) - - """ - if cfg.characteristic_function_strategy == : - # RTL sim only the nodes which are not supported right now with - # analytic characteristic derivations. - # To do this, we first check if the characteristic - # function exists for each node. If yes, we make sure PrepareIP and HLSSynthIP - # do not generate code for them. We unset the flags afterwards - # so that a repeat call to SynthIP and PrepareIP will indeed generate the cpp code. - for node in model.graph.node: - node_inst = getCustomOp(node) - prepare_kwargs_for_characteristic_fx = getattr( - node_inst, "prepare_kwargs_for_characteristic_fx", None - ) - if callable(prepare_kwargs_for_characteristic_fx): - node_inst.set_nodeattr("ipgen_ignore", True) - - model = model.transform( - PrepareIP(cfg._resolve_fpga_part(), cfg._resolve_hls_clk_period()) - ) - model = model.transform(HLSSynthIP()) - model = model.transform(PrepareRTLSim()) - - """ model = model.transform(AnnotateCycles()) period = int(model.analysis(dataflow_performance)["max_cycles"] * 3 + 10) diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index f57b89ed2e..f7f8aac56e 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -367,7 +367,6 @@ def get_outstream_width_padded(self, ind=0): def derive_characteristic_fxns( self, model, period, strategy, fpga_part, clk_period, op_type, override_dict=None ): - print("deriving characteristic func") if override_dict is None: n_inps = np.prod(self.get_folded_input_shape()[:-1]) io_dict = { diff --git a/src/finn/util/test.py b/src/finn/util/test.py index 2115e058a8..ea402d1c89 100644 --- a/src/finn/util/test.py +++ b/src/finn/util/test.py @@ -39,11 +39,28 @@ from pkgutil import get_data from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp +from qonnx.transformation.general import GiveUniqueNodeNames +from finn.analysis.fpgadataflow.dataflow_performance import dataflow_performance from finn.core.onnx_exec import execute_onnx +from finn.transformation.fpgadataflow.annotate_cycles import AnnotateCycles +from finn.transformation.fpgadataflow.derive_characteristic import DeriveCharacteristic from finn.transformation.fpgadataflow.make_zynq_proj import ZynqBuild +from finn.transformation.fpgadataflow.minimize_accumulator_width import ( + MinimizeAccumulatorWidth, +) +from finn.transformation.fpgadataflow.minimize_weight_bit_width import ( + MinimizeWeightBitWidth, +) +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.transformation.fpgadataflow.vitis_build import VitisBuild, VitisOptStrategy -from finn.util.basic import alveo_default_platform, alveo_part_map, pynq_part_map +from finn.util.basic import ( + alveo_default_platform, + alveo_part_map, + make_build_dir, + pynq_part_map, +) # map of (wbits,abits) -> model example_map = { @@ -184,3 +201,58 @@ def resize_smaller_side(target_pixels, img): def crop_center(size, img): """Crop central size*size window out of a PIL image.""" return torchvision_util.center_crop(img, size) + + +def compare_two_chr_funcs(a, b, relaxation): + # relaxation determines how much leeway we allow for the + # analytical implementation to be off from RTL ground truth + equal = True + for inp in range(len(a)): + for i in range(len(a[inp])): + if (a[inp][i] > (b[inp][i] + relaxation)) or (a[inp][i] < (b[inp][i] - relaxation)): + equal = False + return equal + + +def get_characteristic_fnc(model, node, part, target_clk_ns, strategy): + # If set to True: attempt to cache a pre-existing variant of the model + # this is to avoid generating RTL multiple times during + # test debugging + caching = False + model_cache = None + + if strategy == "rtlsim" and caching: + build_dir = os.environ["FINN_BUILD_DIR"] + for x in os.listdir(build_dir): + if x.startswith(str(node)): + model_cache = f"{build_dir}/{x}/model.onnx" + + make_build_dir("build_fifosizing") + if model_cache is not None: + model = ModelWrapper(model_cache) + + if model_cache is None: + model = model.transform(SpecializeLayers(part)) + model = model.transform(MinimizeWeightBitWidth()) + model = model.transform(MinimizeAccumulatorWidth()) + model = model.transform(GiveUniqueNodeNames()) + if strategy == "rtlsim": + model = model.transform(PrepareIP(part, target_clk_ns)) + model = model.transform(AnnotateCycles()) + + period = int(model.analysis(dataflow_performance)["max_cycles"] * 3 + 10) + + model = model.transform( + DeriveCharacteristic( + model, + period, + strategy, + part, + target_clk_ns, + ) + ) + if caching: + tmp_caching_output_dir = make_build_dir(str(node)) + model.save(tmp_caching_output_dir + "/model.onnx") + + return getCustomOp(model.graph.node[0]) diff --git a/tests/fpgadataflow/test_fifosizing.py b/tests/fpgadataflow/test_fifosizing.py index 13dd45e5f5..ee88e2aba3 100644 --- a/tests/fpgadataflow/test_fifosizing.py +++ b/tests/fpgadataflow/test_fifosizing.py @@ -29,542 +29,19 @@ import pytest -import copy import json -import numpy as np -import os import shutil import torch from brevitas.export import export_qonnx -from onnx import TensorProto, helper -from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper -from qonnx.custom_op.general.im2col import compute_conv_output_dim -from qonnx.custom_op.general.maxpoolnhwc import compute_pool_output_dim from qonnx.custom_op.registry import getCustomOp -from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames -from qonnx.transformation.infer_datatypes import InferDataTypes -from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model import finn.builder.build_dataflow as build import finn.builder.build_dataflow_config as build_cfg -import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw -from finn.builder.build_dataflow_steps import step_set_fifo_depths -from finn.transformation.fpgadataflow.convert_to_hw_layers import InferStreamingMaxPool -from finn.transformation.fpgadataflow.prepare_ip import PrepareIP -from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.util.basic import make_build_dir from finn.util.test import get_trained_network_and_ishape -def generate_random_threshold_values( - data_type, num_input_channels, num_steps, narrow=False, per_tensor=False -): - if per_tensor: - num_input_channels = 1 - if narrow: - num_steps -= 1 - - return np.random.randint( - data_type.min(), - data_type.max() + 1, - (num_input_channels, num_steps), - ).astype(np.float32) - - -def sort_thresholds_increasing(thresholds): - return np.sort(thresholds, axis=1) - - -def compare_two_chr_funcs(a, b, relaxation): - # relaxation determines how much leeway we allow for the - # analytical implementation to be off from RTL ground truth - equal = True - for inp in range(len(a)): - for i in range(len(a[inp])): - if (a[inp][i] > (b[inp][i] + relaxation)) or (a[inp][i] < (b[inp][i] - relaxation)): - equal = False - return equal - - -def make_single_fmpadding_modelwrapper(impl_style, idim, padding, num_ch, simd, idt): - pad_h = padding[0] + padding[2] - pad_w = padding[1] + padding[3] - idim_h, idim_w = idim - - assert pad_h > 0 or pad_w > 0, "Output dim should be greater than input dim" - odim_h = idim_h + pad_h - odim_w = idim_w + pad_w - - inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, idim_h, idim_w, num_ch]) - outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, odim_h, odim_w, num_ch]) - - FMPadding = helper.make_node( - "FMPadding", - ["inp"], - ["outp"], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - ImgDim=idim, - Padding=padding, - NumChannels=num_ch, - inputDataType=str(idt.name), - numInputVectors=1, - SIMD=simd, - preferred_impl_style=impl_style, - ) - - graph = helper.make_graph( - nodes=[FMPadding], name="fmpadding_graph", inputs=[inp], outputs=[outp] - ) - - model = qonnx_make_model(graph, producer_name="fmpadding-model") - model = ModelWrapper(model) - - model.set_tensor_datatype("inp", idt) - model.set_tensor_datatype("outp", idt) - - return model - - -def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=None): - mw = W.shape[0] - mh = W.shape[1] - assert mh % pe == 0 - assert mw % simd == 0 - - # there are two ways to implement bipolar weights and inputs for - # MatrixVectorActivation: - # - specify their datatypes as such - # - specify their datatypes as BINARY as use binaryXnorMode - if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: - # we'll internally convert weights/inputs to binary and specify the - # datatypes as such, and also set the binaryXnorMode attribute to 1 - export_wdt = DataType["BINARY"] - export_idt = DataType["BINARY"] - binary_xnor_mode = 1 - else: - export_wdt = wdt - export_idt = idt - binary_xnor_mode = 0 - - inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw]) - outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, mh]) - if T is not None: - no_act = 0 - node_inp_list = ["inp", "weights", "thresh"] - if odt == DataType["BIPOLAR"]: - actval = 0 - else: - actval = odt.min() - else: - # no thresholds - node_inp_list = ["inp", "weights"] - actval = 0 - no_act = 1 - FCLayer_node = helper.make_node( - "MVAU", - node_inp_list, - ["outp"], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - MW=mw, - MH=mh, - SIMD=simd, - PE=pe, - inputDataType=export_idt.name, - weightDataType=export_wdt.name, - outputDataType=odt.name, - ActVal=actval, - binaryXnorMode=binary_xnor_mode, - noActivation=no_act, - ) - graph = helper.make_graph( - nodes=[FCLayer_node], name="fclayer_graph", inputs=[inp], outputs=[outp] - ) - - model = qonnx_make_model(graph, producer_name="fclayer-model") - model = ModelWrapper(model) - - model.set_tensor_datatype("inp", idt) - model.set_tensor_datatype("outp", odt) - model.set_tensor_datatype("weights", wdt) - if binary_xnor_mode: - # convert bipolar to binary - model.set_initializer("weights", (W + 1) / 2) - else: - model.set_initializer("weights", W) - if T is not None: - model.set_tensor_datatype("thresh", tdt) - model.set_initializer("thresh", T) - return model - - -def make_labelselect_modelwrapper(labels, pe, k, idt, impl_style): - inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, labels]) - outp = helper.make_tensor_value_info("outp", TensorProto.INT64, [1, k]) - - labelselect_node = helper.make_node( - "LabelSelect", - ["inp"], - ["outp"], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - Labels=labels, - PE=pe, - K=k, - inputDataType=idt.name, - preferred_impl_style=impl_style, - ) - graph = helper.make_graph( - nodes=[labelselect_node], - name="graph", - inputs=[inp], - outputs=[outp], - ) - - model = qonnx_make_model(graph, producer_name="thresholding-model") - model = ModelWrapper(model) - - model.set_tensor_datatype("inp", idt) - odt = DataType.get_smallest_possible(labels - 1) - model.set_tensor_datatype("outp", odt) - - return model - - -def _make_single_vvau_modelwrapper( - W, - pe, - simd, - k_h, - k_w, - channels, - dim_h, - dim_w, - wdt, - idt, - odt, - T=None, - tdt=None, - mem_mode="internal_embedded", - impl_style="rtl", -): - in_shape = [1, dim_h, dim_w, k_h * k_w * channels] # [N, H, W, K*K*CH] - out_shape = [ - 1, - dim_h, - dim_w, - channels, - ] # [N, H, W, OFM_CH] (OFM_CH=IFM_CH because depthwise convolution) - - inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, in_shape) - outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, out_shape) - - if T is not None: - no_act = 0 - node_inp_list = ["inp", "weights", "thresh"] - if odt == DataType["BIPOLAR"]: - actval = 0 - else: - actval = odt.min() - else: - no_act = 1 - node_inp_list = ["inp", "weights"] - actval = 0 - - VVAU_node = helper.make_node( - "VVAU", - node_inp_list, - ["outp"], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - PE=pe, - SIMD=simd, - Dim=[dim_h, dim_w], - Channels=channels, - Kernel=[k_h, k_w], - resType="lut", - ActVal=actval, - inputDataType=idt.name, - weightDataType=wdt.name, - outputDataType=odt.name, - noActivation=no_act, - mem_mode=mem_mode, - impl_style=impl_style, - ) - - graph = helper.make_graph(nodes=[VVAU_node], name="vvau_graph", inputs=[inp], outputs=[outp]) - - model = qonnx_make_model(graph, producer_name="vvau-model") - model = ModelWrapper(model) - - model.set_tensor_datatype("inp", idt) - model.set_tensor_datatype("outp", odt) - model.set_tensor_datatype("weights", wdt) - - model.set_initializer("weights", W) - model.set_tensor_shape("weights", (channels, 1, k_h, k_w)) - - if T is not None: - model.set_tensor_datatype("thresh", tdt) - model.set_initializer("thresh", T) - - model = model.transform(InferShapes()) - model = model.transform(InferDataTypes()) - - return model - - -def make_single_dw_conv_modelwrapper(conv_config, idt, wdt): - kernel_size, in_feature_dim, in_chn = conv_config - stride = 1 - pad = 0 - - out_feature_dim = compute_conv_output_dim(in_feature_dim, kernel_size, stride, pad) - group = out_chn = in_chn - - conv_param_shape = [out_chn, 1, kernel_size, kernel_size] - input_shape = [1, in_chn, in_feature_dim, in_feature_dim] - output_shape = [1, out_chn, out_feature_dim, out_feature_dim] - - conv_config = {} - conv_config["dilations"] = [1, 1] - conv_config["group"] = group - conv_config["kernel_shape"] = [kernel_size, kernel_size] - conv_config["pads"] = [pad, pad, pad, pad] - conv_config["strides"] = [stride, stride] - - ifm = helper.make_tensor_value_info("ifm", TensorProto.FLOAT, input_shape) - ofm = helper.make_tensor_value_info("ofm", TensorProto.FLOAT, output_shape) - weights = [helper.make_tensor_value_info("weights", TensorProto.FLOAT, conv_param_shape)] - - modelproto = qonnx_make_model( - helper.make_graph( - name="conv_test", - inputs=[ifm], - outputs=[ofm], - value_info=weights, - nodes=[helper.make_node("Conv", ["ifm", "weights"], ["ofm"], **conv_config)], - ) - ) - - model = ModelWrapper(modelproto) - model.set_tensor_datatype("ifm", idt) - model.set_tensor_datatype("weights", wdt) - model.set_initializer("weights", gen_finn_dt_tensor(wdt, conv_param_shape)) - - model = model.transform(InferShapes()) - model = model.transform(InferDataTypes()) - - return model - - -def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels): - W_sparse = np.zeros((channels, channels, k_h, k_w), dtype=np.float32) - for ch in range(channels): - W_sparse[ch][ch] = W_conv[ch][0] - W_conv = W_sparse.astype(np.float32) - W_matmul = W_conv.transpose(0, 2, 3, 1) - W_matmul = W_matmul.reshape(channels, channels * k_h * k_w) - W_matmul = W_matmul.T - - return W_matmul - - -def _calculate_dot_prod_range(dt_a, dt_b, len): - """Returns the (min,max) values a dot product between two (un)signed vectors of - types dt_a and dt_b of len elements can take.""" - min_prod = 2**30 - max_prod = -(2**30) - for a_val in [dt_a.min(), dt_a.max()]: - for b_val in [dt_b.min(), dt_b.max()]: - prod = a_val * b_val * len - if prod < min_prod: - min_prod = prod - if prod > max_prod: - max_prod = prod - return (min_prod, max_prod) - - -def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode): - k_h, k_w = k - ifm_dim_h, ifm_dim_w = ifm_dim - ofm_dim_h, ofm_dim_w = ofm_dim - odt = idt - inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]) - outp = helper.make_tensor_value_info( - "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, ifm_ch] - ) - - mp_node = helper.make_node( - "MaxPoolNHWC", - ["inp"], - ["outp"], - domain="qonnx.custom_op.general", - kernel_shape=[k_h, k_w], - strides=[k_h, k_w], - ceil_mode=ceil_mode, - pads=[0, 0, 0, 0], - ) - graph = helper.make_graph(nodes=[mp_node], name="mp_graph", inputs=[inp], outputs=[outp]) - - model = qonnx_make_model(graph, producer_name="mp-model") - model = ModelWrapper(model) - - model.set_tensor_datatype("inp", idt) - model.set_tensor_datatype("outp", odt) - - return model - - -def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw): - k_h, k_w = k - ifm_dim_h, ifm_dim_w = ifm_dim - stride_h, stride_w = stride - dilation_h, dilation_w = dilation - ofm_dim_h, ofm_dim_w = ofm_dim - - odt = idt - inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ifm_dim_h, ifm_dim_w, ifm_ch]) - outp = helper.make_tensor_value_info( - "outp", TensorProto.FLOAT, [1, ofm_dim_h, ofm_dim_w, k_h * k_w * ifm_ch] - ) - - im2col_node = helper.make_node( - "Im2Col", - ["inp"], - ["outp"], - domain="finn.custom_op.general", - stride=[stride_h, stride_w], - kernel_size=[k_h, k_w], - input_shape=str((1, ifm_dim_h, ifm_dim_w, ifm_ch)), - dilations=[dilation_h, dilation_w], - pad_amount=[0, 0, 0, 0], - pad_value=0, - depthwise=dw, - ) - graph = helper.make_graph( - nodes=[im2col_node], name="im2col_graph", inputs=[inp], outputs=[outp] - ) - - model = qonnx_make_model(graph, producer_name="im2col-model") - model = ModelWrapper(model) - - model.set_tensor_datatype("inp", idt) - model.set_tensor_datatype("outp", odt) - - return model - - -def make_channelwise_modelwrapper(C, pe, idt, odt, pdt, func, vecs): - NumChannels = C.shape[0] - - inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, vecs + [NumChannels]) - outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, vecs + [NumChannels]) - - node_inp_list = ["inp", "const"] - - node = helper.make_node( - "ChannelwiseOp", - node_inp_list, - ["outp"], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - NumChannels=NumChannels, - Func=func, - PE=pe, - inputDataType=idt.name, - outputDataType=odt.name, - paramDataType=pdt.name, - numInputVectors=vecs, - preferred_impl_style="hls", - ) - graph = helper.make_graph(nodes=[node], name="graph", inputs=[inp], outputs=[outp]) - - model = qonnx_make_model(graph, producer_name="model") - model = ModelWrapper(model) - - model.set_tensor_datatype("inp", idt) - model.set_tensor_datatype("outp", odt) - - model.set_tensor_datatype("const", idt) - model.set_initializer("const", C) - return model - - -def make_single_dwc_modelwrapper(in_shape, out_shape, inWidth, outWidth, finn_dtype, impl_style): - inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, in_shape) - outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, out_shape) - - optype = "StreamingDataWidthConverter" - - DWC_node = helper.make_node( - optype, - ["inp"], - ["outp"], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - in_shape=in_shape, - out_shape=out_shape, - inWidth=inWidth, - outWidth=outWidth, - preferred_impl_style=impl_style, - generalized_variant=True, - dataType=str(finn_dtype.name), - ) - - graph = helper.make_graph(nodes=[DWC_node], name="dwc_graph", inputs=[inp], outputs=[outp]) - - model = qonnx_make_model(graph, producer_name="dwc-model") - model = ModelWrapper(model) - - model.set_tensor_datatype("inp", finn_dtype) - model.set_tensor_datatype("outp", finn_dtype) - - return model - - -def make_single_thresholding_modelwrapper(impl_style, T, idt, odt, actval, n_inp_vecs, num_ch): - inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, n_inp_vecs + [num_ch]) - outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, n_inp_vecs + [num_ch]) - - node_inp_list = ["inp", "thresh"] - - Thresholding_node = helper.make_node( - "Thresholding", - node_inp_list, - ["outp"], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - NumChannels=num_ch, - numSteps=T.shape[1], - inputDataType=idt.name, - weightDataType=idt.name, # will be set by MinimizeAccumulatorWidth - outputDataType=odt.name, - ActVal=actval, - numInputVectors=n_inp_vecs, - preferred_impl_style=impl_style, - ) - graph = helper.make_graph( - nodes=[Thresholding_node], - name="thresholding_graph", - inputs=[inp], - outputs=[outp], - ) - - model = qonnx_make_model(graph, producer_name="thresholding-model") - model = ModelWrapper(model) - - model.set_tensor_datatype("inp", idt) - model.set_tensor_datatype("outp", odt) - - model.set_tensor_datatype("thresh", idt) - model.set_initializer("thresh", T) - return model - - def fetch_test_model(topology, wbits=2, abits=2): tmp_output_dir = make_build_dir("build_fifosizing_%s_" % topology) (model, ishape) = get_trained_network_and_ishape(topology, wbits, abits) @@ -577,17 +54,29 @@ def fetch_test_model(topology, wbits=2, abits=2): @pytest.mark.vivado @pytest.mark.fpgadataflow @pytest.mark.parametrize( - "method", ["largefifo_rtlsim_python", "largefifo_rtlsim_cpp", "characterize"] + "method", + [ + "largefifo_rtlsim_python", + "largefifo_rtlsim_cpp", + "characterize_analytic", + "characterize_rtl", + ], ) @pytest.mark.parametrize("topology", ["tfc", "cnv"]) def test_fifosizing_linear(method, topology): force_python_rtlsim = "python" in method method_key = "largefifo_rtlsim" if "largefifo_rtlsim" in method else "characterize" tmp_output_dir = fetch_test_model(topology) + if method == "characterize_analytic": + characterizatio_strategy_key = "analytic" + else: + characterizatio_strategy_key = "rtlsim" + cfg = build_cfg.DataflowBuildConfig( output_dir=tmp_output_dir, auto_fifo_depths=True, auto_fifo_strategy=method_key, + characteristic_function_strategy=characterizatio_strategy_key, target_fps=10000 if topology == "tfc" else 1000, force_python_rtlsim=force_python_rtlsim, synth_clk_period_ns=10.0, @@ -634,607 +123,3 @@ def test_fifosizing_linear(method, topology): shutil.rmtree(tmp_output_dir) shutil.rmtree(tmp_output_dir_cmp) - - -@pytest.mark.slow -@pytest.mark.vivado -@pytest.mark.fpgadataflow -# whether we are testing input or output characterization -@pytest.mark.parametrize("direction", ["input", "output"]) -@pytest.mark.parametrize( - "node", - [ - ("LabelSelect", 10, 1, 1, DataType["UINT8"], "hls"), - ("LabelSelect", 10, 1, 3, DataType["UINT8"], "hls"), - ("LabelSelect", 10, 2, 3, DataType["UINT8"], "hls"), - ( - "MVAU", - 5, - 1, - 8, - 1, - [1, 1], - DataType["UINT2"], - DataType["UINT2"], - DataType["UINT2"], - "hls", - ), - ( - "MVAU", - 5, - 5, - 8, - 8, - [1, 1], - DataType["UINT2"], - DataType["UINT2"], - DataType["UINT2"], - "hls", - ), - ( - "MVAU", - 10, - 5, - 20, - 4, - [1, 1], - DataType["UINT4"], - DataType["UINT8"], - DataType["UINT4"], - "hls", - ), - ( - "MVAU", - 48, - 1, - 4, - 1, - [1, 1], - DataType["UINT2"], - DataType["UINT2"], - DataType["UINT2"], - "hls", - ), - # generalized DWC-variant required - # ("StreamingDataWidthConverter",[1,4,1,40],[1,4,1,40],8,2,DataType["BIPOLAR"],"hls"), - # ("StreamingDataWidthConverter",[1,240],[1,240],12,2,DataType["BIPOLAR"],"hls"), - # ("StreamingDataWidthConverter",[1,36],[1,36],12,12,DataType["BIPOLAR"],"hls"), - # ("StreamingDataWidthConverter",[1,4,1,9],[1,4,1,18],3,9,DataType["UINT4"],"hls"), - # ("StreamingDataWidthConverter",[1,1,1,18],[1,1,1,30],9,3,DataType["BIPOLAR"],"hls"), - # ("StreamingDataWidthConverter",[1,90],[1,90],3,10,DataType["BIPOLAR"],"hls"), - # ("StreamingDataWidthConverter",[1,40],[1,30],10,3,DataType["BIPOLAR"],"hls"), - ("FMPadding", [8, 8], [1, 1, 1, 1], 2, 1, DataType["INT2"], "hls"), - ("FMPadding", [8, 8], [1, 1, 1, 1], 4, 1, DataType["INT2"], "hls"), - ("FMPadding", [8, 8], [1, 1, 1, 1], 12, 1, DataType["INT2"], "hls"), - ("FMPadding", [8, 8], [4, 0, 4, 0], 12, 1, DataType["INT2"], "hls"), - ("FMPadding", [8, 8], [0, 4, 0, 4], 5, 1, DataType["INT2"], "hls"), - ("FMPadding", [2, 3], [0, 3, 0, 4], 5, 5, DataType["INT2"], "hls"), - ("FMPadding", [4, 8], [0, 4, 0, 2], 5, 5, DataType["INT2"], "hls"), - ("FMPadding", [2, 3], [0, 3, 0, 4], 5, 5, DataType["INT2"], "hls"), - # idim, pad, num_ch,simd,idt - ( - "ChannelwiseOp", - DataType["INT8"], - DataType["INT4"], - DataType["INT4"], - 4, - 16, - "add", - [1, 4, 4], - "hls", - ), - ( - "ChannelwiseOp", - DataType["INT8"], - DataType["INT4"], - DataType["INT4"], - 2, - 16, - "add", - [1], - "hls", - ), - # ,idt, act, pdt, nf, ich, func, vecs, impl_style - # (Pdb) (ifm_dim,output_size,is1d, NumChannels,PoolDim,ImgDim,PE) - ("StreamingMaxPool", DataType["INT4"], True, 1, 4, 1, 1, 0, "hls"), - ("StreamingMaxPool", DataType["BIPOLAR"], False, 1, 10, 1, 1, 1, "hls"), - # ("StreamingMaxPool", DataType["INT4"], True, 2, 32, 4, 1, 0, "hls"), - # ("StreamingMaxPool", DataType["BIPOLAR"], False, 2, 28, 64, 1, 0, "hls"), - # ("StreamingMaxPool", DataType["BIPOLAR"], False, 2, 10, 64, 1, 1, "hls"), - # ("StreamingMaxPool", DataType["INT4"], True, 4, 10, 3, 3, 1, "hls"), - # idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil_mode,impl_style - ( - "ConvolutionInputGenerator", - DataType["INT2"], - [6, 1], - [12, 1], - 16, - [1, 1], - [1, 1], - 2, - 0, - 0, - 1, - False, - 1, - "hls", - ), - # idt,k, ifm_dim, ifm_ch,stride, dilation, - # simd, dw, parallel_window, m, flip, is1d - ( - "ConvolutionInputGenerator", - DataType["INT2"], - [4, 4], - [8, 8], - 6, - [4, 4], - [1, 1], - 2, - 1, - 0, - 1, - False, - 0, - "hls", - ), - ( - "ConvolutionInputGenerator", - DataType["INT2"], - [6, 6], - [10, 10], - 8, - [2, 2], - [1, 1], - 2, - 1, - 0, - 1, - False, - 0, - "hls", - ), - ( - "ConvolutionInputGenerator", - DataType["INT2"], - [4, 4], - [10, 10], - 16, - [2, 2], - [1, 1], - 2, - 1, - 0, - 1, - False, - 0, - "hls", - ), - ( - "ConvolutionInputGenerator", - DataType["INT2"], - [6, 1], - [8, 1], - 8, - [3, 1], - [1, 1], - 1, - 0, - 0, - 1, - False, - 1, - "hls", - ), - # ("ConvolutionInputGenerator", DataType["INT2"], - # [6, 6],[12, 12],8,[4, 4],[1, 1],2,0,0,1,False,0,"hls",), - # ("ConvolutionInputGenerator",DataType["INT2"], - # [6, 1],[12, 1],16,[2, 1],[1, 1],2,1,0,1,False,1,"hls",), - # ("ConvolutionInputGenerator",DataType["INT2"], - # [6, 1],[12, 1],16,[1, 1],[1, 1],2,1,0,1,False,1,"hls",), - # idt,k, ifm_dim, ifm_ch,stride, dilation, simd, - # dw, parallel_window, m, flip, is1d - ( - "VVAU", - DataType["INT4"], - DataType["INT4"], - DataType["INT4"], - 3, - 1, - 10, - 10, - 3, - 3, - 3, - "internal_embedded", - 0, - "hls", - ), - ( - "ChannelwiseOp", - DataType["INT8"], - DataType["INT4"], - DataType["INT4"], - 1, - 16, - "add", - [1, 3, 3], - "hls", - ), - ( - "ConvolutionInputGenerator", - DataType["INT2"], - [6, 1], - [12, 1], - 16, - [3, 1], - [1, 1], - 2, - 0, - 0, - 1, - False, - 1, - "hls", - # ("Thresholding", [15, 3], True, True, "hls"), - ), - ], -) -def test_fifosizing_analytical_characterization(direction, node): - test_rtl = True - - test_fpga_part = "xc7z020clg400-1" - target_clk_ns = 4 - - # attempt to cache a pre-existing variant of the model - # this is to avoid generating RTL multiple times during - # test debugging - build_dir = os.environ["FINN_BUILD_DIR"] - model_cache = None - for x in os.listdir(build_dir): - if x.startswith(str(node)): - model_cache = f"{build_dir}/{x}/model.onnx" - - tmp_output_dir = make_build_dir("build_fifosizing") - - if node[0] == "LabelSelect": - labels, pe, k, idt, impl_style = node[1:] - model0 = make_labelselect_modelwrapper(labels, pe, k, idt, impl_style) - - elif node[0] == "Thresholding": - cfg, narrow, per_tensor, impl_style = node[1:] - ch = cfg[0] - pe = cfg[1] - n_inp_vecs = [1, 1, 1] - hls_mem_mode = "internal_decoupled" - act = DataType["INT4"] - idt = DataType["INT16"] - odt = act - n_steps = act.get_num_possible_values() - 1 - # Generate random thresholds and sort in ascending order - T = generate_random_threshold_values(idt, ch, n_steps, narrow, per_tensor) - - # provide non-decreasing/ascending thresholds - T = sort_thresholds_increasing(T) - - actval = act.min() - if narrow: - actval += 1 - - model = make_single_thresholding_modelwrapper( - impl_style, T, idt, odt, actval, n_inp_vecs, ch - ) - model = model.transform(SpecializeLayers(test_fpga_part)) - - # Make sure that specialize layer did not default to HLS implementation - assert model.graph.node[0].op_type == "Thresholding_" + str(impl_style) - - node_inst = model.get_nodes_by_op_type(f"Thresholding_{impl_style}")[0] - op_inst = getCustomOp(node_inst) - op_inst.set_nodeattr("PE", pe) - if impl_style == "hls": - op_inst.set_nodeattr("mem_mode", hls_mem_mode) - op_inst.set_nodeattr("runtime_writeable_weights", 1) - model0 = model - - elif node[0] == "MVAU": - mw, simd, mh, pe, numVectors, wdt, idt, odt, impl_style = node[1:] - W = gen_finn_dt_tensor(wdt, (mw, mh)) - model0 = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=None) - - getCustomOp(model0.graph.node[0]).set_nodeattr("numInputVectors", numVectors) - # model0 = make_labelselect_modelwrapper(labels, pe, k, idt, impl_style) - - elif node[0] == "ChannelwiseOp": - idt, act, pdt, nf, ich, func, vecs, impl_style = node[1:] - if nf == -1: - nf = ich - odt = act - pe = ich // nf - C = gen_finn_dt_tensor(pdt, (ich)) - - model0 = make_channelwise_modelwrapper(C, pe, idt, odt, pdt, func, vecs) - - elif node[0] == "FMPadding": - idim, pad, num_ch, simd, idt, impl_style = node[1:] - model0 = make_single_fmpadding_modelwrapper(impl_style, idim, pad, num_ch, simd, idt) - - elif node[0] == "StreamingDataWidthConverter": - in_shape, out_shape, in_width, out_width, dtype, impl_style = node[1:] - model0 = make_single_dwc_modelwrapper( - in_shape, out_shape, in_width, out_width, dtype, impl_style - ) - # model0 = make_labelselect_modelwrapper(labels, pe, k, idt, impl_style) - - elif node[0] == "StreamingMaxPool": - idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil_mode, impl_style = node[1:] - ifm_dim_h = ifm_dim - k_h = k - if dim_1d: - ifm_dim_w = 1 - k_w = 1 - else: - ifm_dim_w = ifm_dim_h - k_w = k_h - ifm_dim = (ifm_dim_h, ifm_dim_w) - k = (k_h, k_w) - - stride_h = k_h - stride_w = k_w - ofm_dim_h = compute_pool_output_dim(ifm_dim_h, k_h, stride_h, 0, ceil_mode) - ofm_dim_w = compute_pool_output_dim(ifm_dim_w, k_w, stride_w, 0, ceil_mode) - ofm_dim = (ofm_dim_h, ofm_dim_w) - # if idt == DataType["BIPOLAR"] and dim_1d: - # pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)") - if (ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0) and (not dim_1d): - pytest.skip("StreamingMaxPool_2d test w/ ImgDim % PoolDim != 0 not implemented") - if pe > ifm_ch: - pytest.skip("PE cannot be larger than number of input channels") - # if pe > 1 and (not dim_1d): - # pytest.skip("PE>1 only supported for StreamingMaxPool_1d") - - golden = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode) - - model = golden.transform(InferStreamingMaxPool()) - model = model.transform(InferShapes()) - - model0 = model.transform(SpecializeLayers(test_fpga_part)) - - # Ensure PE value is set - streamingmaxpool_node = model0.get_nodes_by_op_type("StreamingMaxPool_hls")[0] - # assert True == False - if pe > 1 and (not dim_1d): - getCustomOp(streamingmaxpool_node).set_nodeattr("PE", 1) - else: - getCustomOp(streamingmaxpool_node).set_nodeattr("PE", pe) - - elif node[0] == "ConvolutionInputGenerator": - ( - idt, - k, - ifm_dim, - ifm_ch, - stride, - dilation, - simd, - dw, - parallel_window, - m, - flip, - is1d, - impl_style, - ) = node[1:] - if flip: - if ( - ifm_dim[0] == ifm_dim[1] - and k[0] == k[1] - and stride[0] == stride[1] - and dilation[0] == dilation[1] - ): - pytest.skip("Dimension flip would have no effect") - k = k[::-1] - ifm_dim = ifm_dim[::-1] - stride = stride[::-1] - dilation = dilation[::-1] - - k_h, k_w = k - ifm_dim_h, ifm_dim_w = ifm_dim - stride_h, stride_w = stride - dilation_h, dilation_w = dilation - - kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation - kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation - - if simd > ifm_ch: - pytest.skip("SIMD cannot be larger than number of input channels") - if ifm_ch % simd != 0: - pytest.skip("SIMD must divide number of input channels") - if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: - pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") - if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: - pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") - if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): - pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") - if ((stride_h > k_h) or (stride_w > k_w)) and not ( - parallel_window or (k_h == 1 and k_w == 1) - ): - pytest.skip("Not all combinations for stride > k edge case supported in default mode") - if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): - pytest.skip("Parallel window requires SIMD=C for non-depthwise case") - - ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) - ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) - ofm_dim = [ofm_dim_h, ofm_dim_w] - - model = make_single_im2col_modelwrapper( - k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw - ) - model = model.transform(to_hw.InferConvInpGen()) - - # set impl_style - inst = getCustomOp(model.get_nodes_by_op_type("ConvolutionInputGenerator")[0]) - inst.set_nodeattr("is1D", is1d) - inst.set_nodeattr("preferred_impl_style", impl_style) - model = model.transform(SpecializeLayers(test_fpga_part)) - # set simd - inst = getCustomOp(model.graph.node[0]) - inst.set_nodeattr("SIMD", simd) - optype = model.graph.node[0].op_type - if optype == "ConvolutionInputGenerator_rtl": - inst.set_nodeattr("parallel_window", parallel_window) - inst.set_nodeattr("M", m) - if optype == "ConvolutionInputGenerator_hls": - if inst.get_nodeattr("is1D"): - inst.set_nodeattr("parallel_window", parallel_window) - model0 = model - - elif node[0] == "VVAU": - ( - idt, - wdt, - act, - pe, - simd, - dim_h, - dim_w, - k_h, - k_w, - channels, - mem_mode, - no_act, - impl_style, - ) = node[1:] - - if dim_w == 1 and k_w != 1: - pytest.skip("1D image requires 1D kernel, skipping.") - - if channels % pe != 0: - pytest.skip("Requirement Channels divisable by PE is violated.") - - if (k_h * k_w) % simd != 0: - pytest.skip("Requirement kernel (k_h * k_w) divisable by SIMD is violated.") - - # Generate weights in expected shape for ONNX and HLS node - W = gen_finn_dt_tensor(wdt, (channels, 1, k_h, k_w)) # shape: [channels, 1, k, k] - # Generate inputs in expected format for ONNX and HLS node - x = gen_finn_dt_tensor(idt, (1, dim_h, dim_w, k_h * k_w * channels)) - x_vvau = x.reshape(1, dim_h, dim_w, k_h * k_w, channels // pe, pe) - x_vvau = x_vvau.transpose(0, 1, 2, 4, 3, 5) - x_vvau = x_vvau.reshape(1, dim_h, dim_w, channels * k_h * k_w) - - if act is None: - T = None - tdt = None - if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: - odt = DataType["UINT32"] - else: - odt = DataType["INT32"] - else: - odt = act - (min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w) - n_steps = act.get_num_possible_values() - 1 - T = np.random.randint(min_v, max_v - 1, (channels, n_steps)).astype(np.float32) - T = np.sort(T, axis=1) - if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: - tdt = DataType["UINT32"] - # bias thresholds to be positive - T = np.ceil((T + (k_h * k_w)) / 2) - assert (T >= 0).all() - else: - tdt = DataType["INT32"] - - model = _make_single_vvau_modelwrapper( - W, - pe, - simd, - k_h, - k_w, - channels, - dim_h, - dim_w, - wdt, - idt, - odt, - T, - tdt, - mem_mode, - impl_style, - ) - model = model.transform(GiveUniqueNodeNames()) - model = model.transform(GiveReadableTensorNames()) - - inst = getCustomOp(model.graph.node[0]) - inst.set_nodeattr("noActivation", no_act) - if impl_style == "rtl": - inst.set_nodeattr("resType", "dsp") - inst.set_nodeattr("preferred_impl_style", impl_style) - - model0 = model.transform(SpecializeLayers(test_fpga_part)) - # test_fpga_part = test_fpga_part - - outputs = [build_cfg.DataflowOutputType.ESTIMATE_REPORTS] - model1 = copy.deepcopy(model0) - - if model_cache is not None: - model0 = ModelWrapper(model_cache) - - node_inst0 = getCustomOp(model0.graph.node[0]) - node_inst1 = getCustomOp(model1.graph.node[0]) - - cfg = build_cfg.DataflowBuildConfig( - output_dir=tmp_output_dir, - synth_clk_period_ns=target_clk_ns, - generate_outputs=outputs, - fpga_part=test_fpga_part, - auto_fifo_strategy="characterize", - characteristic_function_strategy="analytical", - auto_fifo_depths=True, - split_large_fifos=False, - ) - - # analytical - inst = getCustomOp(model1.graph.node[0]) - inst.set_nodeattr("preferred_impl_style", impl_style) - model1 = model1.transform(SpecializeLayers(test_fpga_part)) - model1 = model1.transform(GiveUniqueNodeNames()) - model1 = model1.transform(PrepareIP(test_fpga_part, target_clk_ns)) - model1 = step_set_fifo_depths(model1, cfg) - - # rtlsim-based - if test_rtl: - cfg.characteristic_function_strategy = "rtlsim" - if model_cache is None: - inst = getCustomOp(model0.graph.node[0]) - model0 = model0.transform(SpecializeLayers(test_fpga_part)) - model0 = model0.transform(GiveUniqueNodeNames()) - model0 = model0.transform(PrepareIP(test_fpga_part, target_clk_ns)) - model0 = step_set_fifo_depths(model0, cfg) - - tmp_caching_output_dir = make_build_dir(str(node)) - model0.save(tmp_caching_output_dir + "/model.onnx") - - # grab the last nodes of the model - if test_rtl: - for n in model0.graph.node: - if n.op_type.startswith(node[0]): - node_inst0 = getCustomOp(n) - continue - - for n in model1.graph.node: - if n.op_type.startswith(node[0]): - node_inst1 = getCustomOp(n) - continue - - if test_rtl: - test_relaxation = 5 - if direction == "input": - assert compare_two_chr_funcs( - node_inst0.get_nodeattr("io_chrc_in"), - node_inst1.get_nodeattr("io_chrc_in"), - test_relaxation, - ) - elif direction == "output": - assert compare_two_chr_funcs( - node_inst0.get_nodeattr("io_chrc_out"), - node_inst1.get_nodeattr("io_chrc_out"), - test_relaxation, - ) diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py index 2ad49ae58b..c796ff0d77 100644 --- a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py +++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py @@ -29,6 +29,7 @@ import pytest +import copy import numpy as np from onnx import TensorProto, helper from qonnx.core.datatype import DataType @@ -47,6 +48,7 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.util.test import compare_two_chr_funcs, get_characteristic_fnc def make_modelwrapper(C, pe, idt, odt, pdt, func, vecs): @@ -172,3 +174,61 @@ def test_fpgadataflow_channelwise_ops(idt, act, pdt, nf, ich, func, vecs, exec_m exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0 + + +# which port to test +@pytest.mark.parametrize("direction", ["input", "output"]) +# activation: None or DataType +@pytest.mark.parametrize("act", [DataType["INT8"]]) +# input datatype +@pytest.mark.parametrize("idt", [DataType["INT4"]]) +# param datatype +@pytest.mark.parametrize("pdt", [DataType["INT4"]]) +# folding, -1 is maximum possible +@pytest.mark.parametrize("nf", [-1, 2]) +# number of input features +@pytest.mark.parametrize("ich", [16]) +# vecs +@pytest.mark.parametrize("vecs", [[1], [1, 7, 7]]) +# function +@pytest.mark.parametrize("func", ["add", "mul"]) +# execution mode +@pytest.mark.parametrize("exec_mode", ["rtlsim"]) +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_analytical_characterization_channelwise_ops( + direction, idt, act, pdt, nf, ich, func, vecs, exec_mode +): + if nf == -1: + nf = ich + pe = ich // nf + assert ich % pe == 0 + + # generate param data + C = gen_finn_dt_tensor(pdt, (ich)) + + odt = act + + # create model + model = make_modelwrapper(C, pe, idt, odt, pdt, func, vecs) + node_details = ("ChannelWiseOp", C, pe, idt, odt, pdt, func, "hls") + part = "xc7z020clg400-1" + target_clk_ns = 4 + allowed_chr_offset_positions = 5 + + model_rtl = copy.deepcopy(model) + node_analytical = get_characteristic_fnc(model, node_details, part, target_clk_ns, "analytical") + node_rtlsim = get_characteristic_fnc(model_rtl, node_details, part, target_clk_ns, "rtlsim") + if direction == "input": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_in"), + node_rtlsim.get_nodeattr("io_chrc_in"), + allowed_chr_offset_positions, + ) + elif direction == "output": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_out"), + node_rtlsim.get_nodeattr("io_chrc_out"), + allowed_chr_offset_positions, + ) diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py index dc5dc0c02a..8945d6c941 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py @@ -29,6 +29,7 @@ import pytest +import copy import numpy as np from onnx import TensorProto, helper from qonnx.core.datatype import DataType @@ -48,6 +49,7 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.util.test import compare_two_chr_funcs, get_characteristic_fnc def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw): @@ -237,3 +239,126 @@ def test_fpgadataflow_slidingwindow( assert exp_cycles != 0 else: assert model.graph.node[0].op_type == "ConvolutionInputGenerator_rtl" + + +# which port to test +@pytest.mark.parametrize("direction", ["input", "output"]) +# input datatype +@pytest.mark.parametrize("idt", [DataType["INT2"], DataType["UINT4"]]) +# kernel size +@pytest.mark.parametrize("k", [[2, 2], [3, 3], [1, 5]]) +# input dimension +@pytest.mark.parametrize("ifm_dim", [[8, 8], [1, 21]]) +# input channels +@pytest.mark.parametrize("ifm_ch", [2, 4]) +# Stride +@pytest.mark.parametrize("stride", [[1, 1], [2, 2], [2, 1]]) +# Dilation +@pytest.mark.parametrize("dilation", [[1, 1], [2, 2], [2, 1]]) +# execution mode +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +# input channel parallelism ("SIMD") +@pytest.mark.parametrize("simd", [1, 2, 4]) +# depthwise +@pytest.mark.parametrize("dw", [0, 1]) +# parallel_window enable (MMV_out = M*K) +@pytest.mark.parametrize("parallel_window", [0, 1]) +# in/out MMV ("M") +@pytest.mark.parametrize("m", [1]) +# Flip dimensions +@pytest.mark.parametrize("flip", [False]) +# implementation style +@pytest.mark.parametrize("impl_style", ["rtl", "hls"]) +@pytest.mark.fpgadataflow +@pytest.mark.slow +@pytest.mark.vivado +def test_fpgadataflow_analytical_characterization_slidingwindow( + direction, + idt, + k, + ifm_dim, + ifm_ch, + stride, + dilation, + exec_mode, + simd, + dw, + parallel_window, + m, + flip, + impl_style, +): + if flip: + if ( + ifm_dim[0] == ifm_dim[1] + and k[0] == k[1] + and stride[0] == stride[1] + and dilation[0] == dilation[1] + ): + pytest.skip("Dimension flip would have no effect") + k = k[::-1] + ifm_dim = ifm_dim[::-1] + stride = stride[::-1] + dilation = dilation[::-1] + + k_h, k_w = k + ifm_dim_h, ifm_dim_w = ifm_dim + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + + kernel_width = (k_w - 1) * dilation_w + 1 # incl. dilation + kernel_height = (k_h - 1) * dilation_h + 1 # incl. dilation + + if simd > ifm_ch: + pytest.skip("SIMD cannot be larger than number of input channels") + if ifm_ch % simd != 0: + pytest.skip("SIMD must divide number of input channels") + if kernel_height > ifm_dim_h or stride_h > ifm_dim_h: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if kernel_width > ifm_dim_w or stride_w > ifm_dim_w: + pytest.skip("Illegal convolution configuration: kernel or stride > FM dimension") + if (k_h == 1 and dilation_h != 1) or (k_w == 1 and dilation_w != 1): + pytest.skip("Illegal convolution configuration: dilation for unitary kernel dim") + if ((stride_h > k_h) or (stride_w > k_w)) and not (parallel_window or (k_h == 1 and k_w == 1)): + pytest.skip("Not all combinations for stride > k edge case supported in default mode") + if parallel_window and simd != ifm_ch and not (dw or (k_h == 1 and k_w == 1)): + pytest.skip("Parallel window requires SIMD=C for non-depthwise case") + + ofm_dim_h = compute_conv_output_dim(ifm_dim_h, k_h, stride_h, 0, dilation_h) + ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) + ofm_dim = [ofm_dim_h, ofm_dim_w] + + model = make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt, dw) + + model = model.transform(to_hw.InferConvInpGen()) + node_details = ( + "ConvolutionInputGenerator", + k, + ifm_ch, + ifm_dim, + ofm_dim, + stride, + dilation, + idt, + dw, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + allowed_chr_offset_positions = 5 + + model_rtl = copy.deepcopy(model) + node_analytical = get_characteristic_fnc(model, node_details, part, target_clk_ns, "analytical") + node_rtlsim = get_characteristic_fnc(model_rtl, node_details, part, target_clk_ns, "rtlsim") + if direction == "input": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_in"), + node_rtlsim.get_nodeattr("io_chrc_in"), + allowed_chr_offset_positions, + ) + elif direction == "output": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_out"), + node_rtlsim.get_nodeattr("io_chrc_out"), + allowed_chr_offset_positions, + ) diff --git a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py index 87e3267186..cb14ae8507 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fmpadding.py +++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py @@ -29,6 +29,7 @@ import pytest +import copy import numpy as np import os from onnx import TensorProto, helper @@ -49,6 +50,7 @@ from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.util.basic import pynq_part_map +from finn.util.test import compare_two_chr_funcs, get_characteristic_fnc test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") test_fpga_part = pynq_part_map[test_pynq_board] @@ -162,3 +164,54 @@ def test_fpgadataflow_fmpadding(idim, pad, num_ch, simd, idt, mode, impl_style): exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=10) assert exp_cycles != 0 + + +# which port to test +@pytest.mark.parametrize("direction", ["input", "output"]) +# input image dimension +@pytest.mark.parametrize("idim", [[8, 8], [10, 8]]) +# number of rows and number of cols to add +@pytest.mark.parametrize("pad", [[1, 1, 1, 1], [1, 1, 2, 2], [1, 3, 2, 3], [7, 0, 8, 0]]) +# number of channels +@pytest.mark.parametrize("num_ch", [2, 4]) +# Input parallelism +@pytest.mark.parametrize("simd", [1, 2]) +# FINN input datatype +@pytest.mark.parametrize("idt", [DataType["INT2"], DataType["INT4"]]) +# execution mode +@pytest.mark.parametrize("mode", ["rtlsim"]) +# implementation style +@pytest.mark.parametrize("impl_style", ["rtl", "hls"]) +@pytest.mark.fpgadataflow +@pytest.mark.slow +@pytest.mark.vivado +def test_fpgadataflow_analytical_characterization_fmpadding( + direction, idim, pad, num_ch, simd, idt, mode, impl_style +): + if num_ch % simd != 0: + pytest.skip(" num_ch % simd != 0, skipping") + + model = make_single_fmpadding_modelwrapper(impl_style, idim, pad, num_ch, simd, idt) + model = model.transform(InferShapes()) + model = model.transform(SetExecMode(mode)) + + node_details = ("FMPadding", idim, pad, num_ch, simd, idt, mode, impl_style) + part = "xc7z020clg400-1" + target_clk_ns = 4 + allowed_chr_offset_positions = 5 + + model_rtl = copy.deepcopy(model) + node_analytical = get_characteristic_fnc(model, node_details, part, target_clk_ns, "analytical") + node_rtlsim = get_characteristic_fnc(model_rtl, node_details, part, target_clk_ns, "rtlsim") + if direction == "input": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_in"), + node_rtlsim.get_nodeattr("io_chrc_in"), + allowed_chr_offset_positions, + ) + elif direction == "output": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_out"), + node_rtlsim.get_nodeattr("io_chrc_out"), + allowed_chr_offset_positions, + ) diff --git a/tests/fpgadataflow/test_fpgadataflow_labelselect.py b/tests/fpgadataflow/test_fpgadataflow_labelselect.py index 83ab2ddcaf..241ccdde28 100644 --- a/tests/fpgadataflow/test_fpgadataflow_labelselect.py +++ b/tests/fpgadataflow/test_fpgadataflow_labelselect.py @@ -29,6 +29,7 @@ import pytest +import copy import numpy as np from onnx import TensorProto, helper from qonnx.core.datatype import DataType @@ -44,7 +45,11 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers -from finn.util.test import soft_verify_topk +from finn.util.test import ( + compare_two_chr_funcs, + get_characteristic_fnc, + soft_verify_topk, +) def make_labelselect_modelwrapper(labels, pe, k, idt, impl_style): @@ -136,3 +141,53 @@ def test_fpgadataflow_labelselect(idt, labels, fold, k, exec_mode, impl_style): y = oxe.execute_onnx(model, input_dict)["outp"] assert soft_verify_topk(x, y, k), exec_mode + " failed" + + +# which port to test +@pytest.mark.parametrize("direction", ["input", "output"]) +@pytest.mark.parametrize("idt", [DataType["UINT8"], DataType["UINT16"], DataType["INT16"]]) +# labels +@pytest.mark.parametrize("labels", [10, 100]) +# folding +@pytest.mark.parametrize("fold", [-1, 2, 10]) +# number of top labels to select +@pytest.mark.parametrize("k", [1, 5]) +# impl style +@pytest.mark.parametrize("impl_style", ["hls"]) +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_analytical_characterization_labelselect( + direction, idt, labels, fold, k, impl_style +): + np.random.seed(0) + if fold == -1: + pe = 1 + else: + pe = labels // fold + assert labels % pe == 0 + + if k == -1: + k = labels + + model = make_labelselect_modelwrapper(labels, pe, k, idt, impl_style) + node_details = ("LabelSelect", idt, labels, fold, k, impl_style) + part = "xc7z020clg400-1" + target_clk_ns = 4 + allowed_chr_offset_positions = 5 + + model_rtl = copy.deepcopy(model) + node_analytical = get_characteristic_fnc(model, node_details, part, target_clk_ns, "analytical") + node_rtlsim = get_characteristic_fnc(model_rtl, node_details, part, target_clk_ns, "rtlsim") + if direction == "input": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_in"), + node_rtlsim.get_nodeattr("io_chrc_in"), + allowed_chr_offset_positions, + ) + elif direction == "output": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_out"), + node_rtlsim.get_nodeattr("io_chrc_out"), + allowed_chr_offset_positions, + ) diff --git a/tests/fpgadataflow/test_fpgadataflow_mvau.py b/tests/fpgadataflow/test_fpgadataflow_mvau.py index 1ec77f4eec..a497e5fc2a 100644 --- a/tests/fpgadataflow/test_fpgadataflow_mvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_mvau.py @@ -28,6 +28,7 @@ import pytest +import copy import numpy as np import qonnx.custom_op.general.xnorpopcount as xp from onnx import TensorProto, helper @@ -67,6 +68,7 @@ from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.util.test import compare_two_chr_funcs, get_characteristic_fnc def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=None): @@ -730,3 +732,80 @@ def test_fpgadataflow_rtl_mvau(mh, mw, pe, simd, idt, wdt, part, clk_ns): assert ( output_matmul == output_mvau_rtl_stitch ).all(), "Output of ONNX model not matching output of stitched-IP RTL model!" + + +# which port to test +@pytest.mark.parametrize("direction", ["input", "output"]) +# mem_mode: internal_embedded or internal_decoupled +@pytest.mark.parametrize("mem_mode", ["internal_decoupled", "internal_embedded"]) +# activation: None or DataType +@pytest.mark.parametrize("act", [None, DataType["INT4"]]) +# weight datatype +@pytest.mark.parametrize("wdt", [DataType["INT4"]]) +# input datatype +@pytest.mark.parametrize("idt", [DataType["INT4"]]) +# neuron folding, -1 is maximum possible +@pytest.mark.parametrize("nf", [8]) +# synapse folding, -1 is maximum possible +@pytest.mark.parametrize("sf", [8]) +# HLS matrix width (input features) +@pytest.mark.parametrize("mw", [32]) +# HLS matrix height (output features) +@pytest.mark.parametrize("mh", [32]) +# Backend +@pytest.mark.parametrize("preferred_impl_style", ["hls", "rtl"]) +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_analytical_characterization_mvau( + direction, mem_mode, idt, wdt, act, nf, sf, mw, mh, preferred_impl_style +): + if preferred_impl_style == "rtl" and (mem_mode == "internal_embedded" or act is not None): + pytest.skip("RTL-MVAU doesn't support const mem mode or embedded activations") + if nf == -1: + nf = mh + if sf == -1: + sf = mw + pe = mh // nf + simd = mw // sf + assert mh % pe == 0 + assert mw % sf == 0 + # generate weights + W = gen_finn_dt_tensor(wdt, (mw, mh)) + + # no activation, produce accumulators + T = None + tdt = None + if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: + odt = DataType["UINT32"] + else: + odt = DataType["INT32"] + + model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt) + for node in model.graph.node: + # lookup op_type in registry of CustomOps + inst = getCustomOp(node) + inst.set_nodeattr("mem_mode", mem_mode) + inst.set_nodeattr("resType", "auto") + inst.set_nodeattr("preferred_impl_style", preferred_impl_style) + + node_details = ("MVAU", mem_mode, idt, wdt, act, nf, sf, mw, mh, preferred_impl_style) + part = "xc7z020clg400-1" + target_clk_ns = 4 + allowed_chr_offset_positions = 5 + + model_rtl = copy.deepcopy(model) + node_analytical = get_characteristic_fnc(model, node_details, part, target_clk_ns, "analytical") + node_rtlsim = get_characteristic_fnc(model_rtl, node_details, part, target_clk_ns, "rtlsim") + if direction == "input": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_in"), + node_rtlsim.get_nodeattr("io_chrc_in"), + allowed_chr_offset_positions, + ) + elif direction == "output": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_out"), + node_rtlsim.get_nodeattr("io_chrc_out"), + allowed_chr_offset_positions, + ) diff --git a/tests/fpgadataflow/test_fpgadataflow_streamingmaxpool.py b/tests/fpgadataflow/test_fpgadataflow_streamingmaxpool.py index c520fb50fc..50d4ada783 100644 --- a/tests/fpgadataflow/test_fpgadataflow_streamingmaxpool.py +++ b/tests/fpgadataflow/test_fpgadataflow_streamingmaxpool.py @@ -29,6 +29,7 @@ import pytest +import copy from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper @@ -48,6 +49,7 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.util.test import compare_two_chr_funcs, get_characteristic_fnc def make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode): @@ -180,3 +182,76 @@ def test_fpgadataflow_streamingmaxpool(idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil # nested for-loops # assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0 + + +# which port to test +@pytest.mark.parametrize("direction", ["input", "output"]) +# input datatype +@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["INT4"]]) +# 1d maxpool +@pytest.mark.parametrize("dim_1d", [False, True]) +# kernel size +@pytest.mark.parametrize("k", [2, 4]) +# input dimension +@pytest.mark.parametrize("ifm_dim", [4, 10]) +# input channels +@pytest.mark.parametrize("ifm_ch", [1, 3]) +# pe +@pytest.mark.parametrize("pe", [1, 3]) +# ceil mode +@pytest.mark.parametrize("ceil_mode", [1]) +# execution mode +@pytest.mark.parametrize("exec_mode", ["rtlsim"]) +@pytest.mark.fpgadataflow +@pytest.mark.slow +@pytest.mark.vivado +def test_fpgadataflow_analytical_characterization_streamingmaxpool( + direction, idt, dim_1d, k, ifm_dim, ifm_ch, pe, ceil_mode, exec_mode +): + ifm_dim_h = ifm_dim + k_h = k + if dim_1d: + ifm_dim_w = 1 + k_w = 1 + else: + ifm_dim_w = ifm_dim_h + k_w = k_h + ifm_dim = (ifm_dim_h, ifm_dim_w) + k = (k_h, k_w) + + stride_h = k_h + stride_w = k_w + ofm_dim_h = compute_pool_output_dim(ifm_dim_h, k_h, stride_h, 0, ceil_mode) + ofm_dim_w = compute_pool_output_dim(ifm_dim_w, k_w, stride_w, 0, ceil_mode) + ofm_dim = (ofm_dim_h, ofm_dim_w) + if idt == DataType["BIPOLAR"] and dim_1d: + pytest.skip("Skipping binary StreamingMaxPool_1d (not implemented)") + if (ifm_dim_h % k_h != 0 or ifm_dim_w % k_w != 0) and (not dim_1d): + pytest.skip("StreamingMaxPool_2d test w/ ImgDim % PoolDim != 0 not implemented") + if pe > ifm_ch: + pytest.skip("PE cannot be larger than number of input channels") + if pe > 1 and (not dim_1d): + pytest.skip("PE>1 only supported for StreamingMaxPool_1d") + + model = make_single_maxpoolnhwc_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode) + model = model.transform(InferStreamingMaxPool()) + node_details = ("StreamingMaxPool", k, ifm_ch, ifm_dim, ofm_dim, idt, ceil_mode, "hls") + part = "xc7z020clg400-1" + target_clk_ns = 4 + allowed_chr_offset_positions = 5 + + model_rtl = copy.deepcopy(model) + node_analytical = get_characteristic_fnc(model, node_details, part, target_clk_ns, "analytical") + node_rtlsim = get_characteristic_fnc(model_rtl, node_details, part, target_clk_ns, "rtlsim") + if direction == "input": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_in"), + node_rtlsim.get_nodeattr("io_chrc_in"), + allowed_chr_offset_positions, + ) + elif direction == "output": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_out"), + node_rtlsim.get_nodeattr("io_chrc_out"), + allowed_chr_offset_positions, + ) diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py index fe7ba3d9fb..81290527d9 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py @@ -28,6 +28,7 @@ import pytest +import copy import numpy as np from onnx import TensorProto, helper from qonnx.core.datatype import DataType @@ -49,6 +50,7 @@ from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.util.test import compare_two_chr_funcs, get_characteristic_fnc test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 @@ -263,3 +265,155 @@ def test_fpgadataflow_thresholding( exp_cycles = exp_cycles_dict[node.name] assert np.isclose(exp_cycles, cycles_rtlsim, atol=15) assert exp_cycles != 0 + + +# which port to test +@pytest.mark.parametrize("direction", ["input", "output"]) +@pytest.mark.parametrize("num_input_channels", [6, 16]) +@pytest.mark.parametrize( + "num_input_vecs", + [ + [1], + [1, 2, 2], + ], +) +@pytest.mark.parametrize("activation", [DataType["UINT4"], DataType["INT4"], DataType["BIPOLAR"]]) +@pytest.mark.parametrize( + "idt_tdt_cfg", + [ + (DataType["INT8"], DataType["INT8"]), + (DataType["INT8"], DataType["INT9"]), + (DataType["UINT5"], DataType["UINT5"]), + (DataType["UINT5"], DataType["UINT6"]), + ], +) +@pytest.mark.parametrize("fold", [-1, 1, 2]) +@pytest.mark.parametrize("narrow", [True, False]) +@pytest.mark.parametrize("per_tensor", [True, False]) +@pytest.mark.parametrize("impl_style", ["hls", "rtl"]) +@pytest.mark.parametrize("exec_mode", ["rtlsim"]) +@pytest.mark.parametrize("mem_mode", ["internal_embedded", "internal_decoupled"]) +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_analytical_characterization_thresholding( + direction, + num_input_channels, + num_input_vecs, + activation, + idt_tdt_cfg, + fold, + narrow, + per_tensor, + impl_style, + exec_mode, + mem_mode, +): + # the mem_mode parameter can only be used for the hls thresholding + # so the test will only be executed once for impl_style=rtl and once skipped + # when the mem_mode is varied. Otherwise, the same test configuration would always + # run twice. + if impl_style == "rtl" and mem_mode == "internal_decoupled": + pytest.skip( + "Skip, because test is identical to impl_style=rtl and mem_mode=internal_embedded" + ) + if narrow and activation == DataType["BIPOLAR"]: + pytest.skip("Narrow needs to be false with biploar activation.") + input_data_type, threshold_data_type = idt_tdt_cfg + num_steps = activation.get_num_possible_values() - 1 + + if fold == -1: + fold = num_input_channels + pe = num_input_channels // fold + if num_input_channels % pe != 0: + pytest.skip("Invalid folding configuration. Skipping test.") + + output_data_type = activation + if activation == DataType["BIPOLAR"]: + activation_bias = 0 + else: + activation_bias = activation.min() + if narrow and activation.signed(): + activation_bias += 1 + + # Generate random thresholds and sort in ascending order + thresholds = generate_random_threshold_values( + threshold_data_type, num_input_channels, num_steps, narrow, per_tensor + ) + + # provide non-decreasing/ascending thresholds + thresholds = sort_thresholds_increasing(thresholds) + + # Make a Multithreshold graph and convert to thresholding binary search node + model = make_single_multithresholding_modelwrapper( + thresholds, + input_data_type, + threshold_data_type, + output_data_type, + activation_bias, + num_input_vecs, + num_input_channels, + ) + + # calculate reference output + x = gen_finn_dt_tensor(input_data_type, tuple(num_input_vecs + [num_input_channels])) + + input_dict = {model.graph.input[0].name: x} + y_expected = oxe.execute_onnx(model, input_dict)[model.graph.output[0].name] + + if output_data_type == DataType["BIPOLAR"]: + # binary to bipolar + y_expected = 2 * y_expected - 1 + + model = model.transform(InferThresholdingLayer()) + + # Transform to the specified implementation style, either the + # RTL or HLS according to test parameters + node = model.get_nodes_by_op_type(model.graph.node[0].op_type)[0] + inst = getCustomOp(node) + inst.set_nodeattr("preferred_impl_style", impl_style) + model = model.transform(SpecializeLayers(test_fpga_part)) + model = model.transform(InferShapes()) + assert model.graph.node[0].op_type == "Thresholding_" + str(impl_style) + + node = model.get_nodes_by_op_type(model.graph.node[0].op_type)[0] + inst = getCustomOp(node) + inst.set_nodeattr("PE", pe) + model = model.transform(GiveUniqueNodeNames()) + + if impl_style == "hls": + inst.set_nodeattr("mem_mode", mem_mode) + + node_details = ( + "Thresholding", + thresholds, + input_data_type, + threshold_data_type, + output_data_type, + activation_bias, + num_input_vecs, + num_input_channels, + "hls", + ) + + allowed_chr_offset_positions = 5 + + model_rtl = copy.deepcopy(model) + node_analytical = get_characteristic_fnc( + model, node_details, test_fpga_part, target_clk_ns, "analytical" + ) + node_rtlsim = get_characteristic_fnc( + model_rtl, node_details, test_fpga_part, target_clk_ns, "rtlsim" + ) + if direction == "input": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_in"), + node_rtlsim.get_nodeattr("io_chrc_in"), + allowed_chr_offset_positions, + ) + elif direction == "output": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_out"), + node_rtlsim.get_nodeattr("io_chrc_out"), + allowed_chr_offset_positions, + ) diff --git a/tests/fpgadataflow/test_fpgadataflow_vvau.py b/tests/fpgadataflow/test_fpgadataflow_vvau.py index 236176faa6..6383d5c609 100644 --- a/tests/fpgadataflow/test_fpgadataflow_vvau.py +++ b/tests/fpgadataflow/test_fpgadataflow_vvau.py @@ -28,6 +28,7 @@ import pytest +import copy import numpy as np from onnx import TensorProto, helper from qonnx.core.datatype import DataType @@ -66,6 +67,7 @@ from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.set_fifo_depths import InsertAndSetFIFODepths from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.util.test import compare_two_chr_funcs, get_characteristic_fnc def _infer_sparse_weight_tensor(W_conv, k_h, k_w, channels): @@ -468,3 +470,117 @@ def test_fpgadataflow_vvau_rtl(kernel_size, in_feature_dim, in_chn, idt, wdt, pa assert ( golden_out == output_vvau_stitched ).all(), "Output of ONNX model not matching output of stitched-IP RTL model!" + + +# which port to test +@pytest.mark.parametrize("direction", ["input", "output"]) +# input datatype +@pytest.mark.parametrize("idt", [DataType["BIPOLAR"], DataType["UINT4"]]) +# weight datatype +@pytest.mark.parametrize("wdt", [DataType["BIPOLAR"], DataType["UINT4"]]) +# activation: None or DataType +@pytest.mark.parametrize("act", [DataType["BIPOLAR"], DataType["UINT4"], None]) +# PE +@pytest.mark.parametrize("pe", [1, 3, 6]) +# SIMD +@pytest.mark.parametrize("simd", [1, 9]) +# Input image shape +@pytest.mark.parametrize("dim_h", [10]) +@pytest.mark.parametrize("dim_w", [10, 1]) +# Kernel shape +@pytest.mark.parametrize("k_h", [3]) +@pytest.mark.parametrize("k_w", [3, 1]) +# Number of input and output channels +@pytest.mark.parametrize("channels", [3, 6]) +# memory mode +@pytest.mark.parametrize("mem_mode", ["internal_embedded", "internal_decoupled"]) +# execution mode +@pytest.mark.parametrize("exec_mode", ["rtlsim"]) +@pytest.mark.fpgadataflow +@pytest.mark.slow +@pytest.mark.vivado +def test_fpgadataflow_analytical_characterization_vvau( + direction, idt, wdt, act, pe, simd, dim_h, dim_w, k_h, k_w, channels, mem_mode, exec_mode +): + if dim_w == 1 and k_w != 1: + pytest.skip("1D image requires 1D kernel, skipping.") + + if channels % pe != 0: + pytest.skip("Requirement Channels divisable by PE is violated.") + + if (k_h * k_w) % simd != 0: + pytest.skip("Requirement kernel (k_h * k_w) divisable by SIMD is violated.") + + # Generate weights in expected shape for ONNX and HLS node + W = gen_finn_dt_tensor(wdt, (channels, 1, k_h, k_w)) # shape: [channels, 1, k, k] + + # Generate inputs in expected format for ONNX and HLS node + x = gen_finn_dt_tensor(idt, (1, dim_h, dim_w, k_h * k_w * channels)) + x_vvau = x.reshape(1, dim_h, dim_w, k_h * k_w, channels // pe, pe) + x_vvau = x_vvau.transpose(0, 1, 2, 4, 3, 5) + x_vvau = x_vvau.reshape(1, dim_h, dim_w, channels * k_h * k_w) + + if act is None: + T = None + tdt = None + if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: + odt = DataType["UINT32"] + else: + odt = DataType["INT32"] + else: + odt = act + (min_v, max_v) = _calculate_dot_prod_range(idt, wdt, k_h * k_w) + n_steps = act.get_num_possible_values() - 1 + T = np.random.randint(min_v, max_v - 1, (channels, n_steps)).astype(np.float32) + T = np.sort(T, axis=1) + if wdt == DataType["BIPOLAR"] and idt == DataType["BIPOLAR"]: + tdt = DataType["UINT32"] + # bias thresholds to be positive + T = np.ceil((T + (k_h * k_w)) / 2) + assert (T >= 0).all() + else: + tdt = DataType["INT32"] + + model = _make_single_vvau_modelwrapper( + W, pe, simd, k_h, k_w, channels, dim_h, dim_w, wdt, idt, odt, T, tdt, mem_mode + ) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + + node_details = ( + "VVAU", + W, + pe, + simd, + k_h, + k_w, + channels, + dim_h, + dim_w, + wdt, + idt, + odt, + T, + tdt, + mem_mode, + "hls", + ) + part = "xc7z020clg400-1" + target_clk_ns = 4 + allowed_chr_offset_positions = 5 + + model_rtl = copy.deepcopy(model) + node_analytical = get_characteristic_fnc(model, node_details, part, target_clk_ns, "analytical") + node_rtlsim = get_characteristic_fnc(model_rtl, node_details, part, target_clk_ns, "rtlsim") + if direction == "input": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_in"), + node_rtlsim.get_nodeattr("io_chrc_in"), + allowed_chr_offset_positions, + ) + elif direction == "output": + assert compare_two_chr_funcs( + node_analytical.get_nodeattr("io_chrc_out"), + node_rtlsim.get_nodeattr("io_chrc_out"), + allowed_chr_offset_positions, + ) From 377fc68ea41151b67e20965e5df2ade34edadf70 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Thu, 5 Dec 2024 18:31:10 +0000 Subject: [PATCH 10/12] Remove manual bypass, small fixes --- src/finn/custom_op/fpgadataflow/hwcustomop.py | 6 +++ .../fpgadataflow/matrixvectoractivation.py | 1 + .../fpgadataflow/derive_characteristic.py | 44 +------------------ tests/fpgadataflow/test_fifosizing.py | 3 +- 4 files changed, 10 insertions(+), 44 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index d0cafa85da..777f2b380d 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -513,6 +513,12 @@ def derive_characteristic_fxns_rtlsim(self, model, period, fpga_part, clk_period except KeyError: # exception if op_type is not supported raise Exception("Custom op_type %s is currently not supported." % op_type) + else: + self.prepare_rtlsim() + # ensure that executable path is now set + assert ( + self.get_nodeattr("rtlsim_so") != "" + ), "Failed to prepare RTLSim, no rtlsim_so attribute found." # assert , "rtlsim not ready for " + self.onnx_node.name if self.get_nodeattr("io_chrc_period") > 0: diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index ac838fdad7..890ff1cda2 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -1034,6 +1034,7 @@ def derive_characteristic_fxns( if mem_mode in ["internal_decoupled", "external"]: n_weight_inps = self.calc_wmem() # num_w_reps = np.prod(self.get_nodeattr("numInputVectors")) + # TODO: Why is num_w_reps not considered here? io_dict["inputs"]["weights"] = [0 for i in range(1 * n_weight_inps)] super().derive_characteristic_fxns( diff --git a/src/finn/transformation/fpgadataflow/derive_characteristic.py b/src/finn/transformation/fpgadataflow/derive_characteristic.py index 460642d017..4c19db4d8b 100644 --- a/src/finn/transformation/fpgadataflow/derive_characteristic.py +++ b/src/finn/transformation/fpgadataflow/derive_characteristic.py @@ -53,7 +53,7 @@ class DeriveCharacteristic(NodeLocalTransformation): """ def __init__( - self, model, period, strategy, fpga_part, clk_period, num_workers=None, manual_bypass=False + self, model, period, strategy, fpga_part, clk_period, num_workers=None ): super().__init__(num_workers=num_workers) self.model = model @@ -61,7 +61,6 @@ def __init__( self.strategy = strategy self.fpga_part = fpga_part self.clk_period = clk_period - self.manual_bypass = manual_bypass def applyNodeLocal(self, node): op_type = node.op_type @@ -85,47 +84,6 @@ def applyNodeLocal(self, node): def apply(self, model: ModelWrapper): (model, run_again) = super().apply(model) - if not self.manual_bypass: - return (model, run_again) - # apply manual fix for DuplicateStreams and AddStreams for - # simple residual reconvergent paths with bypass - addstrm_nodes = model.get_nodes_by_op_type("AddStreams_hls") - for addstrm_node in addstrm_nodes: - # we currently only support the case where one branch is - # a bypass - b0 = model.find_producer(addstrm_node.input[0]) - b1 = model.find_producer(addstrm_node.input[1]) - if (b0 is None) or (b1 is None): - warnings.warn("Found unsupported AddStreams, skipping") - return (model, run_again) - b0_is_bypass = b0.op_type == "DuplicateStreams_hls" - b1_is_bypass = b1.op_type == "DuplicateStreams_hls" - if (not b0_is_bypass) and (not b1_is_bypass): - warnings.warn("Found unsupported AddStreams, skipping") - return (model, run_again) - ds_node = b0 if b0_is_bypass else b1 - comp_branch_last = b1 if b0_is_bypass else b0 - - ds_comp_bout = ds_node.output[0] if b0_is_bypass else ds_node.output[1] - comp_branch_first = model.find_consumer(ds_comp_bout) - if comp_branch_first is None or comp_branch_last is None: - warnings.warn("Found unsupported DuplicateStreams, skipping") - return (model, run_again) - comp_branch_last = registry.getCustomOp(comp_branch_last) - comp_branch_first = registry.getCustomOp(comp_branch_first) - # for DuplicateStreams, use comp_branch_first's input characterization - # for AddStreams, use comp_branch_last's output characterization - period = comp_branch_first.get_nodeattr("io_chrc_period") - comp_branch_first_f = comp_branch_first.get_nodeattr("io_characteristic")[: 2 * period] - comp_branch_last_f = comp_branch_last.get_nodeattr("io_characteristic")[2 * period :] - ds_node_inst = registry.getCustomOp(ds_node) - addstrm_node_inst = registry.getCustomOp(addstrm_node) - ds_node_inst.set_nodeattr("io_chrc_period", period) - ds_node_inst.set_nodeattr("io_characteristic", comp_branch_first_f * 2) - addstrm_node_inst.set_nodeattr("io_chrc_period", period) - addstrm_node_inst.set_nodeattr("io_characteristic", comp_branch_last_f * 2) - warnings.warn(f"Set {ds_node.name} chrc. from {comp_branch_first.onnx_node.name}") - warnings.warn(f"Set {addstrm_node.name} chrc. from {comp_branch_last.onnx_node.name}") return (model, run_again) diff --git a/tests/fpgadataflow/test_fifosizing.py b/tests/fpgadataflow/test_fifosizing.py index df5ccd488e..278e90c02b 100644 --- a/tests/fpgadataflow/test_fifosizing.py +++ b/tests/fpgadataflow/test_fifosizing.py @@ -420,6 +420,7 @@ def make_conv_building_block(ifm_dim, ch, kernel_size, simd, pe, parallel_window mvau_config["SIMD"] = simd if parallel_window == 0 else simd * kernel_size * kernel_size mvau_config["PE"] = pe mvau_config["resType"] = "lut" + mvau_config["mem_mode"] = "internal_embedded" # internal_decoupled mvau_config["inputDataType"] = idt.name mvau_config["weightDataType"] = wdt.name mvau_config["outputDataType"] = odt.name @@ -842,7 +843,7 @@ def test_fifosizing_linear(method, topology): @pytest.mark.fpgadataflow @pytest.mark.parametrize("strategy", ["rtlsim"]) # rtlsim #analytical @pytest.mark.parametrize("lb_num_layers", [1]) -@pytest.mark.parametrize("rb_num_layers", [4]) +@pytest.mark.parametrize("rb_num_layers", [3]) def test_fifosizing_nonlinear(strategy, lb_num_layers, rb_num_layers): np.random.seed(0) tmp_output_dir = make_build_dir( From abb96d6fd9edb6699f59a626d2bd4675d0eb17d3 Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Tue, 7 Jan 2025 17:48:06 +0000 Subject: [PATCH 11/12] Move build dir creation into test --- tests/brevitas/test_brevitas_fc.py | 3 +-- tests/transformation/streamline/test_streamline_cnv.py | 3 +-- tests/transformation/streamline/test_streamline_fc.py | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/brevitas/test_brevitas_fc.py b/tests/brevitas/test_brevitas_fc.py index 842d099f57..a7a73a5ed4 100644 --- a/tests/brevitas/test_brevitas_fc.py +++ b/tests/brevitas/test_brevitas_fc.py @@ -45,8 +45,6 @@ from finn.util.basic import make_build_dir from finn.util.test import get_test_model_trained -export_onnx_path = make_build_dir("test_brevitas_fc_") - @pytest.mark.brevitas_export # act bits @@ -61,6 +59,7 @@ def test_brevitas_fc_onnx_export_and_exec(size, wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) + export_onnx_path = make_build_dir("test_brevitas_fc_") finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) ishape = (1, 1, 28, 28) diff --git a/tests/transformation/streamline/test_streamline_cnv.py b/tests/transformation/streamline/test_streamline_cnv.py index 8a91a49278..9e206c843a 100644 --- a/tests/transformation/streamline/test_streamline_cnv.py +++ b/tests/transformation/streamline/test_streamline_cnv.py @@ -50,8 +50,6 @@ from finn.util.basic import make_build_dir from finn.util.test import get_test_model_trained -export_onnx_path = make_build_dir("test_streamline_cnv_") - @pytest.mark.streamline # act bits @@ -64,6 +62,7 @@ def test_streamline_cnv(size, wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) + export_onnx_path = make_build_dir("test_streamline_cnv_") finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) export_qonnx(fc, torch.randn(1, 3, 32, 32), finn_onnx) diff --git a/tests/transformation/streamline/test_streamline_fc.py b/tests/transformation/streamline/test_streamline_fc.py index edc4a96fe2..9ce2f2ab65 100644 --- a/tests/transformation/streamline/test_streamline_fc.py +++ b/tests/transformation/streamline/test_streamline_fc.py @@ -52,8 +52,6 @@ from finn.util.basic import make_build_dir from finn.util.test import get_test_model_trained -export_onnx_path = make_build_dir("test_streamline_fc_") - @pytest.mark.streamline # act bits @@ -68,6 +66,7 @@ def test_streamline_fc(size, wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) + export_onnx_path = make_build_dir("test_streamline_fc_") finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) export_qonnx(fc, torch.randn(1, 1, 28, 28), finn_onnx) From 0bd6dab1c6d97360f4dd0db1928e1cb65c37470c Mon Sep 17 00:00:00 2001 From: Felix Jentzsch Date: Wed, 8 Jan 2025 18:02:47 +0000 Subject: [PATCH 12/12] Extend unit test in preparation for benchmark --- tests/fpgadataflow/test_fifosizing.py | 166 ++++++++++++++++++++++---- 1 file changed, 146 insertions(+), 20 deletions(-) diff --git a/tests/fpgadataflow/test_fifosizing.py b/tests/fpgadataflow/test_fifosizing.py index 4ce2bc0931..31ebe96b33 100644 --- a/tests/fpgadataflow/test_fifosizing.py +++ b/tests/fpgadataflow/test_fifosizing.py @@ -31,8 +31,10 @@ import json import numpy as np +import os import shutil import torch +import copy from brevitas.export import export_qonnx from onnx import TensorProto, helper from qonnx.core.datatype import DataType @@ -81,7 +83,8 @@ def make_conv_building_block(ifm_dim, ch, kernel_size, simd, pe, parallel_window tdt = DataType["UINT32"] stride = 1 in_ch = out_ch = ch # input channel = output channel for stacking - pad = int(np.floor(kernel_size / 2)) # pad so that input dim = output dim for stacking + # pad so that input dim = output dim for stacking (only supports odd kernel_size for now) + pad = int(np.floor(kernel_size / 2)) total_pad = 2 * pad out_feature_dim = compute_conv_output_dim(ifm_dim, kernel_size, stride, total_pad) @@ -92,6 +95,8 @@ def make_conv_building_block(ifm_dim, ch, kernel_size, simd, pe, parallel_window inpgen_out_shape = [1, out_feature_dim, out_feature_dim, in_ch * kernel_size * kernel_size] output_shape = [1, out_feature_dim, out_feature_dim, out_ch] + assert input_shape == output_shape, "ERROR: Conv layer dimensions not stackable" + padding_config = {} padding_config["domain"] = "finn.custom_op.fpgadataflow.rtl" padding_config["backend"] = "fpgadataflow" @@ -339,7 +344,7 @@ def test_fifosizing_linear(method, topology): node0 = model0.graph.node[i] node1 = model1.graph.node[i] assert node0.op_type == node1.op_type - if node0.op_type == "StreamingFIFO": + if node0.op_type == "StreamingFIFO_rtl": node0_inst = getCustomOp(node0) node1_inst = getCustomOp(node1) assert node0_inst.get_nodeattr("depth") == node1_inst.get_nodeattr("depth") @@ -351,24 +356,63 @@ def test_fifosizing_linear(method, topology): @pytest.mark.slow @pytest.mark.vivado @pytest.mark.fpgadataflow -@pytest.mark.parametrize("strategy", ["rtlsim"]) # rtlsim #analytical +@pytest.mark.parametrize("conv_config", [ + (32, # dim + 5, # kernel_size + 4, # ch + 4, # simd + 4, # pe + 1 # parallel_window + ), + #(16, 4, 3, 4, 4, 1), + #(16, 4, 3, 4, 4, 1) + ]) @pytest.mark.parametrize("lb_num_layers", [1]) @pytest.mark.parametrize("rb_num_layers", [3]) -def test_fifosizing_nonlinear(strategy, lb_num_layers, rb_num_layers): +@pytest.mark.parametrize("strategy", ["analytical", "rtlsim"]) +def test_fifosizing_nonlinear(conv_config, lb_num_layers, rb_num_layers, strategy): np.random.seed(0) tmp_output_dir = make_build_dir( - "build_fifosizing_nonlinear_%s_%s" % (lb_num_layers, rb_num_layers) + "test_fifosizing_nonlinear_%s_%s" % (lb_num_layers, rb_num_layers) ) - + log = {} + + #TODO: generalize FIFO test so it can be used by other FIFO-related unit tests + #TODO: allow manual folding/fifo config as input + + #TODO: is a scenario possible where reducing depth of a single FIFO at a time is not sufficient for testing tightness? + # e.g. reducing > 1 FIFOs simultaneously does not cause a throughput drop while reducing a single FIFO does? + + # conv parameters + dim, kernel_size, ch, simd, pe, parallel_window = conv_config + log["stategy"] = strategy + log["lb_num_layers"] = lb_num_layers + log["rb_num_layers"] = rb_num_layers + log["dim"] = dim + log["kernel_size"] = kernel_size + log["ch"] = ch + log["simd"] = simd + log["pe"] = pe + log["parallel_window"] = parallel_window + + # test parameters + #TODO: make configurable + #TODO: how to determine rtlsim_n? rtlsim_n = 10 - - dim = 16 - ch = 4 + throughput_factor_threshold = 0.9 + fifo_reduction_skip_threshold = 32 # skip FIFO tightness test for shallow FIFOs at or below this depth + fifo_reduction_factor = 0.5 # controls tightness + fifo_reduction_throughput_drop_threshold = 0.01 + log["rtlsim_n"] = rtlsim_n + log["throughput_factor_threshold"] = throughput_factor_threshold + log["fifo_reduction_skip_threshold"] = fifo_reduction_skip_threshold + log["fifo_reduction_factor"] = fifo_reduction_factor + log["fifo_reduction_throughput_drop_threshold"] = fifo_reduction_throughput_drop_threshold lb = None for i in range(lb_num_layers): new_block = make_conv_building_block( - dim, ch, kernel_size=3, simd=4, pe=4, parallel_window=1 + dim, ch, kernel_size=kernel_size, simd=simd, pe=pe, parallel_window=parallel_window ) lb = new_block if lb is None else lb.transform(MergeONNXModels(new_block)) lb.save(tmp_output_dir + "/lb.onnx") @@ -376,7 +420,7 @@ def test_fifosizing_nonlinear(strategy, lb_num_layers, rb_num_layers): rb = None for i in range(rb_num_layers): new_block = make_conv_building_block( - dim, ch, kernel_size=3, simd=4, pe=4, parallel_window=1 + dim, ch, kernel_size=kernel_size, simd=simd, pe=pe, parallel_window=parallel_window ) rb = new_block if rb is None else rb.transform(MergeONNXModels(new_block)) rb.save(tmp_output_dir + "/rb.onnx") @@ -386,6 +430,7 @@ def test_fifosizing_nonlinear(strategy, lb_num_layers, rb_num_layers): cfg = build_cfg.DataflowBuildConfig( output_dir=tmp_output_dir, + verbose=True, # TODO: remove this? # only works with characterization-based FIFO-sizing auto_fifo_depths=True, auto_fifo_strategy="characterize", @@ -408,6 +453,7 @@ def test_fifosizing_nonlinear(strategy, lb_num_layers, rb_num_layers): build.build_dataflow_cfg(tmp_output_dir + "/model.onnx", cfg) + # load performance reports with open(tmp_output_dir + "/report/estimate_network_performance.json") as f: est_data = json.load(f) with open(tmp_output_dir + "/report/rtlsim_performance.json") as f: @@ -419,16 +465,96 @@ def test_fifosizing_nonlinear(strategy, lb_num_layers, rb_num_layers): last_node = getCustomOp(model_final.find_producer(model_final.graph.output[0].name)) input_txns_expected = np.prod(first_node.get_folded_input_shape()[:-1]) * rtlsim_n output_txns_expected = np.prod(last_node.get_folded_output_shape()[:-1]) * rtlsim_n - assert sim_data["N_IN_TXNS"] == input_txns_expected - assert sim_data["N_OUT_TXNS"] == output_txns_expected + deadlock = sim_data["N_IN_TXNS"] != input_txns_expected or sim_data["N_OUT_TXNS"] != output_txns_expected + log["deadlock"] = deadlock.tolist() # check rtlsim throughput - # TODO: how to determine N? Take throughput or stable_throughput? - # sim_data["stable_throughput[images/s]"] - assert ( - float(sim_data["throughput[images/s]"]) / float(est_data["estimated_throughput_fps"]) > 0.9 - ) + throughput = sim_data["throughput[images/s]"] + stable_throughput = sim_data["stable_throughput[images/s]"] + estimated_throughput = est_data["estimated_throughput_fps"] + throughput_factor = throughput / estimated_throughput + stable_throughput_factor = stable_throughput / estimated_throughput + + # TODO: Take throughput or stable_throughput? + throughput_pass = throughput_factor > throughput_factor_threshold + + log["throughput_pass"] = throughput_pass + log["throughput"] = throughput + log["stable_throughput"] = stable_throughput + log["estimated_throughput"] = estimated_throughput + + # log FIFO sizes for easier inspection + log["fifo_sizes"] = {} + for node in model_final.get_nodes_by_op_type("StreamingFIFO_rtl"): + node_inst = getCustomOp(node) + log["fifo_sizes"][node.name] = node_inst.get_nodeattr("depth") + + # reduce individual FIFO sizes by some amount and observe throughput drop or deadlock appear + fifo_reduction_pass = [] + log["fifo_reduction_results"] = {} + model_orig = ModelWrapper(tmp_output_dir + "/intermediate_models/step_hw_ipgen.onnx") + for node_orig in model_orig.get_nodes_by_op_type("StreamingFIFO_rtl"): + model = copy.deepcopy(model_orig) + node = model.get_node_from_name(node_orig.name) + node_inst = getCustomOp(node) + + # skip shallow FIFOs + # TODO: do we need to consider rounding-up of FIFO depths for impl_style=vivado? + if node_inst.get_nodeattr("depth") <= fifo_reduction_skip_threshold: + log["fifo_reduction_results"][node.name] = "skip" + continue + + # reduce depth of current FIFO and reset generated code + node_inst.set_nodeattr("depth", int(node_inst.get_nodeattr("depth") * fifo_reduction_factor)) + node_inst.set_nodeattr("code_gen_dir_ipgen", "") + node_inst.set_nodeattr("ip_path", "") + node_inst.set_nodeattr("ipgen_path", "") + + # save model variation + tmp_output_dir_var = tmp_output_dir + "/variations/" + node.name + os.makedirs(tmp_output_dir_var) + model.save(tmp_output_dir_var + "/model.onnx") + + # build again, only re-run necessary steps to save time + cfg.output_dir = tmp_output_dir_var + cfg.steps = ["step_hw_codegen", "step_create_stitched_ip", "step_measure_rtlsim_performance"] + build.build_dataflow_cfg(tmp_output_dir_var + "/model.onnx", cfg) + + # load performance report + with open(tmp_output_dir_var + "/report/rtlsim_performance.json") as f: + sim_data = json.load(f) + + # check for deadlock + model_final = ModelWrapper(tmp_output_dir_var + "/intermediate_models/step_create_stitched_ip.onnx") + first_node = getCustomOp(model_final.find_consumer(model_final.graph.input[0].name)) + last_node = getCustomOp(model_final.find_producer(model_final.graph.output[0].name)) + input_txns_expected = np.prod(first_node.get_folded_input_shape()[:-1]) * rtlsim_n + output_txns_expected = np.prod(last_node.get_folded_output_shape()[:-1]) * rtlsim_n + var_deadlock = sim_data["N_IN_TXNS"] != input_txns_expected or sim_data["N_OUT_TXNS"] != output_txns_expected + + # check rtlsim throughput + var_throughput = sim_data["throughput[images/s]"] + var_stable_throughput = sim_data["stable_throughput[images/s]"] + # TODO: take throughput or stable_throughput? + throughput_drop = (throughput - var_throughput) / throughput + + if var_deadlock: + fifo_reduction_pass.append(True) + log["fifo_reduction_results"][node.name] = 1.0 + elif throughput_drop > fifo_reduction_throughput_drop_threshold: + fifo_reduction_pass.append(True) + log["fifo_reduction_results"][node.name] = throughput_drop + else: + fifo_reduction_pass.append(False) + log["fifo_reduction_results"][node.name] = "fail (no drop)" + + # log for debugging + with open(tmp_output_dir + "/debug.json", "w") as f: + json.dump(log, f, indent=4) - # TODO: - # reduce (individual) FIFO sizes by x % and observe throughput drop or deadlock appear # shutil.rmtree(tmp_output_dir) + + # pass/fail test + assert not deadlock, "Deadlock detected, FIFOs too small." + assert throughput_pass, "Throughput too low, FIFOs too small." + assert all(fifo_reduction_pass), "FIFO tightness test failed, FIFOs too large."