diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 02d716fe..420aa9d7 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -49,6 +49,7 @@ jobs: test2DRequantizedConv iSoftmax FloatAdder + testFloatGEMM generic-models: uses: ./.github/workflows/TestRunnerGeneric.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index c9660a41..4df97d14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ ### Fixed - Fix broken softmax kernel for generic platform ([#2](https://github.com/pulp-platform/Deeploy/pull/2)). + ## Minor CI and Readme Improvements ### Added @@ -16,6 +17,7 @@ - Update the link of the Docker container used to run the CI with the Docker published by this repo instead of my fork. - Add a retry on timeout step for large network tests. This is a temporary fix to address the sporadic freeze happening at the compilation stage, see [this issue](https://github.com/pulp-platform/Deeploy/issues/9). + ## Floating Point Support ### Added @@ -26,6 +28,7 @@ - Extend `testType.py` with float tests - LIMITATION: Current LLVM compiler does not support bfp16 and fp16, these types are commented in the library header + ## Snitch Cluster Support ### Added @@ -38,6 +41,7 @@ ### Changed - Update the Banshee's commit to include a recent PR. + ## Snitch Cluster Tiling Support ### Added @@ -53,6 +57,7 @@ ### Changed - Add the possibility of changing the simulator when using the snitch-tiled test runner. + ## GVSOC support for the Snitch Cluster Platform ### Added @@ -62,3 +67,11 @@ ### Changed - Add the RTL library to the snitch_cluster build process in the Makefile, required for GVSOC simulation + + +## Add Float Support & Float GEMM for Generic +- Float Support for Constbuffer +- Float GEMM on Generic +- Added FP GEMM to CI +- Fixed Float bug on Testslice, CMSIS TestUtil, DivInterger +- Fixed AbstractDatayType Float Bugs diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 0a747640..e2a07cbf 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -6,3 +6,4 @@ All contributors have agreed to an open-source release of their work in the Deep * Luka Macan * Alberto Dequino * Francesco Conti +* Run Wang \ No newline at end of file diff --git a/Deeploy/AbstractDataTypes.py b/Deeploy/AbstractDataTypes.py index a94a1489..71c348b4 100644 --- a/Deeploy/AbstractDataTypes.py +++ b/Deeploy/AbstractDataTypes.py @@ -217,7 +217,7 @@ def partialOrderUpcast(cls, otherCls: Type[Immediate]) -> bool: return False @classmethod - def checkValue(cls, value: Union[int, Iterable[int]], ctxt: Optional[_NetworkContext] = None): + def checkValue(cls, value: Union[int, Iterable[int], np.ndarray], ctxt: Optional[_NetworkContext] = None): if isinstance(value, int): _max, _min = (value, value) @@ -238,6 +238,7 @@ def checkValue(cls, value: Union[int, Iterable[int]], ctxt: Optional[_NetworkCon class FloatImmediate(Immediate[Union[float, Iterable[float]], _ImmediateType]): typeMantissa: int #: int: Represents the number of bits reserved for the mantissa part typeExponent: int #: int: Represents the number of bits reserved for the exponent part + typeMin: float @_classproperty def typeExponentMax(cls) -> int: @@ -249,6 +250,10 @@ def typeExponentOffset(cls) -> int: # The offset added to the exponent return 2**(cls.typeExponent - 1) - 1 + @_classproperty + def typeMin(cls) -> float: + return -math.inf + @classmethod def partialOrderUpcast(cls, otherCls: Type[Immediate]) -> bool: if issubclass(otherCls, FloatImmediate): @@ -257,7 +262,7 @@ def partialOrderUpcast(cls, otherCls: Type[Immediate]) -> bool: return False @classmethod - def checkValue(cls, value: Union[float, Iterable[float]], ctxt: Optional[_NetworkContext] = None): + def checkValue(cls, value: Union[float, Iterable[float], np.ndarray], ctxt: Optional[_NetworkContext] = None): """ This method tries to manually cast standard python's standard immediate float precision values (64 bits) to an arbitrary FP representation and check if the new representation is close enough @@ -268,7 +273,7 @@ def checkValue(cls, value: Union[float, Iterable[float]], ctxt: Optional[_Networ if isinstance(value, float): _val_list.append(value) elif isinstance(value, np.ndarray): - _val_list = value.tolist() + _val_list = value.flatten().tolist() elif isinstance(value, Iterable): for i in value: _val_list.append(i) diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py index c81da20e..08346d74 100644 --- a/Deeploy/DeeployTypes.py +++ b/Deeploy/DeeployTypes.py @@ -385,9 +385,9 @@ class ConstantBuffer(VariableBuffer): def __init__(self, name: str = '', shape = [1], values = [0]): super().__init__(name, shape) values = np.asarray(values) - intArray = values.astype(int) - assert (np.abs(values - intArray)).max() < 0.001, "Constant value {name} is NOT an integer!" - self.values = intArray #: np.array: Stores the underlying weights in Ptyhon-type representation + # intArray = values.astype(int) + # assert (np.abs(values - intArray)).max() < 0.001, "Constant value {name} is NOT an integer!" + self.values = values #: np.array: Stores the underlying weights in Python-type representation # Do not override - ConstantBuffers are assumed to be always live! self._live = True diff --git a/Deeploy/Targets/CortexM/Templates/CMSISUtils.py b/Deeploy/Targets/CortexM/Templates/CMSISUtils.py index 1c474c8e..d8f03597 100644 --- a/Deeploy/Targets/CortexM/Templates/CMSISUtils.py +++ b/Deeploy/Targets/CortexM/Templates/CMSISUtils.py @@ -191,10 +191,10 @@ def bindFCParams(ctxt, if isinstance(mul, str): __mul = ctxt.lookup(mul).values assert np.ndim(__mul) == 0, "Mul is not scalar!" - _mul = __mul.item() + _mul = int(__mul.item()) ctxt.lookup(mul)._deploy = False else: - _mul = mul + _mul = int(mul) if isinstance(shift, str): __shift = ctxt.lookup(shift).values diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index b6d3bb8c..252e084d 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -30,15 +30,16 @@ from Deeploy.AbstractDataTypes import PointerClass from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \ MemoryManagementGeneration, MemoryPassthroughGeneration -from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, int8_t, \ - int32_t, uint8_t +from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, float32_t, \ + int8_t, int32_t, uint8_t from Deeploy.DeeployTypes import CodeTransformation, NodeBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, ConvTemplate, DebugPrintTemplate, \ - DummyTemplate, DWConvTemplate, FloatAddTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, \ - ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, ReduceMeanTemplate, \ - ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, \ - TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate + DummyTemplate, DWConvTemplate, FloatAddTemplate, FloatGemmTemplate, GatherTemplate, GemmTemplate, \ + IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, \ + PadTemplate, ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, \ + RQSiGELUTemplate, SliceTemplate, TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, \ + iSoftmaxTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DebugPrintChecker, \ DummyChecker, FloatAddChecker, GatherChecker, GELUChecker, GEMMChecker, IntegerDivChecker, MatMulChecker, \ MaxPoolChecker, MulChecker, PadChecker, ReduceMeanChecker, ReduceSumChecker, RequantShiftChecker, ReshapeChecker, \ @@ -96,10 +97,16 @@ BasicGELUBinding = NodeBinding(GELUChecker([PointerClass(int8_t)], [PointerClass(int32_t)]), iGELUTemplate.referenceTemplate, BasicTransformer) -BasicGEMMBinding = NodeBinding( - GEMMChecker( - [PointerClass(int8_t), PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int32_t)]), - GemmTemplate.referenceTemplate, BasicTransformer) +BasicGEMMBindings = [ + NodeBinding( + GEMMChecker([PointerClass(int8_t), PointerClass(int8_t), + PointerClass(int32_t)], [PointerClass(int32_t)]), GemmTemplate.referenceTemplate, BasicTransformer) +] + [ + NodeBinding( + GEMMChecker([PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t)], [PointerClass(float32_t)]), FloatGemmTemplate.referenceTemplate, + BasicTransformer) +] BasicIntegerDivBinding = NodeBinding( IntegerDivChecker([PointerClass(int32_t), PointerClass(int32_t)], [PointerClass(int32_t)]), diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index 9142b26c..597c6e1c 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -29,7 +29,7 @@ StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBinding, \ BasicDebugPrintBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, BasicGatherBindings, BasicGELUBinding, \ - BasicGEMMBinding, BasicIntegerDivBinding, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ + BasicGEMMBindings, BasicIntegerDivBinding, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ BasicLayerNormBinding, BasicMatMulBinding, BasicMaxPool2DBinding, BasicMulBindings, BasicPad1DBindings, \ BasicPad2DBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReshapeBindings, \ BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBinding, \ @@ -56,7 +56,7 @@ FlattenMapper = NodeMapper(FlattenParser(), BasicReshapeBindings) GatherMapper = NodeMapper(GatherParser(), BasicGatherBindings) GELUMapper = NodeMapper(iGELUParser(), [BasicGELUBinding]) -GEMMMapper = NodeMapper(GenericGEMMParser(), [BasicGEMMBinding]) +GEMMMapper = NodeMapper(GenericGEMMParser(), BasicGEMMBindings) iLayerNormMapper = NodeMapper(iLayerNormParser(), [BasicLayerNormBinding]) IntegerDivMapper = NodeMapper(IntegerDivParser(), [BasicIntegerDivBinding]) ITAMaxMapper = NodeMapper(ITAMaxParser(), [BasicITASoftmaxBinding]) diff --git a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py new file mode 100644 index 00000000..ee4f6168 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py @@ -0,0 +1,76 @@ +# ---------------------------------------------------------------------- +# +# File: GemmTemplate.py.py +# +# Last edited: 05.01.2023 +# +# Copyright (C) 2023, ETH Zurich and University of Bologna. +# +# Author: Philip Wiese, ETH Zurich +# +# ---------------------------------------------------------------------- +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Tuple + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _FloatGemmTemplate(NodeTemplate): + + def __init__(self, templateStr): + super().__init__(templateStr) + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]: + + A = ctxt.lookup(operatorRepresentation['A']) + B = ctxt.lookup(operatorRepresentation['B']) + C = ctxt.lookup(operatorRepresentation['C']) + Y = ctxt.lookup(operatorRepresentation['data_out']) + + operatorRepresentation['A_offset'] = 0 + operatorRepresentation['B_offset'] = 0 + operatorRepresentation['C_offset'] = 0 + operatorRepresentation['Y_offset'] = 0 + + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _FloatGemmTemplate(""" +// GEMM float (Name: ${nodeName}, Op: ${nodeOp}) +BEGIN_SINGLE_CORE + ${A_type.typeName} ref_${data_out}_${A} = ${A}; + ${B_type.typeName} ref_${data_out}_${B} = ${B}; + ${C_type.typeName} ref_${data_out}_${C} = ${C}; + ${data_out_type.typeName} ref_${data_out}_${data_out} = ${data_out}; + + for(uint32_t i=0; i<${batch}; i++){ + for(uint32_t m=0; m<${M}; m++){ + for(uint32_t n=0; n<${O}; n++){ + ref_${data_out}_${data_out}[m* ${O} + n] = ref_${data_out}_${C}[m * ${O} + n]; + for(uint32_t k=0; k<${N}; k++){ + ref_${data_out}_${data_out}[m* ${O} + n] += ref_${data_out}_${A}[m * ${N} + k] * ref_${data_out}_${B}[k * ${O} + n]; + } + } + } + + ref_${data_out}_${A} += ${M} * ${O}; + ref_${data_out}_${B} += ${O} * ${N}; + ref_${data_out}_${C} += ${M} * ${N}; + ref_${data_out}_${data_out} += ${M} * ${N}; + } +END_SINGLE_CORE +""") diff --git a/Deeploy/Targets/Generic/Templates/SliceTemplate.py b/Deeploy/Targets/Generic/Templates/SliceTemplate.py index 0f04ded4..fa475e22 100644 --- a/Deeploy/Targets/Generic/Templates/SliceTemplate.py +++ b/Deeploy/Targets/Generic/Templates/SliceTemplate.py @@ -67,12 +67,12 @@ def alignToContext(self, ctxt: NetworkContext, for dim in data_in_shape[1:]: dimSteps.append(dimSteps[-1]//dim) %> -<% -transferSize = dimSteps[axes[-1]] +<% +transferSize = dimSteps[int(axes[-1])] %> <% -if axes[0] > 0: - preAxes = list(range(axes[0])) +if int(axes[0]) > 0: + preAxes = list(range(int(axes[0]))) else: preAxes = [] %> @@ -100,7 +100,7 @@ def alignToContext(self, ctxt: NetworkContext, % endfor memcpy(ref_${data_out}, ${data_in} + ${data_out}_offset_${axis}, ${transferSize* data_out_type.referencedType.typeWidth//8}); ref_${data_out} += ${transferSize}; -% for axis in range(axes[-1]+1): +% for axis in range(int(axes[-1])+1): } % endfor """) diff --git a/Deeploy/Targets/PULPOpen/Templates/UniformRequantShiftTemplate.py b/Deeploy/Targets/PULPOpen/Templates/UniformRequantShiftTemplate.py index 92def868..9592b0d3 100644 --- a/Deeploy/Targets/PULPOpen/Templates/UniformRequantShiftTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/UniformRequantShiftTemplate.py @@ -77,8 +77,10 @@ def alignToContext(self, ctxt: NetworkContext, inSignage = "s" if signedI else "u" outSignage = "s" if signedO else "u" +mul_intimmediate = int(mul_immediate) +add_intimmediate = int(add_immediate) %> // UniformRequantShift (Name: ${nodeName}, Op: ${nodeOp}) -UniformRequantShift_${inSignage}${data_in_type.referencedType.typeWidth}_${outSignage}${data_out_type.referencedType.typeWidth}(${data_in}, ${size}, ${mul_immediate}, ${add_immediate}, ${data_out}, ${log2Dstring}, ${channel_width}, 0, 0 , ${output_min}, ${output_max}, 1); +UniformRequantShift_${inSignage}${data_in_type.referencedType.typeWidth}_${outSignage}${data_out_type.referencedType.typeWidth}(${data_in}, ${size}, ${mul_intimmediate}, ${add_intimmediate}, ${data_out}, ${log2Dstring}, ${channel_width}, 0, 0 , ${output_min}, ${output_max}, 1); """) diff --git a/DeeployTest/Platforms/Generic/main.c b/DeeployTest/Platforms/Generic/main.c index ca8c6655..b3635cf5 100644 --- a/DeeployTest/Platforms/Generic/main.c +++ b/DeeployTest/Platforms/Generic/main.c @@ -50,20 +50,21 @@ int main() { int32_t tot_err = 0; uint32_t tot = 0; - int32_t diff; - int32_t expected, actual; + float32_t diff; + float32_t expected, actual; for (uint32_t buf = 0; buf < DeeployNetwork_num_outputs; buf++) { - tot += DeeployNetwork_outputs_bytes[buf]; - for (uint32_t i = 0; i < DeeployNetwork_outputs_bytes[buf]; i++) { - expected = ((char *)testOutputVector[buf])[i]; - actual = ((char *)DeeployNetwork_outputs[buf])[i]; + tot += DeeployNetwork_outputs_bytes[buf] / sizeof(float32_t); + for (uint32_t i = 0; + i < DeeployNetwork_outputs_bytes[buf] / sizeof(float32_t); i++) { + expected = ((float32_t *)testOutputVector[buf])[i]; + actual = ((float32_t *)DeeployNetwork_outputs[buf])[i]; diff = expected - actual; - if (diff) { + if ((diff < 0 ? -diff : diff) > 1e-5) { tot_err += 1; - printf("Expected: %4d ", expected); - printf("Actual: %4d ", actual); - printf("Diff: %4d at Index %12u in Output %u\r\n", diff, i, buf); + printf("Expected: %10.6f ", expected); + printf("Actual: %10.6f ", actual); + printf("Diff: %10.6f at Index %12u in Output %u\r\n", diff, i, buf); } } } diff --git a/DeeployTest/Tests/testFloatGEMM/inputs.npz b/DeeployTest/Tests/testFloatGEMM/inputs.npz new file mode 100644 index 00000000..fae7083c Binary files /dev/null and b/DeeployTest/Tests/testFloatGEMM/inputs.npz differ diff --git a/DeeployTest/Tests/testFloatGEMM/network.onnx b/DeeployTest/Tests/testFloatGEMM/network.onnx new file mode 100644 index 00000000..371c5629 Binary files /dev/null and b/DeeployTest/Tests/testFloatGEMM/network.onnx differ diff --git a/DeeployTest/Tests/testFloatGEMM/outputs.npz b/DeeployTest/Tests/testFloatGEMM/outputs.npz new file mode 100644 index 00000000..83c13f9d Binary files /dev/null and b/DeeployTest/Tests/testFloatGEMM/outputs.npz differ