Skip to content

Commit

Permalink
Add Float Support & Float GEMM for Generic (#25)
Browse files Browse the repository at this point in the history
1. Float Support for Constbuffer
2. Float GEMM on Generic Target
3. Added FP GEMM to CI
4. Fixed Float bug on Testslice, CMSIS TestUtil, DivInterger
5. Fixed AbstractDatayType Float Bugs

Co-authored-by: Victor Jung <[email protected]>
  • Loading branch information
runwangdl and Victor-Jung authored Jan 16, 2025
1 parent 3c64bda commit bb7e56d
Show file tree
Hide file tree
Showing 15 changed files with 142 additions and 36 deletions.
1 change: 1 addition & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ jobs:
test2DRequantizedConv
iSoftmax
FloatAdder
testFloatGEMM
generic-models:
uses: ./.github/workflows/TestRunnerGeneric.yml
Expand Down
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
### Fixed
- Fix broken softmax kernel for generic platform ([#2](https://github.com/pulp-platform/Deeploy/pull/2)).


## Minor CI and Readme Improvements

### Added
Expand All @@ -16,6 +17,7 @@
- Update the link of the Docker container used to run the CI with the Docker published by this repo instead of my fork.
- Add a retry on timeout step for large network tests. This is a temporary fix to address the sporadic freeze happening at the compilation stage, see [this issue](https://github.com/pulp-platform/Deeploy/issues/9).


## Floating Point Support

### Added
Expand All @@ -26,6 +28,7 @@
- Extend `testType.py` with float tests
- LIMITATION: Current LLVM compiler does not support bfp16 and fp16, these types are commented in the library header


## Snitch Cluster Support

### Added
Expand All @@ -38,6 +41,7 @@
### Changed
- Update the Banshee's commit to include a recent PR.


## Snitch Cluster Tiling Support

### Added
Expand All @@ -53,6 +57,7 @@
### Changed
- Add the possibility of changing the simulator when using the snitch-tiled test runner.


## GVSOC support for the Snitch Cluster Platform

### Added
Expand All @@ -62,3 +67,11 @@

### Changed
- Add the RTL library to the snitch_cluster build process in the Makefile, required for GVSOC simulation


## Add Float Support & Float GEMM for Generic
- Float Support for Constbuffer
- Float GEMM on Generic
- Added FP GEMM to CI
- Fixed Float bug on Testslice, CMSIS TestUtil, DivInterger
- Fixed AbstractDatayType Float Bugs
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ All contributors have agreed to an open-source release of their work in the Deep
* Luka Macan
* Alberto Dequino
* Francesco Conti
* Run Wang
11 changes: 8 additions & 3 deletions Deeploy/AbstractDataTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def partialOrderUpcast(cls, otherCls: Type[Immediate]) -> bool:
return False

@classmethod
def checkValue(cls, value: Union[int, Iterable[int]], ctxt: Optional[_NetworkContext] = None):
def checkValue(cls, value: Union[int, Iterable[int], np.ndarray], ctxt: Optional[_NetworkContext] = None):

if isinstance(value, int):
_max, _min = (value, value)
Expand All @@ -238,6 +238,7 @@ def checkValue(cls, value: Union[int, Iterable[int]], ctxt: Optional[_NetworkCon
class FloatImmediate(Immediate[Union[float, Iterable[float]], _ImmediateType]):
typeMantissa: int #: int: Represents the number of bits reserved for the mantissa part
typeExponent: int #: int: Represents the number of bits reserved for the exponent part
typeMin: float

@_classproperty
def typeExponentMax(cls) -> int:
Expand All @@ -249,6 +250,10 @@ def typeExponentOffset(cls) -> int:
# The offset added to the exponent
return 2**(cls.typeExponent - 1) - 1

@_classproperty
def typeMin(cls) -> float:
return -math.inf

@classmethod
def partialOrderUpcast(cls, otherCls: Type[Immediate]) -> bool:
if issubclass(otherCls, FloatImmediate):
Expand All @@ -257,7 +262,7 @@ def partialOrderUpcast(cls, otherCls: Type[Immediate]) -> bool:
return False

@classmethod
def checkValue(cls, value: Union[float, Iterable[float]], ctxt: Optional[_NetworkContext] = None):
def checkValue(cls, value: Union[float, Iterable[float], np.ndarray], ctxt: Optional[_NetworkContext] = None):
"""
This method tries to manually cast standard python's standard immediate float precision values
(64 bits) to an arbitrary FP representation and check if the new representation is close enough
Expand All @@ -268,7 +273,7 @@ def checkValue(cls, value: Union[float, Iterable[float]], ctxt: Optional[_Networ
if isinstance(value, float):
_val_list.append(value)
elif isinstance(value, np.ndarray):
_val_list = value.tolist()
_val_list = value.flatten().tolist()
elif isinstance(value, Iterable):
for i in value:
_val_list.append(i)
Expand Down
6 changes: 3 additions & 3 deletions Deeploy/DeeployTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,9 +385,9 @@ class ConstantBuffer(VariableBuffer):
def __init__(self, name: str = '', shape = [1], values = [0]):
super().__init__(name, shape)
values = np.asarray(values)
intArray = values.astype(int)
assert (np.abs(values - intArray)).max() < 0.001, "Constant value {name} is NOT an integer!"
self.values = intArray #: np.array: Stores the underlying weights in Ptyhon-type representation
# intArray = values.astype(int)
# assert (np.abs(values - intArray)).max() < 0.001, "Constant value {name} is NOT an integer!"
self.values = values #: np.array: Stores the underlying weights in Python-type representation

# Do not override - ConstantBuffers are assumed to be always live!
self._live = True
Expand Down
4 changes: 2 additions & 2 deletions Deeploy/Targets/CortexM/Templates/CMSISUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,10 @@ def bindFCParams(ctxt,
if isinstance(mul, str):
__mul = ctxt.lookup(mul).values
assert np.ndim(__mul) == 0, "Mul is not scalar!"
_mul = __mul.item()
_mul = int(__mul.item())
ctxt.lookup(mul)._deploy = False
else:
_mul = mul
_mul = int(mul)

if isinstance(shift, str):
__shift = ctxt.lookup(shift).values
Expand Down
27 changes: 17 additions & 10 deletions Deeploy/Targets/Generic/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,16 @@
from Deeploy.AbstractDataTypes import PointerClass
from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
MemoryManagementGeneration, MemoryPassthroughGeneration
from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, int8_t, \
int32_t, uint8_t
from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, float32_t, \
int8_t, int32_t, uint8_t
from Deeploy.DeeployTypes import CodeTransformation, NodeBinding
from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, ConvTemplate, DebugPrintTemplate, \
DummyTemplate, DWConvTemplate, FloatAddTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, \
ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, ReduceMeanTemplate, \
ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, \
TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate
DummyTemplate, DWConvTemplate, FloatAddTemplate, FloatGemmTemplate, GatherTemplate, GemmTemplate, \
IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, \
PadTemplate, ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, \
RQSiGELUTemplate, SliceTemplate, TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, \
iSoftmaxTemplate
from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DebugPrintChecker, \
DummyChecker, FloatAddChecker, GatherChecker, GELUChecker, GEMMChecker, IntegerDivChecker, MatMulChecker, \
MaxPoolChecker, MulChecker, PadChecker, ReduceMeanChecker, ReduceSumChecker, RequantShiftChecker, ReshapeChecker, \
Expand Down Expand Up @@ -96,10 +97,16 @@
BasicGELUBinding = NodeBinding(GELUChecker([PointerClass(int8_t)], [PointerClass(int32_t)]),
iGELUTemplate.referenceTemplate, BasicTransformer)

BasicGEMMBinding = NodeBinding(
GEMMChecker(
[PointerClass(int8_t), PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int32_t)]),
GemmTemplate.referenceTemplate, BasicTransformer)
BasicGEMMBindings = [
NodeBinding(
GEMMChecker([PointerClass(int8_t), PointerClass(int8_t),
PointerClass(int32_t)], [PointerClass(int32_t)]), GemmTemplate.referenceTemplate, BasicTransformer)
] + [
NodeBinding(
GEMMChecker([PointerClass(float32_t), PointerClass(float32_t),
PointerClass(float32_t)], [PointerClass(float32_t)]), FloatGemmTemplate.referenceTemplate,
BasicTransformer)
]

BasicIntegerDivBinding = NodeBinding(
IntegerDivChecker([PointerClass(int32_t), PointerClass(int32_t)], [PointerClass(int32_t)]),
Expand Down
4 changes: 2 additions & 2 deletions Deeploy/Targets/Generic/Platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer
from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBinding, \
BasicDebugPrintBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, BasicGatherBindings, BasicGELUBinding, \
BasicGEMMBinding, BasicIntegerDivBinding, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \
BasicGEMMBindings, BasicIntegerDivBinding, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \
BasicLayerNormBinding, BasicMatMulBinding, BasicMaxPool2DBinding, BasicMulBindings, BasicPad1DBindings, \
BasicPad2DBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReshapeBindings, \
BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBinding, \
Expand All @@ -56,7 +56,7 @@
FlattenMapper = NodeMapper(FlattenParser(), BasicReshapeBindings)
GatherMapper = NodeMapper(GatherParser(), BasicGatherBindings)
GELUMapper = NodeMapper(iGELUParser(), [BasicGELUBinding])
GEMMMapper = NodeMapper(GenericGEMMParser(), [BasicGEMMBinding])
GEMMMapper = NodeMapper(GenericGEMMParser(), BasicGEMMBindings)
iLayerNormMapper = NodeMapper(iLayerNormParser(), [BasicLayerNormBinding])
IntegerDivMapper = NodeMapper(IntegerDivParser(), [BasicIntegerDivBinding])
ITAMaxMapper = NodeMapper(ITAMaxParser(), [BasicITASoftmaxBinding])
Expand Down
76 changes: 76 additions & 0 deletions Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# ----------------------------------------------------------------------
#
# File: GemmTemplate.py.py
#
# Last edited: 05.01.2023
#
# Copyright (C) 2023, ETH Zurich and University of Bologna.
#
# Author: Philip Wiese, ETH Zurich
#
# ----------------------------------------------------------------------
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the License); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict, List, Tuple

from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation


class _FloatGemmTemplate(NodeTemplate):

def __init__(self, templateStr):
super().__init__(templateStr)

def alignToContext(self, ctxt: NetworkContext,
operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]:

A = ctxt.lookup(operatorRepresentation['A'])
B = ctxt.lookup(operatorRepresentation['B'])
C = ctxt.lookup(operatorRepresentation['C'])
Y = ctxt.lookup(operatorRepresentation['data_out'])

operatorRepresentation['A_offset'] = 0
operatorRepresentation['B_offset'] = 0
operatorRepresentation['C_offset'] = 0
operatorRepresentation['Y_offset'] = 0

return ctxt, operatorRepresentation, []


referenceTemplate = _FloatGemmTemplate("""
// GEMM float (Name: ${nodeName}, Op: ${nodeOp})
BEGIN_SINGLE_CORE
${A_type.typeName} ref_${data_out}_${A} = ${A};
${B_type.typeName} ref_${data_out}_${B} = ${B};
${C_type.typeName} ref_${data_out}_${C} = ${C};
${data_out_type.typeName} ref_${data_out}_${data_out} = ${data_out};
for(uint32_t i=0; i<${batch}; i++){
for(uint32_t m=0; m<${M}; m++){
for(uint32_t n=0; n<${O}; n++){
ref_${data_out}_${data_out}[m* ${O} + n] = ref_${data_out}_${C}[m * ${O} + n];
for(uint32_t k=0; k<${N}; k++){
ref_${data_out}_${data_out}[m* ${O} + n] += ref_${data_out}_${A}[m * ${N} + k] * ref_${data_out}_${B}[k * ${O} + n];
}
}
}
ref_${data_out}_${A} += ${M} * ${O};
ref_${data_out}_${B} += ${O} * ${N};
ref_${data_out}_${C} += ${M} * ${N};
ref_${data_out}_${data_out} += ${M} * ${N};
}
END_SINGLE_CORE
""")
10 changes: 5 additions & 5 deletions Deeploy/Targets/Generic/Templates/SliceTemplate.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ def alignToContext(self, ctxt: NetworkContext,
for dim in data_in_shape[1:]:
dimSteps.append(dimSteps[-1]//dim)
%>
<%
transferSize = dimSteps[axes[-1]]
<%
transferSize = dimSteps[int(axes[-1])]
%>
<%
if axes[0] > 0:
preAxes = list(range(axes[0]))
if int(axes[0]) > 0:
preAxes = list(range(int(axes[0])))
else:
preAxes = []
%>
Expand Down Expand Up @@ -100,7 +100,7 @@ def alignToContext(self, ctxt: NetworkContext,
% endfor
memcpy(ref_${data_out}, ${data_in} + ${data_out}_offset_${axis}, ${transferSize* data_out_type.referencedType.typeWidth//8});
ref_${data_out} += ${transferSize};
% for axis in range(axes[-1]+1):
% for axis in range(int(axes[-1])+1):
}
% endfor
""")
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,10 @@ def alignToContext(self, ctxt: NetworkContext,
inSignage = "s" if signedI else "u"
outSignage = "s" if signedO else "u"
mul_intimmediate = int(mul_immediate)
add_intimmediate = int(add_immediate)
%>
// UniformRequantShift (Name: ${nodeName}, Op: ${nodeOp})
UniformRequantShift_${inSignage}${data_in_type.referencedType.typeWidth}_${outSignage}${data_out_type.referencedType.typeWidth}(${data_in}, ${size}, ${mul_immediate}, ${add_immediate}, ${data_out}, ${log2Dstring}, ${channel_width}, 0, 0 , ${output_min}, ${output_max}, 1);
UniformRequantShift_${inSignage}${data_in_type.referencedType.typeWidth}_${outSignage}${data_out_type.referencedType.typeWidth}(${data_in}, ${size}, ${mul_intimmediate}, ${add_intimmediate}, ${data_out}, ${log2Dstring}, ${channel_width}, 0, 0 , ${output_min}, ${output_max}, 1);
""")
21 changes: 11 additions & 10 deletions DeeployTest/Platforms/Generic/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,21 @@ int main() {

int32_t tot_err = 0;
uint32_t tot = 0;
int32_t diff;
int32_t expected, actual;
float32_t diff;
float32_t expected, actual;
for (uint32_t buf = 0; buf < DeeployNetwork_num_outputs; buf++) {
tot += DeeployNetwork_outputs_bytes[buf];
for (uint32_t i = 0; i < DeeployNetwork_outputs_bytes[buf]; i++) {
expected = ((char *)testOutputVector[buf])[i];
actual = ((char *)DeeployNetwork_outputs[buf])[i];
tot += DeeployNetwork_outputs_bytes[buf] / sizeof(float32_t);
for (uint32_t i = 0;
i < DeeployNetwork_outputs_bytes[buf] / sizeof(float32_t); i++) {
expected = ((float32_t *)testOutputVector[buf])[i];
actual = ((float32_t *)DeeployNetwork_outputs[buf])[i];
diff = expected - actual;

if (diff) {
if ((diff < 0 ? -diff : diff) > 1e-5) {
tot_err += 1;
printf("Expected: %4d ", expected);
printf("Actual: %4d ", actual);
printf("Diff: %4d at Index %12u in Output %u\r\n", diff, i, buf);
printf("Expected: %10.6f ", expected);
printf("Actual: %10.6f ", actual);
printf("Diff: %10.6f at Index %12u in Output %u\r\n", diff, i, buf);
}
}
}
Expand Down
Binary file added DeeployTest/Tests/testFloatGEMM/inputs.npz
Binary file not shown.
Binary file added DeeployTest/Tests/testFloatGEMM/network.onnx
Binary file not shown.
Binary file added DeeployTest/Tests/testFloatGEMM/outputs.npz
Binary file not shown.

0 comments on commit bb7e56d

Please sign in to comment.