From d6fdb1663e709ab4bc6580c03c3a7f7f9c23e989 Mon Sep 17 00:00:00 2001 From: samanthawangdl Date: Wed, 22 Jan 2025 23:59:58 +0000 Subject: [PATCH] Add Float Conv2D, LayerNorm, Div, Softmax, Reshape, Relu on Generic Platform Refactored: - BasicIntergerDivBinding -> BasicDivBindings - IntergerDivLayer -> DivLayer - iSoftmaxLayer -> SoftmaxLayer - iLayernormLayer -> LayernormLayer - xxx binding -> bindings - BasicIntegerDivBinding -> BasicDivBindings - IntegerDivChecker -> DivChecker - ilayernormchecker -> layernormchecker - isoftmaxchecker -> softmaxchecker (Note: nlevel, sign not used for float) Added: - 6 float templates - relumapper, relulayer, relubinding, reluchecker - softmaxparser - layernormparser - divparser - reluparser Deleted: - floataddchecker --- .github/workflows/CI.yml | 8 +- Deeploy/Targets/CortexM/Platform.py | 26 +++--- Deeploy/Targets/Generic/Bindings.py | 80 ++++++++++++++----- Deeploy/Targets/Generic/Layers.py | 14 +++- Deeploy/Targets/Generic/Parsers.py | 75 ++++++++++++++++- Deeploy/Targets/Generic/Platform.py | 54 +++++++------ .../Generic/Templates/FloatConvTemplate.py | 67 ++++++++++++++++ .../Generic/Templates/FloatDivTemplate.py | 31 +++++++ .../Generic/Templates/FloatGemmTemplate.py | 6 +- .../Templates/FloatLayernormTemplate.py | 48 +++++++++++ .../Generic/Templates/FloatReluTemplate.py | 38 +++++++++ .../Generic/Templates/FloatSoftmaxTemplate.py | 48 +++++++++++ Deeploy/Targets/Generic/TypeCheckers.py | 31 ++++--- Deeploy/Targets/MemPool/Platform.py | 30 +++---- Deeploy/Targets/PULPOpen/Bindings.py | 6 +- Deeploy/Targets/PULPOpen/Platform.py | 4 +- Deeploy/Targets/Snitch/Platform.py | 12 +-- DeeployTest/Platforms/Generic/main.c | 2 +- .../Generic/inc/kernel/Convolution.h | 6 ++ TargetLibraries/Generic/inc/kernel/Div.h | 2 + .../Generic/inc/kernel/Layernorm.h | 3 + TargetLibraries/Generic/inc/kernel/Softmax.h | 2 + .../Generic/src/Convolution_fp32.c | 60 ++++++++++++++ TargetLibraries/Generic/src/Div_fp32.c | 35 ++++++++ TargetLibraries/Generic/src/Layernorm_fp32.c | 56 +++++++++++++ TargetLibraries/Generic/src/Relu_fp32.c | 40 ++++++++++ TargetLibraries/Generic/src/Softmax_fp32.c | 54 +++++++++++++ 27 files changed, 724 insertions(+), 114 deletions(-) create mode 100644 Deeploy/Targets/Generic/Templates/FloatConvTemplate.py create mode 100644 Deeploy/Targets/Generic/Templates/FloatDivTemplate.py create mode 100644 Deeploy/Targets/Generic/Templates/FloatLayernormTemplate.py create mode 100644 Deeploy/Targets/Generic/Templates/FloatReluTemplate.py create mode 100644 Deeploy/Targets/Generic/Templates/FloatSoftmaxTemplate.py create mode 100644 TargetLibraries/Generic/src/Convolution_fp32.c create mode 100644 TargetLibraries/Generic/src/Div_fp32.c create mode 100644 TargetLibraries/Generic/src/Layernorm_fp32.c create mode 100644 TargetLibraries/Generic/src/Relu_fp32.c create mode 100644 TargetLibraries/Generic/src/Softmax_fp32.c diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 58a05335..06e54bbf 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -48,8 +48,12 @@ jobs: testRequantizedDWConv test2DRequantizedConv iSoftmax - FloatAdder + testFloatAdder testFloatGEMM + testFloat2DConvolution + testFloatLayerNorm + testFloatDiv + testFloatRelu generic-models: uses: ./.github/workflows/TestRunnerGeneric.yml @@ -199,7 +203,7 @@ jobs: Hardswish RQHardswish testBacktracking - FloatAdder + testFloatAdder testFloatGEMM num-cores: 8 diff --git a/Deeploy/Targets/CortexM/Platform.py b/Deeploy/Targets/CortexM/Platform.py index c9d65341..1b203848 100644 --- a/Deeploy/Targets/CortexM/Platform.py +++ b/Deeploy/Targets/CortexM/Platform.py @@ -35,15 +35,15 @@ CMSISDWConv1DParser, CMSISDWConv2DParser, CMSISGEMMParser, CMSISLinearAttentionParser, CMSISMaxPool2DParser from Deeploy.Targets.CortexM.TopologyOptimizationPasses.Passes import ConvRequantMergePass, GEMMRequantMergePass, \ LinearAttentionAlignmentPass, MatMulRequantMergePass, MHSAAlignmentPass -from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicDebugPrintBindings, BasicGatherBindings, \ - BasicGELUBinding, BasicIntegerDivBinding, BasicLayerNormBinding, BasicMatMulBinding, BasicMulBindings, \ +from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicDebugPrintBindings, BasicDivBindings, \ + BasicGatherBindings, BasicGELUBinding, BasicLayerNormBindings, BasicMatMulBinding, BasicMulBindings, \ BasicPad1DBindings, BasicPad2DBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReshapeBindings, \ - BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBinding, \ + BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, \ BasicTransposeBindings, DummyBinding -from Deeploy.Targets.Generic.Layers import AddLayer, CLCALayer, DebugPrintLayer, GatherLayer, IntegerDivLayer, \ - LinearAttentionLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, ReduceMeanLayer, ReduceSumLayer, \ - RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, TransposeLayer, iGELULayer, \ - iLayerNormLayer, iSoftmaxLayer +from Deeploy.Targets.Generic.Layers import AddLayer, CLCALayer, DebugPrintLayer, DivLayer, GatherLayer, \ + LayerNormLayer, LinearAttentionLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, ReduceMeanLayer, \ + ReduceSumLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, SoftmaxLayer, \ + TransposeLayer, iGELULayer from Deeploy.Targets.Generic.Parsers import AddParser, DebugParser, DummyParser, FlattenParser, GatherParser, \ IntegerDivParser, MatMulParser, MulParser, Pad1DParser, Pad2DParser, ReduceMeanParser, ReduceSumParser, \ RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, TransposeParser, \ @@ -63,8 +63,8 @@ GatherMapper = NodeMapper(GatherParser(), BasicGatherBindings) GELU_int8_Mapper = NodeMapper(iGELUParser(), [BasicGELUBinding]) GEMMMapper = NodeMapper(CMSISGEMMParser(), CMSISGEMMBindings) -iLayerNorm_int8_Mapper = NodeMapper(iLayerNormParser(), [BasicLayerNormBinding]) -IntegerDivMapper = NodeMapper(IntegerDivParser(), [BasicIntegerDivBinding]) +iLayerNorm_int8_Mapper = NodeMapper(iLayerNormParser(), BasicLayerNormBindings) +IntegerDivMapper = NodeMapper(IntegerDivParser(), BasicDivBindings) LinearAttention_int16_Mapper = NodeMapper(CMSISLinearAttentionParser(), [CMSISLinearAttentionBinding]) MatMulMapper = NodeMapper(MatMulParser(), [BasicMatMulBinding]) MaxPool2DMapper = NodeMapper(CMSISMaxPool2DParser(), [CMSISMaxPool2DBinding]) @@ -77,7 +77,7 @@ ReshapeMapper = NodeMapper(ReshapeParser(), BasicReshapeBindings) RQGELU_int8_Mapper = NodeMapper(RQSiGELUParser(), [BasicRQSGELUBinding]) RQIntegerDivMapper = NodeMapper(RQIntegerDivParser(), [BasicRQIntegerDivBinding]) -Softmax_int8_Mapper = NodeMapper(iSoftmaxParser(), [BasicSoftmaxBinding]) +Softmax_int8_Mapper = NodeMapper(iSoftmaxParser(), BasicSoftmaxBindings) TransposeMapper = NodeMapper(TransposeParser(), BasicTransposeBindings) UnsqueezeMapper = NodeMapper(UnsqueezeParser(), BasicReshapeBindings) @@ -94,10 +94,10 @@ 'Flatten': ReshapeLayer([FlattenMapper]), 'Gather': GatherLayer([GatherMapper]), 'iGELU': iGELULayer([GELU_int8_Mapper]), - 'iLayerNorm': iLayerNormLayer([iLayerNorm_int8_Mapper]), - 'IntegerDiv': IntegerDivLayer([IntegerDivMapper]), + 'iLayerNorm': LayerNormLayer([iLayerNorm_int8_Mapper]), + 'IntegerDiv': DivLayer([IntegerDivMapper]), 'IntegerMean': ReduceMeanLayer([ReduceMeanMapper]), - 'iSoftmax': iSoftmaxLayer([Softmax_int8_Mapper]), + 'iSoftmax': SoftmaxLayer([Softmax_int8_Mapper]), 'LinearAttention': LinearAttentionLayer([LinearAttention_int16_Mapper]), 'MatMul': MatMulLayer([MatMulMapper]), 'MaxPool': MaxPoolLayer([MaxPool2DMapper]), diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py index 252e084d..4003cebe 100644 --- a/Deeploy/Targets/Generic/Bindings.py +++ b/Deeploy/Targets/Generic/Bindings.py @@ -30,20 +30,21 @@ from Deeploy.AbstractDataTypes import PointerClass from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \ MemoryManagementGeneration, MemoryPassthroughGeneration -from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, float32_t, \ - int8_t, int32_t, uint8_t +from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, float32_t, int8_t, int32_t, \ + uint8_t from Deeploy.DeeployTypes import CodeTransformation, NodeBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, ConvTemplate, DebugPrintTemplate, \ - DummyTemplate, DWConvTemplate, FloatAddTemplate, FloatGemmTemplate, GatherTemplate, GemmTemplate, \ - IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, \ - PadTemplate, ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, \ + DummyTemplate, DWConvTemplate, FloatAddTemplate, FloatConvTemplate, FloatDivTemplate, FloatGemmTemplate, \ + FloatLayernormTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, \ + ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, \ + ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, \ RQSiGELUTemplate, SliceTemplate, TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, \ iSoftmaxTemplate from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DebugPrintChecker, \ - DummyChecker, FloatAddChecker, GatherChecker, GELUChecker, GEMMChecker, IntegerDivChecker, MatMulChecker, \ - MaxPoolChecker, MulChecker, PadChecker, ReduceMeanChecker, ReduceSumChecker, RequantShiftChecker, ReshapeChecker, \ - RQIntegerDivChecker, SliceChecker, SoftmaxChecker, TransposeChecker, iLayerNormChecker + DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, LayerNormChecker, MatMulChecker, \ + MaxPoolChecker, MulChecker, PadChecker, ReduceMeanChecker, ReduceSumChecker, ReluChecker, RequantShiftChecker, \ + ReshapeChecker, RQIntegerDivChecker, SliceChecker, SoftmaxChecker, TransposeChecker BasicTransformer = CodeTransformation([ArgumentStructGeneration(), MemoryManagementGeneration(), FutureGeneration()]) @@ -68,8 +69,8 @@ for type1 in IntegerDataTypes for type2 in IntegerDataTypes ] + [ - NodeBinding(FloatAddChecker([PointerClass(type), PointerClass(type)], [PointerClass(type)]), - FloatAddTemplate.referenceTemplate, BasicTransformer) for type in FloatDataTypes + NodeBinding(AddChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatAddTemplate.referenceTemplate, BasicTransformer) ] BasicConv1DBinding = NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]), @@ -78,8 +79,15 @@ BasicDWConv1DBinding = NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]), DWConvTemplate.reference1DTemplate, BasicTransformer) -BasicConv2DBinding = NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]), - ConvTemplate.reference2DTemplate, BasicTransformer) +BasicConv2DBindings = [ + NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]), + ConvTemplate.reference2DTemplate, BasicTransformer) +] + [ + NodeBinding( + ConvChecker([PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t)], [PointerClass(float32_t)]), FloatConvTemplate.reference2DTemplate, + BasicTransformer) +] BasicDWConv2DBinding = NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]), DWConvTemplate.reference2DTemplate, BasicTransformer) @@ -108,9 +116,13 @@ BasicTransformer) ] -BasicIntegerDivBinding = NodeBinding( - IntegerDivChecker([PointerClass(int32_t), PointerClass(int32_t)], [PointerClass(int32_t)]), - IntegerDivTemplate.referenceTemplate, BasicTransformer) +BasicDivBindings = [ + NodeBinding(DivChecker([PointerClass(int32_t), PointerClass(int32_t)], [PointerClass(int32_t)]), + IntegerDivTemplate.referenceTemplate, BasicTransformer) +] + [ + NodeBinding(DivChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatDivTemplate.referenceTemplate, BasicTransformer) +] BasicITASoftmaxBinding = NodeBinding(SoftmaxChecker([PointerClass(int8_t)], [PointerClass(int8_t)]), ITAMaxTemplate.referenceTemplate, BasicTransformer) @@ -118,10 +130,18 @@ BasicITAPartialSoftmaxBinding = NodeBinding(SoftmaxChecker([PointerClass(int8_t)], [PointerClass(int8_t)]), ITAPartialMaxTemplate.referenceTemplate, BasicTransformer) -BasicLayerNormBinding = NodeBinding( - iLayerNormChecker([PointerClass(int8_t), PointerClass(int32_t), - PointerClass(int32_t)], [PointerClass(int8_t)]), iLayernormTemplate.referenceTemplate, - BasicTransformer) +BasicLayerNormBindings = [ + NodeBinding( + LayerNormChecker([PointerClass(int8_t), PointerClass(int32_t), + PointerClass(int32_t)], [PointerClass(int8_t)]), iLayernormTemplate.referenceTemplate, + BasicTransformer) +] + [ + NodeBinding( + LayerNormChecker( + [PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t)], [PointerClass(float32_t)]), FloatLayernormTemplate.referenceTemplate, + BasicTransformer) +] BasicMatMulBinding = NodeBinding(MatMulChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]), MatMulTemplate.referenceTemplate, BasicTransformer) @@ -142,6 +162,11 @@ BasicPad2DBindings = [ NodeBinding(PadChecker([PointerClass(type)], [PointerClass(type)]), PadTemplate.reference2DTemplate, BasicTransformer) for type in SignedIntegerDataTypes +] + [ + NodeBinding( + PadChecker([PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t)], [PointerClass(float32_t)]), PadTemplate.reference2DTemplate, + BasicTransformer) ] BasicReduceMeanBindings = [ @@ -154,9 +179,15 @@ BasicTransformer) for type in SignedIntegerDataTypes ] +BasicReluBinding = NodeBinding(ReluChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatReluTemplate.referenceTemplate, BasicTransformer) + BasicReshapeBindings = [ NodeBinding(ReshapeChecker([PointerClass(type), PointerClass(int32_t)], [PointerClass(type)]), ReshapeTemplate.referenceTemplate, ReshapeSkipTransformer) for type in IntegerDataTypes +] + [ + NodeBinding(ReshapeChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]), + ReshapeTemplate.referenceTemplate, ReshapeSkipTransformer) ] BasicRQSBindings = [ @@ -181,8 +212,13 @@ PointerClass(int32_t) ], [PointerClass(int8_t)]), RQIntegerDivTemplate.referenceTemplate, BasicTransformer) -BasicSoftmaxBinding = NodeBinding(SoftmaxChecker([PointerClass(int8_t)], [PointerClass(int8_t)]), - iSoftmaxTemplate.referenceTemplate, BasicTransformer) +BasicSoftmaxBindings = [ + NodeBinding(SoftmaxChecker([PointerClass(int8_t)], [PointerClass(int8_t)]), iSoftmaxTemplate.referenceTemplate, + BasicTransformer) +] + [ + NodeBinding(SoftmaxChecker([PointerClass(float32_t)], [PointerClass(float32_t)]), + FloatSoftmaxTemplate.referenceTemplate, BasicTransformer) +] BasicTransposeBindings = [ NodeBinding(TransposeChecker([PointerClass(type)], [PointerClass(type)]), TransposeTemplate.referenceTemplate, @@ -190,7 +226,7 @@ ] BasiciRMSNormBinding = NodeBinding( - iLayerNormChecker([PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int8_t)]), + LayerNormChecker([PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int8_t)]), iRMSNormTemplate.referenceTemplate, BasicTransformer) DummyBinding = NodeBinding(DummyChecker([PointerClass(int8_t)], [PointerClass(int8_t)]), diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py index a714e6d4..2a43dcc2 100644 --- a/Deeploy/Targets/Generic/Layers.py +++ b/Deeploy/Targets/Generic/Layers.py @@ -114,7 +114,7 @@ def __init__(self, maps: List[NodeMapper]): super().__init__(maps) -class iSoftmaxLayer(ONNXLayer): +class SoftmaxLayer(ONNXLayer): def __init__(self, maps: List[NodeMapper]): super().__init__(maps) @@ -193,13 +193,13 @@ def computeOps(self): return matmul + rqs -class IntegerDivLayer(ONNXLayer): +class DivLayer(ONNXLayer): def __init__(self, maps: List[NodeMapper]): super().__init__(maps) -class RQIntegerDivLayer(IntegerDivLayer): +class RQIntegerDivLayer(DivLayer): def __init__(self, maps: List[NodeMapper]): super().__init__(maps) @@ -365,7 +365,13 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese return (inputShapes, outputShapes) -class iLayerNormLayer(ONNXLayer): +class ReluLayer(ONNXLayer): + + def __init__(self, maps: List[NodeMapper]): + super().__init__(maps) + + +class LayerNormLayer(ONNXLayer): def __init__(self, maps: List[NodeMapper]): super().__init__(maps) diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py index 852e3233..cec6f990 100644 --- a/Deeploy/Targets/Generic/Parsers.py +++ b/Deeploy/Targets/Generic/Parsers.py @@ -550,9 +550,6 @@ def parseNode(self, node: gs.Node) -> bool: ret = all([len(node.inputs) == 1, len(node.outputs) == 1]) - if ret: - self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) - return ret def parseNodeCtxt(self, @@ -591,6 +588,7 @@ def parseNode(self, node: gs.Node) -> bool: self.operatorRepresentation['coeffB'] = int(node.attrs['coeffB'].values) self.operatorRepresentation['coeffC'] = int(node.attrs['coeffC'].values) self.operatorRepresentation['log2'] = int(node.attrs['log2'].values) + self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) return wellFormed @@ -610,9 +608,10 @@ def __init__(self): super().__init__() def parseNode(self, node: gs.Node) -> bool: + wellFormed = super().parseNode(node) - ret = all(['n_levels' in node.attrs]) + ret = all(['n_levels' in node.attrs, len(node.inputs) == 1, len(node.outputs) == 1]) if ret and wellFormed: self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values) @@ -923,6 +922,32 @@ def parseNodeCtxt(self, return ctxt, True +class ReluParser(NodeParser): + + def __init__(self): + super().__init__() + + def parseNode(self, node: gs.Node) -> (bool): + + ret = all([len(node.inputs) == 1, len(node.outputs) == 1]) + + return ret + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + + data_in = ctxt.lookup(node.inputs[0].name) + data_out = ctxt.lookup(node.outputs[0].name) + self.operatorRepresentation['data_in'] = data_in.name + self.operatorRepresentation['data_out'] = data_out.name + self.operatorRepresentation['size'] = np.prod(data_in.shape) + self.operatorRepresentation['lastDimLength'] = data_in.shape[-1] + + return ctxt, True + + class ReshapeParser(NodeParser): def __init__(self): @@ -1494,6 +1519,18 @@ def parseNodeCtxt(self, return ctxt, True +class LayerNormParser(iLayerNormParser): + + def parseNode(self, node: gs.Node) -> (bool): + + ret = all(['epsilon' in node.attrs, len(node.inputs) == 3, len(node.outputs) == 1]) + + if ret: + self.operatorRepresentation['epsilon'] = node.attrs['epsilon'] + + return ret + + class MatMulParser(NodeParser): def __init__(self, noBiasHoisting = True): @@ -1798,6 +1835,36 @@ def parseNodeCtxt(self, return ctxt, True +class DivParser(NodeParser): + + def __init__(self): + super().__init__() + + def parseNode(self, node: gs.Node) -> bool: + + ret = all([len(node.inputs) == 2, len(node.outputs) == 1]) + + return ret + + def parseNodeCtxt(self, + ctxt: NetworkContext, + node: gs.Node, + channels_first: bool = True) -> Tuple[NetworkContext, bool]: + + inputs = ["input1", "input2"] + outputs = ["output"] + for idx, inputNode in enumerate(node.inputs): + if idx < len(inputs): + self.operatorRepresentation[inputs[idx]] = ctxt.lookup(inputNode.name).name + for idx, outputNode in enumerate(node.outputs): + self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name + + self.operatorRepresentation['size'] = np.prod(ctxt.lookup(self.operatorRepresentation['input1']).shape) + self.operatorRepresentation['lastDimLength'] = ctxt.lookup(self.operatorRepresentation['input1']).shape[-1] + + return ctxt, True + + class RQIntegerDivParser(IntegerDivParser, RQSParserInterface): def __init__(self): diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py index 597c6e1c..a018ff96 100644 --- a/Deeploy/Targets/Generic/Platform.py +++ b/Deeploy/Targets/Generic/Platform.py @@ -27,29 +27,30 @@ from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \ StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer -from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBinding, \ - BasicDebugPrintBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, BasicGatherBindings, BasicGELUBinding, \ - BasicGEMMBindings, BasicIntegerDivBinding, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ - BasicLayerNormBinding, BasicMatMulBinding, BasicMaxPool2DBinding, BasicMulBindings, BasicPad1DBindings, \ - BasicPad2DBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReshapeBindings, \ - BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBinding, \ +from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBindings, \ + BasicDebugPrintBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, BasicGatherBindings, \ + BasicGELUBinding, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \ + BasicLayerNormBindings, BasicMatMulBinding, BasicMaxPool2DBinding, BasicMulBindings, BasicPad1DBindings, \ + BasicPad2DBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, \ + BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, \ BasicTransposeBindings, DummyBinding -from Deeploy.Targets.Generic.Layers import AddLayer, ConvLayer, DebugPrintLayer, GatherLayer, GEMMLayer, \ - IntegerDivLayer, ITAMaxLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, ReduceMeanLayer, ReduceSumLayer, \ - RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, TransposeLayer, iGELULayer, \ - iLayerNormLayer, iSoftmaxLayer -from Deeploy.Targets.Generic.Parsers import AddParser, DebugParser, DummyParser, FlattenParser, GatherParser, \ - GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, \ - GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, MatMulParser, MulParser, Pad1DParser, \ - Pad2DParser, ReduceMeanParser, ReduceSumParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, \ - RQSiGELUParser, SliceParser, TransposeParser, UnsqueezeParser, iGELUParser, iLayerNormParser, iSoftmaxParser +from Deeploy.Targets.Generic.Layers import AddLayer, ConvLayer, DebugPrintLayer, DivLayer, GatherLayer, GEMMLayer, \ + ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, ReduceMeanLayer, ReduceSumLayer, \ + ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, SoftmaxLayer, \ + TransposeLayer, iGELULayer +from Deeploy.Targets.Generic.Parsers import AddParser, DebugParser, DivParser, DummyParser, FlattenParser, \ + GatherParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \ + GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \ + MatMulParser, MulParser, Pad1DParser, Pad2DParser, ReduceMeanParser, ReduceSumParser, ReluParser, \ + RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, SoftmaxParser, \ + TransposeParser, UnsqueezeParser, iGELUParser, iLayerNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import ExtractPaddingFromConvPass, \ ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, iGELURequantMergePass AddMapper = NodeMapper(AddParser(), BasicAddBindings) Conv1DMapper = NodeMapper(GenericConv1DParser(), [BasicConv1DBinding]) -Conv2DMapper = NodeMapper(GenericConv2DParser(), [BasicConv2DBinding]) +Conv2DMapper = NodeMapper(GenericConv2DParser(), BasicConv2DBindings) DebugMapper = NodeMapper(DebugParser(), BasicDebugPrintBindings) DWConv1DMapper = NodeMapper(GenericDWConv1DParser(), [BasicDWConv1DBinding]) DWConv2DMapper = NodeMapper(GenericDWConv2DParser(), [BasicDWConv2DBinding]) @@ -57,8 +58,10 @@ GatherMapper = NodeMapper(GatherParser(), BasicGatherBindings) GELUMapper = NodeMapper(iGELUParser(), [BasicGELUBinding]) GEMMMapper = NodeMapper(GenericGEMMParser(), BasicGEMMBindings) -iLayerNormMapper = NodeMapper(iLayerNormParser(), [BasicLayerNormBinding]) -IntegerDivMapper = NodeMapper(IntegerDivParser(), [BasicIntegerDivBinding]) +LayerNormMapper = NodeMapper(LayerNormParser(), BasicLayerNormBindings) +iLayerNormMapper = NodeMapper(iLayerNormParser(), BasicLayerNormBindings) +DivMapper = NodeMapper(DivParser(), BasicDivBindings) +IntegerDivMapper = NodeMapper(IntegerDivParser(), BasicDivBindings) ITAMaxMapper = NodeMapper(ITAMaxParser(), [BasicITASoftmaxBinding]) ITAPartialMaxMapper = NodeMapper(ITAPartialMaxParser(), [BasicITAPartialSoftmaxBinding]) MatMulMapper = NodeMapper(MatMulParser(), [BasicMatMulBinding]) @@ -68,11 +71,13 @@ Pad2DMapper = NodeMapper(Pad2DParser(), BasicPad2DBindings) ReduceMeanMapper = NodeMapper(ReduceMeanParser(), BasicReduceMeanBindings) ReduceSumMapper = NodeMapper(ReduceSumParser(), BasicReduceSumBindings) +ReluMapper = NodeMapper(ReluParser(), [BasicReluBinding]) RequantShiftMapper = NodeMapper(RequantShiftParser(), BasicRQSBindings) ReshapeMapper = NodeMapper(ReshapeParser(), BasicReshapeBindings) RQGELUMapper = NodeMapper(RQSiGELUParser(), [BasicRQSGELUBinding]) RQIntegerDivMapper = NodeMapper(RQIntegerDivParser(), [BasicRQIntegerDivBinding]) -SoftmaxMapper = NodeMapper(iSoftmaxParser(), [BasicSoftmaxBinding]) +SoftmaxMapper = NodeMapper(SoftmaxParser(), BasicSoftmaxBindings) +iSoftmaxMapper = NodeMapper(iSoftmaxParser(), BasicSoftmaxBindings) TransposeMapper = NodeMapper(TransposeParser(), BasicTransposeBindings) UnsqueezeMapper = NodeMapper(UnsqueezeParser(), BasicReshapeBindings) @@ -86,15 +91,17 @@ 'Add': AddLayer([AddMapper]), 'Conv': ConvLayer([Conv2DMapper, DWConv2DMapper, Conv1DMapper, DWConv1DMapper]), 'DebugPrint': DebugPrintLayer([DebugMapper]), - 'Div': IntegerDivLayer([IntegerDivMapper]), + 'Div': DivLayer([DivMapper]), 'Flatten': ReshapeLayer([FlattenMapper]), 'Gather': GatherLayer([GatherMapper]), 'Gemm': GEMMLayer([GEMMMapper]), 'iGELU': iGELULayer([GELUMapper]), - 'iLayerNorm': iLayerNormLayer([iLayerNormMapper]), - 'IntegerDiv': IntegerDivLayer([IntegerDivMapper]), + 'LayerNormalization': LayerNormLayer([LayerNormMapper]), + 'iLayerNorm': LayerNormLayer([iLayerNormMapper]), + 'IntegerDiv': DivLayer([IntegerDivMapper]), 'IntegerMean': ReduceMeanLayer([ReduceMeanMapper]), - 'iSoftmax': iSoftmaxLayer([SoftmaxMapper]), + 'Softmax': SoftmaxLayer([SoftmaxMapper]), + 'iSoftmax': SoftmaxLayer([iSoftmaxMapper]), 'ITAMax': ITAMaxLayer([ITAMaxMapper]), 'ITAPartialMax': ITAMaxLayer([ITAPartialMaxMapper]), 'MatMul': GEMMLayer([MatMulMapper]), @@ -104,6 +111,7 @@ 'Pad': PadLayer([Pad1DMapper, Pad2DMapper]), 'ReduceMean': ReduceMeanLayer([ReduceMeanMapper]), 'ReduceSum': ReduceSumLayer([ReduceSumMapper]), + 'Relu': ReluLayer([ReluMapper]), 'RequantizediGELU': RQSiGELULayer([RQGELUMapper]), 'RequantShift': RequantShiftLayer([RequantShiftMapper]), 'Reshape': ReshapeLayer([ReshapeMapper]), diff --git a/Deeploy/Targets/Generic/Templates/FloatConvTemplate.py b/Deeploy/Targets/Generic/Templates/FloatConvTemplate.py new file mode 100644 index 00000000..22719fdb --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatConvTemplate.py @@ -0,0 +1,67 @@ +# ---------------------------------------------------------------------- +# +# File: FLoatConvTemplate.py +# +# Last edited: 23.01.2025 +# +# Copyright (C) 2023, ETH Zurich and University of Bologna. +# +# Author: Run Wang, ETH Zurich +# +# ---------------------------------------------------------------------- +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Tuple + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _FloatConvTemplate(NodeTemplate): + + def __init__(self, templateStr): + super().__init__(templateStr) + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + data_out = ctxt.lookup(operatorRepresentation['data_out']) + + return ctxt, operatorRepresentation, [] + + +reference2DTemplate = _FloatConvTemplate(""" +<% +batchOffsetIn = ch_im_in * dim_im_in_x * dim_im_in_y +batchOffsetOut = ch_im_out * dim_im_out_x * dim_im_out_y +%> + +// 2D FP Conv (Name: ${nodeName}, Op: ${nodeOp}) +BEGIN_SINGLE_CORE + ${data_in_type.typeName} ref_${data_out}_${data_in} = ${data_in}; + ${data_out_type.typeName} ref_${data_out}_${data_out} = ${data_out}; + + for (uint32_t n=0; n<${batch}; ++n) { + Conv2d_fp${data_in_type.referencedType.typeWidth}_fp${weight_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}_NCHW( + ref_${data_out}_${data_in}, ${ch_im_in}, ${dim_im_in_x}, ${dim_im_in_y}, + ${weight}, ${ch_im_out}, ${dim_kernel_x}, ${dim_kernel_y}, + ${stride_x}, ${stride_y}, + ref_${data_out}_${data_out} + ); + ref_${data_out}_${data_in} += ${batchOffsetIn}; + ref_${data_out}_${data_out} += ${batchOffsetOut}; + } +END_SINGLE_CORE +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatDivTemplate.py b/Deeploy/Targets/Generic/Templates/FloatDivTemplate.py new file mode 100644 index 00000000..be713b3f --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatDivTemplate.py @@ -0,0 +1,31 @@ +# ---------------------------------------------------------------------- +# +# File: FloatDivTemplate.py +# +# Last edited: 23.01.2025 +# +# Copyright (C) 2023, ETH Zurich and University of Bologna. +# +# Author: Run Wang, ETH Zurich +# +# ---------------------------------------------------------------------- +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from Deeploy.DeeployTypes import NodeTemplate + +referenceTemplate = NodeTemplate(""" +// Division (Name: ${nodeName}, Op: ${nodeOp}) +SINGLE_CORE Div_fp${input1_type.referencedType.typeWidth}_fp${input2_type.referencedType.typeWidth}_fp${output_type.referencedType.typeWidth}(${input1}, ${input2}, ${output}, ${size}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py index ee4f6168..8a07928c 100644 --- a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py +++ b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py @@ -1,12 +1,12 @@ # ---------------------------------------------------------------------- # -# File: GemmTemplate.py.py +# File: FloatGemmTemplate.py.py # -# Last edited: 05.01.2023 +# Last edited: 23.01.2025 # # Copyright (C) 2023, ETH Zurich and University of Bologna. # -# Author: Philip Wiese, ETH Zurich +# Author: Run Wang, ETH Zurich # # ---------------------------------------------------------------------- # SPDX-License-Identifier: Apache-2.0 diff --git a/Deeploy/Targets/Generic/Templates/FloatLayernormTemplate.py b/Deeploy/Targets/Generic/Templates/FloatLayernormTemplate.py new file mode 100644 index 00000000..8a4b7b91 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatLayernormTemplate.py @@ -0,0 +1,48 @@ +# ---------------------------------------------------------------------- +# +# File: FloatConvTemplate.py +# +# Last edited: 23.01.2025 +# +# Copyright (C) 2023, ETH Zurich and University of Bologna. +# +# Author: Run Wang, ETH Zurich +# +# ---------------------------------------------------------------------- +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, Tuple + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _LayerNormTemplate(NodeTemplate): + + def __init__(self, templateStr): + super().__init__(templateStr) + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + data_out = ctxt.lookup(operatorRepresentation['data_out']) + + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _LayerNormTemplate(""" +// FloatLayernorm (Name: ${nodeName}, Op: ${nodeOp}) +SINGLE_CORE Layernorm_fp${data_in_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}(${data_in}, ${data_out}, ${weight}, ${bias}, ${epsilon}, ${size}, ${lastDimLength}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatReluTemplate.py b/Deeploy/Targets/Generic/Templates/FloatReluTemplate.py new file mode 100644 index 00000000..86b4de32 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatReluTemplate.py @@ -0,0 +1,38 @@ +# ---------------------------------------------------------------------- +# +# File: FloatReluTemplate.py +# +# Last edited: 23.01.2025 +# +# Copyright (C) 2023, ETH Zurich and University of Bologna. +# +# Author: Run Wang, ETH Zurich +# +# ---------------------------------------------------------------------- +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from Deeploy.DeeployTypes import NodeTemplate + + +class _reluTemplate(NodeTemplate): + + def __init__(self, templateStr): + super().__init__(templateStr) + + +referenceTemplate = _reluTemplate(""" +// Relu (Name: ${nodeName}, Op: ${nodeOp}) +SINGLE_CORE Relu_fp${data_in_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}(${data_in}, ${data_out}, ${size}, ${lastDimLength}); +""") diff --git a/Deeploy/Targets/Generic/Templates/FloatSoftmaxTemplate.py b/Deeploy/Targets/Generic/Templates/FloatSoftmaxTemplate.py new file mode 100644 index 00000000..191f0d40 --- /dev/null +++ b/Deeploy/Targets/Generic/Templates/FloatSoftmaxTemplate.py @@ -0,0 +1,48 @@ +# ---------------------------------------------------------------------- +# +# File: FloatSoftmaxTemplate.py +# +# Last edited: 23.1.2025 +# +# Copyright (C) 2021, ETH Zurich and University of Bologna. +# +# Author: Run Wang, ETH Zurich +# +# ---------------------------------------------------------------------- +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the License); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Tuple + +from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation + + +class _SoftmaxTemplate(NodeTemplate): + + def __init__(self, templateStr): + super().__init__(templateStr) + + def alignToContext(self, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]: + + data_in = ctxt.lookup(operatorRepresentation['data_in']) + data_out = ctxt.lookup(operatorRepresentation['data_out']) + + return ctxt, operatorRepresentation, [] + + +referenceTemplate = _SoftmaxTemplate(""" +// Softmax (Name: ${nodeName}, Op: ${nodeOp}) +SINGLE_CORE Softmax_fp${data_in_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}(${data_in}, ${data_out}, ${size}, ${lastDimLength}); +""") diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py index 475cd200..1bc3b9fa 100644 --- a/Deeploy/Targets/Generic/TypeCheckers.py +++ b/Deeploy/Targets/Generic/TypeCheckers.py @@ -125,20 +125,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer], return [False] -class FloatAddChecker(SignPropTypeChecker): - - def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): - super().__init__(input_types, output_types) - - def _inferNumLevels(self, inputs: List[VariableBuffer], - operatorRepresentation: OperatorRepresentation) -> List[int]: - return [inputs[0].nLevels + inputs[1].nLevels] - - def _inferSignedness(self, inputs: List[VariableBuffer], - operatorRepresentation: OperatorRepresentation) -> List[bool]: - return [True] - - class GatherChecker(SignPropTypeChecker): def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): @@ -232,7 +218,7 @@ def _inferSignedness(self, inputs: List[VariableBuffer], return [True] -class iLayerNormChecker(SignPropTypeChecker): +class LayerNormChecker(SignPropTypeChecker): def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): super().__init__(input_types, output_types) @@ -263,7 +249,7 @@ def _inferSignedness(self, inputs: List[VariableBuffer], return [False] -class IntegerDivChecker(SignPropTypeChecker): +class DivChecker(SignPropTypeChecker): def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): super().__init__(input_types, output_types) @@ -378,6 +364,19 @@ def _inferSignedness(self, inputs: List[VariableBuffer], return [False] +class ReluChecker(SignPropTypeChecker): + + def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): + super().__init__(input_types, output_types) + + def _inferNumLevels(self, inputs, operatorRepresentation): + return [2**(self.input_types[0].referencedType.typeWidth)] + + def _inferSignedness(self, inputs: List[VariableBuffer], + operatorRepresentation: OperatorRepresentation) -> List[bool]: + return [True] + + class SoftmaxChecker(SignPropTypeChecker): def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]): diff --git a/Deeploy/Targets/MemPool/Platform.py b/Deeploy/Targets/MemPool/Platform.py index e9125d91..d89e6e5a 100644 --- a/Deeploy/Targets/MemPool/Platform.py +++ b/Deeploy/Targets/MemPool/Platform.py @@ -30,15 +30,15 @@ from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \ StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer -from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBinding, \ - BasicDebugPrintBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, BasicGatherBindings, BasicGELUBinding, \ - BasicIntegerDivBinding, BasicLayerNormBinding, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \ +from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBindings, \ + BasicDebugPrintBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, BasicGatherBindings, \ + BasicGELUBinding, BasicLayerNormBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \ BasicReduceMeanBindings, BasicReduceSumBindings, BasicReshapeBindings, BasicRQIntegerDivBinding, \ - BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBinding, BasicTransposeBindings, DummyBinding -from Deeploy.Targets.Generic.Layers import AddLayer, ConvLayer, DebugPrintLayer, GatherLayer, GEMMLayer, \ - IntegerDivLayer, ITAMaxLayer, MatMulLayer, MaxPoolLayer, MHSALayer, MulLayer, PadLayer, ReduceMeanLayer, \ + BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicTransposeBindings, DummyBinding +from Deeploy.Targets.Generic.Layers import AddLayer, ConvLayer, DebugPrintLayer, DivLayer, GatherLayer, GEMMLayer, \ + ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MHSALayer, MulLayer, PadLayer, ReduceMeanLayer, \ ReduceSumLayer, RequantShiftLayer, ReshapeLayer, RQGEMMLayer, RQIntegerDivLayer, RQMatMulLayer, RQSiGELULayer, \ - SliceLayer, TransposeLayer, iGELULayer, iLayerNormLayer, iSoftmaxLayer + SliceLayer, SoftmaxLayer, TransposeLayer, iGELULayer from Deeploy.Targets.Generic.Parsers import AddParser, DebugParser, DummyParser, FlattenParser, GatherParser, \ GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, \ GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, MatMulParser, MulParser, Pad1DParser, Pad2DParser, \ @@ -61,7 +61,7 @@ # (they support a wider range of attribute values) GenericConv1D_Mapper = NodeMapper(GenericConv1DParser(), [BasicConv1DBinding]) GenericDWConv1D_Mapper = NodeMapper(GenericDWConv1DParser(), [BasicDWConv1DBinding]) -GenericConv2D_Mapper = NodeMapper(GenericConv2DParser(), [BasicConv2DBinding]) +GenericConv2D_Mapper = NodeMapper(GenericConv2DParser(), BasicConv2DBindings) GenericDWConv2D_Mapper = NodeMapper(GenericDWConv2DParser(), [BasicDWConv2DBinding]) GenericConv_Mappers = [GenericConv2D_Mapper, GenericDWConv2D_Mapper, GenericConv1D_Mapper, GenericDWConv1D_Mapper] @@ -72,8 +72,8 @@ Flatten_Mapper = NodeMapper(FlattenParser(), BasicReshapeBindings) Gather_Mapper = NodeMapper(GatherParser(), BasicGatherBindings) GELU_Mapper = NodeMapper(iGELUParser(), [BasicGELUBinding]) -iLayerNorm_Mapper = NodeMapper(iLayerNormParser(), [BasicLayerNormBinding]) -IntegerDiv_Mapper = NodeMapper(IntegerDivParser(), [BasicIntegerDivBinding]) +iLayerNorm_Mapper = NodeMapper(iLayerNormParser(), BasicLayerNormBindings) +IntegerDiv_Mapper = NodeMapper(IntegerDivParser(), BasicDivBindings) ITAMaxMapper = NodeMapper(ITAMaxParser(), [MemPoolITASoftmaxBinding_8_8]) Mul_Mapper = NodeMapper(MulParser(), BasicMulBindings) Pad1D_Mapper = NodeMapper(Pad1DParser(), BasicPad1DBindings) @@ -84,7 +84,7 @@ Reshape_Mapper = NodeMapper(ReshapeParser(), BasicReshapeBindings) RQGELU_Mapper = NodeMapper(RQSiGELUParser(), [BasicRQSGELUBinding]) RQIntegerDiv_Mapper = NodeMapper(RQIntegerDivParser(), [BasicRQIntegerDivBinding]) -Softmax_Mapper = NodeMapper(iSoftmaxParser(), [BasicSoftmaxBinding]) +Softmax_Mapper = NodeMapper(iSoftmaxParser(), BasicSoftmaxBindings) Transpose_Mapper = NodeMapper(TransposeParser(), BasicTransposeBindings) Unsqueeze_Mapper = NodeMapper(UnsqueezeParser(), BasicReshapeBindings) @@ -116,15 +116,15 @@ 'Add': AddLayer([Add_Mapper]), 'Conv': ConvLayer(Conv_Mappers + GenericConv_Mappers), # Mapper with higher priority should be placed first! 'DebugPrint': DebugPrintLayer([DebugPrint_Mapper]), - 'Div': IntegerDivLayer([IntegerDiv_Mapper]), + 'Div': DivLayer([IntegerDiv_Mapper]), 'Flatten': ReshapeLayer([Flatten_Mapper]), 'Gather': GatherLayer([Gather_Mapper]), 'Gemm': GEMMLayer([GEMM_Mapper]), 'iGELU': iGELULayer([GELU_Mapper]), - 'iLayerNorm': iLayerNormLayer([iLayerNorm_Mapper]), - 'IntegerDiv': IntegerDivLayer([IntegerDiv_Mapper]), + 'iLayerNorm': LayerNormLayer([iLayerNorm_Mapper]), + 'IntegerDiv': DivLayer([IntegerDiv_Mapper]), 'IntegerMean': ReduceMeanLayer([ReduceMean_Mapper]), - 'iSoftmax': iSoftmaxLayer([Softmax_Mapper]), + 'iSoftmax': SoftmaxLayer([Softmax_Mapper]), 'ITAMax': ITAMaxLayer([ITAMaxMapper]), 'MatMul': MatMulLayer([MatMul_Mapper]), 'MatMulInteger': MatMulLayer([MatMul_Mapper]), diff --git a/Deeploy/Targets/PULPOpen/Bindings.py b/Deeploy/Targets/PULPOpen/Bindings.py index ac61768f..366a863b 100644 --- a/Deeploy/Targets/PULPOpen/Bindings.py +++ b/Deeploy/Targets/PULPOpen/Bindings.py @@ -39,8 +39,8 @@ from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration from Deeploy.Targets.Generic.Templates import ConcatTemplate, FloatGemmTemplate, RQSiGELUTemplate, iHardswishTemplate from Deeploy.Targets.Generic.TypeCheckers import ConcatChecker, GELUChecker, GEMMChecker, HardswishChecker, \ - MatMulChecker, MulChecker, ReduceMeanChecker, RQAddChecker, RQHardswishChecker, SliceChecker, SoftmaxChecker, \ - TransposeChecker, iLayerNormChecker + LayerNormChecker, MatMulChecker, MulChecker, ReduceMeanChecker, RQAddChecker, RQHardswishChecker, SliceChecker, \ + SoftmaxChecker, TransposeChecker from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling @@ -284,7 +284,7 @@ ] PULPiRMSNormBindings = [ - NodeBinding(iLayerNormChecker([PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int8_t)]), + NodeBinding(LayerNormChecker([PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int8_t)]), iRMSNormTemplate.referenceTemplate, ForkTransformer) ] diff --git a/Deeploy/Targets/PULPOpen/Platform.py b/Deeploy/Targets/PULPOpen/Platform.py index 4c7cda84..8d9ef6ed 100644 --- a/Deeploy/Targets/PULPOpen/Platform.py +++ b/Deeploy/Targets/PULPOpen/Platform.py @@ -37,7 +37,7 @@ BasicReshapeBindings, BasicRQIntegerDivBinding from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, GatherLayer, GEMMLayer, MatMulLayer, MaxPoolLayer, \ MulLayer, PadLayer, ReduceMeanLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \ - RQSiHardswishLayer, SliceLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer, iSoftmaxLayer + RQSiHardswishLayer, SliceLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, FlattenParser, GatherParser, GEMMParser, \ MatMulParser, MulParser, Pad1DParser, Pad2DParser, ReduceMeanParser, RequantShiftParser, ReshapeParser, \ RQAddParser, RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SliceParser, TransposeParser, \ @@ -112,7 +112,7 @@ 'RQIntegerDiv': RQIntegerDivLayer([RQIntegerDivMapper]), 'MatMul': MatMulLayer([MatMulMapper]), 'IntegerMean': ReduceMeanLayer([ReduceMeanMapper]), - 'iSoftmax': iSoftmaxLayer([Softmax_int8_Mapper]), + 'iSoftmax': SoftmaxLayer([Softmax_int8_Mapper]), 'ReduceMean': ReduceMeanLayer([ReduceMeanMapper]), 'RequantShift': RequantShiftLayer([UniformRequantShiftMapper, RequantShiftMapper]), 'Add': AddLayer([AddMapper]), diff --git a/Deeploy/Targets/Snitch/Platform.py b/Deeploy/Targets/Snitch/Platform.py index 3b45d9e2..3200f73f 100644 --- a/Deeploy/Targets/Snitch/Platform.py +++ b/Deeploy/Targets/Snitch/Platform.py @@ -30,10 +30,10 @@ from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \ StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer -from Deeploy.Targets.Generic.Bindings import BasicGatherBindings, BasicLayerNormBinding, BasicMatMulBinding, \ +from Deeploy.Targets.Generic.Bindings import BasicGatherBindings, BasicLayerNormBindings, BasicMatMulBinding, \ BasicPad1DBindings, BasicPad2DBindings, BasicReshapeBindings, BasicRQIntegerDivBinding -from Deeploy.Targets.Generic.Layers import AddLayer, GatherLayer, GEMMLayer, MatMulLayer, PadLayer, ReshapeLayer, \ - RQGEMMLayer, RQIntegerDivLayer, iLayerNormLayer, iNoNormLayer, iSoftmaxLayer +from Deeploy.Targets.Generic.Layers import AddLayer, GatherLayer, GEMMLayer, LayerNormLayer, MatMulLayer, PadLayer, \ + ReshapeLayer, RQGEMMLayer, RQIntegerDivLayer, SoftmaxLayer, iNoNormLayer from Deeploy.Targets.Generic.Parsers import AddParser, GatherParser, MatMulParser, Pad1DParser, Pad2DParser, \ RQAddParser, RQIntegerDivParser, UnsqueezeParser, iLayerNormParser, iNoNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate @@ -59,7 +59,7 @@ RqGemmMapper = NodeMapper(SnitchRQGEMMParser(), SnitchRqGemmTilingReadyBindings) iSoftmaxMapper = NodeMapper(iSoftmaxParser(), SnitchiSoftmaxTilingReadyBindings) iNoNormMapper = NodeMapper(iNoNormParser(), SnitchiNoNormTilingReadyBindings) -iLayerNormMapper = NodeMapper(iLayerNormParser(), [BasicLayerNormBinding]) +iLayerNormMapper = NodeMapper(iLayerNormParser(), BasicLayerNormBindings) RQAddMapper = NodeMapper(RQAddParser(), SnitchRQAddTilingReadyBindings) AddMapper = NodeMapper(AddParser(), SnitchAddTileReadyBindings) @@ -71,9 +71,9 @@ 'MatMul': MatMulLayer([MatMulMapper]), 'Gemm': GEMMLayer([GemmMapper]), 'RQGemm': RQGEMMLayer([RqGemmMapper]), - 'iSoftmax': iSoftmaxLayer([iSoftmaxMapper]), + 'iSoftmax': SoftmaxLayer([iSoftmaxMapper]), 'iNoNorm': iNoNormLayer([iNoNormMapper]), - 'iLayerNorm': iLayerNormLayer([iLayerNormMapper]), + 'iLayerNorm': LayerNormLayer([iLayerNormMapper]), 'RequantizedAdd': AddLayer([RQAddMapper]), 'Add': AddLayer([AddMapper]), } diff --git a/DeeployTest/Platforms/Generic/main.c b/DeeployTest/Platforms/Generic/main.c index b3635cf5..2cdb3ef6 100644 --- a/DeeployTest/Platforms/Generic/main.c +++ b/DeeployTest/Platforms/Generic/main.c @@ -60,7 +60,7 @@ int main() { actual = ((float32_t *)DeeployNetwork_outputs[buf])[i]; diff = expected - actual; - if ((diff < 0 ? -diff : diff) > 1e-5) { + if ((diff < 0 ? -diff : diff) > 1e-4) { tot_err += 1; printf("Expected: %10.6f ", expected); printf("Actual: %10.6f ", actual); diff --git a/TargetLibraries/Generic/inc/kernel/Convolution.h b/TargetLibraries/Generic/inc/kernel/Convolution.h index 43c4a1ff..45ae07ca 100644 --- a/TargetLibraries/Generic/inc/kernel/Convolution.h +++ b/TargetLibraries/Generic/inc/kernel/Convolution.h @@ -59,4 +59,10 @@ void Conv2d_s8_s8_s32_NCHW(int8_t const *__restrict__ pSrcA, uint32_t C, int32_t *__restrict__ pDstC, int32_t input_offset, int32_t output_offset); +void Conv2d_fp32_fp32_fp32_NCHW(const float *__restrict__ pSrcA, uint32_t C, + uint32_t H_padded, uint32_t W_padded, + const float *__restrict__ pSrcB, uint32_t F, + uint32_t P, uint32_t Q, uint32_t SP, uint32_t SQ, + float *__restrict__ pDstC); + #endif //__DEEPLOY_BASIC_MATH_CONVOLUTION_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/Div.h b/TargetLibraries/Generic/inc/kernel/Div.h index 672cff21..2b2b8e27 100644 --- a/TargetLibraries/Generic/inc/kernel/Div.h +++ b/TargetLibraries/Generic/inc/kernel/Div.h @@ -45,4 +45,6 @@ void Div_s32_s32(int32_t *data_in_nom, int32_t *data_in_denom, int32_t size_nom, int32_t size_denom, int32_t nomStep, int32_t denomStep, int32_t *data_out, int32_t Delta, int32_t eps, int32_t eta); +void Div_fp32_fp32_fp32(float32_t *data_in_1, float32_t *data_in_2, float32_t *data_out, int32_t size); + #endif //__DEEPLOY_BASIC_MATH_DIV_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/Layernorm.h b/TargetLibraries/Generic/inc/kernel/Layernorm.h index 9539096a..6f4accba 100644 --- a/TargetLibraries/Generic/inc/kernel/Layernorm.h +++ b/TargetLibraries/Generic/inc/kernel/Layernorm.h @@ -45,4 +45,7 @@ void Layernorm_s8_s8(int8_t *data_in, int8_t *data_out, int32_t *weight, int32_t *bias, int32_t input_offset, int32_t size, int32_t lastDimLength, int32_t log2D); +void Layernorm_fp32_fp32(float *data_in, float *data_out, float *scale, + float *bias, float epsilon, int32_t size, int32_t lastDimLength); + #endif //__DEEPLOY_BASIC_MATH_LAYERNORM_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/inc/kernel/Softmax.h b/TargetLibraries/Generic/inc/kernel/Softmax.h index ebe48746..ed376754 100644 --- a/TargetLibraries/Generic/inc/kernel/Softmax.h +++ b/TargetLibraries/Generic/inc/kernel/Softmax.h @@ -88,4 +88,6 @@ void ITAPartialMax_s8(int8_t const *__restrict__ pSrcA, uint32_t lastDimLength, uint32_t group_width, uint32_t n_levels); +void Softmax_fp32_fp32(float32_t *input, float32_t *output, int32_t size, int32_t last_dim_length); + #endif //__DEEPLOY_BASIC_MATH_SOFTMAX_KERNEL_HEADER_ diff --git a/TargetLibraries/Generic/src/Convolution_fp32.c b/TargetLibraries/Generic/src/Convolution_fp32.c new file mode 100644 index 00000000..0a62b4d0 --- /dev/null +++ b/TargetLibraries/Generic/src/Convolution_fp32.c @@ -0,0 +1,60 @@ +/* ===================================================================== + * Title: Convolution_float32.c + * Description: Float32 version of Conv2D with NCHW format (pre-padded input) + * + * Date: 23.01.2025 + * + * ===================================================================== */ + +/* + * Copyright (C) 2023 ETH Zurich and University of Bologna. + * + * Authors: + * - Run Wang, ETH Zurich + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DeeployBasicMath.h" + +void Conv2d_fp32_fp32_fp32_NCHW(const float *__restrict__ pSrcA, uint32_t C, + uint32_t H_padded, uint32_t W_padded, + const float *__restrict__ pSrcB, uint32_t F, + uint32_t P, uint32_t Q, uint32_t SP, uint32_t SQ, + float *__restrict__ pDstC) { + // Compute output dimensions + uint32_t H_out = (H_padded - P) / SP + 1; + uint32_t W_out = (W_padded - Q) / SQ + 1; + + uint32_t c, h, w, f, p, q; + + // Perform convolution + for (f = 0; f < F; ++f) { + for (h = 0; h < H_out; ++h) { + for (w = 0; w < W_out; ++w) { + float sum = 0.0f; + for (c = 0; c < C; ++c) { + for (p = 0; p < P; ++p) { + for (q = 0; q < Q; ++q) { + sum += pSrcA[c * H_padded * W_padded + (h * SP + p) * W_padded + (w * SQ + q)] * + pSrcB[f * C * P * Q + c * P * Q + p * Q + q]; + } + } + } + pDstC[f * H_out * W_out + h * W_out + w] = sum; + } + } + } +} diff --git a/TargetLibraries/Generic/src/Div_fp32.c b/TargetLibraries/Generic/src/Div_fp32.c new file mode 100644 index 00000000..24b80c3a --- /dev/null +++ b/TargetLibraries/Generic/src/Div_fp32.c @@ -0,0 +1,35 @@ +/* ===================================================================== + * Title: Div_fp32.c + * Description: + * + * $Date: 23.01.2025 + * + * ===================================================================== */ +/* + * Copyright (C) 2022 ETH Zurich and University of Bologna. + * + * Authors: + * - Run Wang, ETH Zurich + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DeeployBasicMath.h" + +void Div_fp32_fp32_fp32(float32_t *data_in_1, float32_t *data_in_2, float32_t *data_out, int32_t size){ + for (int i = 0; i < size; i++) { + data_out[i] = data_in_1[i] / data_in_2[i]; + } +} \ No newline at end of file diff --git a/TargetLibraries/Generic/src/Layernorm_fp32.c b/TargetLibraries/Generic/src/Layernorm_fp32.c new file mode 100644 index 00000000..2262d922 --- /dev/null +++ b/TargetLibraries/Generic/src/Layernorm_fp32.c @@ -0,0 +1,56 @@ +/* ===================================================================== + * Title: Layernorm_fp32.c + * Description: + * + * $Date: 22.01.2025 + * + * ===================================================================== */ +/* + * Copyright (C) 2022 ETH Zurich and University of Bologna. + * + * Authors: + * - Run Wang, ETH Zurich + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DeeployBasicMath.h" + +void Layernorm_fp32_fp32(float32_t *data_in, float32_t *data_out, float32_t *scale, float32_t *bias, float32_t epsilon, int32_t size, int32_t lastDimLength) { + float32_t mean; + float32_t sum; + float32_t std; + float32_t temp; + + for (int i = 0; i < (size / lastDimLength); i++) { + sum = 0.0f; + mean = 0.0f; + for (int j = 0; j < lastDimLength; j++) { + mean += data_in[j + i * lastDimLength]; + } + mean = mean / lastDimLength; + for (int j = 0; j < lastDimLength; j++) { + temp = data_in[j + i * lastDimLength] - mean; + sum += temp * temp; + } + sum = sum / lastDimLength; + sum += epsilon; + std = sqrtf(sum); + + for (int j = 0; j < lastDimLength; j++) { + data_out[j + i * lastDimLength] = ((data_in[j + i * lastDimLength] - mean) / std) * scale[j] + bias[j]; + } + } +} \ No newline at end of file diff --git a/TargetLibraries/Generic/src/Relu_fp32.c b/TargetLibraries/Generic/src/Relu_fp32.c new file mode 100644 index 00000000..c7789b89 --- /dev/null +++ b/TargetLibraries/Generic/src/Relu_fp32.c @@ -0,0 +1,40 @@ +/* ===================================================================== + * Title: Softmax_fp8.c + * Description: + * + * $Date: 22.01.2025 + * + * ===================================================================== */ +/* + * Copyright (C) 2022 ETH Zurich and University of Bologna. + * + * - Run Wang, ETH Zurich + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DeeployBasicMath.h" + + +void Relu_fp32_fp32(float32_t* input, float32_t* output, int32_t size, int32_t last_dim_length) { + + int32_t batch_size = size / last_dim_length; + + for (int b = 0; b < batch_size; b++) { + for (int i = 0; i < last_dim_length; i++) { + output[b * last_dim_length + i] = MAX(input[b * last_dim_length + i], 0.0f); + } + } +} \ No newline at end of file diff --git a/TargetLibraries/Generic/src/Softmax_fp32.c b/TargetLibraries/Generic/src/Softmax_fp32.c new file mode 100644 index 00000000..ae15158f --- /dev/null +++ b/TargetLibraries/Generic/src/Softmax_fp32.c @@ -0,0 +1,54 @@ +/* ===================================================================== + * Title: Softmax_fp8.c + * Description: + * + * $Date: 22.01.2025 + * + * ===================================================================== */ +/* + * Copyright (C) 2022 ETH Zurich and University of Bologna. + * + * - Run Wang, ETH Zurich + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DeeployBasicMath.h" + + +void Softmax_fp32_fp32(float32_t* input, float32_t* output, int32_t size, int32_t last_dim_length) { + + int32_t batch_size = size / last_dim_length; + + for (int b = 0; b < batch_size; b++) { + float32_t max_val = *((float32_t*)&(uint32_t){0xFF800000}); + float sum = 0.0f; + + for (int i = 0; i < last_dim_length; i++) { + if (input[b * last_dim_length + i] > max_val) { + max_val = input[b * last_dim_length + i]; + } + } + + for (int i = 0; i < last_dim_length; i++) { + output[b * last_dim_length + i] = expf(input[b * last_dim_length + i] - max_val); + sum += output[b * last_dim_length + i]; + } + + for (int i = 0; i < last_dim_length; i++) { + output[b * last_dim_length + i] /= sum; + } + } +} \ No newline at end of file