From d6fdb1663e709ab4bc6580c03c3a7f7f9c23e989 Mon Sep 17 00:00:00 2001
From: samanthawangdl <samanthawangdl@gmail.com>
Date: Wed, 22 Jan 2025 23:59:58 +0000
Subject: [PATCH] Add Float Conv2D, LayerNorm, Div, Softmax, Reshape, Relu on
 Generic Platform Refactored: - BasicIntergerDivBinding -> BasicDivBindings -
 IntergerDivLayer -> DivLayer - iSoftmaxLayer -> SoftmaxLayer -
 iLayernormLayer -> LayernormLayer - xxx binding -> bindings -
 BasicIntegerDivBinding -> BasicDivBindings - IntegerDivChecker -> DivChecker
 - ilayernormchecker -> layernormchecker - isoftmaxchecker -> softmaxchecker
 (Note: nlevel, sign not used for float)

Added:
- 6 float templates
- relumapper, relulayer, relubinding, reluchecker
- softmaxparser
- layernormparser
- divparser
- reluparser

Deleted:
- floataddchecker
---
 .github/workflows/CI.yml                      |  8 +-
 Deeploy/Targets/CortexM/Platform.py           | 26 +++---
 Deeploy/Targets/Generic/Bindings.py           | 80 ++++++++++++++-----
 Deeploy/Targets/Generic/Layers.py             | 14 +++-
 Deeploy/Targets/Generic/Parsers.py            | 75 ++++++++++++++++-
 Deeploy/Targets/Generic/Platform.py           | 54 +++++++------
 .../Generic/Templates/FloatConvTemplate.py    | 67 ++++++++++++++++
 .../Generic/Templates/FloatDivTemplate.py     | 31 +++++++
 .../Generic/Templates/FloatGemmTemplate.py    |  6 +-
 .../Templates/FloatLayernormTemplate.py       | 48 +++++++++++
 .../Generic/Templates/FloatReluTemplate.py    | 38 +++++++++
 .../Generic/Templates/FloatSoftmaxTemplate.py | 48 +++++++++++
 Deeploy/Targets/Generic/TypeCheckers.py       | 31 ++++---
 Deeploy/Targets/MemPool/Platform.py           | 30 +++----
 Deeploy/Targets/PULPOpen/Bindings.py          |  6 +-
 Deeploy/Targets/PULPOpen/Platform.py          |  4 +-
 Deeploy/Targets/Snitch/Platform.py            | 12 +--
 DeeployTest/Platforms/Generic/main.c          |  2 +-
 .../Generic/inc/kernel/Convolution.h          |  6 ++
 TargetLibraries/Generic/inc/kernel/Div.h      |  2 +
 .../Generic/inc/kernel/Layernorm.h            |  3 +
 TargetLibraries/Generic/inc/kernel/Softmax.h  |  2 +
 .../Generic/src/Convolution_fp32.c            | 60 ++++++++++++++
 TargetLibraries/Generic/src/Div_fp32.c        | 35 ++++++++
 TargetLibraries/Generic/src/Layernorm_fp32.c  | 56 +++++++++++++
 TargetLibraries/Generic/src/Relu_fp32.c       | 40 ++++++++++
 TargetLibraries/Generic/src/Softmax_fp32.c    | 54 +++++++++++++
 27 files changed, 724 insertions(+), 114 deletions(-)
 create mode 100644 Deeploy/Targets/Generic/Templates/FloatConvTemplate.py
 create mode 100644 Deeploy/Targets/Generic/Templates/FloatDivTemplate.py
 create mode 100644 Deeploy/Targets/Generic/Templates/FloatLayernormTemplate.py
 create mode 100644 Deeploy/Targets/Generic/Templates/FloatReluTemplate.py
 create mode 100644 Deeploy/Targets/Generic/Templates/FloatSoftmaxTemplate.py
 create mode 100644 TargetLibraries/Generic/src/Convolution_fp32.c
 create mode 100644 TargetLibraries/Generic/src/Div_fp32.c
 create mode 100644 TargetLibraries/Generic/src/Layernorm_fp32.c
 create mode 100644 TargetLibraries/Generic/src/Relu_fp32.c
 create mode 100644 TargetLibraries/Generic/src/Softmax_fp32.c

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 58a05335..06e54bbf 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -48,8 +48,12 @@ jobs:
         testRequantizedDWConv
         test2DRequantizedConv
         iSoftmax
-        FloatAdder
+        testFloatAdder
         testFloatGEMM
+        testFloat2DConvolution
+        testFloatLayerNorm
+        testFloatDiv
+        testFloatRelu
 
   generic-models:
     uses: ./.github/workflows/TestRunnerGeneric.yml
@@ -199,7 +203,7 @@ jobs:
         Hardswish
         RQHardswish
         testBacktracking
-        FloatAdder
+        testFloatAdder
         testFloatGEMM
       num-cores: 8
 
diff --git a/Deeploy/Targets/CortexM/Platform.py b/Deeploy/Targets/CortexM/Platform.py
index c9d65341..1b203848 100644
--- a/Deeploy/Targets/CortexM/Platform.py
+++ b/Deeploy/Targets/CortexM/Platform.py
@@ -35,15 +35,15 @@
     CMSISDWConv1DParser, CMSISDWConv2DParser, CMSISGEMMParser, CMSISLinearAttentionParser, CMSISMaxPool2DParser
 from Deeploy.Targets.CortexM.TopologyOptimizationPasses.Passes import ConvRequantMergePass, GEMMRequantMergePass, \
     LinearAttentionAlignmentPass, MatMulRequantMergePass, MHSAAlignmentPass
-from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicDebugPrintBindings, BasicGatherBindings, \
-    BasicGELUBinding, BasicIntegerDivBinding, BasicLayerNormBinding, BasicMatMulBinding, BasicMulBindings, \
+from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicDebugPrintBindings, BasicDivBindings, \
+    BasicGatherBindings, BasicGELUBinding, BasicLayerNormBindings, BasicMatMulBinding, BasicMulBindings, \
     BasicPad1DBindings, BasicPad2DBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReshapeBindings, \
-    BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBinding, \
+    BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, \
     BasicTransposeBindings, DummyBinding
-from Deeploy.Targets.Generic.Layers import AddLayer, CLCALayer, DebugPrintLayer, GatherLayer, IntegerDivLayer, \
-    LinearAttentionLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, ReduceMeanLayer, ReduceSumLayer, \
-    RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, TransposeLayer, iGELULayer, \
-    iLayerNormLayer, iSoftmaxLayer
+from Deeploy.Targets.Generic.Layers import AddLayer, CLCALayer, DebugPrintLayer, DivLayer, GatherLayer, \
+    LayerNormLayer, LinearAttentionLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, ReduceMeanLayer, \
+    ReduceSumLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, SoftmaxLayer, \
+    TransposeLayer, iGELULayer
 from Deeploy.Targets.Generic.Parsers import AddParser, DebugParser, DummyParser, FlattenParser, GatherParser, \
     IntegerDivParser, MatMulParser, MulParser, Pad1DParser, Pad2DParser, ReduceMeanParser, ReduceSumParser, \
     RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, TransposeParser, \
@@ -63,8 +63,8 @@
 GatherMapper = NodeMapper(GatherParser(), BasicGatherBindings)
 GELU_int8_Mapper = NodeMapper(iGELUParser(), [BasicGELUBinding])
 GEMMMapper = NodeMapper(CMSISGEMMParser(), CMSISGEMMBindings)
-iLayerNorm_int8_Mapper = NodeMapper(iLayerNormParser(), [BasicLayerNormBinding])
-IntegerDivMapper = NodeMapper(IntegerDivParser(), [BasicIntegerDivBinding])
+iLayerNorm_int8_Mapper = NodeMapper(iLayerNormParser(), BasicLayerNormBindings)
+IntegerDivMapper = NodeMapper(IntegerDivParser(), BasicDivBindings)
 LinearAttention_int16_Mapper = NodeMapper(CMSISLinearAttentionParser(), [CMSISLinearAttentionBinding])
 MatMulMapper = NodeMapper(MatMulParser(), [BasicMatMulBinding])
 MaxPool2DMapper = NodeMapper(CMSISMaxPool2DParser(), [CMSISMaxPool2DBinding])
@@ -77,7 +77,7 @@
 ReshapeMapper = NodeMapper(ReshapeParser(), BasicReshapeBindings)
 RQGELU_int8_Mapper = NodeMapper(RQSiGELUParser(), [BasicRQSGELUBinding])
 RQIntegerDivMapper = NodeMapper(RQIntegerDivParser(), [BasicRQIntegerDivBinding])
-Softmax_int8_Mapper = NodeMapper(iSoftmaxParser(), [BasicSoftmaxBinding])
+Softmax_int8_Mapper = NodeMapper(iSoftmaxParser(), BasicSoftmaxBindings)
 TransposeMapper = NodeMapper(TransposeParser(), BasicTransposeBindings)
 UnsqueezeMapper = NodeMapper(UnsqueezeParser(), BasicReshapeBindings)
 
@@ -94,10 +94,10 @@
     'Flatten': ReshapeLayer([FlattenMapper]),
     'Gather': GatherLayer([GatherMapper]),
     'iGELU': iGELULayer([GELU_int8_Mapper]),
-    'iLayerNorm': iLayerNormLayer([iLayerNorm_int8_Mapper]),
-    'IntegerDiv': IntegerDivLayer([IntegerDivMapper]),
+    'iLayerNorm': LayerNormLayer([iLayerNorm_int8_Mapper]),
+    'IntegerDiv': DivLayer([IntegerDivMapper]),
     'IntegerMean': ReduceMeanLayer([ReduceMeanMapper]),
-    'iSoftmax': iSoftmaxLayer([Softmax_int8_Mapper]),
+    'iSoftmax': SoftmaxLayer([Softmax_int8_Mapper]),
     'LinearAttention': LinearAttentionLayer([LinearAttention_int16_Mapper]),
     'MatMul': MatMulLayer([MatMulMapper]),
     'MaxPool': MaxPoolLayer([MaxPool2DMapper]),
diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py
index 252e084d..4003cebe 100644
--- a/Deeploy/Targets/Generic/Bindings.py
+++ b/Deeploy/Targets/Generic/Bindings.py
@@ -30,20 +30,21 @@
 from Deeploy.AbstractDataTypes import PointerClass
 from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
     MemoryManagementGeneration, MemoryPassthroughGeneration
-from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, float32_t, \
-    int8_t, int32_t, uint8_t
+from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, float32_t, int8_t, int32_t, \
+    uint8_t
 from Deeploy.DeeployTypes import CodeTransformation, NodeBinding
 from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
 from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, ConvTemplate, DebugPrintTemplate, \
-    DummyTemplate, DWConvTemplate, FloatAddTemplate, FloatGemmTemplate, GatherTemplate, GemmTemplate, \
-    IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, \
-    PadTemplate, ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, \
+    DummyTemplate, DWConvTemplate, FloatAddTemplate, FloatConvTemplate, FloatDivTemplate, FloatGemmTemplate, \
+    FloatLayernormTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, \
+    ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, \
+    ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, \
     RQSiGELUTemplate, SliceTemplate, TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, \
     iSoftmaxTemplate
 from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DebugPrintChecker, \
-    DummyChecker, FloatAddChecker, GatherChecker, GELUChecker, GEMMChecker, IntegerDivChecker, MatMulChecker, \
-    MaxPoolChecker, MulChecker, PadChecker, ReduceMeanChecker, ReduceSumChecker, RequantShiftChecker, ReshapeChecker, \
-    RQIntegerDivChecker, SliceChecker, SoftmaxChecker, TransposeChecker, iLayerNormChecker
+    DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, LayerNormChecker, MatMulChecker, \
+    MaxPoolChecker, MulChecker, PadChecker, ReduceMeanChecker, ReduceSumChecker, ReluChecker, RequantShiftChecker, \
+    ReshapeChecker, RQIntegerDivChecker, SliceChecker, SoftmaxChecker, TransposeChecker
 
 BasicTransformer = CodeTransformation([ArgumentStructGeneration(), MemoryManagementGeneration(), FutureGeneration()])
 
@@ -68,8 +69,8 @@
     for type1 in IntegerDataTypes
     for type2 in IntegerDataTypes
 ] + [
-    NodeBinding(FloatAddChecker([PointerClass(type), PointerClass(type)], [PointerClass(type)]),
-                FloatAddTemplate.referenceTemplate, BasicTransformer) for type in FloatDataTypes
+    NodeBinding(AddChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]),
+                FloatAddTemplate.referenceTemplate, BasicTransformer)
 ]
 
 BasicConv1DBinding = NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),
@@ -78,8 +79,15 @@
 BasicDWConv1DBinding = NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),
                                    DWConvTemplate.reference1DTemplate, BasicTransformer)
 
-BasicConv2DBinding = NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),
-                                 ConvTemplate.reference2DTemplate, BasicTransformer)
+BasicConv2DBindings = [
+    NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),
+                ConvTemplate.reference2DTemplate, BasicTransformer)
+] + [
+    NodeBinding(
+        ConvChecker([PointerClass(float32_t), PointerClass(float32_t),
+                     PointerClass(float32_t)], [PointerClass(float32_t)]), FloatConvTemplate.reference2DTemplate,
+        BasicTransformer)
+]
 
 BasicDWConv2DBinding = NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),
                                    DWConvTemplate.reference2DTemplate, BasicTransformer)
@@ -108,9 +116,13 @@
         BasicTransformer)
 ]
 
-BasicIntegerDivBinding = NodeBinding(
-    IntegerDivChecker([PointerClass(int32_t), PointerClass(int32_t)], [PointerClass(int32_t)]),
-    IntegerDivTemplate.referenceTemplate, BasicTransformer)
+BasicDivBindings = [
+    NodeBinding(DivChecker([PointerClass(int32_t), PointerClass(int32_t)], [PointerClass(int32_t)]),
+                IntegerDivTemplate.referenceTemplate, BasicTransformer)
+] + [
+    NodeBinding(DivChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]),
+                FloatDivTemplate.referenceTemplate, BasicTransformer)
+]
 
 BasicITASoftmaxBinding = NodeBinding(SoftmaxChecker([PointerClass(int8_t)], [PointerClass(int8_t)]),
                                      ITAMaxTemplate.referenceTemplate, BasicTransformer)
@@ -118,10 +130,18 @@
 BasicITAPartialSoftmaxBinding = NodeBinding(SoftmaxChecker([PointerClass(int8_t)], [PointerClass(int8_t)]),
                                             ITAPartialMaxTemplate.referenceTemplate, BasicTransformer)
 
-BasicLayerNormBinding = NodeBinding(
-    iLayerNormChecker([PointerClass(int8_t), PointerClass(int32_t),
-                       PointerClass(int32_t)], [PointerClass(int8_t)]), iLayernormTemplate.referenceTemplate,
-    BasicTransformer)
+BasicLayerNormBindings = [
+    NodeBinding(
+        LayerNormChecker([PointerClass(int8_t), PointerClass(int32_t),
+                          PointerClass(int32_t)], [PointerClass(int8_t)]), iLayernormTemplate.referenceTemplate,
+        BasicTransformer)
+] + [
+    NodeBinding(
+        LayerNormChecker(
+            [PointerClass(float32_t), PointerClass(float32_t),
+             PointerClass(float32_t)], [PointerClass(float32_t)]), FloatLayernormTemplate.referenceTemplate,
+        BasicTransformer)
+]
 
 BasicMatMulBinding = NodeBinding(MatMulChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),
                                  MatMulTemplate.referenceTemplate, BasicTransformer)
@@ -142,6 +162,11 @@
 BasicPad2DBindings = [
     NodeBinding(PadChecker([PointerClass(type)], [PointerClass(type)]), PadTemplate.reference2DTemplate,
                 BasicTransformer) for type in SignedIntegerDataTypes
+] + [
+    NodeBinding(
+        PadChecker([PointerClass(float32_t), PointerClass(float32_t),
+                    PointerClass(float32_t)], [PointerClass(float32_t)]), PadTemplate.reference2DTemplate,
+        BasicTransformer)
 ]
 
 BasicReduceMeanBindings = [
@@ -154,9 +179,15 @@
                 BasicTransformer) for type in SignedIntegerDataTypes
 ]
 
+BasicReluBinding = NodeBinding(ReluChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
+                               FloatReluTemplate.referenceTemplate, BasicTransformer)
+
 BasicReshapeBindings = [
     NodeBinding(ReshapeChecker([PointerClass(type), PointerClass(int32_t)], [PointerClass(type)]),
                 ReshapeTemplate.referenceTemplate, ReshapeSkipTransformer) for type in IntegerDataTypes
+] + [
+    NodeBinding(ReshapeChecker([PointerClass(float32_t), PointerClass(float32_t)], [PointerClass(float32_t)]),
+                ReshapeTemplate.referenceTemplate, ReshapeSkipTransformer)
 ]
 
 BasicRQSBindings = [
@@ -181,8 +212,13 @@
         PointerClass(int32_t)
     ], [PointerClass(int8_t)]), RQIntegerDivTemplate.referenceTemplate, BasicTransformer)
 
-BasicSoftmaxBinding = NodeBinding(SoftmaxChecker([PointerClass(int8_t)], [PointerClass(int8_t)]),
-                                  iSoftmaxTemplate.referenceTemplate, BasicTransformer)
+BasicSoftmaxBindings = [
+    NodeBinding(SoftmaxChecker([PointerClass(int8_t)], [PointerClass(int8_t)]), iSoftmaxTemplate.referenceTemplate,
+                BasicTransformer)
+] + [
+    NodeBinding(SoftmaxChecker([PointerClass(float32_t)], [PointerClass(float32_t)]),
+                FloatSoftmaxTemplate.referenceTemplate, BasicTransformer)
+]
 
 BasicTransposeBindings = [
     NodeBinding(TransposeChecker([PointerClass(type)], [PointerClass(type)]), TransposeTemplate.referenceTemplate,
@@ -190,7 +226,7 @@
 ]
 
 BasiciRMSNormBinding = NodeBinding(
-    iLayerNormChecker([PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int8_t)]),
+    LayerNormChecker([PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int8_t)]),
     iRMSNormTemplate.referenceTemplate, BasicTransformer)
 
 DummyBinding = NodeBinding(DummyChecker([PointerClass(int8_t)], [PointerClass(int8_t)]),
diff --git a/Deeploy/Targets/Generic/Layers.py b/Deeploy/Targets/Generic/Layers.py
index a714e6d4..2a43dcc2 100644
--- a/Deeploy/Targets/Generic/Layers.py
+++ b/Deeploy/Targets/Generic/Layers.py
@@ -114,7 +114,7 @@ def __init__(self, maps: List[NodeMapper]):
         super().__init__(maps)
 
 
-class iSoftmaxLayer(ONNXLayer):
+class SoftmaxLayer(ONNXLayer):
 
     def __init__(self, maps: List[NodeMapper]):
         super().__init__(maps)
@@ -193,13 +193,13 @@ def computeOps(self):
         return matmul + rqs
 
 
-class IntegerDivLayer(ONNXLayer):
+class DivLayer(ONNXLayer):
 
     def __init__(self, maps: List[NodeMapper]):
         super().__init__(maps)
 
 
-class RQIntegerDivLayer(IntegerDivLayer):
+class RQIntegerDivLayer(DivLayer):
 
     def __init__(self, maps: List[NodeMapper]):
         super().__init__(maps)
@@ -365,7 +365,13 @@ def computeShapes(self, inputShapes: Shape, outputShapes: Shape, operatorReprese
         return (inputShapes, outputShapes)
 
 
-class iLayerNormLayer(ONNXLayer):
+class ReluLayer(ONNXLayer):
+
+    def __init__(self, maps: List[NodeMapper]):
+        super().__init__(maps)
+
+
+class LayerNormLayer(ONNXLayer):
 
     def __init__(self, maps: List[NodeMapper]):
         super().__init__(maps)
diff --git a/Deeploy/Targets/Generic/Parsers.py b/Deeploy/Targets/Generic/Parsers.py
index 852e3233..cec6f990 100644
--- a/Deeploy/Targets/Generic/Parsers.py
+++ b/Deeploy/Targets/Generic/Parsers.py
@@ -550,9 +550,6 @@ def parseNode(self, node: gs.Node) -> bool:
 
         ret = all([len(node.inputs) == 1, len(node.outputs) == 1])
 
-        if ret:
-            self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values)
-
         return ret
 
     def parseNodeCtxt(self,
@@ -591,6 +588,7 @@ def parseNode(self, node: gs.Node) -> bool:
             self.operatorRepresentation['coeffB'] = int(node.attrs['coeffB'].values)
             self.operatorRepresentation['coeffC'] = int(node.attrs['coeffC'].values)
             self.operatorRepresentation['log2'] = int(node.attrs['log2'].values)
+            self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values)
 
         return wellFormed
 
@@ -610,9 +608,10 @@ def __init__(self):
         super().__init__()
 
     def parseNode(self, node: gs.Node) -> bool:
+
         wellFormed = super().parseNode(node)
 
-        ret = all(['n_levels' in node.attrs])
+        ret = all(['n_levels' in node.attrs, len(node.inputs) == 1, len(node.outputs) == 1])
 
         if ret and wellFormed:
             self.operatorRepresentation['n_levels'] = int(node.attrs['n_levels'].values)
@@ -923,6 +922,32 @@ def parseNodeCtxt(self,
         return ctxt, True
 
 
+class ReluParser(NodeParser):
+
+    def __init__(self):
+        super().__init__()
+
+    def parseNode(self, node: gs.Node) -> (bool):
+
+        ret = all([len(node.inputs) == 1, len(node.outputs) == 1])
+
+        return ret
+
+    def parseNodeCtxt(self,
+                      ctxt: NetworkContext,
+                      node: gs.Node,
+                      channels_first: bool = True) -> Tuple[NetworkContext, bool]:
+
+        data_in = ctxt.lookup(node.inputs[0].name)
+        data_out = ctxt.lookup(node.outputs[0].name)
+        self.operatorRepresentation['data_in'] = data_in.name
+        self.operatorRepresentation['data_out'] = data_out.name
+        self.operatorRepresentation['size'] = np.prod(data_in.shape)
+        self.operatorRepresentation['lastDimLength'] = data_in.shape[-1]
+
+        return ctxt, True
+
+
 class ReshapeParser(NodeParser):
 
     def __init__(self):
@@ -1494,6 +1519,18 @@ def parseNodeCtxt(self,
         return ctxt, True
 
 
+class LayerNormParser(iLayerNormParser):
+
+    def parseNode(self, node: gs.Node) -> (bool):
+
+        ret = all(['epsilon' in node.attrs, len(node.inputs) == 3, len(node.outputs) == 1])
+
+        if ret:
+            self.operatorRepresentation['epsilon'] = node.attrs['epsilon']
+
+        return ret
+
+
 class MatMulParser(NodeParser):
 
     def __init__(self, noBiasHoisting = True):
@@ -1798,6 +1835,36 @@ def parseNodeCtxt(self,
         return ctxt, True
 
 
+class DivParser(NodeParser):
+
+    def __init__(self):
+        super().__init__()
+
+    def parseNode(self, node: gs.Node) -> bool:
+
+        ret = all([len(node.inputs) == 2, len(node.outputs) == 1])
+
+        return ret
+
+    def parseNodeCtxt(self,
+                      ctxt: NetworkContext,
+                      node: gs.Node,
+                      channels_first: bool = True) -> Tuple[NetworkContext, bool]:
+
+        inputs = ["input1", "input2"]
+        outputs = ["output"]
+        for idx, inputNode in enumerate(node.inputs):
+            if idx < len(inputs):
+                self.operatorRepresentation[inputs[idx]] = ctxt.lookup(inputNode.name).name
+        for idx, outputNode in enumerate(node.outputs):
+            self.operatorRepresentation[outputs[idx]] = ctxt.lookup(outputNode.name).name
+
+        self.operatorRepresentation['size'] = np.prod(ctxt.lookup(self.operatorRepresentation['input1']).shape)
+        self.operatorRepresentation['lastDimLength'] = ctxt.lookup(self.operatorRepresentation['input1']).shape[-1]
+
+        return ctxt, True
+
+
 class RQIntegerDivParser(IntegerDivParser, RQSParserInterface):
 
     def __init__(self):
diff --git a/Deeploy/Targets/Generic/Platform.py b/Deeploy/Targets/Generic/Platform.py
index 597c6e1c..a018ff96 100644
--- a/Deeploy/Targets/Generic/Platform.py
+++ b/Deeploy/Targets/Generic/Platform.py
@@ -27,29 +27,30 @@
 
 from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \
     StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer
-from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBinding, \
-    BasicDebugPrintBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, BasicGatherBindings, BasicGELUBinding, \
-    BasicGEMMBindings, BasicIntegerDivBinding, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \
-    BasicLayerNormBinding, BasicMatMulBinding, BasicMaxPool2DBinding, BasicMulBindings, BasicPad1DBindings, \
-    BasicPad2DBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReshapeBindings, \
-    BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBinding, \
+from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBindings, \
+    BasicDebugPrintBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, BasicGatherBindings, \
+    BasicGELUBinding, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \
+    BasicLayerNormBindings, BasicMatMulBinding, BasicMaxPool2DBinding, BasicMulBindings, BasicPad1DBindings, \
+    BasicPad2DBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, BasicReshapeBindings, \
+    BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, \
     BasicTransposeBindings, DummyBinding
-from Deeploy.Targets.Generic.Layers import AddLayer, ConvLayer, DebugPrintLayer, GatherLayer, GEMMLayer, \
-    IntegerDivLayer, ITAMaxLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, ReduceMeanLayer, ReduceSumLayer, \
-    RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, TransposeLayer, iGELULayer, \
-    iLayerNormLayer, iSoftmaxLayer
-from Deeploy.Targets.Generic.Parsers import AddParser, DebugParser, DummyParser, FlattenParser, GatherParser, \
-    GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, \
-    GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, MatMulParser, MulParser, Pad1DParser, \
-    Pad2DParser, ReduceMeanParser, ReduceSumParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, \
-    RQSiGELUParser, SliceParser, TransposeParser, UnsqueezeParser, iGELUParser, iLayerNormParser, iSoftmaxParser
+from Deeploy.Targets.Generic.Layers import AddLayer, ConvLayer, DebugPrintLayer, DivLayer, GatherLayer, GEMMLayer, \
+    ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, ReduceMeanLayer, ReduceSumLayer, \
+    ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, SliceLayer, SoftmaxLayer, \
+    TransposeLayer, iGELULayer
+from Deeploy.Targets.Generic.Parsers import AddParser, DebugParser, DivParser, DummyParser, FlattenParser, \
+    GatherParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \
+    GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \
+    MatMulParser, MulParser, Pad1DParser, Pad2DParser, ReduceMeanParser, ReduceSumParser, ReluParser, \
+    RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, SoftmaxParser, \
+    TransposeParser, UnsqueezeParser, iGELUParser, iLayerNormParser, iSoftmaxParser
 from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate
 from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import ExtractPaddingFromConvPass, \
     ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, iGELURequantMergePass
 
 AddMapper = NodeMapper(AddParser(), BasicAddBindings)
 Conv1DMapper = NodeMapper(GenericConv1DParser(), [BasicConv1DBinding])
-Conv2DMapper = NodeMapper(GenericConv2DParser(), [BasicConv2DBinding])
+Conv2DMapper = NodeMapper(GenericConv2DParser(), BasicConv2DBindings)
 DebugMapper = NodeMapper(DebugParser(), BasicDebugPrintBindings)
 DWConv1DMapper = NodeMapper(GenericDWConv1DParser(), [BasicDWConv1DBinding])
 DWConv2DMapper = NodeMapper(GenericDWConv2DParser(), [BasicDWConv2DBinding])
@@ -57,8 +58,10 @@
 GatherMapper = NodeMapper(GatherParser(), BasicGatherBindings)
 GELUMapper = NodeMapper(iGELUParser(), [BasicGELUBinding])
 GEMMMapper = NodeMapper(GenericGEMMParser(), BasicGEMMBindings)
-iLayerNormMapper = NodeMapper(iLayerNormParser(), [BasicLayerNormBinding])
-IntegerDivMapper = NodeMapper(IntegerDivParser(), [BasicIntegerDivBinding])
+LayerNormMapper = NodeMapper(LayerNormParser(), BasicLayerNormBindings)
+iLayerNormMapper = NodeMapper(iLayerNormParser(), BasicLayerNormBindings)
+DivMapper = NodeMapper(DivParser(), BasicDivBindings)
+IntegerDivMapper = NodeMapper(IntegerDivParser(), BasicDivBindings)
 ITAMaxMapper = NodeMapper(ITAMaxParser(), [BasicITASoftmaxBinding])
 ITAPartialMaxMapper = NodeMapper(ITAPartialMaxParser(), [BasicITAPartialSoftmaxBinding])
 MatMulMapper = NodeMapper(MatMulParser(), [BasicMatMulBinding])
@@ -68,11 +71,13 @@
 Pad2DMapper = NodeMapper(Pad2DParser(), BasicPad2DBindings)
 ReduceMeanMapper = NodeMapper(ReduceMeanParser(), BasicReduceMeanBindings)
 ReduceSumMapper = NodeMapper(ReduceSumParser(), BasicReduceSumBindings)
+ReluMapper = NodeMapper(ReluParser(), [BasicReluBinding])
 RequantShiftMapper = NodeMapper(RequantShiftParser(), BasicRQSBindings)
 ReshapeMapper = NodeMapper(ReshapeParser(), BasicReshapeBindings)
 RQGELUMapper = NodeMapper(RQSiGELUParser(), [BasicRQSGELUBinding])
 RQIntegerDivMapper = NodeMapper(RQIntegerDivParser(), [BasicRQIntegerDivBinding])
-SoftmaxMapper = NodeMapper(iSoftmaxParser(), [BasicSoftmaxBinding])
+SoftmaxMapper = NodeMapper(SoftmaxParser(), BasicSoftmaxBindings)
+iSoftmaxMapper = NodeMapper(iSoftmaxParser(), BasicSoftmaxBindings)
 TransposeMapper = NodeMapper(TransposeParser(), BasicTransposeBindings)
 UnsqueezeMapper = NodeMapper(UnsqueezeParser(), BasicReshapeBindings)
 
@@ -86,15 +91,17 @@
     'Add': AddLayer([AddMapper]),
     'Conv': ConvLayer([Conv2DMapper, DWConv2DMapper, Conv1DMapper, DWConv1DMapper]),
     'DebugPrint': DebugPrintLayer([DebugMapper]),
-    'Div': IntegerDivLayer([IntegerDivMapper]),
+    'Div': DivLayer([DivMapper]),
     'Flatten': ReshapeLayer([FlattenMapper]),
     'Gather': GatherLayer([GatherMapper]),
     'Gemm': GEMMLayer([GEMMMapper]),
     'iGELU': iGELULayer([GELUMapper]),
-    'iLayerNorm': iLayerNormLayer([iLayerNormMapper]),
-    'IntegerDiv': IntegerDivLayer([IntegerDivMapper]),
+    'LayerNormalization': LayerNormLayer([LayerNormMapper]),
+    'iLayerNorm': LayerNormLayer([iLayerNormMapper]),
+    'IntegerDiv': DivLayer([IntegerDivMapper]),
     'IntegerMean': ReduceMeanLayer([ReduceMeanMapper]),
-    'iSoftmax': iSoftmaxLayer([SoftmaxMapper]),
+    'Softmax': SoftmaxLayer([SoftmaxMapper]),
+    'iSoftmax': SoftmaxLayer([iSoftmaxMapper]),
     'ITAMax': ITAMaxLayer([ITAMaxMapper]),
     'ITAPartialMax': ITAMaxLayer([ITAPartialMaxMapper]),
     'MatMul': GEMMLayer([MatMulMapper]),
@@ -104,6 +111,7 @@
     'Pad': PadLayer([Pad1DMapper, Pad2DMapper]),
     'ReduceMean': ReduceMeanLayer([ReduceMeanMapper]),
     'ReduceSum': ReduceSumLayer([ReduceSumMapper]),
+    'Relu': ReluLayer([ReluMapper]),
     'RequantizediGELU': RQSiGELULayer([RQGELUMapper]),
     'RequantShift': RequantShiftLayer([RequantShiftMapper]),
     'Reshape': ReshapeLayer([ReshapeMapper]),
diff --git a/Deeploy/Targets/Generic/Templates/FloatConvTemplate.py b/Deeploy/Targets/Generic/Templates/FloatConvTemplate.py
new file mode 100644
index 00000000..22719fdb
--- /dev/null
+++ b/Deeploy/Targets/Generic/Templates/FloatConvTemplate.py
@@ -0,0 +1,67 @@
+# ----------------------------------------------------------------------
+#
+# File: FLoatConvTemplate.py
+#
+# Last edited: 23.01.2025
+#
+# Copyright (C) 2023, ETH Zurich and University of Bologna.
+#
+# Author: Run Wang, ETH Zurich
+#
+# ----------------------------------------------------------------------
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, List, Tuple
+
+from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation
+
+
+class _FloatConvTemplate(NodeTemplate):
+
+    def __init__(self, templateStr):
+        super().__init__(templateStr)
+
+    def alignToContext(self, ctxt: NetworkContext,
+                       operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]:
+
+        data_in = ctxt.lookup(operatorRepresentation['data_in'])
+        data_out = ctxt.lookup(operatorRepresentation['data_out'])
+
+        return ctxt, operatorRepresentation, []
+
+
+reference2DTemplate = _FloatConvTemplate("""
+<%
+batchOffsetIn = ch_im_in * dim_im_in_x * dim_im_in_y
+batchOffsetOut = ch_im_out * dim_im_out_x * dim_im_out_y
+%>
+
+// 2D FP Conv (Name: ${nodeName}, Op: ${nodeOp})
+BEGIN_SINGLE_CORE
+    ${data_in_type.typeName} ref_${data_out}_${data_in} = ${data_in};
+    ${data_out_type.typeName} ref_${data_out}_${data_out} = ${data_out};
+
+    for (uint32_t n=0; n<${batch}; ++n) {
+        Conv2d_fp${data_in_type.referencedType.typeWidth}_fp${weight_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}_NCHW(
+            ref_${data_out}_${data_in}, ${ch_im_in}, ${dim_im_in_x}, ${dim_im_in_y},
+            ${weight}, ${ch_im_out}, ${dim_kernel_x}, ${dim_kernel_y},
+            ${stride_x}, ${stride_y},
+            ref_${data_out}_${data_out}
+        );
+        ref_${data_out}_${data_in} += ${batchOffsetIn};
+        ref_${data_out}_${data_out} += ${batchOffsetOut};
+    }
+END_SINGLE_CORE
+""")
diff --git a/Deeploy/Targets/Generic/Templates/FloatDivTemplate.py b/Deeploy/Targets/Generic/Templates/FloatDivTemplate.py
new file mode 100644
index 00000000..be713b3f
--- /dev/null
+++ b/Deeploy/Targets/Generic/Templates/FloatDivTemplate.py
@@ -0,0 +1,31 @@
+# ----------------------------------------------------------------------
+#
+# File: FloatDivTemplate.py
+#
+# Last edited: 23.01.2025
+#
+# Copyright (C) 2023, ETH Zurich and University of Bologna.
+#
+# Author: Run Wang, ETH Zurich
+#
+# ----------------------------------------------------------------------
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from Deeploy.DeeployTypes import NodeTemplate
+
+referenceTemplate = NodeTemplate("""
+// Division (Name: ${nodeName}, Op: ${nodeOp})
+SINGLE_CORE Div_fp${input1_type.referencedType.typeWidth}_fp${input2_type.referencedType.typeWidth}_fp${output_type.referencedType.typeWidth}(${input1}, ${input2}, ${output}, ${size});
+""")
diff --git a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py
index ee4f6168..8a07928c 100644
--- a/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py
+++ b/Deeploy/Targets/Generic/Templates/FloatGemmTemplate.py
@@ -1,12 +1,12 @@
 # ----------------------------------------------------------------------
 #
-# File: GemmTemplate.py.py
+# File: FloatGemmTemplate.py.py
 #
-# Last edited: 05.01.2023
+# Last edited: 23.01.2025
 #
 # Copyright (C) 2023, ETH Zurich and University of Bologna.
 #
-# Author: Philip Wiese, ETH Zurich
+# Author: Run Wang, ETH Zurich
 #
 # ----------------------------------------------------------------------
 # SPDX-License-Identifier: Apache-2.0
diff --git a/Deeploy/Targets/Generic/Templates/FloatLayernormTemplate.py b/Deeploy/Targets/Generic/Templates/FloatLayernormTemplate.py
new file mode 100644
index 00000000..8a4b7b91
--- /dev/null
+++ b/Deeploy/Targets/Generic/Templates/FloatLayernormTemplate.py
@@ -0,0 +1,48 @@
+# ----------------------------------------------------------------------
+#
+# File: FloatConvTemplate.py
+#
+# Last edited: 23.01.2025
+#
+# Copyright (C) 2023, ETH Zurich and University of Bologna.
+#
+# Author: Run Wang, ETH Zurich
+#
+# ----------------------------------------------------------------------
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, Tuple
+
+from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation
+
+
+class _LayerNormTemplate(NodeTemplate):
+
+    def __init__(self, templateStr):
+        super().__init__(templateStr)
+
+    def alignToContext(self, ctxt: NetworkContext,
+                       operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict]:
+
+        data_in = ctxt.lookup(operatorRepresentation['data_in'])
+        data_out = ctxt.lookup(operatorRepresentation['data_out'])
+
+        return ctxt, operatorRepresentation, []
+
+
+referenceTemplate = _LayerNormTemplate("""
+// FloatLayernorm (Name: ${nodeName}, Op: ${nodeOp})
+SINGLE_CORE Layernorm_fp${data_in_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}(${data_in}, ${data_out}, ${weight}, ${bias}, ${epsilon}, ${size}, ${lastDimLength});
+""")
diff --git a/Deeploy/Targets/Generic/Templates/FloatReluTemplate.py b/Deeploy/Targets/Generic/Templates/FloatReluTemplate.py
new file mode 100644
index 00000000..86b4de32
--- /dev/null
+++ b/Deeploy/Targets/Generic/Templates/FloatReluTemplate.py
@@ -0,0 +1,38 @@
+# ----------------------------------------------------------------------
+#
+# File: FloatReluTemplate.py
+#
+# Last edited: 23.01.2025
+#
+# Copyright (C) 2023, ETH Zurich and University of Bologna.
+#
+# Author: Run Wang, ETH Zurich
+#
+# ----------------------------------------------------------------------
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from Deeploy.DeeployTypes import NodeTemplate
+
+
+class _reluTemplate(NodeTemplate):
+
+    def __init__(self, templateStr):
+        super().__init__(templateStr)
+
+
+referenceTemplate = _reluTemplate("""
+// Relu (Name: ${nodeName}, Op: ${nodeOp})
+SINGLE_CORE Relu_fp${data_in_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}(${data_in}, ${data_out}, ${size}, ${lastDimLength});
+""")
diff --git a/Deeploy/Targets/Generic/Templates/FloatSoftmaxTemplate.py b/Deeploy/Targets/Generic/Templates/FloatSoftmaxTemplate.py
new file mode 100644
index 00000000..191f0d40
--- /dev/null
+++ b/Deeploy/Targets/Generic/Templates/FloatSoftmaxTemplate.py
@@ -0,0 +1,48 @@
+# ----------------------------------------------------------------------
+#
+# File: FloatSoftmaxTemplate.py
+#
+# Last edited: 23.1.2025
+#
+# Copyright (C) 2021, ETH Zurich and University of Bologna.
+#
+# Author: Run Wang, ETH Zurich
+#
+# ----------------------------------------------------------------------
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, List, Tuple
+
+from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation
+
+
+class _SoftmaxTemplate(NodeTemplate):
+
+    def __init__(self, templateStr):
+        super().__init__(templateStr)
+
+    def alignToContext(self, ctxt: NetworkContext,
+                       operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]:
+
+        data_in = ctxt.lookup(operatorRepresentation['data_in'])
+        data_out = ctxt.lookup(operatorRepresentation['data_out'])
+
+        return ctxt, operatorRepresentation, []
+
+
+referenceTemplate = _SoftmaxTemplate("""
+// Softmax (Name: ${nodeName}, Op: ${nodeOp})
+SINGLE_CORE Softmax_fp${data_in_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}(${data_in}, ${data_out}, ${size}, ${lastDimLength});
+""")
diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py
index 475cd200..1bc3b9fa 100644
--- a/Deeploy/Targets/Generic/TypeCheckers.py
+++ b/Deeploy/Targets/Generic/TypeCheckers.py
@@ -125,20 +125,6 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
             return [False]
 
 
-class FloatAddChecker(SignPropTypeChecker):
-
-    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
-        super().__init__(input_types, output_types)
-
-    def _inferNumLevels(self, inputs: List[VariableBuffer],
-                        operatorRepresentation: OperatorRepresentation) -> List[int]:
-        return [inputs[0].nLevels + inputs[1].nLevels]
-
-    def _inferSignedness(self, inputs: List[VariableBuffer],
-                         operatorRepresentation: OperatorRepresentation) -> List[bool]:
-        return [True]
-
-
 class GatherChecker(SignPropTypeChecker):
 
     def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
@@ -232,7 +218,7 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
         return [True]
 
 
-class iLayerNormChecker(SignPropTypeChecker):
+class LayerNormChecker(SignPropTypeChecker):
 
     def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
         super().__init__(input_types, output_types)
@@ -263,7 +249,7 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
             return [False]
 
 
-class IntegerDivChecker(SignPropTypeChecker):
+class DivChecker(SignPropTypeChecker):
 
     def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
         super().__init__(input_types, output_types)
@@ -378,6 +364,19 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
             return [False]
 
 
+class ReluChecker(SignPropTypeChecker):
+
+    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
+        super().__init__(input_types, output_types)
+
+    def _inferNumLevels(self, inputs, operatorRepresentation):
+        return [2**(self.input_types[0].referencedType.typeWidth)]
+
+    def _inferSignedness(self, inputs: List[VariableBuffer],
+                         operatorRepresentation: OperatorRepresentation) -> List[bool]:
+        return [True]
+
+
 class SoftmaxChecker(SignPropTypeChecker):
 
     def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
diff --git a/Deeploy/Targets/MemPool/Platform.py b/Deeploy/Targets/MemPool/Platform.py
index e9125d91..d89e6e5a 100644
--- a/Deeploy/Targets/MemPool/Platform.py
+++ b/Deeploy/Targets/MemPool/Platform.py
@@ -30,15 +30,15 @@
 
 from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \
     StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer
-from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBinding, \
-    BasicDebugPrintBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, BasicGatherBindings, BasicGELUBinding, \
-    BasicIntegerDivBinding, BasicLayerNormBinding, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \
+from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBindings, \
+    BasicDebugPrintBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, BasicGatherBindings, \
+    BasicGELUBinding, BasicLayerNormBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \
     BasicReduceMeanBindings, BasicReduceSumBindings, BasicReshapeBindings, BasicRQIntegerDivBinding, \
-    BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBinding, BasicTransposeBindings, DummyBinding
-from Deeploy.Targets.Generic.Layers import AddLayer, ConvLayer, DebugPrintLayer, GatherLayer, GEMMLayer, \
-    IntegerDivLayer, ITAMaxLayer, MatMulLayer, MaxPoolLayer, MHSALayer, MulLayer, PadLayer, ReduceMeanLayer, \
+    BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicTransposeBindings, DummyBinding
+from Deeploy.Targets.Generic.Layers import AddLayer, ConvLayer, DebugPrintLayer, DivLayer, GatherLayer, GEMMLayer, \
+    ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MHSALayer, MulLayer, PadLayer, ReduceMeanLayer, \
     ReduceSumLayer, RequantShiftLayer, ReshapeLayer, RQGEMMLayer, RQIntegerDivLayer, RQMatMulLayer, RQSiGELULayer, \
-    SliceLayer, TransposeLayer, iGELULayer, iLayerNormLayer, iSoftmaxLayer
+    SliceLayer, SoftmaxLayer, TransposeLayer, iGELULayer
 from Deeploy.Targets.Generic.Parsers import AddParser, DebugParser, DummyParser, FlattenParser, GatherParser, \
     GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, GenericGEMMParser, \
     GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, MatMulParser, MulParser, Pad1DParser, Pad2DParser, \
@@ -61,7 +61,7 @@
 # (they support a wider range of attribute values)
 GenericConv1D_Mapper = NodeMapper(GenericConv1DParser(), [BasicConv1DBinding])
 GenericDWConv1D_Mapper = NodeMapper(GenericDWConv1DParser(), [BasicDWConv1DBinding])
-GenericConv2D_Mapper = NodeMapper(GenericConv2DParser(), [BasicConv2DBinding])
+GenericConv2D_Mapper = NodeMapper(GenericConv2DParser(), BasicConv2DBindings)
 GenericDWConv2D_Mapper = NodeMapper(GenericDWConv2DParser(), [BasicDWConv2DBinding])
 
 GenericConv_Mappers = [GenericConv2D_Mapper, GenericDWConv2D_Mapper, GenericConv1D_Mapper, GenericDWConv1D_Mapper]
@@ -72,8 +72,8 @@
 Flatten_Mapper = NodeMapper(FlattenParser(), BasicReshapeBindings)
 Gather_Mapper = NodeMapper(GatherParser(), BasicGatherBindings)
 GELU_Mapper = NodeMapper(iGELUParser(), [BasicGELUBinding])
-iLayerNorm_Mapper = NodeMapper(iLayerNormParser(), [BasicLayerNormBinding])
-IntegerDiv_Mapper = NodeMapper(IntegerDivParser(), [BasicIntegerDivBinding])
+iLayerNorm_Mapper = NodeMapper(iLayerNormParser(), BasicLayerNormBindings)
+IntegerDiv_Mapper = NodeMapper(IntegerDivParser(), BasicDivBindings)
 ITAMaxMapper = NodeMapper(ITAMaxParser(), [MemPoolITASoftmaxBinding_8_8])
 Mul_Mapper = NodeMapper(MulParser(), BasicMulBindings)
 Pad1D_Mapper = NodeMapper(Pad1DParser(), BasicPad1DBindings)
@@ -84,7 +84,7 @@
 Reshape_Mapper = NodeMapper(ReshapeParser(), BasicReshapeBindings)
 RQGELU_Mapper = NodeMapper(RQSiGELUParser(), [BasicRQSGELUBinding])
 RQIntegerDiv_Mapper = NodeMapper(RQIntegerDivParser(), [BasicRQIntegerDivBinding])
-Softmax_Mapper = NodeMapper(iSoftmaxParser(), [BasicSoftmaxBinding])
+Softmax_Mapper = NodeMapper(iSoftmaxParser(), BasicSoftmaxBindings)
 Transpose_Mapper = NodeMapper(TransposeParser(), BasicTransposeBindings)
 Unsqueeze_Mapper = NodeMapper(UnsqueezeParser(), BasicReshapeBindings)
 
@@ -116,15 +116,15 @@
     'Add': AddLayer([Add_Mapper]),
     'Conv': ConvLayer(Conv_Mappers + GenericConv_Mappers),  # Mapper with higher priority should be placed first!
     'DebugPrint': DebugPrintLayer([DebugPrint_Mapper]),
-    'Div': IntegerDivLayer([IntegerDiv_Mapper]),
+    'Div': DivLayer([IntegerDiv_Mapper]),
     'Flatten': ReshapeLayer([Flatten_Mapper]),
     'Gather': GatherLayer([Gather_Mapper]),
     'Gemm': GEMMLayer([GEMM_Mapper]),
     'iGELU': iGELULayer([GELU_Mapper]),
-    'iLayerNorm': iLayerNormLayer([iLayerNorm_Mapper]),
-    'IntegerDiv': IntegerDivLayer([IntegerDiv_Mapper]),
+    'iLayerNorm': LayerNormLayer([iLayerNorm_Mapper]),
+    'IntegerDiv': DivLayer([IntegerDiv_Mapper]),
     'IntegerMean': ReduceMeanLayer([ReduceMean_Mapper]),
-    'iSoftmax': iSoftmaxLayer([Softmax_Mapper]),
+    'iSoftmax': SoftmaxLayer([Softmax_Mapper]),
     'ITAMax': ITAMaxLayer([ITAMaxMapper]),
     'MatMul': MatMulLayer([MatMul_Mapper]),
     'MatMulInteger': MatMulLayer([MatMul_Mapper]),
diff --git a/Deeploy/Targets/PULPOpen/Bindings.py b/Deeploy/Targets/PULPOpen/Bindings.py
index ac61768f..366a863b 100644
--- a/Deeploy/Targets/PULPOpen/Bindings.py
+++ b/Deeploy/Targets/PULPOpen/Bindings.py
@@ -39,8 +39,8 @@
 from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
 from Deeploy.Targets.Generic.Templates import ConcatTemplate, FloatGemmTemplate, RQSiGELUTemplate, iHardswishTemplate
 from Deeploy.Targets.Generic.TypeCheckers import ConcatChecker, GELUChecker, GEMMChecker, HardswishChecker, \
-    MatMulChecker, MulChecker, ReduceMeanChecker, RQAddChecker, RQHardswishChecker, SliceChecker, SoftmaxChecker, \
-    TransposeChecker, iLayerNormChecker
+    LayerNormChecker, MatMulChecker, MulChecker, ReduceMeanChecker, RQAddChecker, RQHardswishChecker, SliceChecker, \
+    SoftmaxChecker, TransposeChecker
 from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass
 from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling
 from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling
@@ -284,7 +284,7 @@
 ]
 
 PULPiRMSNormBindings = [
-    NodeBinding(iLayerNormChecker([PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int8_t)]),
+    NodeBinding(LayerNormChecker([PointerClass(int8_t), PointerClass(int32_t)], [PointerClass(int8_t)]),
                 iRMSNormTemplate.referenceTemplate, ForkTransformer)
 ]
 
diff --git a/Deeploy/Targets/PULPOpen/Platform.py b/Deeploy/Targets/PULPOpen/Platform.py
index 4c7cda84..8d9ef6ed 100644
--- a/Deeploy/Targets/PULPOpen/Platform.py
+++ b/Deeploy/Targets/PULPOpen/Platform.py
@@ -37,7 +37,7 @@
     BasicReshapeBindings, BasicRQIntegerDivBinding
 from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, GatherLayer, GEMMLayer, MatMulLayer, MaxPoolLayer, \
     MulLayer, PadLayer, ReduceMeanLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \
-    RQSiHardswishLayer, SliceLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer, iSoftmaxLayer
+    RQSiHardswishLayer, SliceLayer, SoftmaxLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer
 from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, FlattenParser, GatherParser, GEMMParser, \
     MatMulParser, MulParser, Pad1DParser, Pad2DParser, ReduceMeanParser, RequantShiftParser, ReshapeParser, \
     RQAddParser, RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SliceParser, TransposeParser, \
@@ -112,7 +112,7 @@
     'RQIntegerDiv': RQIntegerDivLayer([RQIntegerDivMapper]),
     'MatMul': MatMulLayer([MatMulMapper]),
     'IntegerMean': ReduceMeanLayer([ReduceMeanMapper]),
-    'iSoftmax': iSoftmaxLayer([Softmax_int8_Mapper]),
+    'iSoftmax': SoftmaxLayer([Softmax_int8_Mapper]),
     'ReduceMean': ReduceMeanLayer([ReduceMeanMapper]),
     'RequantShift': RequantShiftLayer([UniformRequantShiftMapper, RequantShiftMapper]),
     'Add': AddLayer([AddMapper]),
diff --git a/Deeploy/Targets/Snitch/Platform.py b/Deeploy/Targets/Snitch/Platform.py
index 3b45d9e2..3200f73f 100644
--- a/Deeploy/Targets/Snitch/Platform.py
+++ b/Deeploy/Targets/Snitch/Platform.py
@@ -30,10 +30,10 @@
 
 from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \
     StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer
-from Deeploy.Targets.Generic.Bindings import BasicGatherBindings, BasicLayerNormBinding, BasicMatMulBinding, \
+from Deeploy.Targets.Generic.Bindings import BasicGatherBindings, BasicLayerNormBindings, BasicMatMulBinding, \
     BasicPad1DBindings, BasicPad2DBindings, BasicReshapeBindings, BasicRQIntegerDivBinding
-from Deeploy.Targets.Generic.Layers import AddLayer, GatherLayer, GEMMLayer, MatMulLayer, PadLayer, ReshapeLayer, \
-    RQGEMMLayer, RQIntegerDivLayer, iLayerNormLayer, iNoNormLayer, iSoftmaxLayer
+from Deeploy.Targets.Generic.Layers import AddLayer, GatherLayer, GEMMLayer, LayerNormLayer, MatMulLayer, PadLayer, \
+    ReshapeLayer, RQGEMMLayer, RQIntegerDivLayer, SoftmaxLayer, iNoNormLayer
 from Deeploy.Targets.Generic.Parsers import AddParser, GatherParser, MatMulParser, Pad1DParser, Pad2DParser, \
     RQAddParser, RQIntegerDivParser, UnsqueezeParser, iLayerNormParser, iNoNormParser, iSoftmaxParser
 from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate
@@ -59,7 +59,7 @@
 RqGemmMapper = NodeMapper(SnitchRQGEMMParser(), SnitchRqGemmTilingReadyBindings)
 iSoftmaxMapper = NodeMapper(iSoftmaxParser(), SnitchiSoftmaxTilingReadyBindings)
 iNoNormMapper = NodeMapper(iNoNormParser(), SnitchiNoNormTilingReadyBindings)
-iLayerNormMapper = NodeMapper(iLayerNormParser(), [BasicLayerNormBinding])
+iLayerNormMapper = NodeMapper(iLayerNormParser(), BasicLayerNormBindings)
 RQAddMapper = NodeMapper(RQAddParser(), SnitchRQAddTilingReadyBindings)
 AddMapper = NodeMapper(AddParser(), SnitchAddTileReadyBindings)
 
@@ -71,9 +71,9 @@
     'MatMul': MatMulLayer([MatMulMapper]),
     'Gemm': GEMMLayer([GemmMapper]),
     'RQGemm': RQGEMMLayer([RqGemmMapper]),
-    'iSoftmax': iSoftmaxLayer([iSoftmaxMapper]),
+    'iSoftmax': SoftmaxLayer([iSoftmaxMapper]),
     'iNoNorm': iNoNormLayer([iNoNormMapper]),
-    'iLayerNorm': iLayerNormLayer([iLayerNormMapper]),
+    'iLayerNorm': LayerNormLayer([iLayerNormMapper]),
     'RequantizedAdd': AddLayer([RQAddMapper]),
     'Add': AddLayer([AddMapper]),
 }
diff --git a/DeeployTest/Platforms/Generic/main.c b/DeeployTest/Platforms/Generic/main.c
index b3635cf5..2cdb3ef6 100644
--- a/DeeployTest/Platforms/Generic/main.c
+++ b/DeeployTest/Platforms/Generic/main.c
@@ -60,7 +60,7 @@ int main() {
       actual = ((float32_t *)DeeployNetwork_outputs[buf])[i];
       diff = expected - actual;
 
-      if ((diff < 0 ? -diff : diff) > 1e-5) {
+      if ((diff < 0 ? -diff : diff) > 1e-4) {
         tot_err += 1;
         printf("Expected: %10.6f  ", expected);
         printf("Actual: %10.6f  ", actual);
diff --git a/TargetLibraries/Generic/inc/kernel/Convolution.h b/TargetLibraries/Generic/inc/kernel/Convolution.h
index 43c4a1ff..45ae07ca 100644
--- a/TargetLibraries/Generic/inc/kernel/Convolution.h
+++ b/TargetLibraries/Generic/inc/kernel/Convolution.h
@@ -59,4 +59,10 @@ void Conv2d_s8_s8_s32_NCHW(int8_t const *__restrict__ pSrcA, uint32_t C,
                            int32_t *__restrict__ pDstC, int32_t input_offset,
                            int32_t output_offset);
 
+void Conv2d_fp32_fp32_fp32_NCHW(const float *__restrict__ pSrcA, uint32_t C,
+                                uint32_t H_padded, uint32_t W_padded,
+                                const float *__restrict__ pSrcB, uint32_t F,
+                                uint32_t P, uint32_t Q, uint32_t SP, uint32_t SQ,
+                                float *__restrict__ pDstC);
+
 #endif //__DEEPLOY_BASIC_MATH_CONVOLUTION_KERNEL_HEADER_
diff --git a/TargetLibraries/Generic/inc/kernel/Div.h b/TargetLibraries/Generic/inc/kernel/Div.h
index 672cff21..2b2b8e27 100644
--- a/TargetLibraries/Generic/inc/kernel/Div.h
+++ b/TargetLibraries/Generic/inc/kernel/Div.h
@@ -45,4 +45,6 @@ void Div_s32_s32(int32_t *data_in_nom, int32_t *data_in_denom, int32_t size_nom,
                  int32_t size_denom, int32_t nomStep, int32_t denomStep,
                  int32_t *data_out, int32_t Delta, int32_t eps, int32_t eta);
 
+void Div_fp32_fp32_fp32(float32_t *data_in_1, float32_t *data_in_2, float32_t *data_out, int32_t size);
+
 #endif //__DEEPLOY_BASIC_MATH_DIV_KERNEL_HEADER_
diff --git a/TargetLibraries/Generic/inc/kernel/Layernorm.h b/TargetLibraries/Generic/inc/kernel/Layernorm.h
index 9539096a..6f4accba 100644
--- a/TargetLibraries/Generic/inc/kernel/Layernorm.h
+++ b/TargetLibraries/Generic/inc/kernel/Layernorm.h
@@ -45,4 +45,7 @@ void Layernorm_s8_s8(int8_t *data_in, int8_t *data_out, int32_t *weight,
                      int32_t *bias, int32_t input_offset, int32_t size,
                      int32_t lastDimLength, int32_t log2D);
 
+void Layernorm_fp32_fp32(float *data_in, float *data_out, float *scale,
+                         float *bias, float epsilon, int32_t size, int32_t lastDimLength);
+
 #endif //__DEEPLOY_BASIC_MATH_LAYERNORM_KERNEL_HEADER_
diff --git a/TargetLibraries/Generic/inc/kernel/Softmax.h b/TargetLibraries/Generic/inc/kernel/Softmax.h
index ebe48746..ed376754 100644
--- a/TargetLibraries/Generic/inc/kernel/Softmax.h
+++ b/TargetLibraries/Generic/inc/kernel/Softmax.h
@@ -88,4 +88,6 @@ void ITAPartialMax_s8(int8_t const *__restrict__ pSrcA,
                       uint32_t lastDimLength, uint32_t group_width,
                       uint32_t n_levels);
 
+void Softmax_fp32_fp32(float32_t *input, float32_t *output, int32_t size, int32_t last_dim_length);
+
 #endif //__DEEPLOY_BASIC_MATH_SOFTMAX_KERNEL_HEADER_
diff --git a/TargetLibraries/Generic/src/Convolution_fp32.c b/TargetLibraries/Generic/src/Convolution_fp32.c
new file mode 100644
index 00000000..0a62b4d0
--- /dev/null
+++ b/TargetLibraries/Generic/src/Convolution_fp32.c
@@ -0,0 +1,60 @@
+/* =====================================================================
+ * Title:        Convolution_float32.c
+ * Description:  Float32 version of Conv2D with NCHW format (pre-padded input)
+ *
+ * Date:         23.01.2025
+ *
+ * ===================================================================== */
+
+/*
+ * Copyright (C) 2023 ETH Zurich and University of Bologna.
+ *
+ * Authors:
+ * - Run Wang, ETH Zurich
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DeeployBasicMath.h"
+
+void Conv2d_fp32_fp32_fp32_NCHW(const float *__restrict__ pSrcA, uint32_t C,
+                                uint32_t H_padded, uint32_t W_padded,
+                                const float *__restrict__ pSrcB, uint32_t F,
+                                uint32_t P, uint32_t Q, uint32_t SP, uint32_t SQ,
+                                float *__restrict__ pDstC) {
+  // Compute output dimensions
+  uint32_t H_out = (H_padded - P) / SP + 1;
+  uint32_t W_out = (W_padded - Q) / SQ + 1;
+
+  uint32_t c, h, w, f, p, q;
+
+  // Perform convolution
+  for (f = 0; f < F; ++f) { 
+    for (h = 0; h < H_out; ++h) {
+      for (w = 0; w < W_out; ++w) {
+        float sum = 0.0f; 
+        for (c = 0; c < C; ++c) { 
+          for (p = 0; p < P; ++p) { 
+            for (q = 0; q < Q; ++q) { 
+              sum += pSrcA[c * H_padded * W_padded + (h * SP + p) * W_padded + (w * SQ + q)] *
+                     pSrcB[f * C * P * Q + c * P * Q + p * Q + q];
+            }
+          }
+        }
+        pDstC[f * H_out * W_out + h * W_out + w] = sum;
+      }
+    }
+  }
+}
diff --git a/TargetLibraries/Generic/src/Div_fp32.c b/TargetLibraries/Generic/src/Div_fp32.c
new file mode 100644
index 00000000..24b80c3a
--- /dev/null
+++ b/TargetLibraries/Generic/src/Div_fp32.c
@@ -0,0 +1,35 @@
+/* =====================================================================
+ * Title:        Div_fp32.c
+ * Description:
+ *
+ * $Date:        23.01.2025
+ *
+ * ===================================================================== */
+/*
+ * Copyright (C) 2022 ETH Zurich and University of Bologna.
+ *
+ * Authors:
+ * - Run Wang, ETH Zurich
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DeeployBasicMath.h"
+
+void Div_fp32_fp32_fp32(float32_t *data_in_1, float32_t *data_in_2, float32_t *data_out, int32_t size){
+    for (int i = 0; i < size; i++) {
+        data_out[i] = data_in_1[i] / data_in_2[i];
+    }
+}
\ No newline at end of file
diff --git a/TargetLibraries/Generic/src/Layernorm_fp32.c b/TargetLibraries/Generic/src/Layernorm_fp32.c
new file mode 100644
index 00000000..2262d922
--- /dev/null
+++ b/TargetLibraries/Generic/src/Layernorm_fp32.c
@@ -0,0 +1,56 @@
+/* =====================================================================
+ * Title:        Layernorm_fp32.c
+ * Description:
+ *
+ * $Date:        22.01.2025
+ *
+ * ===================================================================== */
+/*
+ * Copyright (C) 2022 ETH Zurich and University of Bologna.
+ *
+ * Authors:
+ * - Run Wang, ETH Zurich
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DeeployBasicMath.h"
+
+void Layernorm_fp32_fp32(float32_t *data_in, float32_t *data_out, float32_t *scale, float32_t *bias, float32_t epsilon, int32_t size, int32_t lastDimLength) {
+    float32_t mean;
+    float32_t sum;
+    float32_t std;
+    float32_t temp;
+
+    for (int i = 0; i < (size / lastDimLength); i++) {
+        sum = 0.0f;
+        mean = 0.0f;
+        for (int j = 0; j < lastDimLength; j++) {
+            mean += data_in[j + i * lastDimLength];
+        }
+        mean = mean / lastDimLength;
+        for (int j = 0; j < lastDimLength; j++) {
+            temp = data_in[j + i * lastDimLength] - mean;
+            sum += temp * temp;
+        }
+        sum = sum / lastDimLength;
+        sum += epsilon;
+        std = sqrtf(sum);
+
+        for (int j = 0; j < lastDimLength; j++) {
+            data_out[j + i * lastDimLength] = ((data_in[j + i * lastDimLength] - mean) / std) * scale[j] + bias[j];
+        }
+    }
+}
\ No newline at end of file
diff --git a/TargetLibraries/Generic/src/Relu_fp32.c b/TargetLibraries/Generic/src/Relu_fp32.c
new file mode 100644
index 00000000..c7789b89
--- /dev/null
+++ b/TargetLibraries/Generic/src/Relu_fp32.c
@@ -0,0 +1,40 @@
+/* =====================================================================
+ * Title:        Softmax_fp8.c
+ * Description:
+ *
+ * $Date:        22.01.2025
+ *
+ * ===================================================================== */
+/*
+ * Copyright (C) 2022 ETH Zurich and University of Bologna.
+ *
+ * - Run Wang, ETH Zurich
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DeeployBasicMath.h"
+
+
+void Relu_fp32_fp32(float32_t* input, float32_t* output, int32_t size, int32_t last_dim_length) {
+
+    int32_t batch_size = size / last_dim_length;  
+
+    for (int b = 0; b < batch_size; b++) {
+        for (int i = 0; i < last_dim_length; i++) {
+            output[b * last_dim_length + i] = MAX(input[b * last_dim_length + i], 0.0f);
+        }
+    }
+}
\ No newline at end of file
diff --git a/TargetLibraries/Generic/src/Softmax_fp32.c b/TargetLibraries/Generic/src/Softmax_fp32.c
new file mode 100644
index 00000000..ae15158f
--- /dev/null
+++ b/TargetLibraries/Generic/src/Softmax_fp32.c
@@ -0,0 +1,54 @@
+/* =====================================================================
+ * Title:        Softmax_fp8.c
+ * Description:
+ *
+ * $Date:        22.01.2025
+ *
+ * ===================================================================== */
+/*
+ * Copyright (C) 2022 ETH Zurich and University of Bologna.
+ *
+ * - Run Wang, ETH Zurich
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DeeployBasicMath.h"
+
+
+void Softmax_fp32_fp32(float32_t* input, float32_t* output, int32_t size, int32_t last_dim_length) {
+
+    int32_t batch_size = size / last_dim_length;  
+
+    for (int b = 0; b < batch_size; b++) {
+        float32_t max_val = *((float32_t*)&(uint32_t){0xFF800000});
+        float sum = 0.0f;
+
+        for (int i = 0; i < last_dim_length; i++) {
+            if (input[b * last_dim_length + i] > max_val) {
+                max_val = input[b * last_dim_length + i];
+            }
+        }
+
+        for (int i = 0; i < last_dim_length; i++) {
+            output[b * last_dim_length + i] = expf(input[b * last_dim_length + i] - max_val);
+            sum += output[b * last_dim_length + i];
+        }
+
+        for (int i = 0; i < last_dim_length; i++) {
+            output[b * last_dim_length + i] /= sum;
+        }
+    }
+}
\ No newline at end of file