diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD
index d0bf2cbc0a2..7b5ddc7b306 100644
--- a/tensorflow/lite/micro/kernels/BUILD
+++ b/tensorflow/lite/micro/kernels/BUILD
@@ -278,6 +278,7 @@ tflm_kernel_cc_library(
         "neg.cc",
         "pack.cc",
         "pad.cc",
+        "pad_common.cc",
         "pooling.cc",
         "pooling_common.cc",
         "prelu.cc",
@@ -311,6 +312,7 @@ tflm_kernel_cc_library(
         "svdf_common.cc",
         "tanh.cc",
         "transpose.cc",
+        "transpose_common.cc",
         "transpose_conv.cc",
         "unidirectional_sequence_lstm.cc",
         "unpack.cc",
@@ -347,6 +349,7 @@ tflm_kernel_cc_library(
         "strided_slice.h",
         "sub.h",
         "svdf.h",
+        "transpose.h",
         "transpose_conv.h",
         "unidirectional_sequence_lstm.h",
     ] + select({
diff --git a/tensorflow/lite/micro/kernels/cmsis_nn/pad.cc b/tensorflow/lite/micro/kernels/cmsis_nn/pad.cc
new file mode 100644
index 00000000000..bacba0b325b
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/cmsis_nn/pad.cc
@@ -0,0 +1,145 @@
+/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+
+#include <limits>
+
+#include "Include/arm_nn_types.h"
+#include "Include/arm_nnfunctions.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/pad.h"
+
+namespace tflite {
+namespace {
+
+TfLiteStatus PadEvalInt8(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData* data = static_cast<const OpData*>(node->user_data);
+
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, /*index=*/0);
+  const TfLiteEvalTensor* constant_values =
+      NumInputs(node) == 3
+          ? tflite::micro::GetEvalInput(context, node, /*index=*/2)
+          : nullptr;
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, /*index=*/0);
+
+  int8_t pad_value;
+  if (constant_values == nullptr) {
+    pad_value = static_cast<uint8_t>(data->output_zero_point);
+  } else {
+    pad_value = *tflite::micro::GetTensorData<int8_t>(constant_values);
+  }
+  const int8_t* input_ptr = tflite::micro::GetTensorData<int8_t>(input);
+  int8_t* output_ptr = tflite::micro::GetTensorData<int8_t>(output);
+
+  const RuntimeShape d = tflite::micro::GetTensorShape(input);
+  const cmsis_nn_dims input_size = {d.Dims(0), d.Dims(1), d.Dims(2), d.Dims(3)};
+
+  const PadParams p = data->params;
+  const cmsis_nn_dims pre_pad = {p.left_padding[0], p.left_padding[1],
+                                 p.left_padding[2], p.left_padding[3]};
+  const cmsis_nn_dims post_pad = {p.right_padding[0], p.right_padding[1],
+                                  p.right_padding[2], p.right_padding[3]};
+
+  arm_pad_s8(input_ptr, output_ptr, pad_value, &input_size, &pre_pad,
+             &post_pad);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus PadEval(TfLiteContext* context, TfLiteNode* node) {
+  TFLITE_DCHECK(node->user_data != nullptr);
+  const OpData* data = static_cast<const OpData*>(node->user_data);
+
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, /*index=*/0);
+  const TfLiteEvalTensor* constant_values =
+      NumInputs(node) == 3
+          ? tflite::micro::GetEvalInput(context, node, /*index=*/2)
+          : nullptr;
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, /*index=*/0);
+
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      float pad_value =
+          constant_values == nullptr
+              ? 0.f
+              : *tflite::micro::GetTensorData<float>(constant_values);
+      if (data->params.resizing_category == ResizingCategory::kImageStyle) {
+        reference_ops::PadImageStyle(
+            data->params, tflite::micro::GetTensorShape(input),
+            tflite::micro::GetTensorData<float>(input), &pad_value,
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<float>(output));
+      } else {
+        reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
+                           tflite::micro::GetTensorData<float>(input),
+                           &pad_value, tflite::micro::GetTensorShape(output),
+                           tflite::micro::GetTensorData<float>(output));
+      }
+    } break;
+    case kTfLiteInt8: {
+      PadEvalInt8(context, node);
+    } break;
+    case kTfLiteInt16: {
+      int16_t pad_value =
+          constant_values == nullptr
+              ? 0
+              : *tflite::micro::GetTensorData<int16_t>(constant_values);
+      reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
+                         tflite::micro::GetTensorData<int16_t>(input),
+                         &pad_value, tflite::micro::GetTensorShape(output),
+                         tflite::micro::GetTensorData<int16_t>(output));
+    } break;
+    case kTfLiteInt32: {
+      int32_t pad_value =
+          constant_values == nullptr
+              ? 0
+              : *tflite::micro::GetTensorData<int32_t>(constant_values);
+      reference_ops::Pad(data->params, tflite::micro::GetTensorShape(input),
+                         tflite::micro::GetTensorData<int32_t>(input),
+                         &pad_value, tflite::micro::GetTensorShape(output),
+                         tflite::micro::GetTensorData<int32_t>(output));
+    } break;
+    default:
+
+      MicroPrintf("Type %s not currently supported by Pad.",
+                  TfLiteTypeGetName(input->type));
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TFLMRegistration Register_PAD() {
+  return tflite::micro::RegisterOp(PadInit, PadPrepare, PadEval);
+}
+
+// Also register Pad as PadV2.
+TFLMRegistration Register_PADV2() {
+  return tflite::micro::RegisterOp(PadInit, PadPrepare, PadEval);
+}
+
+TFLMRegistration Register_PAD_INT8() {
+  return tflite::micro::RegisterOp(PadInit, PadPrepare, PadEvalInt8);
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/cmsis_nn/transpose.cc b/tensorflow/lite/micro/kernels/cmsis_nn/transpose.cc
new file mode 100644
index 00000000000..8e2c3717da3
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/cmsis_nn/transpose.cc
@@ -0,0 +1,112 @@
+/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+
+#include "Include/arm_nnfunctions.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/transpose.h"
+
+namespace tflite {
+namespace {
+
+TfLiteStatus TransposeEvalInt8(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* perm_tensor =
+      tflite::micro::GetEvalInput(context, node, kTransposePermTensor);
+  const int size = perm_tensor->dims->data[0];
+  TF_LITE_ENSURE(context, size <= 4);
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kTransposeInputTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kTransposeOutputTensor);
+  const cmsis_nn_transpose_params transpose_params = {
+      size, reinterpret_cast<const uint32_t*>(perm_tensor->data.i32)};
+  cmsis_nn_dims input_dims = {
+      tflite::micro::GetTensorShape(input).DimsData()[0],
+      tflite::micro::GetTensorShape(input).DimsData()[1],
+      tflite::micro::GetTensorShape(input).DimsData()[2],
+      tflite::micro::GetTensorShape(input).DimsData()[3]};
+  cmsis_nn_dims output_dims = {
+      tflite::micro::GetTensorShape(output).DimsData()[0],
+      tflite::micro::GetTensorShape(output).DimsData()[1],
+      tflite::micro::GetTensorShape(output).DimsData()[2],
+      tflite::micro::GetTensorShape(output).DimsData()[3]};
+
+  TFLITE_DCHECK_EQ(
+      arm_transpose_s8(tflite::micro::GetTensorData<int8_t>(input),
+                       tflite::micro::GetTensorData<int8_t>(output),
+                       &input_dims, &output_dims, &transpose_params),
+      ARM_CMSIS_NN_SUCCESS);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus TransposeEval(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteEvalTensor* perm_tensor =
+      tflite::micro::GetEvalInput(context, node, kTransposePermTensor);
+  const int32_t* perm_data = perm_tensor->data.i32;
+  const int size = perm_tensor->dims->data[0];
+  TransposeParams params;
+  params.perm_count = size;
+  for (int i = 0; i < size; ++i) {
+    params.perm[i] = perm_data[i];
+  }
+
+  // Transpose kernel only does rearranging values not numeric evaluations
+  // on each cell. It's safe to implement per size of scalar type and this
+  // trick keeps the total code size in a reasonable range.
+  const TfLiteEvalTensor* input =
+      tflite::micro::GetEvalInput(context, node, kTransposeInputTensor);
+  TfLiteEvalTensor* output =
+      tflite::micro::GetEvalOutput(context, node, kTransposeOutputTensor);
+  switch (input->type) {
+    case kTfLiteFloat32:
+      reference_ops::Transpose(params, tflite::micro::GetTensorShape(input),
+                               tflite::micro::GetTensorData<float>(input),
+                               tflite::micro::GetTensorShape(output),
+                               tflite::micro::GetTensorData<float>(output));
+      break;
+    case kTfLiteInt8: {
+      TransposeEvalInt8(context, node);
+    } break;
+    case kTfLiteInt16:
+      reference_ops::Transpose(params, tflite::micro::GetTensorShape(input),
+                               tflite::micro::GetTensorData<int16_t>(input),
+                               tflite::micro::GetTensorShape(output),
+                               tflite::micro::GetTensorData<int16_t>(output));
+      break;
+    default:
+      MicroPrintf(
+          "Type %s is currently not supported by Transpose. "
+          "Only float32, int8 and int16 is supported",
+          TfLiteTypeGetName(input->type));
+      return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TFLMRegistration Register_TRANSPOSE() {
+  return tflite::micro::RegisterOp(nullptr, TransposePrepare, TransposeEval);
+}
+TFLMRegistration Register_TRANSPOSE_INT8() {
+  return tflite::micro::RegisterOp(nullptr, TransposePrepare,
+                                   TransposeEvalInt8);
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/pad.cc b/tensorflow/lite/micro/kernels/pad.cc
index 29f08faa534..02931792543 100644
--- a/tensorflow/lite/micro/kernels/pad.cc
+++ b/tensorflow/lite/micro/kernels/pad.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,27 +16,14 @@ limitations under the License.
 
 #include <string.h>
 
-#include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/types.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
-#include "tensorflow/lite/kernels/op_macros.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
-#include "tensorflow/lite/micro/micro_log.h"
+#include "tensorflow/lite/micro/kernels/pad.h"
 
 namespace tflite {
 namespace {
 
-struct OpData {
-  PadParams params;
-  int32_t output_zero_point;
-};
-
-void* PadInit(TfLiteContext* context, const char* buffer, size_t length) {
-  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
-  return context->AllocatePersistentBuffer(context, sizeof(OpData));
-}
-
 TfLiteStatus PadEval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   const OpData* data = static_cast<const OpData*>(node->user_data);
@@ -120,103 +107,6 @@ TfLiteStatus PadEval(TfLiteContext* context, TfLiteNode* node) {
 
 }  // namespace
 
-TfLiteStatus PadPrepare(TfLiteContext* context, TfLiteNode* node) {
-  MicroContext* micro_context = GetMicroContext(context);
-
-  TFLITE_DCHECK(node->user_data != nullptr);
-  OpData* data = static_cast<OpData*>(node->user_data);
-
-  TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-
-  TfLiteTensor* input =
-      micro_context->AllocateTempInputTensor(node, /*index=*/0);
-  TF_LITE_ENSURE(context, input != nullptr);
-  TfLiteTensor* paddings =
-      micro_context->AllocateTempInputTensor(node, /*index=*/1);
-  TF_LITE_ENSURE(context, paddings != nullptr);
-  TfLiteTensor* constant_values =
-      NumInputs(node) == 3
-          ? micro_context->AllocateTempInputTensor(node, /*index=*/2)
-          : nullptr;
-  TfLiteTensor* output =
-      micro_context->AllocateTempOutputTensor(node, /*index=*/0);
-  TF_LITE_ENSURE(context, output != nullptr);
-
-  TF_LITE_ENSURE_EQ(context, input->type, output->type);
-
-  // Current implementations rely on the inputs being <= 4D.
-  TF_LITE_ENSURE(context, NumDimensions(input) <=
-                              reference_ops::PadKernelMaxDimensionCount());
-
-  if (constant_values != nullptr) {
-    TF_LITE_ENSURE_EQ(context, input->type, constant_values->type);
-    // Ensure that constant_values is a scalar.
-    TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1);
-  }
-
-  // There must be a pair of paddings for each output dimension.
-  TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(),
-                    output->dims->size * 2);
-
-  // On Micro, outputs must be properly sized by the converter.
-  // NOTE: This data is only available because the paddings buffer is stored in
-  // the flatbuffer:
-  TF_LITE_ENSURE(context, IsConstantTensor(paddings));
-  const int32_t* paddings_data = GetTensorData<int32_t>(paddings);
-  for (int i = 0; i < output->dims->size; i++) {
-    int output_dim = output->dims->data[i];
-    int expected_dim =
-        input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1];
-    TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
-  }
-
-  // Calculate OpData:
-  data->params.resizing_category = ResizingCategory::kGenericResize;
-  const int paddings_total = GetTensorShape(paddings).FlatSize();
-  if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) &&
-      (paddings_data[6] == 0 && paddings_data[7] == 0)) {
-    data->params.resizing_category = ResizingCategory::kImageStyle;
-  }
-
-  const int num_input_dimensions = NumDimensions(input);
-  data->params.left_padding_count = num_input_dimensions;
-  data->params.right_padding_count = num_input_dimensions;
-
-  for (int idx = num_input_dimensions - 1; idx >= 0; --idx) {
-    data->params.left_padding[idx] = paddings_data[idx * 2];
-    data->params.right_padding[idx] = paddings_data[idx * 2 + 1];
-  }
-
-  if (input->type == kTfLiteInt8) {
-    if (constant_values == nullptr) {
-      // Quantized Pad requires that 0 is represented in the quantized
-      // range.
-      TF_LITE_ENSURE(context, output->params.zero_point >=
-                                  std::numeric_limits<int8_t>::min());
-      TF_LITE_ENSURE(context, output->params.zero_point <=
-                                  std::numeric_limits<int8_t>::max());
-    } else {
-      // Quantized Pad requires that 'constant_values' is represented in the
-      // same quantized range as the input and output tensors.
-      TF_LITE_ENSURE_EQ(context, output->params.zero_point,
-                        constant_values->params.zero_point);
-      TF_LITE_ENSURE_EQ(context, static_cast<double>(output->params.scale),
-                        static_cast<double>(constant_values->params.scale));
-    }
-    data->output_zero_point = output->params.zero_point;
-  }
-
-  micro_context->DeallocateTempTfLiteTensor(input);
-  micro_context->DeallocateTempTfLiteTensor(paddings);
-  if (constant_values != nullptr) {
-    micro_context->DeallocateTempTfLiteTensor(constant_values);
-  }
-  micro_context->DeallocateTempTfLiteTensor(output);
-
-  return kTfLiteOk;
-}
-
 TFLMRegistration Register_PAD() {
   return tflite::micro::RegisterOp(PadInit, PadPrepare, PadEval);
 }
diff --git a/tensorflow/lite/micro/kernels/pad.h b/tensorflow/lite/micro/kernels/pad.h
index ad90890b131..069148a016b 100644
--- a/tensorflow/lite/micro/kernels/pad.h
+++ b/tensorflow/lite/micro/kernels/pad.h
@@ -1,4 +1,4 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,11 +17,27 @@ limitations under the License.
 #define TENSORFLOW_LITE_MICRO_KERNELS_PAD_H_
 
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
 
 namespace tflite {
 
+struct OpData {
+  PadParams params;
+  int32_t output_zero_point;
+};
+
+void* PadInit(TfLiteContext* context, const char* buffer, size_t length);
 TfLiteStatus PadPrepare(TfLiteContext* context, TfLiteNode* node);
 
+TFLMRegistration Register_PAD();
+TFLMRegistration Register_PADV2();
+
+#if defined(CMSIS_NN)
+TFLMRegistration Register_PAD_INT8();
+#else
+inline TFLMRegistration Register_PAD_INT8() { return Register_PAD(); }
+#endif
+
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_PAD_H_
diff --git a/tensorflow/lite/micro/kernels/pad_common.cc b/tensorflow/lite/micro/kernels/pad_common.cc
new file mode 100644
index 00000000000..aceb861b946
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/pad_common.cc
@@ -0,0 +1,127 @@
+/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <limits>
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/reference/pad.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/pad.h"
+
+namespace tflite {
+
+void* PadInit(TfLiteContext* context, const char* buffer, size_t length) {
+  TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
+  return context->AllocatePersistentBuffer(context, sizeof(OpData));
+}
+
+TfLiteStatus PadPrepare(TfLiteContext* context, TfLiteNode* node) {
+  MicroContext* micro_context = GetMicroContext(context);
+
+  TFLITE_DCHECK(node->user_data != nullptr);
+  OpData* data = static_cast<OpData*>(node->user_data);
+
+  TF_LITE_ENSURE(context, NumInputs(node) == 2 || NumInputs(node) == 3);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TfLiteTensor* input =
+      micro_context->AllocateTempInputTensor(node, /*index=*/0);
+  TF_LITE_ENSURE(context, input != nullptr);
+  TfLiteTensor* paddings =
+      micro_context->AllocateTempInputTensor(node, /*index=*/1);
+  TF_LITE_ENSURE(context, paddings != nullptr);
+  TfLiteTensor* constant_values =
+      NumInputs(node) == 3
+          ? micro_context->AllocateTempInputTensor(node, /*index=*/2)
+          : nullptr;
+  TfLiteTensor* output =
+      micro_context->AllocateTempOutputTensor(node, /*index=*/0);
+  TF_LITE_ENSURE(context, output != nullptr);
+
+  TF_LITE_ENSURE_EQ(context, input->type, output->type);
+
+  // Current implementations rely on the inputs being <= 4D.
+  TF_LITE_ENSURE(context, NumDimensions(input) <=
+                              reference_ops::PadKernelMaxDimensionCount());
+
+  if (constant_values != nullptr) {
+    TF_LITE_ENSURE_EQ(context, input->type, constant_values->type);
+    // Ensure that constant_values is a scalar.
+    TF_LITE_ENSURE_EQ(context, NumElements(constant_values), 1);
+  }
+
+  // There must be a pair of paddings for each output dimension.
+  TF_LITE_ENSURE_EQ(context, GetTensorShape(paddings).FlatSize(),
+                    output->dims->size * 2);
+
+  // On Micro, outputs must be properly sized by the converter.
+  // NOTE: This data is only available because the paddings buffer is stored in
+  // the flatbuffer:
+  TF_LITE_ENSURE(context, IsConstantTensor(paddings));
+  const int32_t* paddings_data = GetTensorData<int32_t>(paddings);
+  for (int i = 0; i < output->dims->size; i++) {
+    int output_dim = output->dims->data[i];
+    int expected_dim =
+        input->dims->data[i] + paddings_data[i * 2] + paddings_data[i * 2 + 1];
+    TF_LITE_ENSURE_EQ(context, output_dim, expected_dim);
+  }
+
+  // Calculate OpData:
+  data->params.resizing_category = ResizingCategory::kGenericResize;
+  const int paddings_total = GetTensorShape(paddings).FlatSize();
+  if (paddings_total == 8 && (paddings_data[0] == 0 && paddings_data[1] == 0) &&
+      (paddings_data[6] == 0 && paddings_data[7] == 0)) {
+    data->params.resizing_category = ResizingCategory::kImageStyle;
+  }
+
+  const int num_input_dimensions = NumDimensions(input);
+  data->params.left_padding_count = num_input_dimensions;
+  data->params.right_padding_count = num_input_dimensions;
+
+  for (int idx = num_input_dimensions - 1; idx >= 0; --idx) {
+    data->params.left_padding[idx] = paddings_data[idx * 2];
+    data->params.right_padding[idx] = paddings_data[idx * 2 + 1];
+  }
+
+  if (input->type == kTfLiteInt8) {
+    if (constant_values == nullptr) {
+      // Quantized Pad requires that 0 is represented in the quantized
+      // range.
+      TF_LITE_ENSURE(context, output->params.zero_point >=
+                                  std::numeric_limits<int8_t>::min());
+      TF_LITE_ENSURE(context, output->params.zero_point <=
+                                  std::numeric_limits<int8_t>::max());
+    } else {
+      // Quantized Pad requires that 'constant_values' is represented in the
+      // same quantized range as the input and output tensors.
+      TF_LITE_ENSURE_EQ(context, output->params.zero_point,
+                        constant_values->params.zero_point);
+      TF_LITE_ENSURE_EQ(context, static_cast<double>(output->params.scale),
+                        static_cast<double>(constant_values->params.scale));
+    }
+    data->output_zero_point = output->params.zero_point;
+  }
+
+  micro_context->DeallocateTempTfLiteTensor(input);
+  micro_context->DeallocateTempTfLiteTensor(paddings);
+  if (constant_values != nullptr) {
+    micro_context->DeallocateTempTfLiteTensor(constant_values);
+  }
+  micro_context->DeallocateTempTfLiteTensor(output);
+
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/transpose.cc b/tensorflow/lite/micro/kernels/transpose.cc
index 70d53e2c449..3a01a2c8fbc 100644
--- a/tensorflow/lite/micro/kernels/transpose.cc
+++ b/tensorflow/lite/micro/kernels/transpose.cc
@@ -15,66 +15,16 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/reference/transpose.h"
 
 #include "tensorflow/lite/c/common.h"
-#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
-#include "tensorflow/lite/kernels/internal/types.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
-#include "tensorflow/lite/micro/micro_log.h"
+#include "tensorflow/lite/micro/kernels/transpose.h"
 
 namespace tflite {
 namespace {
 
-constexpr int kInputTensor = 0;
-constexpr int kPermTensor = 1;
-constexpr int kOutputTensor = 0;
-
-struct TransposeContext {
-  TransposeContext(TfLiteContext* context, TfLiteNode* node) {
-    micro_context = GetMicroContext(context);
-    input = micro_context->AllocateTempInputTensor(node, kInputTensor);
-    perm = micro_context->AllocateTempInputTensor(node, kPermTensor);
-    output = micro_context->AllocateTempOutputTensor(node, kOutputTensor);
-  }
-  ~TransposeContext() {
-    micro_context->DeallocateTempTfLiteTensor(input);
-    micro_context->DeallocateTempTfLiteTensor(perm);
-    micro_context->DeallocateTempTfLiteTensor(output);
-  }
-  MicroContext* micro_context;
-  TfLiteTensor* input;
-  TfLiteTensor* perm;
-  TfLiteTensor* output;
-};
-
-TfLiteStatus TransposePrepare(TfLiteContext* context, TfLiteNode* node) {
-  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
-  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
-
-  TransposeContext op_context(context, node);
-
-  // Ensure validity of input tensor.
-  TF_LITE_ENSURE_MSG(context, NumDimensions(op_context.input) <= 5,
-                     "Transpose op only supports 1D-5D input arrays.");
-  TF_LITE_ENSURE_TYPES_EQ(context, op_context.input->type,
-                          op_context.output->type);
-
-  int dims = NumDimensions(op_context.input);
-  const int32_t* perm_data = GetTensorData<int32_t>(op_context.perm);
-
-  // Ensure validity of the permutations tensor as a 1D tensor.
-  TF_LITE_ENSURE_EQ(context, NumDimensions(op_context.perm), 1);
-  TF_LITE_ENSURE_EQ(context, op_context.perm->dims->data[0], dims);
-  for (int idx = 0; idx < dims; ++idx) {
-    TF_LITE_ENSURE_MSG(context, (perm_data[idx] >= 0 && perm_data[idx] < dims),
-                       "Transpose op permutations array is out of bounds.");
-  }
-
-  return kTfLiteOk;
-}
-
 TfLiteStatus TransposeEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteEvalTensor* perm_tensor =
-      tflite::micro::GetEvalInput(context, node, kPermTensor);
+      tflite::micro::GetEvalInput(context, node, kTransposePermTensor);
   const int32_t* perm_data = perm_tensor->data.i32;
   const int size = perm_tensor->dims->data[0];
   TransposeParams params;
@@ -87,9 +37,9 @@ TfLiteStatus TransposeEval(TfLiteContext* context, TfLiteNode* node) {
   // on each cell. It's safe to implement per size of scalar type and this
   // trick keeps the total code size in a reasonable range.
   const TfLiteEvalTensor* input =
-      tflite::micro::GetEvalInput(context, node, kInputTensor);
+      tflite::micro::GetEvalInput(context, node, kTransposeInputTensor);
   TfLiteEvalTensor* output =
-      tflite::micro::GetEvalOutput(context, node, kOutputTensor);
+      tflite::micro::GetEvalOutput(context, node, kTransposeOutputTensor);
   switch (input->type) {
     case kTfLiteFloat32:
       reference_ops::Transpose(params, tflite::micro::GetTensorShape(input),
diff --git a/tensorflow/lite/micro/kernels/transpose.h b/tensorflow/lite/micro/kernels/transpose.h
new file mode 100644
index 00000000000..2e675c5a591
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/transpose.h
@@ -0,0 +1,58 @@
+/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_TRANSPOSE_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_TRANSPOSE_H_
+
+#include "tensorflow/lite/c/common.h"
+
+namespace tflite {
+
+constexpr int kTransposeInputTensor = 0;
+constexpr int kTransposePermTensor = 1;
+constexpr int kTransposeOutputTensor = 0;
+
+struct TransposeContext {
+  TransposeContext(TfLiteContext* context, TfLiteNode* node) {
+    micro_context = GetMicroContext(context);
+    input = micro_context->AllocateTempInputTensor(node, kTransposeInputTensor);
+    perm = micro_context->AllocateTempInputTensor(node, kTransposePermTensor);
+    output =
+        micro_context->AllocateTempOutputTensor(node, kTransposeOutputTensor);
+  }
+  ~TransposeContext() {
+    micro_context->DeallocateTempTfLiteTensor(input);
+    micro_context->DeallocateTempTfLiteTensor(perm);
+    micro_context->DeallocateTempTfLiteTensor(output);
+  }
+  MicroContext* micro_context;
+  TfLiteTensor* input;
+  TfLiteTensor* perm;
+  TfLiteTensor* output;
+};
+
+TfLiteStatus TransposePrepare(TfLiteContext* context, TfLiteNode* node);
+TFLMRegistration Register_TRANSPOSE();
+
+#if defined(CMSIS_NN)
+TFLMRegistration Register_TRANSPOSE_INT8();
+#else
+inline TFLMRegistration Register_TRANSPOSE_INT8() {
+  return Register_TRANSPOSE();
+}
+#endif
+
+}  // namespace tflite
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_TRANSPOSE_H_
diff --git a/tensorflow/lite/micro/kernels/transpose_common.cc b/tensorflow/lite/micro/kernels/transpose_common.cc
new file mode 100644
index 00000000000..1a99f3b8b2c
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/transpose_common.cc
@@ -0,0 +1,52 @@
+/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/reference/transpose.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+#include "tensorflow/lite/micro/kernels/transpose.h"
+#include "tensorflow/lite/micro/micro_log.h"
+
+namespace tflite {
+
+TfLiteStatus TransposePrepare(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  TransposeContext op_context(context, node);
+
+  // Ensure validity of input tensor.
+  TF_LITE_ENSURE_MSG(context, NumDimensions(op_context.input) <= 5,
+                     "Transpose op only supports 1D-5D input arrays.");
+  TF_LITE_ENSURE_TYPES_EQ(context, op_context.input->type,
+                          op_context.output->type);
+
+  int dims = NumDimensions(op_context.input);
+  const int32_t* perm_data = GetTensorData<int32_t>(op_context.perm);
+
+  // Ensure validity of the permutations tensor as a 1D tensor.
+  TF_LITE_ENSURE_EQ(context, NumDimensions(op_context.perm), 1);
+  TF_LITE_ENSURE_EQ(context, op_context.perm->dims->data[0], dims);
+  for (int idx = 0; idx < dims; ++idx) {
+    TF_LITE_ENSURE_MSG(context, (perm_data[idx] >= 0 && perm_data[idx] < dims),
+                       "Transpose op permutations array is out of bounds.");
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/tools/make/Makefile b/tensorflow/lite/micro/tools/make/Makefile
index 1b4f9d4bf2c..287661882f6 100644
--- a/tensorflow/lite/micro/tools/make/Makefile
+++ b/tensorflow/lite/micro/tools/make/Makefile
@@ -1,4 +1,4 @@
-# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2025 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -426,6 +426,7 @@ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/mul_common.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/neg.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/pack.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/pad.cc \
+$(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/pad_common.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/pooling.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/pooling_common.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/prelu.cc \
@@ -459,6 +460,7 @@ $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/svdf.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/svdf_common.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/tanh.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/transpose.cc \
+$(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/transpose_common.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/transpose_conv.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.cc \
 $(TENSORFLOW_ROOT)tensorflow/lite/micro/kernels/unpack.cc \
diff --git a/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh b/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
index a211a2b38a3..47058187c2c 100755
--- a/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
+++ b/tensorflow/lite/micro/tools/make/ext_libs/cmsis_nn_download.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2025 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -38,9 +38,9 @@ source ${TENSORFLOW_ROOT}tensorflow/lite/micro/tools/make/bash_helpers.sh
 DOWNLOADS_DIR=${1}
 DOWNLOADED_CMSIS_NN_PATH=${DOWNLOADS_DIR}/cmsis_nn
 
-ZIP_PREFIX_NN="22080c68d040c98139e6cb1549473e3149735f4d"
+ZIP_PREFIX_NN="e096196a0c49f065abc03d943c583cd50de424ba"
 CMSIS_NN_URL="http://github.com/ARM-software/CMSIS-NN/archive/${ZIP_PREFIX_NN}.zip"
-CMSIS_NN_MD5="32aa69692541060a76b18bd5d2d98956"
+CMSIS_NN_MD5="d2a6bc4330fed5653c74fc5dae31fd3a"
 
 should_download=$(check_should_download ${DOWNLOADS_DIR})