diff --git a/backends/aoti/slim/c10/core/WrapDimMinimal.h b/backends/aoti/slim/c10/core/WrapDimMinimal.h
new file mode 100644
index 00000000000..0a3acc3f54f
--- /dev/null
+++ b/backends/aoti/slim/c10/core/WrapDimMinimal.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <utility>
+
+#include <executorch/backends/aoti/slim/c10/macros/Macros.h>
+#include <executorch/runtime/platform/assert.h>
+
+namespace executorch::backends::aoti::slim::c10 {
+
+namespace detail {
+
+/// Slow path for maybe_wrap_dim when dimension needs validation.
+template <typename T>
+inline T maybe_wrap_dim_slow(T dim, T dim_post_expr, bool wrap_scalar) {
+  ET_CHECK_MSG(
+      dim_post_expr >= 0,
+      "Rank cannot be negative but got %ld",
+      static_cast<long>(dim_post_expr));
+
+  if (dim_post_expr == 0) {
+    ET_CHECK_MSG(
+        wrap_scalar,
+        "Dimension specified as %ld but tensor has no dimensions",
+        static_cast<long>(dim));
+    // Recursively call with dim_post_expr=1
+    if (dim >= 0 && dim < 1) {
+      return dim;
+    } else if (dim >= -1 && dim < 0) {
+      return dim + 1;
+    }
+    ET_CHECK_MSG(
+        false,
+        "Dimension out of range (expected to be in range of [-1, 0], but got %ld)",
+        static_cast<long>(dim));
+  }
+
+  T min = dim_post_expr * -1;
+  T max = dim_post_expr - 1;
+  ET_CHECK_MSG(
+      min <= dim && dim <= max,
+      "Dimension out of range (expected to be in range of [%ld, %ld], but got %ld)",
+      static_cast<long>(min),
+      static_cast<long>(max),
+      static_cast<long>(dim));
+
+  // This should be unreachable if above check passes
+  return dim < 0 ? dim + dim_post_expr : dim;
+}
+
+} // namespace detail
+
+/// Wraps a dimension index to handle negative indexing.
+/// For example, dim=-1 with dim_post_expr=3 returns 2.
+///
+/// @param dim The dimension index (may be negative).
+/// @param dim_post_expr The number of dimensions.
+/// @param wrap_scalar If true, allows wrapping for 0-dimensional tensors.
+/// @return The wrapped dimension index (always non-negative).
+template <typename T>
+inline T _maybe_wrap_dim(T dim, T dim_post_expr, bool wrap_scalar = true) {
+  // Inline the fast paths
+  if (SLIMTENSOR_LIKELY(dim_post_expr * -1 <= dim && dim < dim_post_expr)) {
+    if (dim < 0) {
+      return dim + dim_post_expr;
+    }
+    return dim;
+  }
+  // Check edge-cases out-of-line
+  return detail::maybe_wrap_dim_slow<T>(
+      std::move(dim), std::move(dim_post_expr), wrap_scalar);
+}
+
+/// Wraps a dimension index for int64_t.
+inline int64_t
+maybe_wrap_dim(int64_t dim, int64_t dim_post_expr, bool wrap_scalar = true) {
+  return _maybe_wrap_dim(dim, dim_post_expr, wrap_scalar);
+}
+
+/// Wraps a dimension index for size_t.
+inline int64_t
+maybe_wrap_dim(int64_t dim, size_t dim_post_expr, bool wrap_scalar = true) {
+  return _maybe_wrap_dim(dim, static_cast<int64_t>(dim_post_expr), wrap_scalar);
+}
+
+} // namespace executorch::backends::aoti::slim::c10
diff --git a/backends/aoti/slim/c10/core/targets.bzl b/backends/aoti/slim/c10/core/targets.bzl
index 5a9b9558938..65c6aaa7707 100644
--- a/backends/aoti/slim/c10/core/targets.bzl
+++ b/backends/aoti/slim/c10/core/targets.bzl
@@ -67,6 +67,19 @@ def define_common_targets():
         ],
     )
 
+    # Header-only library for WrapDimMinimal
+    runtime.cxx_library(
+        name = "wrap_dim_minimal",
+        headers = [
+            "WrapDimMinimal.h",
+        ],
+        visibility = ["@EXECUTORCH_CLIENTS"],
+        exported_deps = [
+            "//executorch/backends/aoti/slim/c10/macros:macros",
+            "//executorch/runtime/platform:platform",
+        ],
+    )
+
     # Combined c10 core library
     runtime.cxx_library(
         name = "core",
@@ -77,5 +90,6 @@ def define_common_targets():
             ":device_type",
             ":scalar_type",
             ":sizes_and_strides",
+            ":wrap_dim_minimal",
         ],
     )
diff --git a/backends/aoti/slim/core/SlimTensor.h b/backends/aoti/slim/core/SlimTensor.h
index 92b34e8a3e8..0061b0e08b9 100644
--- a/backends/aoti/slim/core/SlimTensor.h
+++ b/backends/aoti/slim/core/SlimTensor.h
@@ -10,9 +10,12 @@
 
 #include <cstdint>
 #include <cstring>
+#include <optional>
 #include <utility>
 #include <vector>
 
+#include <c10/util/safe_numerics.h>
+
 #include <executorch/backends/aoti/slim/c10/core/Contiguity.h>
 #include <executorch/backends/aoti/slim/c10/core/Device.h>
 #include <executorch/backends/aoti/slim/c10/core/ScalarType.h>
@@ -254,22 +257,113 @@ class SlimTensor {
   }
 
   /**
-   * Set sizes and strides together.
+   * Set sizes, strides, and storage offset together.
    */
-  void set_sizes_and_strides(IntArrayRef sizes, IntArrayRef strides) {
+  void set_sizes_and_strides(
+      IntArrayRef sizes,
+      IntArrayRef strides,
+      std::optional<int64_t> storage_offset = std::nullopt) {
+    const size_t new_dim = sizes.size();
     ET_CHECK_MSG(
-        sizes.size() == strides.size(),
-        "sizes (%zu) and strides (%zu) must have the same length",
-        sizes.size(),
+        new_dim == strides.size(),
+        "dimensionality of sizes (%zu) must match dimensionality of strides (%zu)",
+        new_dim,
         strides.size());
 
-    sizes_and_strides_.set_sizes(sizes);
-    sizes_and_strides_.set_strides(strides);
+    std::vector<int64_t> new_sizes = toVec(sizes);
+    std::vector<int64_t> new_strides = toVec(strides);
+
+    // stride calculation logic
+    bool overflowed = false;
+    if (new_dim > 0) {
+      for (int64_t dim = new_dim - 1; dim >= 0; dim--) {
+        if (strides[dim] >= 0) {
+          new_strides[dim] = strides[dim];
+        } else {
+          // for negative strides
+          if (dim == new_dim - 1) {
+            new_strides[dim] = 1;
+          } else {
+            overflowed |= ::c10::mul_overflows(
+                new_strides[dim + 1],
+                std::max<int64_t>(new_sizes[dim + 1], 1),
+                &new_strides[dim]);
+          }
+        }
+      }
+    }
+    ET_CHECK_MSG(!overflowed, "Stride calculation overflowed");
+
+    sizes_and_strides_.set_sizes(makeArrayRef(new_sizes));
+    sizes_and_strides_.set_strides(makeArrayRef(new_strides));
+    if (storage_offset.has_value()) {
+      storage_offset_ = *storage_offset;
+    }
 
     refresh_numel();
     refresh_contiguous();
   }
 
+  /**
+   * Set sizes to a contiguous layout (computes strides automatically).
+   */
+  void set_sizes_contiguous(IntArrayRef sizes) {
+    std::vector<int64_t> contig_strides = compute_contiguous_strides(sizes);
+    set_sizes_and_strides(sizes, makeArrayRef(contig_strides));
+  }
+
+  // =========================================================================
+  // View Operations
+  // =========================================================================
+
+  /**
+   * Returns a view of the tensor with the specified sizes, strides, and
+   * storage offset. The returned tensor shares the same underlying storage.
+   *
+   * @param sizes The sizes of the view.
+   * @param strides The strides of the view.
+   * @param storage_offset Offset into storage in number of elements.
+   * @return A new SlimTensor that is a view of this tensor.
+   */
+  inline SlimTensor as_strided(
+      IntArrayRef sizes,
+      IntArrayRef strides,
+      int64_t storage_offset) const;
+
+  /**
+   * Overload for initializer lists.
+   */
+  inline SlimTensor as_strided(
+      std::initializer_list<int64_t> sizes,
+      std::initializer_list<int64_t> strides,
+      int64_t storage_offset) const {
+    return as_strided(
+        makeArrayRef(sizes), makeArrayRef(strides), storage_offset);
+  }
+
+  /**
+   * Modifies this tensor in-place to have the specified sizes, strides, and
+   * storage offset. The underlying storage remains unchanged.
+   *
+   * @param sizes The new sizes.
+   * @param strides The new strides.
+   * @param storage_offset New offset into storage in number of elements.
+   * @return Reference to this tensor.
+   */
+  inline SlimTensor&
+  as_strided_(IntArrayRef sizes, IntArrayRef strides, int64_t storage_offset);
+
+  /**
+   * Overload for initializer lists.
+   */
+  inline SlimTensor& as_strided_(
+      std::initializer_list<int64_t> sizes,
+      std::initializer_list<int64_t> strides,
+      int64_t storage_offset) {
+    return as_strided_(
+        makeArrayRef(sizes), makeArrayRef(strides), storage_offset);
+  }
+
   // =========================================================================
   // Copy Operation
   // =========================================================================
@@ -278,7 +372,7 @@ class SlimTensor {
    * Copy data from another tensor to this tensor.
    *
    * Both tensors must have the same numel and dtype.
-   * Supports CPU-to-CPU and cross-device copies (CPU↔CUDA, CUDA↔CUDA).
+   * Currently only supports CPU-to-CPU copy (contiguous tensors only).
    *
    * @param other The source tensor to copy from
    * @return Reference to this tensor
@@ -371,3 +465,7 @@ class SlimTensor {
 };
 
 } // namespace executorch::backends::aoti::slim
+
+// Include view operations implementations (must be after SlimTensor class
+// definition)
+#include <executorch/backends/aoti/slim/core/SlimTensorView-incl.h>
diff --git a/backends/aoti/slim/core/SlimTensorView-incl.h b/backends/aoti/slim/core/SlimTensorView-incl.h
new file mode 100644
index 00000000000..f0ed8bc087c
--- /dev/null
+++ b/backends/aoti/slim/core/SlimTensorView-incl.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/aoti/slim/c10/core/WrapDimMinimal.h>
+#include <executorch/backends/aoti/slim/util/ArrayRefUtil.h>
+
+namespace executorch::backends::aoti::slim {
+
+inline SlimTensor SlimTensor::as_strided(
+    IntArrayRef sizes,
+    IntArrayRef strides,
+    int64_t storage_offset) const {
+  SlimTensor result = *this;
+  result.as_strided_(sizes, strides, storage_offset);
+  return result;
+}
+
+inline SlimTensor& SlimTensor::as_strided_(
+    IntArrayRef sizes,
+    IntArrayRef strides,
+    int64_t storage_offset) {
+  ET_CHECK_MSG(
+      sizes.size() == strides.size(),
+      "as_strided: number of sizes (%zu) must equal number of strides (%zu)",
+      sizes.size(),
+      strides.size());
+
+  for (size_t i = 0; i < sizes.size(); ++i) {
+    ET_CHECK_MSG(
+        sizes[i] >= 0,
+        "as_strided: size at dimension %zu is negative: %ld",
+        i,
+        static_cast<long>(sizes[i]));
+  }
+
+  ET_CHECK_MSG(
+      storage_offset >= 0,
+      "as_strided: storage_offset must be non-negative, got: %ld",
+      static_cast<long>(storage_offset));
+
+  this->set_sizes_and_strides(sizes, strides, storage_offset);
+  return *this;
+}
+
+} // namespace executorch::backends::aoti::slim
diff --git a/backends/aoti/slim/core/targets.bzl b/backends/aoti/slim/core/targets.bzl
index cc74b01b444..408738edd35 100644
--- a/backends/aoti/slim/core/targets.bzl
+++ b/backends/aoti/slim/core/targets.bzl
@@ -26,6 +26,7 @@ def define_common_targets():
         name = "slimtensor",
         headers = [
             "SlimTensor.h",
+            "SlimTensorView-incl.h",
         ],
         visibility = ["@EXECUTORCH_CLIENTS"],
         exported_deps = [
@@ -34,9 +35,10 @@ def define_common_targets():
             "//executorch/backends/aoti/slim/c10/core:device",
             "//executorch/backends/aoti/slim/c10/core:scalar_type",
             "//executorch/backends/aoti/slim/c10/core:sizes_and_strides",
+            "//executorch/backends/aoti/slim/c10/core:wrap_dim_minimal",
             "//executorch/backends/aoti/slim/util:array_ref_util",
             "//executorch/backends/aoti/slim/util:size_util",
-            "//executorch/backends/aoti/slim/c10/cuda:exception",
             "//executorch/runtime/platform:platform",
+            "//executorch/backends/aoti/slim/c10/cuda:exception",
         ],
     )
diff --git a/backends/aoti/slim/core/test/targets.bzl b/backends/aoti/slim/core/test/targets.bzl
index d0991708c7f..e2bd116ffc9 100644
--- a/backends/aoti/slim/core/test/targets.bzl
+++ b/backends/aoti/slim/core/test/targets.bzl
@@ -7,8 +7,17 @@ def get_backend_mode():
 
 def define_common_targets():
     """Define test targets for SlimTensor core module."""
+    runtime.cxx_test(
+        name = "test_slimtensor_dtypes",
+        srcs = [
+            "test_slimtensor_dtypes.cpp",
+        ],
+        deps = [
+            "//executorch/backends/aoti/slim/factory:empty",
+        ],
+    )
 
-    # GPU storage test with CUDA support
+    # Backend mode specific tests
     for backend_mode in get_backend_mode():
         backend_suffix = "_" + backend_mode if backend_mode == "cuda" else ""
 
@@ -57,12 +66,14 @@ def define_common_targets():
             **backend_kwargs
         )
 
-    runtime.cxx_test(
-        name = "test_slimtensor_dtypes",
-        srcs = [
-            "test_slimtensor_dtypes.cpp",
-        ],
-        deps = [
-            "//executorch/backends/aoti/slim/factory:empty",
-        ],
-    )
+        runtime.cxx_test(
+            name = "test_as_strided" + backend_suffix,
+            srcs = [
+                "test_as_strided.cpp",
+            ],
+            deps = [
+                "//executorch/backends/aoti/slim/core:slimtensor",
+                "//executorch/backends/aoti/slim/factory:empty",
+            ],
+            **backend_kwargs
+        )
diff --git a/backends/aoti/slim/core/test/test_as_strided.cpp b/backends/aoti/slim/core/test/test_as_strided.cpp
new file mode 100644
index 00000000000..f73104b5ba0
--- /dev/null
+++ b/backends/aoti/slim/core/test/test_as_strided.cpp
@@ -0,0 +1,388 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <gtest/gtest.h>
+
+#include <executorch/backends/aoti/slim/core/SlimTensor.h>
+#include <executorch/backends/aoti/slim/core/SlimTensorView-incl.h>
+#include <executorch/backends/aoti/slim/factory/Empty.h>
+
+#ifdef CUDA_AVAILABLE
+#include <cuda_runtime.h>
+#endif
+
+namespace executorch::backends::aoti::slim {
+
+// =============================================================================
+// Device trait for parameterized tests
+// =============================================================================
+
+struct CPUDevice {
+  static c10::Device device() {
+    return CPU_DEVICE;
+  }
+  static constexpr bool is_cuda = false;
+};
+
+#ifdef CUDA_AVAILABLE
+struct CUDADevice {
+  static c10::Device device() {
+    return DEFAULT_CUDA_DEVICE;
+  }
+  static constexpr bool is_cuda = true;
+};
+#endif
+
+// =============================================================================
+// Test fixture for parameterized device tests
+// =============================================================================
+
+template <typename DeviceTrait>
+class AsStridedDeviceTest : public ::testing::Test {
+ protected:
+  static c10::Device device() {
+    return DeviceTrait::device();
+  }
+
+  SlimTensor make_tensor(
+      std::initializer_list<int64_t> sizes,
+      c10::ScalarType dtype = c10::ScalarType::Float) {
+    return empty(sizes, dtype, device());
+  }
+
+  // Helper to initialize tensor data from CPU (handles both CPU and CUDA)
+  template <typename T>
+  void fill_sequential(SlimTensor& tensor, size_t count) {
+    if constexpr (DeviceTrait::is_cuda) {
+#ifdef CUDA_AVAILABLE
+      std::vector<T> cpu_data(count);
+      for (size_t i = 0; i < count; ++i) {
+        cpu_data[i] = static_cast<T>(i);
+      }
+      DeviceTraits<c10::DeviceType::CUDA>::memcpy(
+          tensor.data_ptr(),
+          cpu_data.data(),
+          count * sizeof(T),
+          DEFAULT_CUDA_DEVICE,
+          CPU_DEVICE);
+#endif
+    } else {
+      T* data = static_cast<T*>(tensor.data_ptr());
+      for (size_t i = 0; i < count; ++i) {
+        data[i] = static_cast<T>(i);
+      }
+    }
+  }
+
+  // Helper to read a value from tensor (handles both CPU and CUDA)
+  template <typename T>
+  T read_value(void* ptr, size_t offset = 0) {
+    if constexpr (DeviceTrait::is_cuda) {
+#ifdef CUDA_AVAILABLE
+      T value;
+      DeviceTraits<c10::DeviceType::CUDA>::memcpy(
+          &value,
+          static_cast<T*>(ptr) + offset,
+          sizeof(T),
+          CPU_DEVICE,
+          DEFAULT_CUDA_DEVICE);
+      return value;
+#else
+      return T{};
+#endif
+    } else {
+      return *(static_cast<T*>(ptr) + offset);
+    }
+  }
+
+  // Helper to write a value to tensor (handles both CPU and CUDA)
+  template <typename T>
+  void write_value(void* ptr, T value, size_t offset = 0) {
+    if constexpr (DeviceTrait::is_cuda) {
+#ifdef CUDA_AVAILABLE
+      DeviceTraits<c10::DeviceType::CUDA>::memcpy(
+          static_cast<T*>(ptr) + offset,
+          &value,
+          sizeof(T),
+          DEFAULT_CUDA_DEVICE,
+          CPU_DEVICE);
+#endif
+    } else {
+      *(static_cast<T*>(ptr) + offset) = value;
+    }
+  }
+};
+
+// Type list for parameterized tests
+using DeviceTypes = ::testing::Types<
+    CPUDevice
+#ifdef CUDA_AVAILABLE
+    ,
+    CUDADevice
+#endif
+    >;
+
+TYPED_TEST_SUITE(AsStridedDeviceTest, DeviceTypes);
+
+// =============================================================================
+// as_strided Basic Tests
+// =============================================================================
+
+TYPED_TEST(AsStridedDeviceTest, BasicView) {
+  SlimTensor tensor = this->make_tensor({4, 4});
+  this->template fill_sequential<float>(tensor, 16);
+
+  SlimTensor view = tensor.as_strided({2, 2}, {4, 1}, 0);
+
+  EXPECT_EQ(view.size(0), 2);
+  EXPECT_EQ(view.size(1), 2);
+  EXPECT_EQ(view.stride(0), 4);
+  EXPECT_EQ(view.stride(1), 1);
+  EXPECT_EQ(view.storage_offset(), 0);
+  EXPECT_EQ(view.numel(), 4);
+
+  // View should share storage
+  EXPECT_EQ(view.storage().get(), tensor.storage().get());
+
+  // Verify data access through view
+  EXPECT_FLOAT_EQ(this->template read_value<float>(view.data_ptr(), 0), 0.0f);
+  EXPECT_FLOAT_EQ(this->template read_value<float>(view.data_ptr(), 1), 1.0f);
+  EXPECT_FLOAT_EQ(this->template read_value<float>(tensor.data_ptr(), 4), 4.0f);
+}
+
+TYPED_TEST(AsStridedDeviceTest, WithStorageOffset) {
+  SlimTensor tensor = this->make_tensor({4, 4});
+  this->template fill_sequential<float>(tensor, 16);
+
+  SlimTensor view = tensor.as_strided({2, 3}, {4, 1}, 5);
+
+  EXPECT_EQ(view.storage_offset(), 5);
+  EXPECT_EQ(view.numel(), 6);
+
+  EXPECT_FLOAT_EQ(this->template read_value<float>(view.data_ptr(), 0), 5.0f);
+}
+
+TYPED_TEST(AsStridedDeviceTest, NonContiguousStrides) {
+  SlimTensor tensor = this->make_tensor({6});
+  this->template fill_sequential<float>(tensor, 6);
+
+  SlimTensor view = tensor.as_strided({3}, {2}, 0);
+
+  EXPECT_EQ(view.size(0), 3);
+  EXPECT_EQ(view.stride(0), 2);
+  EXPECT_EQ(view.numel(), 3);
+  EXPECT_FALSE(view.is_contiguous());
+
+  // Access values through stride (stride=2, so indices 0, 2, 4)
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(view.data_ptr(), 0 * 2), 0.0f);
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(view.data_ptr(), 1 * 2), 2.0f);
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(view.data_ptr(), 2 * 2), 4.0f);
+}
+
+TYPED_TEST(AsStridedDeviceTest, TransposeView) {
+  SlimTensor tensor = this->make_tensor({3, 4});
+  this->template fill_sequential<float>(tensor, 12);
+
+  // Create transposed view (4x3) by swapping sizes and strides
+  SlimTensor transposed = tensor.as_strided({4, 3}, {1, 4}, 0);
+
+  EXPECT_EQ(transposed.size(0), 4);
+  EXPECT_EQ(transposed.size(1), 3);
+  EXPECT_EQ(transposed.stride(0), 1);
+  EXPECT_EQ(transposed.stride(1), 4);
+  EXPECT_FALSE(transposed.is_contiguous());
+}
+
+TYPED_TEST(AsStridedDeviceTest, SharedStorageModification) {
+  SlimTensor tensor = this->make_tensor({4});
+  this->template fill_sequential<float>(tensor, 4);
+
+  SlimTensor view = tensor.as_strided({2}, {1}, 1);
+
+  // Modify through view
+  this->template write_value<float>(view.data_ptr(), 100.0f, 0);
+  this->template write_value<float>(view.data_ptr(), 200.0f, 1);
+
+  // Changes should be visible in original tensor
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(tensor.data_ptr(), 1), 100.0f);
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(tensor.data_ptr(), 2), 200.0f);
+}
+
+// =============================================================================
+// as_strided_ In-Place Tests
+// =============================================================================
+
+TYPED_TEST(AsStridedDeviceTest, InPlaceModification) {
+  SlimTensor tensor = this->make_tensor({4, 4});
+  void* original_data = tensor.data_ptr();
+
+  tensor.as_strided_({2, 8}, {8, 1}, 0);
+
+  EXPECT_EQ(tensor.size(0), 2);
+  EXPECT_EQ(tensor.size(1), 8);
+  EXPECT_EQ(tensor.stride(0), 8);
+  EXPECT_EQ(tensor.stride(1), 1);
+  EXPECT_EQ(tensor.numel(), 16);
+  EXPECT_TRUE(tensor.is_contiguous());
+
+  EXPECT_EQ(tensor.data_ptr(), original_data);
+}
+
+TYPED_TEST(AsStridedDeviceTest, InPlaceWithOffset) {
+  SlimTensor tensor = this->make_tensor({16});
+
+  tensor.as_strided_({4}, {1}, 4);
+
+  EXPECT_EQ(tensor.size(0), 4);
+  EXPECT_EQ(tensor.storage_offset(), 4);
+  EXPECT_EQ(tensor.numel(), 4);
+}
+
+// =============================================================================
+// as_strided Edge Cases
+// =============================================================================
+
+TYPED_TEST(AsStridedDeviceTest, ZeroDimView) {
+  SlimTensor tensor = this->make_tensor({4});
+  this->template write_value<float>(tensor.data_ptr(), 42.0f, 2);
+
+  SlimTensor scalar_view = tensor.as_strided({}, {}, 2);
+
+  EXPECT_EQ(scalar_view.dim(), 0);
+  EXPECT_EQ(scalar_view.numel(), 1);
+  EXPECT_EQ(scalar_view.storage_offset(), 2);
+
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(scalar_view.data_ptr(), 0), 42.0f);
+}
+
+TYPED_TEST(AsStridedDeviceTest, SingleElementView) {
+  SlimTensor tensor = this->make_tensor({3, 3});
+  this->template fill_sequential<float>(tensor, 9);
+
+  SlimTensor view = tensor.as_strided({1, 1}, {3, 1}, 4);
+
+  EXPECT_EQ(view.numel(), 1);
+
+  EXPECT_FLOAT_EQ(this->template read_value<float>(view.data_ptr(), 0), 4.0f);
+}
+
+TYPED_TEST(AsStridedDeviceTest, ZeroStridesBroadcast) {
+  SlimTensor tensor = this->make_tensor({4});
+  this->template write_value<float>(tensor.data_ptr(), 42.0f, 0);
+
+  SlimTensor broadcast = tensor.as_strided({3, 3}, {0, 0}, 0);
+
+  EXPECT_EQ(broadcast.size(0), 3);
+  EXPECT_EQ(broadcast.size(1), 3);
+  EXPECT_EQ(broadcast.stride(0), 0);
+  EXPECT_EQ(broadcast.stride(1), 0);
+  EXPECT_EQ(broadcast.numel(), 9);
+
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(broadcast.data_ptr(), 0), 42.0f);
+}
+
+// =============================================================================
+// as_strided with Different DTypes
+// =============================================================================
+
+TYPED_TEST(AsStridedDeviceTest, Int64View) {
+  SlimTensor tensor = this->make_tensor({8}, c10::ScalarType::Long);
+
+  // Fill with values multiplied by 10
+  if constexpr (TypeParam::is_cuda) {
+#ifdef CUDA_AVAILABLE
+    std::vector<int64_t> cpu_data(8);
+    for (size_t i = 0; i < 8; ++i) {
+      cpu_data[i] = static_cast<int64_t>(i * 10);
+    }
+    DeviceTraits<c10::DeviceType::CUDA>::memcpy(
+        tensor.data_ptr(),
+        cpu_data.data(),
+        8 * sizeof(int64_t),
+        DEFAULT_CUDA_DEVICE,
+        CPU_DEVICE);
+#endif
+  } else {
+    int64_t* data = static_cast<int64_t*>(tensor.data_ptr());
+    for (size_t i = 0; i < 8; ++i) {
+      data[i] = static_cast<int64_t>(i * 10);
+    }
+  }
+
+  SlimTensor view = tensor.as_strided({2, 3}, {3, 1}, 1);
+
+  EXPECT_EQ(view.dtype(), c10::ScalarType::Long);
+  EXPECT_EQ(this->template read_value<int64_t>(view.data_ptr(), 0), 10);
+}
+
+TYPED_TEST(AsStridedDeviceTest, Int8View) {
+  SlimTensor tensor = this->make_tensor({16}, c10::ScalarType::Char);
+
+  if constexpr (TypeParam::is_cuda) {
+#ifdef CUDA_AVAILABLE
+    std::vector<int8_t> cpu_data(16);
+    for (size_t i = 0; i < 16; ++i) {
+      cpu_data[i] = static_cast<int8_t>(i);
+    }
+    DeviceTraits<c10::DeviceType::CUDA>::memcpy(
+        tensor.data_ptr(),
+        cpu_data.data(),
+        16 * sizeof(int8_t),
+        DEFAULT_CUDA_DEVICE,
+        CPU_DEVICE);
+#endif
+  } else {
+    int8_t* data = static_cast<int8_t*>(tensor.data_ptr());
+    for (size_t i = 0; i < 16; ++i) {
+      data[i] = static_cast<int8_t>(i);
+    }
+  }
+
+  SlimTensor view = tensor.as_strided({4, 2}, {4, 1}, 2);
+
+  EXPECT_EQ(view.dtype(), c10::ScalarType::Char);
+  EXPECT_EQ(view.itemsize(), 1);
+  EXPECT_EQ(this->template read_value<int8_t>(view.data_ptr(), 0), 2);
+}
+
+// =============================================================================
+// Multiple Views Share Storage
+// =============================================================================
+
+TYPED_TEST(AsStridedDeviceTest, MultipleViews) {
+  SlimTensor tensor = this->make_tensor({12});
+  this->template fill_sequential<float>(tensor, 12);
+
+  SlimTensor view1 = tensor.as_strided({4}, {1}, 0);
+  SlimTensor view2 = tensor.as_strided({4}, {1}, 4);
+  SlimTensor view3 = tensor.as_strided({4}, {1}, 8);
+
+  EXPECT_EQ(view1.storage().get(), tensor.storage().get());
+  EXPECT_EQ(view2.storage().get(), tensor.storage().get());
+  EXPECT_EQ(view3.storage().get(), tensor.storage().get());
+
+  EXPECT_FLOAT_EQ(this->template read_value<float>(view1.data_ptr(), 0), 0.0f);
+  EXPECT_FLOAT_EQ(this->template read_value<float>(view2.data_ptr(), 0), 4.0f);
+  EXPECT_FLOAT_EQ(this->template read_value<float>(view3.data_ptr(), 0), 8.0f);
+
+  // Modify through one view
+  this->template write_value<float>(view2.data_ptr(), 100.0f, 0);
+
+  // Visible in original
+  EXPECT_FLOAT_EQ(
+      this->template read_value<float>(tensor.data_ptr(), 4), 100.0f);
+}
+
+} // namespace executorch::backends::aoti::slim