diff --git a/esp-rtos/esp_config.yml b/esp-rtos/esp_config.yml
index 638419b6adc..d3af1c57d9c 100644
--- a/esp-rtos/esp_config.yml
+++ b/esp-rtos/esp_config.yml
@@ -8,3 +8,13 @@ options:
     constraints:
       - type:
           validator: positive_integer
+
+  - name: sw-task-overflow-detection
+    description: 'Enable software-based stack overflow detection. The stack guard value and offset is based on esp-hal configuration.'
+    default:
+      - value: false
+
+  - name: hw-task-overflow-detection
+    description: 'Enable hardware-based stack overflow detection. The stack watermark is based on the esp-hal stack-guard-offset configuration.'
+    default:
+      - value: true
diff --git a/esp-rtos/src/lib.rs b/esp-rtos/src/lib.rs
index 7d537a8ca33..d120a519d02 100644
--- a/esp-rtos/src/lib.rs
+++ b/esp-rtos/src/lib.rs
@@ -222,20 +222,23 @@ pub fn start_with_idle_hook(
     SCHEDULER.with(move |scheduler| {
         scheduler.setup(TimeDriver::new(timer.timer()), idle_hook);
 
-        // Allocate the default task. The stack bottom is set to the stack guard's address as the
-        // rest of the stack is unusable anyway.
+        // Allocate the default task.
 
         unsafe extern "C" {
             static _stack_start_cpu0: u32;
-            static __stack_chk_guard: u32;
+            static _stack_end_cpu0: u32;
         }
         let stack_top = &raw const _stack_start_cpu0;
-        let stack_bottom = (&raw const __stack_chk_guard).cast::<MaybeUninit<u32>>();
+        let stack_bottom = (&raw const _stack_end_cpu0).cast::<MaybeUninit<u32>>();
         let stack_slice = core::ptr::slice_from_raw_parts_mut(
             stack_bottom.cast_mut(),
             stack_top as usize - stack_bottom as usize,
         );
-        task::allocate_main_task(scheduler, stack_slice);
+        task::allocate_main_task(
+            scheduler,
+            stack_slice,
+            esp_config::esp_config_int!(usize, "ESP_HAL_CONFIG_STACK_GUARD_OFFSET"),
+        );
 
         task::setup_multitasking(
             #[cfg(riscv)]
@@ -317,7 +320,14 @@ pub fn start_second_core_with_stack_guard_offset<const STACK_SIZE: usize>(
                     scheduler.time_driver.is_some(),
                     "The scheduler must be started on the first core first."
                 );
-                task::allocate_main_task(scheduler, ptrs.stack);
+                task::allocate_main_task(
+                    scheduler,
+                    ptrs.stack,
+                    stack_guard_offset.unwrap_or(esp_config::esp_config_int!(
+                        usize,
+                        "ESP_HAL_CONFIG_STACK_GUARD_OFFSET"
+                    )),
+                );
                 task::yield_task();
                 trace!("Second core scheduler initialized");
             });
diff --git a/esp-rtos/src/scheduler.rs b/esp-rtos/src/scheduler.rs
index 50ac7023ff6..a3765e4e87e 100644
--- a/esp-rtos/src/scheduler.rs
+++ b/esp-rtos/src/scheduler.rs
@@ -66,6 +66,8 @@ impl CpuSchedulerState {
                 thread_semaphore: None,
                 state: TaskState::Ready,
                 stack: core::ptr::slice_from_raw_parts_mut(core::ptr::null_mut(), 0),
+                #[cfg(any(hw_task_overflow_detection, sw_task_overflow_detection))]
+                stack_guard: core::ptr::null_mut(),
                 current_queue: None,
                 priority: 0,
                 #[cfg(multi_core)]
@@ -274,6 +276,8 @@ impl SchedulerState {
             let next_context = if let Some(next) = next_task {
                 priority = Some(next.priority(&mut self.run_queue));
 
+                unsafe { next.as_ref().set_up_stack_watchpoint() };
+
                 unsafe { &raw mut (*next.as_ptr()).cpu_context }
             } else {
                 // If there is no next task, set up and return to the idle hook.
@@ -284,6 +288,9 @@ impl SchedulerState {
                 let idle_sp = if current_context.is_null()
                     || current_context == &raw mut self.per_cpu[current_cpu].main_task.cpu_context
                 {
+                    // We're using the current task's stack, for which the watchpoint is already set
+                    // up.
+
                     let current_sp;
                     cfg_if::cfg_if! {
                         if #[cfg(xtensa)] {
@@ -294,6 +301,11 @@ impl SchedulerState {
                     }
                     current_sp
                 } else {
+                    // We're using the main task's stack.
+                    self.per_cpu[current_cpu]
+                        .main_task
+                        .set_up_stack_watchpoint();
+
                     cfg_if::cfg_if! {
                         if #[cfg(xtensa)] {
                             self.per_cpu[current_cpu].main_task.cpu_context.A1
diff --git a/esp-rtos/src/task/mod.rs b/esp-rtos/src/task/mod.rs
index d8a58f5674d..6a11aca9e2b 100644
--- a/esp-rtos/src/task/mod.rs
+++ b/esp-rtos/src/task/mod.rs
@@ -335,6 +335,8 @@ pub(crate) struct Task {
     pub thread_semaphore: Option<Semaphore>,
     pub state: TaskState,
     pub stack: *mut [MaybeUninit<u32>],
+    #[cfg(any(hw_task_overflow_detection, sw_task_overflow_detection))]
+    pub stack_guard: *mut u32,
     pub priority: usize,
     #[cfg(multi_core)]
     pub pinned_to: Option<Cpu>,
@@ -362,6 +364,7 @@ pub(crate) struct Task {
     pub(crate) heap_allocated: bool,
 }
 
+#[cfg(sw_task_overflow_detection)]
 const STACK_CANARY: u32 =
     const { esp_config::esp_config_int!(u32, "ESP_HAL_CONFIG_STACK_GUARD_VALUE") };
 
@@ -386,14 +389,17 @@ impl Task {
             name, task_fn, param, task_stack_size, priority, pinned_to
         );
 
-        let extra_stack = if cfg!(debug_build) {
-            // This is a lot, but debug builds fail in different ways without.
-            6 * 1024
+        // Make sure the stack guard doesn't eat into the stack size.
+        let extra_stack = if cfg!(any(hw_task_overflow_detection, sw_task_overflow_detection)) {
+            4 + esp_config::esp_config_int!(usize, "ESP_HAL_CONFIG_STACK_GUARD_OFFSET")
         } else {
-            // Make sure the stack guard doesn't eat into the stack size.
-            4
+            0
         };
 
+        #[cfg(debug_build)]
+        // This is a lot, but debug builds fail in different ways without.
+        let extra_stack = extra_stack.max(6 * 1024);
+
         let task_stack_size = task_stack_size + extra_stack;
 
         // Make sure stack size is also aligned to 16 bytes.
@@ -413,17 +419,21 @@ impl Task {
 
         let stack_bottom = stack.cast::<MaybeUninit<u32>>();
         let stack_len_bytes = layout.size();
-        unsafe { stack_bottom.write(MaybeUninit::new(STACK_CANARY)) };
+
+        let stack_guard_offset =
+            esp_config::esp_config_int!(usize, "ESP_HAL_CONFIG_STACK_GUARD_OFFSET");
 
         let stack_words = core::ptr::slice_from_raw_parts_mut(stack_bottom, stack_len_bytes / 4);
         let stack_top = unsafe { stack_bottom.add(stack_words.len()).cast() };
 
-        Task {
+        let mut task = Task {
             cpu_context: new_task_context(task_fn, param, stack_top),
             #[cfg(feature = "esp-radio")]
             thread_semaphore: None,
             state: TaskState::Ready,
             stack: stack_words,
+            #[cfg(any(hw_task_overflow_detection, sw_task_overflow_detection))]
+            stack_guard: stack_words.cast(),
             current_queue: None,
             priority,
             #[cfg(multi_core)]
@@ -438,19 +448,49 @@ impl Task {
 
             #[cfg(feature = "alloc")]
             heap_allocated: false,
+        };
+
+        task.set_up_stack_guard(stack_guard_offset);
+
+        task
+    }
+
+    fn set_up_stack_guard(&mut self, offset: usize) {
+        let stack_bottom = self.stack.cast::<MaybeUninit<u32>>();
+        let stack_guard = unsafe { stack_bottom.byte_add(offset) };
+
+        #[cfg(sw_task_overflow_detection)]
+        unsafe {
+            // avoid touching the main stack's canary on the first core
+            if stack_guard.read().assume_init() != STACK_CANARY {
+                stack_guard.write(MaybeUninit::new(STACK_CANARY));
+            }
+        }
+
+        #[cfg(any(hw_task_overflow_detection, sw_task_overflow_detection))]
+        {
+            self.stack_guard = stack_guard.cast();
         }
     }
 
     pub(crate) fn ensure_no_stack_overflow(&self) {
+        #[cfg(sw_task_overflow_detection)]
         assert_eq!(
             // This cast is safe to do from MaybeUninit<u32> because this is the word we've written
             // during initialization.
-            unsafe { self.stack.cast::<u32>().read() },
+            unsafe { self.stack_guard.read() },
             STACK_CANARY,
             "Stack overflow detected in {:?}",
             self as *const Task
         );
     }
+
+    pub(crate) fn set_up_stack_watchpoint(&self) {
+        #[cfg(hw_task_overflow_detection)]
+        unsafe {
+            esp_hal::debugger::set_stack_watchpoint(self.stack_guard as usize);
+        }
+    }
 }
 
 impl Drop for Task {
@@ -471,18 +511,20 @@ impl Drop for Task {
     }
 }
 
-pub(super) fn allocate_main_task(scheduler: &mut SchedulerState, stack: *mut [MaybeUninit<u32>]) {
+pub(super) fn allocate_main_task(
+    scheduler: &mut SchedulerState,
+    stack: *mut [MaybeUninit<u32>],
+    stack_guard_offset: usize,
+) {
     let cpu = Cpu::current();
     let current_cpu = cpu as usize;
 
-    unsafe {
-        // avoid touching the main stack's canary on the first core
-        if stack.cast::<MaybeUninit<u32>>().read().assume_init() != STACK_CANARY {
-            stack
-                .cast::<MaybeUninit<u32>>()
-                .write(MaybeUninit::new(STACK_CANARY));
-        }
-    }
+    debug_assert!(
+        !scheduler.per_cpu[current_cpu].initialized,
+        "Tried to allocate main task multiple times"
+    );
+
+    scheduler.per_cpu[current_cpu].initialized = true;
 
     // Reset main task properties. The rest should be cleared when the task is deleted.
     scheduler.per_cpu[current_cpu].main_task.priority = 0;
@@ -493,12 +535,13 @@ pub(super) fn allocate_main_task(scheduler: &mut SchedulerState, stack: *mut [Ma
         scheduler.per_cpu[current_cpu].main_task.pinned_to = Some(cpu);
     }
 
-    debug_assert!(
-        !scheduler.per_cpu[current_cpu].initialized,
-        "Tried to allocate main task multiple times"
-    );
+    scheduler.per_cpu[current_cpu]
+        .main_task
+        .set_up_stack_guard(stack_guard_offset);
 
-    scheduler.per_cpu[current_cpu].initialized = true;
+    scheduler.per_cpu[current_cpu]
+        .main_task
+        .set_up_stack_watchpoint();
 
     // This is slightly questionable as we don't ensure SchedulerState is pinned, but it's always
     // part of a static object so taking the pointer is fine.