diff --git a/esp-rtos/esp_config.yml b/esp-rtos/esp_config.yml index 638419b6adc..d3af1c57d9c 100644 --- a/esp-rtos/esp_config.yml +++ b/esp-rtos/esp_config.yml @@ -8,3 +8,13 @@ options: constraints: - type: validator: positive_integer + + - name: sw-task-overflow-detection + description: 'Enable software-based stack overflow detection. The stack guard value and offset is based on esp-hal configuration.' + default: + - value: false + + - name: hw-task-overflow-detection + description: 'Enable hardware-based stack overflow detection. The stack watermark is based on the esp-hal stack-guard-offset configuration.' + default: + - value: true diff --git a/esp-rtos/src/lib.rs b/esp-rtos/src/lib.rs index 7d537a8ca33..d120a519d02 100644 --- a/esp-rtos/src/lib.rs +++ b/esp-rtos/src/lib.rs @@ -222,20 +222,23 @@ pub fn start_with_idle_hook( SCHEDULER.with(move |scheduler| { scheduler.setup(TimeDriver::new(timer.timer()), idle_hook); - // Allocate the default task. The stack bottom is set to the stack guard's address as the - // rest of the stack is unusable anyway. + // Allocate the default task. unsafe extern "C" { static _stack_start_cpu0: u32; - static __stack_chk_guard: u32; + static _stack_end_cpu0: u32; } let stack_top = &raw const _stack_start_cpu0; - let stack_bottom = (&raw const __stack_chk_guard).cast::>(); + let stack_bottom = (&raw const _stack_end_cpu0).cast::>(); let stack_slice = core::ptr::slice_from_raw_parts_mut( stack_bottom.cast_mut(), stack_top as usize - stack_bottom as usize, ); - task::allocate_main_task(scheduler, stack_slice); + task::allocate_main_task( + scheduler, + stack_slice, + esp_config::esp_config_int!(usize, "ESP_HAL_CONFIG_STACK_GUARD_OFFSET"), + ); task::setup_multitasking( #[cfg(riscv)] @@ -317,7 +320,14 @@ pub fn start_second_core_with_stack_guard_offset( scheduler.time_driver.is_some(), "The scheduler must be started on the first core first." ); - task::allocate_main_task(scheduler, ptrs.stack); + task::allocate_main_task( + scheduler, + ptrs.stack, + stack_guard_offset.unwrap_or(esp_config::esp_config_int!( + usize, + "ESP_HAL_CONFIG_STACK_GUARD_OFFSET" + )), + ); task::yield_task(); trace!("Second core scheduler initialized"); }); diff --git a/esp-rtos/src/scheduler.rs b/esp-rtos/src/scheduler.rs index 50ac7023ff6..a3765e4e87e 100644 --- a/esp-rtos/src/scheduler.rs +++ b/esp-rtos/src/scheduler.rs @@ -66,6 +66,8 @@ impl CpuSchedulerState { thread_semaphore: None, state: TaskState::Ready, stack: core::ptr::slice_from_raw_parts_mut(core::ptr::null_mut(), 0), + #[cfg(any(hw_task_overflow_detection, sw_task_overflow_detection))] + stack_guard: core::ptr::null_mut(), current_queue: None, priority: 0, #[cfg(multi_core)] @@ -274,6 +276,8 @@ impl SchedulerState { let next_context = if let Some(next) = next_task { priority = Some(next.priority(&mut self.run_queue)); + unsafe { next.as_ref().set_up_stack_watchpoint() }; + unsafe { &raw mut (*next.as_ptr()).cpu_context } } else { // If there is no next task, set up and return to the idle hook. @@ -284,6 +288,9 @@ impl SchedulerState { let idle_sp = if current_context.is_null() || current_context == &raw mut self.per_cpu[current_cpu].main_task.cpu_context { + // We're using the current task's stack, for which the watchpoint is already set + // up. + let current_sp; cfg_if::cfg_if! { if #[cfg(xtensa)] { @@ -294,6 +301,11 @@ impl SchedulerState { } current_sp } else { + // We're using the main task's stack. + self.per_cpu[current_cpu] + .main_task + .set_up_stack_watchpoint(); + cfg_if::cfg_if! { if #[cfg(xtensa)] { self.per_cpu[current_cpu].main_task.cpu_context.A1 diff --git a/esp-rtos/src/task/mod.rs b/esp-rtos/src/task/mod.rs index d8a58f5674d..6a11aca9e2b 100644 --- a/esp-rtos/src/task/mod.rs +++ b/esp-rtos/src/task/mod.rs @@ -335,6 +335,8 @@ pub(crate) struct Task { pub thread_semaphore: Option, pub state: TaskState, pub stack: *mut [MaybeUninit], + #[cfg(any(hw_task_overflow_detection, sw_task_overflow_detection))] + pub stack_guard: *mut u32, pub priority: usize, #[cfg(multi_core)] pub pinned_to: Option, @@ -362,6 +364,7 @@ pub(crate) struct Task { pub(crate) heap_allocated: bool, } +#[cfg(sw_task_overflow_detection)] const STACK_CANARY: u32 = const { esp_config::esp_config_int!(u32, "ESP_HAL_CONFIG_STACK_GUARD_VALUE") }; @@ -386,14 +389,17 @@ impl Task { name, task_fn, param, task_stack_size, priority, pinned_to ); - let extra_stack = if cfg!(debug_build) { - // This is a lot, but debug builds fail in different ways without. - 6 * 1024 + // Make sure the stack guard doesn't eat into the stack size. + let extra_stack = if cfg!(any(hw_task_overflow_detection, sw_task_overflow_detection)) { + 4 + esp_config::esp_config_int!(usize, "ESP_HAL_CONFIG_STACK_GUARD_OFFSET") } else { - // Make sure the stack guard doesn't eat into the stack size. - 4 + 0 }; + #[cfg(debug_build)] + // This is a lot, but debug builds fail in different ways without. + let extra_stack = extra_stack.max(6 * 1024); + let task_stack_size = task_stack_size + extra_stack; // Make sure stack size is also aligned to 16 bytes. @@ -413,17 +419,21 @@ impl Task { let stack_bottom = stack.cast::>(); let stack_len_bytes = layout.size(); - unsafe { stack_bottom.write(MaybeUninit::new(STACK_CANARY)) }; + + let stack_guard_offset = + esp_config::esp_config_int!(usize, "ESP_HAL_CONFIG_STACK_GUARD_OFFSET"); let stack_words = core::ptr::slice_from_raw_parts_mut(stack_bottom, stack_len_bytes / 4); let stack_top = unsafe { stack_bottom.add(stack_words.len()).cast() }; - Task { + let mut task = Task { cpu_context: new_task_context(task_fn, param, stack_top), #[cfg(feature = "esp-radio")] thread_semaphore: None, state: TaskState::Ready, stack: stack_words, + #[cfg(any(hw_task_overflow_detection, sw_task_overflow_detection))] + stack_guard: stack_words.cast(), current_queue: None, priority, #[cfg(multi_core)] @@ -438,19 +448,49 @@ impl Task { #[cfg(feature = "alloc")] heap_allocated: false, + }; + + task.set_up_stack_guard(stack_guard_offset); + + task + } + + fn set_up_stack_guard(&mut self, offset: usize) { + let stack_bottom = self.stack.cast::>(); + let stack_guard = unsafe { stack_bottom.byte_add(offset) }; + + #[cfg(sw_task_overflow_detection)] + unsafe { + // avoid touching the main stack's canary on the first core + if stack_guard.read().assume_init() != STACK_CANARY { + stack_guard.write(MaybeUninit::new(STACK_CANARY)); + } + } + + #[cfg(any(hw_task_overflow_detection, sw_task_overflow_detection))] + { + self.stack_guard = stack_guard.cast(); } } pub(crate) fn ensure_no_stack_overflow(&self) { + #[cfg(sw_task_overflow_detection)] assert_eq!( // This cast is safe to do from MaybeUninit because this is the word we've written // during initialization. - unsafe { self.stack.cast::().read() }, + unsafe { self.stack_guard.read() }, STACK_CANARY, "Stack overflow detected in {:?}", self as *const Task ); } + + pub(crate) fn set_up_stack_watchpoint(&self) { + #[cfg(hw_task_overflow_detection)] + unsafe { + esp_hal::debugger::set_stack_watchpoint(self.stack_guard as usize); + } + } } impl Drop for Task { @@ -471,18 +511,20 @@ impl Drop for Task { } } -pub(super) fn allocate_main_task(scheduler: &mut SchedulerState, stack: *mut [MaybeUninit]) { +pub(super) fn allocate_main_task( + scheduler: &mut SchedulerState, + stack: *mut [MaybeUninit], + stack_guard_offset: usize, +) { let cpu = Cpu::current(); let current_cpu = cpu as usize; - unsafe { - // avoid touching the main stack's canary on the first core - if stack.cast::>().read().assume_init() != STACK_CANARY { - stack - .cast::>() - .write(MaybeUninit::new(STACK_CANARY)); - } - } + debug_assert!( + !scheduler.per_cpu[current_cpu].initialized, + "Tried to allocate main task multiple times" + ); + + scheduler.per_cpu[current_cpu].initialized = true; // Reset main task properties. The rest should be cleared when the task is deleted. scheduler.per_cpu[current_cpu].main_task.priority = 0; @@ -493,12 +535,13 @@ pub(super) fn allocate_main_task(scheduler: &mut SchedulerState, stack: *mut [Ma scheduler.per_cpu[current_cpu].main_task.pinned_to = Some(cpu); } - debug_assert!( - !scheduler.per_cpu[current_cpu].initialized, - "Tried to allocate main task multiple times" - ); + scheduler.per_cpu[current_cpu] + .main_task + .set_up_stack_guard(stack_guard_offset); - scheduler.per_cpu[current_cpu].initialized = true; + scheduler.per_cpu[current_cpu] + .main_task + .set_up_stack_watchpoint(); // This is slightly questionable as we don't ensure SchedulerState is pinned, but it's always // part of a static object so taking the pointer is fine.