From 52d460e8354a3549fa4b9f66a20797c5f2e81fd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Metrich?= <frederic.metrich@live.fr>
Date: Thu, 8 Jan 2026 17:29:33 +0100
Subject: [PATCH 1/4] [Util] Optimize conversion functions for Cortex-M0+

Use shift-based operations instead of software division.

Performance comparison (1000 iterations x test values):

| Function  | Original   | Optimized  | vs stdlib        |
|-----------|------------|------------|------------------|
| utilItoa  | 89,795 us  | 33,784 us  | 2.0x faster      |
| utilAtoi  | 33,768 us  | 12,643 us  | 5.1x faster      |
| utilFtoa  | 84,628 us  | 63,126 us  | N/A              |
| utilAtof  | 159,728 us | 95,508 us  | ~4% slower*      |

*utilAtof supports comma as decimal separator (European locales)

Key optimizations:
- fastDiv10() using shifts/adds instead of hardware divide
- Process strings left-to-right to eliminate reversal
- Multiply by 10 using (x<<3)+(x<<1) instead of *10
- utilAtoi/utilAtof no longer modify input buffer

Also adds benchmark test for measuring performance on target.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .gitignore                     |   1 +
 src/util.c                     | 203 +++++++++++++++++------------
 tests/Makefile.bench           | 114 ++++++++++++++++
 tests/test_util_bench_target.c | 230 +++++++++++++++++++++++++++++++++
 4 files changed, 464 insertions(+), 84 deletions(-)
 create mode 100644 tests/Makefile.bench
 create mode 100644 tests/test_util_bench_target.c

diff --git a/.gitignore b/.gitignore
index 86a218e..19eadfa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@
 *.tgz
 bin/emon32*
 build/
+build_bench/
 cm-test-sine.csv
 compile_commands.json
 emon32_build_info.c
diff --git a/src/util.c b/src/util.c
index 1d75e75..3dbe772 100644
--- a/src/util.c
+++ b/src/util.c
@@ -33,94 +33,111 @@ uint32_t utilStrlen(const char *pBuf) {
   return charCnt;
 }
 
+/* Fast divide by 10 using shifts/adds only (no hardware divide) */
+static inline uint32_t fastDiv10(uint32_t n) {
+  uint32_t q = (n >> 1) + (n >> 2);
+  q          = q + (q >> 4);
+  q          = q + (q >> 8);
+  q          = q + (q >> 16);
+  q          = q >> 3;
+  uint32_t r = n - ((q << 3) + (q << 1)); /* r = n - q*10 */
+  return q + ((r + 6) >> 4);
+}
+
 uint32_t utilItoa(char *pBuf, int32_t val, ITOA_BASE_t base) {
-  uint32_t    charCnt    = 0;
-  bool        isNegative = false;
-  char *const pBase      = pBuf;
+  char     buf[12]; /* -2147483648 = 11 chars + null */
+  char    *p = &buf[11];
+  uint32_t uval;
+  bool     neg = false;
+
+  *p = '\0';
 
   /* Handle 0 explicitly */
   if (0 == val) {
-    *pBuf++ = '0';
-    *pBuf   = '\0';
+    pBuf[0] = '0';
+    pBuf[1] = '\0';
     return 2u;
   }
 
-  /* Base 10 can be signed, and has a divide in */
   if (ITOA_BASE10 == base) {
     if (val < 0) {
-      isNegative = true;
-      val        = -val;
+      neg  = true;
+      uval = (uint32_t)(-val);
+    } else {
+      uval = (uint32_t)val;
     }
 
-    while (0 != val) {
-      *pBuf++ = (val % 10u) + '0';
-      val     = val / 10u;
-      charCnt++;
+    while (uval != 0) {
+      uint32_t q = fastDiv10(uval);
+      *--p = (char)('0' + (uval - ((q << 3) + (q << 1)))); /* uval - q*10 */
+      uval = q;
     }
 
-    if (isNegative) {
-      *pBuf++ = '-';
-      charCnt++;
+    if (neg) {
+      *--p = '-';
     }
   } else {
-    const char itohex[] = "0123456789abcdef";
-    uint32_t   val_u    = (uint32_t)val;
+    static const char itohex[] = "0123456789abcdef";
+    uint32_t          val_u    = (uint32_t)val;
 
     while (0 != val_u) {
-      *pBuf++ = itohex[(val_u & 0xFu)];
+      *--p = itohex[val_u & 0xFu];
       val_u >>= 4;
-      charCnt++;
     }
   }
 
-  /* Terminate and return */
-  *pBuf = '\0';
-  charCnt++;
+  /* Copy to output buffer */
+  char    *dst = pBuf;
+  uint32_t len = 0;
+  while (*p) {
+    *dst++ = *p++;
+    len++;
+  }
+  *dst = '\0';
 
-  utilStrReverse(pBase, charCnt - 1u);
-  return charCnt;
+  return len + 1u;
 }
 
 ConvInt_t utilAtoi(char *pBuf, ITOA_BASE_t base) {
   bool      isNegative = false;
-  uint32_t  len;
-  uint32_t  mulCnt = 1;
-  ConvInt_t conv   = {false, 0};
+  uint32_t  result     = 0;
+  ConvInt_t conv       = {false, 0};
 
   if ('-' == *pBuf) {
     isNegative = true;
     pBuf++;
   }
 
-  /* Reverse string and convert */
-  len = utilStrlen(pBuf);
-  utilStrReverse(pBuf, len);
-
+  /* Process left-to-right, no string reversal needed */
   if (ITOA_BASE10 == base) {
     while (*pBuf) {
       if (!isnumeric(*pBuf)) {
         return conv;
       }
-      conv.val += ((*pBuf++) - '0') * mulCnt;
-      mulCnt *= 10;
-    }
-    if (isNegative) {
-      conv.val = -conv.val;
+      /* result = result * 10 + digit, using shifts for *10 */
+      result = (result << 3) + (result << 1) + (uint32_t)(*pBuf - '0');
+      pBuf++;
     }
   } else {
     while (*pBuf) {
-      if (('a' <= *pBuf) && ('f' >= *pBuf)) {
-        conv.val += ((*pBuf) - 'a' + 10u) * mulCnt;
-      } else if (isnumeric(*pBuf)) {
-        conv.val += ((*pBuf) - '0') * mulCnt;
+      char     c = *pBuf;
+      uint32_t digit;
+      if (('a' <= c) && ('f' >= c)) {
+        digit = (uint32_t)(c - 'a' + 10);
+      } else if (('A' <= c) && ('F' >= c)) {
+        digit = (uint32_t)(c - 'A' + 10);
+      } else if (isnumeric(c)) {
+        digit = (uint32_t)(c - '0');
       } else {
         return conv;
       }
+      /* result = result * 16 + digit */
+      result = (result << 4) + digit;
       pBuf++;
-      mulCnt *= 16;
     }
   }
 
+  conv.val   = isNegative ? -(int32_t)result : (int32_t)result;
   conv.valid = true;
   return conv;
 }
@@ -131,81 +148,99 @@ bool utilCharPrintable(const char c) {
 }
 
 uint32_t utilFtoa(char *pBuf, float val) {
-  uint32_t    charCnt    = 0;
-  bool        isNegative = false;
-  char *const pBase      = pBuf;
+  char     buf[16]; /* Enough for -2147483648.99 + null */
+  char    *p = &buf[15];
+  uint32_t units;
+  uint32_t decimals;
+  bool     neg = false;
 
-  uint16_t decimals;
-  int32_t  units;
+  *p = '\0';
 
   if (val < 0.0f) {
-    isNegative = true;
-    val        = qfp_fmul(val, -1.0f);
+    neg = true;
+    val = qfp_fmul(val, -1.0f);
   }
-  decimals = qfp_float2int_z(qfp_fmul(val, 100.0f)) % 100;
-  units    = qfp_float2int_z(val);
 
-  charCnt += 3u;
-  *pBuf++  = (decimals % 10) + '0';
-  decimals = decimals / 10;
-  *pBuf++  = (decimals % 10) + '0';
-  *pBuf++  = '.';
-
-  if (0 == units) {
-    *pBuf++ = '0';
-    charCnt++;
+  /* Extract integer and fractional parts */
+  units           = (uint32_t)qfp_float2int_z(val);
+  /* decimals = (val * 100) - (units * 100), using shifts for *100 */
+  uint32_t val100 = (uint32_t)qfp_float2int_z(qfp_fmul(val, 100.0f));
+  uint32_t units100 =
+      (units << 6) + (units << 5) + (units << 2); /* units * 100 */
+  decimals = val100 - units100;
+
+  /* Write decimals (always 2 digits) using fast division */
+  uint32_t q = fastDiv10(decimals);
+  *--p = (char)('0' + (decimals - ((q << 3) + (q << 1)))); /* decimals % 10 */
+  *--p = (char)('0' + q);                                  /* tens digit */
+  *--p = '.';
+
+  /* Write integer part */
+  if (units == 0) {
+    *--p = '0';
+  } else {
+    while (units != 0) {
+      q     = fastDiv10(units);
+      *--p  = (char)('0' + (units - ((q << 3) + (q << 1)))); /* units % 10 */
+      units = q;
+    }
   }
 
-  while (0 != units) {
-    *pBuf++ = (units % 10) + '0';
-    units   = units / 10;
-    charCnt++;
+  if (neg) {
+    *--p = '-';
   }
 
-  if (isNegative) {
-    *pBuf++ = '-';
-    charCnt++;
+  /* Copy to output buffer */
+  char    *dst = pBuf;
+  uint32_t len = 0;
+  while (*p) {
+    *dst++ = *p++;
+    len++;
   }
+  *dst = '\0';
 
-  /* Terminate and return */
-  *pBuf = '\0';
-  charCnt++;
-
-  utilStrReverse(pBase, charCnt - 1u);
-  return charCnt;
+  return len + 1u;
 }
 
 ConvFloat_t utilAtof(char *pBuf) {
   bool        isNegative = false;
-  uint32_t    len        = 0;
-  uint32_t    mulCnt     = 1u;
-  uint32_t    fraction   = 0u;
+  uint32_t    intPart    = 0;
+  uint32_t    fracPart   = 0;
+  uint32_t    fracDiv    = 1;
+  bool        inFraction = false;
   ConvFloat_t conv       = {false, 0.0f};
 
   if ('-' == *pBuf) {
     isNegative = true;
     pBuf++;
   }
-  len = utilStrlen(pBuf);
-  utilStrReverse(pBuf, len);
 
+  /* Process left-to-right, no string reversal needed */
   while (*pBuf) {
     const char c = *pBuf++;
-    /* Allow period/comma delimit, divide down if found */
     if (('.' == c) || (',' == c)) {
-      fraction = mulCnt;
+      inFraction = true;
     } else if (isnumeric(c)) {
-      const float toAdd = qfp_uint2float((c - '0') * mulCnt);
-      conv.val          = qfp_fadd(conv.val, toAdd);
-      mulCnt *= 10;
+      uint32_t digit = (uint32_t)(c - '0');
+      if (inFraction) {
+        /* fracPart = fracPart * 10 + digit */
+        fracPart = (fracPart << 3) + (fracPart << 1) + digit;
+        fracDiv  = (fracDiv << 3) + (fracDiv << 1);
+      } else {
+        /* intPart = intPart * 10 + digit */
+        intPart = (intPart << 3) + (intPart << 1) + digit;
+      }
     } else {
       /* Invalid character found */
       return conv;
     }
   }
 
-  if (0 != fraction) {
-    conv.val = qfp_fdiv(conv.val, qfp_uint2float(fraction));
+  /* Convert to float only at the end */
+  conv.val = qfp_uint2float(intPart);
+  if (fracDiv > 1u) {
+    conv.val = qfp_fadd(
+        conv.val, qfp_fdiv(qfp_uint2float(fracPart), qfp_uint2float(fracDiv)));
   }
 
   if (isNegative) {
diff --git a/tests/Makefile.bench b/tests/Makefile.bench
new file mode 100644
index 0000000..b4aef7b
--- /dev/null
+++ b/tests/Makefile.bench
@@ -0,0 +1,114 @@
+##############################################################################
+# Makefile for util benchmark test on ARM Cortex-M0+ target
+#
+# Uses IDENTICAL compiler/linker settings as main Makefile for fair comparison.
+# Only addition: --specs=nosys.specs for syscall stubs (sprintf/strtol need them)
+#
+# Usage: make -f tests/Makefile.bench
+##############################################################################
+BUILD = build_bench
+BIN = util_bench
+OUT = bin
+##############################################################################
+.PHONY: all directory clean size
+
+# Path to toolchain, e.g. /path/to/bin/ Leave empty if already on path.
+TC_PATH =
+CC = $(TC_PATH)arm-none-eabi-gcc
+OBJCOPY = $(TC_PATH)arm-none-eabi-objcopy
+SIZE = $(TC_PATH)arm-none-eabi-size
+
+ifeq ($(OS), Windows_NT)
+  MKDIR = gmkdir
+else
+  MKDIR = mkdir
+endif
+
+# === IDENTICAL CFLAGS as main Makefile ===
+CFLAGS += -W -Wall -Wextra -Wpedantic --std=c17 -Os -g3
+CFLAGS += -fno-diagnostics-show-caret -fno-common
+CFLAGS += -fdata-sections -ffunction-sections
+CFLAGS += -funsigned-char -funsigned-bitfields
+CFLAGS += -Wuninitialized
+CFLAGS += -Wshadow -Wdouble-promotion -Wundef
+CFLAGS += -mcpu=cortex-m0plus -mthumb
+CFLAGS += -MD -MP -MT $(BUILD)/$(*F).o -MF $(BUILD)/$(@F).d
+
+# === IDENTICAL LDFLAGS as main Makefile + nosys.specs for syscall stubs ===
+LDFLAGS += -mcpu=cortex-m0plus -mthumb
+LDFLAGS += -Wl,--gc-sections
+LDFLAGS += -Wl,--print-memory-usage
+LDFLAGS += -Wl,--script=./linker/samd21j17.ld
+LDFLAGS += --specs=nosys.specs
+
+INCLUDES += \
+  -I./include/samd21 \
+  -I./third_party/printf \
+  -I./third_party/qfplib \
+  -I./third_party/tinyusb/src \
+  -I./src/
+
+# Minimal set of source files needed for the benchmark
+SRCS += \
+  ./tests/test_util_bench_target.c \
+  ./src/startup_samd21.c \
+  ./src/board_def.c \
+  ./src/driver_CLK.c \
+  ./src/driver_DMAC.c \
+  ./src/driver_PORT.c \
+  ./src/driver_SAMD.c \
+  ./src/driver_SERCOM.c \
+  ./src/driver_TIME.c \
+  ./src/util.c \
+  ./third_party/printf/printf.c
+
+# === IDENTICAL DEFINES as main Makefile ===
+DEFINES += \
+  -D__SAMD21J17A__ \
+  -DDONT_USE_CMSIS_INIT \
+  -DCFG_TUSB_MCU=OPT_MCU_SAMD21
+
+CFLAGS += $(INCLUDES) $(DEFINES)
+
+OBJS = $(addprefix $(BUILD)/, $(notdir $(subst .c,.o, $(SRCS))))
+OBJS += $(BUILD)/qfplib-m0-full.o
+
+all: directory $(BUILD)/$(BIN).elf $(BUILD)/$(BIN).hex $(BUILD)/$(BIN).bin $(BUILD)/$(BIN).uf2 size
+
+$(BUILD)/$(BIN).elf: $(OBJS)
+	@echo LD $@
+	@$(CC) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
+
+$(BUILD)/$(BIN).hex: $(BUILD)/$(BIN).elf
+	@echo OBJCOPY $@
+	@$(OBJCOPY) -O ihex $^ $@
+
+$(BUILD)/$(BIN).bin: $(BUILD)/$(BIN).elf
+	@echo OBJCOPY $@
+	@$(OBJCOPY) -O binary $^ $@
+
+$(BUILD)/$(BIN).uf2: $(BUILD)/$(BIN).bin
+	@echo BIN_TO_UF2 $@
+	@python3 ./scripts/bin_to_uf2.py $(BUILD)/$(BIN).bin $(BUILD)/$(BIN).uf2
+
+$(BUILD)/qfplib-m0-full.o:
+	@echo AS $@
+	@$(CC) $(CFLAGS) third_party/qfplib/qfplib-m0-full.s -c -o $@
+
+%.o:
+	@echo CC $@
+	@$(CC) $(CFLAGS) $(filter %/$(subst .o,.c,$(notdir $@)), $(SRCS)) -c -o $@
+
+directory:
+	@$(MKDIR) -p $(BUILD)
+	@$(MKDIR) -p $(OUT)
+
+size: $(BUILD)/$(BIN).elf
+	@echo size:
+	@$(SIZE) -t $^
+
+clean:
+	@echo clean
+	@-rm -rf $(BUILD)
+
+-include $(wildcard $(BUILD)/*.d)
diff --git a/tests/test_util_bench_target.c b/tests/test_util_bench_target.c
new file mode 100644
index 0000000..cb716c6
--- /dev/null
+++ b/tests/test_util_bench_target.c
@@ -0,0 +1,230 @@
+/*
+ * Benchmark for util functions on ARM Cortex-M0+ target
+ *
+ * This is a standalone test firmware that replaces emon32.c main().
+ * Build with: make TEST=test_util_bench_target
+ *
+ * Results are output via serial at 115200 baud.
+ */
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "emon32_samd.h"
+
+#include "board_def.h"
+#include "driver_CLK.h"
+#include "driver_PORT.h"
+#include "driver_SAMD.h"
+#include "driver_SERCOM.h"
+#include "driver_TIME.h"
+#include "emon32.h"
+
+#include "util.h"
+
+#include "printf.h"
+#include "qfplib-m0-full.h"
+
+/* itoa is non-standard but provided by newlib */
+extern char *itoa(int value, char *str, int base);
+
+/* Number of iterations for each test */
+#define ITERATIONS 1000
+
+/*************************************
+ * Stubs for unused dependencies
+ *************************************/
+
+/* Stub for event system (used by timer interrupt) */
+void emon32EventSet(const EVTSRC_t evt) { (void)evt; }
+
+/* Stub for sbrk (heap allocation) - we don't use malloc */
+void *_sbrk(int incr) {
+  (void)incr;
+  return (void *)-1;
+}
+
+/*************************************
+ * putchar_ for printf
+ *************************************/
+void putchar_(char c) { uartPutcBlocking(SERCOM_UART, c); }
+
+/*************************************
+ * Benchmark functions
+ *************************************/
+
+static void benchItoa(void) {
+  char          buf[32];
+  uint32_t      tStart, tEnd;
+  int32_t       testVals[] = {0, 1, -1, 123, -456, 999999, -999999};
+  const int32_t numVals    = sizeof(testVals) / sizeof(testVals[0]);
+
+  printf_("\r\n=== ITOA Benchmark ===\r\n");
+
+  /* utilItoa */
+  tStart = timerMicros();
+  for (int32_t i = 0; i < ITERATIONS; i++) {
+    for (int32_t v = 0; v < numVals; v++) {
+      utilItoa(buf, testVals[v], ITOA_BASE10);
+    }
+  }
+  tEnd = timerMicrosDelta(tStart);
+  printf_("utilItoa:  %" PRIu32 " us\r\n", tEnd);
+
+  /* itoa (newlib) */
+  tStart = timerMicros();
+  for (int32_t i = 0; i < ITERATIONS; i++) {
+    for (int32_t v = 0; v < numVals; v++) {
+      itoa(testVals[v], buf, 10);
+    }
+  }
+  tEnd = timerMicrosDelta(tStart);
+  printf_("itoa:      %" PRIu32 " us\r\n", tEnd);
+}
+
+static void benchFtoa(void) {
+  char          buf[32];
+  uint32_t      tStart, tEnd;
+  float         testVals[] = {0.0f, 1.0f, -1.0f, 123.45f, -456.78f};
+  const int32_t numVals    = sizeof(testVals) / sizeof(testVals[0]);
+
+  printf_("\r\n=== FTOA Benchmark ===\r\n");
+
+  /* utilFtoa */
+  tStart = timerMicros();
+  for (int32_t i = 0; i < ITERATIONS; i++) {
+    for (int32_t v = 0; v < numVals; v++) {
+      utilFtoa(buf, testVals[v]);
+    }
+  }
+  tEnd = timerMicrosDelta(tStart);
+  printf_("utilFtoa:  %" PRIu32 " us\r\n", tEnd);
+
+  /* No standard ftoa() exists */
+  printf_("ftoa:      N/A (no standard function)\r\n");
+}
+
+static void benchAtoi(void) {
+  uint32_t      tStart, tEnd;
+  const char   *testStrs[] = {"0", "1", "-1", "123", "-456", "999999"};
+  const int32_t numStrs    = sizeof(testStrs) / sizeof(testStrs[0]);
+
+  printf_("\r\n=== ATOI Benchmark ===\r\n");
+
+  /* utilAtoi (no longer modifies buffer) */
+  tStart = timerMicros();
+  for (int32_t i = 0; i < ITERATIONS; i++) {
+    for (int32_t v = 0; v < numStrs; v++) {
+      utilAtoi((char *)testStrs[v], ITOA_BASE10);
+    }
+  }
+  tEnd = timerMicrosDelta(tStart);
+  printf_("utilAtoi:  %" PRIu32 " us\r\n", tEnd);
+
+  /* atoi */
+  tStart = timerMicros();
+  for (int32_t i = 0; i < ITERATIONS; i++) {
+    for (int32_t v = 0; v < numStrs; v++) {
+      atoi(testStrs[v]);
+    }
+  }
+  tEnd = timerMicrosDelta(tStart);
+  printf_("atoi:      %" PRIu32 " us\r\n", tEnd);
+}
+
+static void benchAtof(void) {
+  char          buf[32];
+  uint32_t      tStart, tEnd;
+  const char   *testStrs[] = {"0.0", "1.0", "-1.0", "123.45", "-456.78"};
+  const int32_t numStrs    = sizeof(testStrs) / sizeof(testStrs[0]);
+
+  printf_("\r\n=== ATOF Benchmark ===\r\n");
+
+  /* utilAtof (no longer modifies buffer) */
+  tStart = timerMicros();
+  for (int32_t i = 0; i < ITERATIONS; i++) {
+    for (int32_t v = 0; v < numStrs; v++) {
+      /* Cast away const - utilAtof no longer modifies input */
+      utilAtof((char *)testStrs[v]);
+    }
+  }
+  tEnd = timerMicrosDelta(tStart);
+  printf_("utilAtof:  %" PRIu32 " us\r\n", tEnd);
+
+  /* atof */
+  tStart = timerMicros();
+  for (int32_t i = 0; i < ITERATIONS; i++) {
+    for (int32_t v = 0; v < numStrs; v++) {
+      atof(testStrs[v]);
+    }
+  }
+  tEnd = timerMicrosDelta(tStart);
+  printf_("atof:      %" PRIu32 " us\r\n", tEnd);
+
+  (void)buf; /* Unused now */
+}
+
+static void benchCorrectness(void) {
+  char buf1[32], buf2[32];
+
+  printf_("\r\n=== Correctness Check ===\r\n");
+
+  /* ITOA */
+  int32_t testInts[] = {0, 1, -1, 123, -456, 999999};
+  printf_("ITOA:\r\n");
+  for (size_t i = 0; i < sizeof(testInts) / sizeof(testInts[0]); i++) {
+    utilItoa(buf1, testInts[i], ITOA_BASE10);
+    itoa(testInts[i], buf2, 10);
+    printf_("  %ld: util='%s' itoa='%s' %s\r\n", (long)testInts[i], buf1, buf2,
+            strcmp(buf1, buf2) == 0 ? "OK" : "MISMATCH");
+  }
+
+  /* FTOA - no standard comparison available */
+  float testFloats[] = {0.0f, 1.0f, -1.0f, 123.45f, -456.78f};
+  printf_("FTOA (no standard comparison):\r\n");
+  for (size_t i = 0; i < sizeof(testFloats) / sizeof(testFloats[0]); i++) {
+    utilFtoa(buf1, testFloats[i]);
+    printf_("  input=%.2f: util='%s'\r\n", (double)testFloats[i], buf1);
+  }
+}
+
+/*************************************
+ * Main entry point
+ *************************************/
+
+int main(void) {
+  /* Initialize clocks, ports, and UART */
+  clkSetup();
+  timerSetup();
+  portSetup();
+  sercomSetup();
+  uartEnableTx(SERCOM_UART);
+
+  /* Wait for UART to stabilize */
+  timerDelay_ms(100);
+
+  printf_("\r\n\r\n");
+  printf_("================================\r\n");
+  printf_("  Util Functions Benchmark\r\n");
+  printf_("  ARM Cortex-M0+ Target Test\r\n");
+  printf_("================================\r\n");
+  printf_("Iterations: %d per value\r\n", ITERATIONS);
+
+  benchItoa();
+  benchFtoa();
+  benchAtoi();
+  benchAtof();
+  benchCorrectness();
+
+  printf_("\r\n=== Benchmark Complete ===\r\n");
+  printf_("Note: utilAtoi/utilAtof modify input buffer.\r\n");
+
+  /* Infinite loop */
+  for (;;) {
+    samdSleepIdle();
+  }
+}

From 48b6c135f42d27097fe23e606230dd2b00529669 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Metrich?= <frederic.metrich@live.fr>
Date: Thu, 8 Jan 2026 17:38:08 +0100
Subject: [PATCH 2/4] Add const qualifier to read-only string parameters in
 util functions

Add const correctness to utilAtof() and utilAtoi() functions since they
only read from their string buffer parameters and never modify them.
This improves API safety by preventing accidental modifications and
allows these functions to accept string literals and const char*
arguments without requiring casts.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/util.c | 4 ++--
 src/util.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/util.c b/src/util.c
index 3dbe772..35902f7 100644
--- a/src/util.c
+++ b/src/util.c
@@ -98,7 +98,7 @@ uint32_t utilItoa(char *pBuf, int32_t val, ITOA_BASE_t base) {
   return len + 1u;
 }
 
-ConvInt_t utilAtoi(char *pBuf, ITOA_BASE_t base) {
+ConvInt_t utilAtoi(const char *pBuf, ITOA_BASE_t base) {
   bool      isNegative = false;
   uint32_t  result     = 0;
   ConvInt_t conv       = {false, 0};
@@ -202,7 +202,7 @@ uint32_t utilFtoa(char *pBuf, float val) {
   return len + 1u;
 }
 
-ConvFloat_t utilAtof(char *pBuf) {
+ConvFloat_t utilAtof(const char *pBuf) {
   bool        isNegative = false;
   uint32_t    intPart    = 0;
   uint32_t    fracPart   = 0;
diff --git a/src/util.h b/src/util.h
index 5da49de..b4f6a5d 100644
--- a/src/util.h
+++ b/src/util.h
@@ -19,14 +19,14 @@ typedef struct ConvInt_ {
  *  @param [in] pBuf : pointer to string buffer
  *  @return converted float and status
  */
-ConvFloat_t utilAtof(char *pBuf);
+ConvFloat_t utilAtof(const char *pBuf);
 
 /*! @brief Convert null terminated string to integer, returns the value.
  *  @param [in] pBuf : pointer to string buffer
  *  @param [in] base : select base 10 or base 16 conversion
  *  @return converted integer and status
  */
-ConvInt_t utilAtoi(char *pBuf, ITOA_BASE_t base);
+ConvInt_t utilAtoi(const char *pBuf, ITOA_BASE_t base);
 
 /*! @brief Indicate if a character is printable
  *  @param [in] c : character to check

From 7deb87a800e6cdeebdf8dbc23c9bce849161eb61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Metrich?= <frederic.metrich@live.fr>
Date: Thu, 8 Jan 2026 18:59:46 +0100
Subject: [PATCH 3/4] Use * 10 instead of shifts for multiplication

SAMD21 has a single-cycle hardware multiplier, so the compiler
optimizes `* 10` to a single `muls` instruction. The shift-based
approach ((x << 3) + (x << 1)) was not faster and hurt readability.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/util.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/src/util.c b/src/util.c
index 35902f7..2003921 100644
--- a/src/util.c
+++ b/src/util.c
@@ -40,7 +40,7 @@ static inline uint32_t fastDiv10(uint32_t n) {
   q          = q + (q >> 8);
   q          = q + (q >> 16);
   q          = q >> 3;
-  uint32_t r = n - ((q << 3) + (q << 1)); /* r = n - q*10 */
+  uint32_t r = n - q * 10;
   return q + ((r + 6) >> 4);
 }
 
@@ -69,8 +69,8 @@ uint32_t utilItoa(char *pBuf, int32_t val, ITOA_BASE_t base) {
 
     while (uval != 0) {
       uint32_t q = fastDiv10(uval);
-      *--p = (char)('0' + (uval - ((q << 3) + (q << 1)))); /* uval - q*10 */
-      uval = q;
+      *--p       = (char)('0' + (uval - q * 10));
+      uval       = q;
     }
 
     if (neg) {
@@ -114,8 +114,7 @@ ConvInt_t utilAtoi(const char *pBuf, ITOA_BASE_t base) {
       if (!isnumeric(*pBuf)) {
         return conv;
       }
-      /* result = result * 10 + digit, using shifts for *10 */
-      result = (result << 3) + (result << 1) + (uint32_t)(*pBuf - '0');
+      result = result * 10 + (uint32_t)(*pBuf - '0');
       pBuf++;
     }
   } else {
@@ -171,9 +170,9 @@ uint32_t utilFtoa(char *pBuf, float val) {
 
   /* Write decimals (always 2 digits) using fast division */
   uint32_t q = fastDiv10(decimals);
-  *--p = (char)('0' + (decimals - ((q << 3) + (q << 1)))); /* decimals % 10 */
-  *--p = (char)('0' + q);                                  /* tens digit */
-  *--p = '.';
+  *--p       = (char)('0' + (decimals - q * 10));
+  *--p       = (char)('0' + q);
+  *--p       = '.';
 
   /* Write integer part */
   if (units == 0) {
@@ -181,7 +180,7 @@ uint32_t utilFtoa(char *pBuf, float val) {
   } else {
     while (units != 0) {
       q     = fastDiv10(units);
-      *--p  = (char)('0' + (units - ((q << 3) + (q << 1)))); /* units % 10 */
+      *--p  = (char)('0' + (units - q * 10));
       units = q;
     }
   }
@@ -223,12 +222,10 @@ ConvFloat_t utilAtof(const char *pBuf) {
     } else if (isnumeric(c)) {
       uint32_t digit = (uint32_t)(c - '0');
       if (inFraction) {
-        /* fracPart = fracPart * 10 + digit */
-        fracPart = (fracPart << 3) + (fracPart << 1) + digit;
-        fracDiv  = (fracDiv << 3) + (fracDiv << 1);
+        fracPart = fracPart * 10 + digit;
+        fracDiv  = fracDiv * 10;
       } else {
-        /* intPart = intPart * 10 + digit */
-        intPart = (intPart << 3) + (intPart << 1) + digit;
+        intPart = intPart * 10 + digit;
       }
     } else {
       /* Invalid character found */

From 8abbacc2e771c84e06f14585f7e16a8bac0d22f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Metrich?= <frederic.metrich@live.fr>
Date: Sun, 11 Jan 2026 22:45:58 +0100
Subject: [PATCH 4/4] Simplify utilFtoa: use direct multiply instead of shifts

Replace shift-based *100 with direct multiplication.
SAMD has single-cycle 32b multiplier making shifts unnecessary.

Co-Authored-By: Claude <noreply@anthropic.com>
---
 src/util.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/util.c b/src/util.c
index 2003921..5af52a8 100644
--- a/src/util.c
+++ b/src/util.c
@@ -161,12 +161,8 @@ uint32_t utilFtoa(char *pBuf, float val) {
   }
 
   /* Extract integer and fractional parts */
-  units           = (uint32_t)qfp_float2int_z(val);
-  /* decimals = (val * 100) - (units * 100), using shifts for *100 */
-  uint32_t val100 = (uint32_t)qfp_float2int_z(qfp_fmul(val, 100.0f));
-  uint32_t units100 =
-      (units << 6) + (units << 5) + (units << 2); /* units * 100 */
-  decimals = val100 - units100;
+  units    = (uint32_t)qfp_float2int_z(val);
+  decimals = (uint32_t)qfp_float2int_z(qfp_fmul(val, 100.0f)) - (units * 100);
 
   /* Write decimals (always 2 digits) using fast division */
   uint32_t q = fastDiv10(decimals);