diff --git a/.gitignore b/.gitignore index 86a218e..19eadfa 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ *.tgz bin/emon32* build/ +build_bench/ cm-test-sine.csv compile_commands.json emon32_build_info.c diff --git a/src/util.c b/src/util.c index 1d75e75..5af52a8 100644 --- a/src/util.c +++ b/src/util.c @@ -33,94 +33,110 @@ uint32_t utilStrlen(const char *pBuf) { return charCnt; } +/* Fast divide by 10 using shifts/adds only (no hardware divide) */ +static inline uint32_t fastDiv10(uint32_t n) { + uint32_t q = (n >> 1) + (n >> 2); + q = q + (q >> 4); + q = q + (q >> 8); + q = q + (q >> 16); + q = q >> 3; + uint32_t r = n - q * 10; + return q + ((r + 6) >> 4); +} + uint32_t utilItoa(char *pBuf, int32_t val, ITOA_BASE_t base) { - uint32_t charCnt = 0; - bool isNegative = false; - char *const pBase = pBuf; + char buf[12]; /* -2147483648 = 11 chars + null */ + char *p = &buf[11]; + uint32_t uval; + bool neg = false; + + *p = '\0'; /* Handle 0 explicitly */ if (0 == val) { - *pBuf++ = '0'; - *pBuf = '\0'; + pBuf[0] = '0'; + pBuf[1] = '\0'; return 2u; } - /* Base 10 can be signed, and has a divide in */ if (ITOA_BASE10 == base) { if (val < 0) { - isNegative = true; - val = -val; + neg = true; + uval = (uint32_t)(-val); + } else { + uval = (uint32_t)val; } - while (0 != val) { - *pBuf++ = (val % 10u) + '0'; - val = val / 10u; - charCnt++; + while (uval != 0) { + uint32_t q = fastDiv10(uval); + *--p = (char)('0' + (uval - q * 10)); + uval = q; } - if (isNegative) { - *pBuf++ = '-'; - charCnt++; + if (neg) { + *--p = '-'; } } else { - const char itohex[] = "0123456789abcdef"; - uint32_t val_u = (uint32_t)val; + static const char itohex[] = "0123456789abcdef"; + uint32_t val_u = (uint32_t)val; while (0 != val_u) { - *pBuf++ = itohex[(val_u & 0xFu)]; + *--p = itohex[val_u & 0xFu]; val_u >>= 4; - charCnt++; } } - /* Terminate and return */ - *pBuf = '\0'; - charCnt++; + /* Copy to output buffer */ + char *dst = pBuf; + uint32_t len = 0; + while (*p) { + *dst++ = *p++; + len++; + } + *dst = '\0'; - utilStrReverse(pBase, charCnt - 1u); - return charCnt; + return len + 1u; } -ConvInt_t utilAtoi(char *pBuf, ITOA_BASE_t base) { +ConvInt_t utilAtoi(const char *pBuf, ITOA_BASE_t base) { bool isNegative = false; - uint32_t len; - uint32_t mulCnt = 1; - ConvInt_t conv = {false, 0}; + uint32_t result = 0; + ConvInt_t conv = {false, 0}; if ('-' == *pBuf) { isNegative = true; pBuf++; } - /* Reverse string and convert */ - len = utilStrlen(pBuf); - utilStrReverse(pBuf, len); - + /* Process left-to-right, no string reversal needed */ if (ITOA_BASE10 == base) { while (*pBuf) { if (!isnumeric(*pBuf)) { return conv; } - conv.val += ((*pBuf++) - '0') * mulCnt; - mulCnt *= 10; - } - if (isNegative) { - conv.val = -conv.val; + result = result * 10 + (uint32_t)(*pBuf - '0'); + pBuf++; } } else { while (*pBuf) { - if (('a' <= *pBuf) && ('f' >= *pBuf)) { - conv.val += ((*pBuf) - 'a' + 10u) * mulCnt; - } else if (isnumeric(*pBuf)) { - conv.val += ((*pBuf) - '0') * mulCnt; + char c = *pBuf; + uint32_t digit; + if (('a' <= c) && ('f' >= c)) { + digit = (uint32_t)(c - 'a' + 10); + } else if (('A' <= c) && ('F' >= c)) { + digit = (uint32_t)(c - 'A' + 10); + } else if (isnumeric(c)) { + digit = (uint32_t)(c - '0'); } else { return conv; } + /* result = result * 16 + digit */ + result = (result << 4) + digit; pBuf++; - mulCnt *= 16; } } + conv.val = isNegative ? -(int32_t)result : (int32_t)result; conv.valid = true; return conv; } @@ -131,81 +147,93 @@ bool utilCharPrintable(const char c) { } uint32_t utilFtoa(char *pBuf, float val) { - uint32_t charCnt = 0; - bool isNegative = false; - char *const pBase = pBuf; + char buf[16]; /* Enough for -2147483648.99 + null */ + char *p = &buf[15]; + uint32_t units; + uint32_t decimals; + bool neg = false; - uint16_t decimals; - int32_t units; + *p = '\0'; if (val < 0.0f) { - isNegative = true; - val = qfp_fmul(val, -1.0f); + neg = true; + val = qfp_fmul(val, -1.0f); } - decimals = qfp_float2int_z(qfp_fmul(val, 100.0f)) % 100; - units = qfp_float2int_z(val); - charCnt += 3u; - *pBuf++ = (decimals % 10) + '0'; - decimals = decimals / 10; - *pBuf++ = (decimals % 10) + '0'; - *pBuf++ = '.'; + /* Extract integer and fractional parts */ + units = (uint32_t)qfp_float2int_z(val); + decimals = (uint32_t)qfp_float2int_z(qfp_fmul(val, 100.0f)) - (units * 100); - if (0 == units) { - *pBuf++ = '0'; - charCnt++; - } + /* Write decimals (always 2 digits) using fast division */ + uint32_t q = fastDiv10(decimals); + *--p = (char)('0' + (decimals - q * 10)); + *--p = (char)('0' + q); + *--p = '.'; - while (0 != units) { - *pBuf++ = (units % 10) + '0'; - units = units / 10; - charCnt++; + /* Write integer part */ + if (units == 0) { + *--p = '0'; + } else { + while (units != 0) { + q = fastDiv10(units); + *--p = (char)('0' + (units - q * 10)); + units = q; + } } - if (isNegative) { - *pBuf++ = '-'; - charCnt++; + if (neg) { + *--p = '-'; } - /* Terminate and return */ - *pBuf = '\0'; - charCnt++; + /* Copy to output buffer */ + char *dst = pBuf; + uint32_t len = 0; + while (*p) { + *dst++ = *p++; + len++; + } + *dst = '\0'; - utilStrReverse(pBase, charCnt - 1u); - return charCnt; + return len + 1u; } -ConvFloat_t utilAtof(char *pBuf) { +ConvFloat_t utilAtof(const char *pBuf) { bool isNegative = false; - uint32_t len = 0; - uint32_t mulCnt = 1u; - uint32_t fraction = 0u; + uint32_t intPart = 0; + uint32_t fracPart = 0; + uint32_t fracDiv = 1; + bool inFraction = false; ConvFloat_t conv = {false, 0.0f}; if ('-' == *pBuf) { isNegative = true; pBuf++; } - len = utilStrlen(pBuf); - utilStrReverse(pBuf, len); + /* Process left-to-right, no string reversal needed */ while (*pBuf) { const char c = *pBuf++; - /* Allow period/comma delimit, divide down if found */ if (('.' == c) || (',' == c)) { - fraction = mulCnt; + inFraction = true; } else if (isnumeric(c)) { - const float toAdd = qfp_uint2float((c - '0') * mulCnt); - conv.val = qfp_fadd(conv.val, toAdd); - mulCnt *= 10; + uint32_t digit = (uint32_t)(c - '0'); + if (inFraction) { + fracPart = fracPart * 10 + digit; + fracDiv = fracDiv * 10; + } else { + intPart = intPart * 10 + digit; + } } else { /* Invalid character found */ return conv; } } - if (0 != fraction) { - conv.val = qfp_fdiv(conv.val, qfp_uint2float(fraction)); + /* Convert to float only at the end */ + conv.val = qfp_uint2float(intPart); + if (fracDiv > 1u) { + conv.val = qfp_fadd( + conv.val, qfp_fdiv(qfp_uint2float(fracPart), qfp_uint2float(fracDiv))); } if (isNegative) { diff --git a/src/util.h b/src/util.h index 5da49de..b4f6a5d 100644 --- a/src/util.h +++ b/src/util.h @@ -19,14 +19,14 @@ typedef struct ConvInt_ { * @param [in] pBuf : pointer to string buffer * @return converted float and status */ -ConvFloat_t utilAtof(char *pBuf); +ConvFloat_t utilAtof(const char *pBuf); /*! @brief Convert null terminated string to integer, returns the value. * @param [in] pBuf : pointer to string buffer * @param [in] base : select base 10 or base 16 conversion * @return converted integer and status */ -ConvInt_t utilAtoi(char *pBuf, ITOA_BASE_t base); +ConvInt_t utilAtoi(const char *pBuf, ITOA_BASE_t base); /*! @brief Indicate if a character is printable * @param [in] c : character to check diff --git a/tests/Makefile.bench b/tests/Makefile.bench new file mode 100644 index 0000000..b4aef7b --- /dev/null +++ b/tests/Makefile.bench @@ -0,0 +1,114 @@ +############################################################################## +# Makefile for util benchmark test on ARM Cortex-M0+ target +# +# Uses IDENTICAL compiler/linker settings as main Makefile for fair comparison. +# Only addition: --specs=nosys.specs for syscall stubs (sprintf/strtol need them) +# +# Usage: make -f tests/Makefile.bench +############################################################################## +BUILD = build_bench +BIN = util_bench +OUT = bin +############################################################################## +.PHONY: all directory clean size + +# Path to toolchain, e.g. /path/to/bin/ Leave empty if already on path. +TC_PATH = +CC = $(TC_PATH)arm-none-eabi-gcc +OBJCOPY = $(TC_PATH)arm-none-eabi-objcopy +SIZE = $(TC_PATH)arm-none-eabi-size + +ifeq ($(OS), Windows_NT) + MKDIR = gmkdir +else + MKDIR = mkdir +endif + +# === IDENTICAL CFLAGS as main Makefile === +CFLAGS += -W -Wall -Wextra -Wpedantic --std=c17 -Os -g3 +CFLAGS += -fno-diagnostics-show-caret -fno-common +CFLAGS += -fdata-sections -ffunction-sections +CFLAGS += -funsigned-char -funsigned-bitfields +CFLAGS += -Wuninitialized +CFLAGS += -Wshadow -Wdouble-promotion -Wundef +CFLAGS += -mcpu=cortex-m0plus -mthumb +CFLAGS += -MD -MP -MT $(BUILD)/$(*F).o -MF $(BUILD)/$(@F).d + +# === IDENTICAL LDFLAGS as main Makefile + nosys.specs for syscall stubs === +LDFLAGS += -mcpu=cortex-m0plus -mthumb +LDFLAGS += -Wl,--gc-sections +LDFLAGS += -Wl,--print-memory-usage +LDFLAGS += -Wl,--script=./linker/samd21j17.ld +LDFLAGS += --specs=nosys.specs + +INCLUDES += \ + -I./include/samd21 \ + -I./third_party/printf \ + -I./third_party/qfplib \ + -I./third_party/tinyusb/src \ + -I./src/ + +# Minimal set of source files needed for the benchmark +SRCS += \ + ./tests/test_util_bench_target.c \ + ./src/startup_samd21.c \ + ./src/board_def.c \ + ./src/driver_CLK.c \ + ./src/driver_DMAC.c \ + ./src/driver_PORT.c \ + ./src/driver_SAMD.c \ + ./src/driver_SERCOM.c \ + ./src/driver_TIME.c \ + ./src/util.c \ + ./third_party/printf/printf.c + +# === IDENTICAL DEFINES as main Makefile === +DEFINES += \ + -D__SAMD21J17A__ \ + -DDONT_USE_CMSIS_INIT \ + -DCFG_TUSB_MCU=OPT_MCU_SAMD21 + +CFLAGS += $(INCLUDES) $(DEFINES) + +OBJS = $(addprefix $(BUILD)/, $(notdir $(subst .c,.o, $(SRCS)))) +OBJS += $(BUILD)/qfplib-m0-full.o + +all: directory $(BUILD)/$(BIN).elf $(BUILD)/$(BIN).hex $(BUILD)/$(BIN).bin $(BUILD)/$(BIN).uf2 size + +$(BUILD)/$(BIN).elf: $(OBJS) + @echo LD $@ + @$(CC) $(LDFLAGS) $(OBJS) $(LIBS) -o $@ + +$(BUILD)/$(BIN).hex: $(BUILD)/$(BIN).elf + @echo OBJCOPY $@ + @$(OBJCOPY) -O ihex $^ $@ + +$(BUILD)/$(BIN).bin: $(BUILD)/$(BIN).elf + @echo OBJCOPY $@ + @$(OBJCOPY) -O binary $^ $@ + +$(BUILD)/$(BIN).uf2: $(BUILD)/$(BIN).bin + @echo BIN_TO_UF2 $@ + @python3 ./scripts/bin_to_uf2.py $(BUILD)/$(BIN).bin $(BUILD)/$(BIN).uf2 + +$(BUILD)/qfplib-m0-full.o: + @echo AS $@ + @$(CC) $(CFLAGS) third_party/qfplib/qfplib-m0-full.s -c -o $@ + +%.o: + @echo CC $@ + @$(CC) $(CFLAGS) $(filter %/$(subst .o,.c,$(notdir $@)), $(SRCS)) -c -o $@ + +directory: + @$(MKDIR) -p $(BUILD) + @$(MKDIR) -p $(OUT) + +size: $(BUILD)/$(BIN).elf + @echo size: + @$(SIZE) -t $^ + +clean: + @echo clean + @-rm -rf $(BUILD) + +-include $(wildcard $(BUILD)/*.d) diff --git a/tests/test_util_bench_target.c b/tests/test_util_bench_target.c new file mode 100644 index 0000000..cb716c6 --- /dev/null +++ b/tests/test_util_bench_target.c @@ -0,0 +1,230 @@ +/* + * Benchmark for util functions on ARM Cortex-M0+ target + * + * This is a standalone test firmware that replaces emon32.c main(). + * Build with: make TEST=test_util_bench_target + * + * Results are output via serial at 115200 baud. + */ + +#include +#include +#include +#include +#include +#include + +#include "emon32_samd.h" + +#include "board_def.h" +#include "driver_CLK.h" +#include "driver_PORT.h" +#include "driver_SAMD.h" +#include "driver_SERCOM.h" +#include "driver_TIME.h" +#include "emon32.h" + +#include "util.h" + +#include "printf.h" +#include "qfplib-m0-full.h" + +/* itoa is non-standard but provided by newlib */ +extern char *itoa(int value, char *str, int base); + +/* Number of iterations for each test */ +#define ITERATIONS 1000 + +/************************************* + * Stubs for unused dependencies + *************************************/ + +/* Stub for event system (used by timer interrupt) */ +void emon32EventSet(const EVTSRC_t evt) { (void)evt; } + +/* Stub for sbrk (heap allocation) - we don't use malloc */ +void *_sbrk(int incr) { + (void)incr; + return (void *)-1; +} + +/************************************* + * putchar_ for printf + *************************************/ +void putchar_(char c) { uartPutcBlocking(SERCOM_UART, c); } + +/************************************* + * Benchmark functions + *************************************/ + +static void benchItoa(void) { + char buf[32]; + uint32_t tStart, tEnd; + int32_t testVals[] = {0, 1, -1, 123, -456, 999999, -999999}; + const int32_t numVals = sizeof(testVals) / sizeof(testVals[0]); + + printf_("\r\n=== ITOA Benchmark ===\r\n"); + + /* utilItoa */ + tStart = timerMicros(); + for (int32_t i = 0; i < ITERATIONS; i++) { + for (int32_t v = 0; v < numVals; v++) { + utilItoa(buf, testVals[v], ITOA_BASE10); + } + } + tEnd = timerMicrosDelta(tStart); + printf_("utilItoa: %" PRIu32 " us\r\n", tEnd); + + /* itoa (newlib) */ + tStart = timerMicros(); + for (int32_t i = 0; i < ITERATIONS; i++) { + for (int32_t v = 0; v < numVals; v++) { + itoa(testVals[v], buf, 10); + } + } + tEnd = timerMicrosDelta(tStart); + printf_("itoa: %" PRIu32 " us\r\n", tEnd); +} + +static void benchFtoa(void) { + char buf[32]; + uint32_t tStart, tEnd; + float testVals[] = {0.0f, 1.0f, -1.0f, 123.45f, -456.78f}; + const int32_t numVals = sizeof(testVals) / sizeof(testVals[0]); + + printf_("\r\n=== FTOA Benchmark ===\r\n"); + + /* utilFtoa */ + tStart = timerMicros(); + for (int32_t i = 0; i < ITERATIONS; i++) { + for (int32_t v = 0; v < numVals; v++) { + utilFtoa(buf, testVals[v]); + } + } + tEnd = timerMicrosDelta(tStart); + printf_("utilFtoa: %" PRIu32 " us\r\n", tEnd); + + /* No standard ftoa() exists */ + printf_("ftoa: N/A (no standard function)\r\n"); +} + +static void benchAtoi(void) { + uint32_t tStart, tEnd; + const char *testStrs[] = {"0", "1", "-1", "123", "-456", "999999"}; + const int32_t numStrs = sizeof(testStrs) / sizeof(testStrs[0]); + + printf_("\r\n=== ATOI Benchmark ===\r\n"); + + /* utilAtoi (no longer modifies buffer) */ + tStart = timerMicros(); + for (int32_t i = 0; i < ITERATIONS; i++) { + for (int32_t v = 0; v < numStrs; v++) { + utilAtoi((char *)testStrs[v], ITOA_BASE10); + } + } + tEnd = timerMicrosDelta(tStart); + printf_("utilAtoi: %" PRIu32 " us\r\n", tEnd); + + /* atoi */ + tStart = timerMicros(); + for (int32_t i = 0; i < ITERATIONS; i++) { + for (int32_t v = 0; v < numStrs; v++) { + atoi(testStrs[v]); + } + } + tEnd = timerMicrosDelta(tStart); + printf_("atoi: %" PRIu32 " us\r\n", tEnd); +} + +static void benchAtof(void) { + char buf[32]; + uint32_t tStart, tEnd; + const char *testStrs[] = {"0.0", "1.0", "-1.0", "123.45", "-456.78"}; + const int32_t numStrs = sizeof(testStrs) / sizeof(testStrs[0]); + + printf_("\r\n=== ATOF Benchmark ===\r\n"); + + /* utilAtof (no longer modifies buffer) */ + tStart = timerMicros(); + for (int32_t i = 0; i < ITERATIONS; i++) { + for (int32_t v = 0; v < numStrs; v++) { + /* Cast away const - utilAtof no longer modifies input */ + utilAtof((char *)testStrs[v]); + } + } + tEnd = timerMicrosDelta(tStart); + printf_("utilAtof: %" PRIu32 " us\r\n", tEnd); + + /* atof */ + tStart = timerMicros(); + for (int32_t i = 0; i < ITERATIONS; i++) { + for (int32_t v = 0; v < numStrs; v++) { + atof(testStrs[v]); + } + } + tEnd = timerMicrosDelta(tStart); + printf_("atof: %" PRIu32 " us\r\n", tEnd); + + (void)buf; /* Unused now */ +} + +static void benchCorrectness(void) { + char buf1[32], buf2[32]; + + printf_("\r\n=== Correctness Check ===\r\n"); + + /* ITOA */ + int32_t testInts[] = {0, 1, -1, 123, -456, 999999}; + printf_("ITOA:\r\n"); + for (size_t i = 0; i < sizeof(testInts) / sizeof(testInts[0]); i++) { + utilItoa(buf1, testInts[i], ITOA_BASE10); + itoa(testInts[i], buf2, 10); + printf_(" %ld: util='%s' itoa='%s' %s\r\n", (long)testInts[i], buf1, buf2, + strcmp(buf1, buf2) == 0 ? "OK" : "MISMATCH"); + } + + /* FTOA - no standard comparison available */ + float testFloats[] = {0.0f, 1.0f, -1.0f, 123.45f, -456.78f}; + printf_("FTOA (no standard comparison):\r\n"); + for (size_t i = 0; i < sizeof(testFloats) / sizeof(testFloats[0]); i++) { + utilFtoa(buf1, testFloats[i]); + printf_(" input=%.2f: util='%s'\r\n", (double)testFloats[i], buf1); + } +} + +/************************************* + * Main entry point + *************************************/ + +int main(void) { + /* Initialize clocks, ports, and UART */ + clkSetup(); + timerSetup(); + portSetup(); + sercomSetup(); + uartEnableTx(SERCOM_UART); + + /* Wait for UART to stabilize */ + timerDelay_ms(100); + + printf_("\r\n\r\n"); + printf_("================================\r\n"); + printf_(" Util Functions Benchmark\r\n"); + printf_(" ARM Cortex-M0+ Target Test\r\n"); + printf_("================================\r\n"); + printf_("Iterations: %d per value\r\n", ITERATIONS); + + benchItoa(); + benchFtoa(); + benchAtoi(); + benchAtof(); + benchCorrectness(); + + printf_("\r\n=== Benchmark Complete ===\r\n"); + printf_("Note: utilAtoi/utilAtof modify input buffer.\r\n"); + + /* Infinite loop */ + for (;;) { + samdSleepIdle(); + } +}