From a8a1dd1c1f5854274bf743841e67b1fe977d75dc Mon Sep 17 00:00:00 2001 From: Omri Sarig Date: Mon, 14 Jul 2025 10:55:07 +0200 Subject: [PATCH 1/3] scripts/logging: Add support for numbered %ll and %hh Python does not support values of long-long (ll) or short-short (hh) in string-formatting. The current parser is correctly removing the first "h" or "l" from the format, to make it possible to correctly parse these strings when they are in the dictionary. However, the current implementation does not correctly handle the case of having a number before the "ll" or "hh" argument - which is supported by both C and Python. This commit updates the parser to correctly handle these cases, by changing the simple replace operator with a regex replace, which is able to correctly handle this case. Before this commit, the string of: "This is my long variable: %08llX" will stay the same, which will throw a ValueError when parsing this dictionary message: "ValueError: unsupported format character 'h' (0x68) at index ". After this commit, the string will be correctly fixed to "This is my long variable: %08lX" which is parsed correctly. Signed-off-by: Omri Sarig --- scripts/logging/dictionary/dictionary_parser/log_parser.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/logging/dictionary/dictionary_parser/log_parser.py b/scripts/logging/dictionary/dictionary_parser/log_parser.py index 4df5dbb0d05e7..ee37dda4f0652 100644 --- a/scripts/logging/dictionary/dictionary_parser/log_parser.py +++ b/scripts/logging/dictionary/dictionary_parser/log_parser.py @@ -9,6 +9,7 @@ """ import abc +import re from colorama import Fore @@ -36,13 +37,13 @@ def formalize_fmt_string(fmt_str): for spec in ['d', 'i', 'o', 'u', 'x', 'X']: # Python doesn't support %ll for integer specifiers, so remove extra 'l' - new_str = new_str.replace("%ll" + spec, "%l" + spec) + new_str = re.sub(r'%(\#?\d*)ll' + spec, r'%\1l' + spec, new_str) if spec in ['x', 'X']: - new_str = new_str.replace("%#ll" + spec, "%#l" + spec) + new_str = re.sub(r'%\#(\d*)ll' + spec, r'%#\1l' + spec, new_str) # Python doesn't support %hh for integer specifiers, so remove extra 'h' - new_str = new_str.replace("%hh" + spec, "%h" + spec) + new_str = re.sub(r'%(\#?\d*)hh' + spec, r'%\1h' + spec, new_str) # No %p for pointer either, so use %x new_str = new_str.replace("%p", "0x%x") From 14bed32439fe7ac95b67b46734263de86037f2f4 Mon Sep 17 00:00:00 2001 From: Omri Sarig Date: Mon, 14 Jul 2025 13:19:45 +0200 Subject: [PATCH 2/3] scripts/logging: Handle partly read packets The current implementation of the serial log parser is running in a loop - every 2 seconds it checks if there is any information ready to be read, and in this case, it reads this information and parses it. In case the last packet of the information is not fully available, the script will read only the first part of the packet, and will fail when parsing, ending the run with an error. This should not the be the case, as it is not an error to have only part of the packet in the buffer. This commit fixes this problem - now, instead of failing because the parser does not have enough information, the parser will parse all the full packets it have, and keep the last, partial packet in the queue, to be parsed together with the next chunk of data. This is done by updating the log parsers to return the amount of parsed data when trying to parse the information, and updating the calling scripts to correctly handle this new return value. Additionally, the parser is now quietly failing in case of having a partial message, and throw an exception for any other kind of error in the parsing (instead of returning a boolean return code). In addition to the partial packet handling, the current commit also do the following, minor improvements: * parserlib now fails by throwing an exception, instead of exiting - this is done to make it possible to choose a different handling for the errors from the calling code, if needed. * The dictionary and parser are now created before the parse operation. This makes the uart parser more efficient, and also removes the previously duplicated messages of build id, target architecture and endianess (which was printed every time new information was received from the UART). Signed-off-by: Omri Sarig --- .../dictionary_parser/log_parser_v1.py | 70 +++++++++++++----- .../dictionary_parser/log_parser_v3.py | 73 ++++++++++++++----- scripts/logging/dictionary/log_parser.py | 10 ++- scripts/logging/dictionary/log_parser_uart.py | 8 +- scripts/logging/dictionary/parserlib.py | 42 ++++++----- 5 files changed, 144 insertions(+), 59 deletions(-) diff --git a/scripts/logging/dictionary/dictionary_parser/log_parser_v1.py b/scripts/logging/dictionary/dictionary_parser/log_parser_v1.py index e73f1cd36db7a..bb62ad2fa385e 100644 --- a/scripts/logging/dictionary/dictionary_parser/log_parser_v1.py +++ b/scripts/logging/dictionary/dictionary_parser/log_parser_v1.py @@ -255,6 +255,18 @@ def print_hexdump(hex_data, prefix_len, color): print(f"{color}%s%s%s|%s{Fore.RESET}" % ((" " * prefix_len), hex_vals, hex_padding, chr_vals)) + def get_full_msg_hdr_size(self): + """Get the size of the full message header""" + return struct.calcsize(self.fmt_msg_type) + \ + struct.calcsize(self.fmt_msg_hdr) + \ + struct.calcsize(self.fmt_msg_timestamp) + + def get_normal_msg_size(self, logdata, offset): + """Get the needed size of the normal log message at offset""" + log_desc, _ = struct.unpack_from(self.fmt_msg_hdr, logdata, offset) + pkg_len = (log_desc >> 6) & int(math.pow(2, 10) - 1) + data_len = (log_desc >> 16) & int(math.pow(2, 12) - 1) + return self.get_full_msg_hdr_size() + pkg_len + data_len def parse_one_normal_msg(self, logdata, offset): """Parse one normal log message and print the encoded message""" @@ -341,33 +353,53 @@ def parse_one_normal_msg(self, logdata, offset): # Point to next message return next_msg_offset + def parse_one_msg(self, logdata, offset): + if offset + struct.calcsize(self.fmt_msg_type) > len(logdata): + return False, offset - def parse_log_data(self, logdata, debug=False): - """Parse binary log data and print the encoded log messages""" - offset = 0 + # Get message type + msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0] - while offset < len(logdata): - # Get message type - msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0] + if msg_type == MSG_TYPE_DROPPED: + + if offset + struct.calcsize(self.fmt_dropped_cnt) > len(logdata): + return False, offset offset += struct.calcsize(self.fmt_msg_type) - if msg_type == MSG_TYPE_DROPPED: - num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset) - offset += struct.calcsize(self.fmt_dropped_cnt) + num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset) + offset += struct.calcsize(self.fmt_dropped_cnt) - print(f"--- {num_dropped} messages dropped ---") + print(f"--- {num_dropped} messages dropped ---") - elif msg_type == MSG_TYPE_NORMAL: - ret = self.parse_one_normal_msg(logdata, offset) - if ret is None: - return False + elif msg_type == MSG_TYPE_NORMAL: - offset = ret + if ((offset + self.get_full_msg_hdr_size() > len(logdata)) or + (offset + self.get_normal_msg_size(logdata, offset) > len(logdata))): + return False, offset - else: - logger.error("------ Unknown message type: %s", msg_type) - return False + offset += struct.calcsize(self.fmt_msg_type) + + ret = self.parse_one_normal_msg(logdata, offset) + if ret is None: + raise ValueError("Error parsing normal log message") + + offset = ret + + else: + logger.error("------ Unknown message type: %s", msg_type) + raise ValueError(f"Unknown message type: {msg_type}") + + return True, offset + + def parse_log_data(self, logdata, debug=False): + """Parse binary log data and print the encoded log messages""" + offset = 0 + still_parsing = True + + while offset < len(logdata) and still_parsing: + still_parsing, offset = self.parse_one_msg(logdata, offset) + + return offset - return True colorama.init() diff --git a/scripts/logging/dictionary/dictionary_parser/log_parser_v3.py b/scripts/logging/dictionary/dictionary_parser/log_parser_v3.py index 954d9622ee696..69fe8469a9cda 100644 --- a/scripts/logging/dictionary/dictionary_parser/log_parser_v3.py +++ b/scripts/logging/dictionary/dictionary_parser/log_parser_v3.py @@ -260,6 +260,20 @@ def print_hexdump(hex_data, prefix_len, color): print(f"{color}%s%s%s|%s{Fore.RESET}" % ((" " * prefix_len), hex_vals, hex_padding, chr_vals)) + def get_full_msg_hdr_size(self): + """Get the size of the full message header""" + return struct.calcsize(self.fmt_msg_type) + \ + struct.calcsize(self.fmt_msg_hdr) + \ + struct.calcsize(self.fmt_msg_timestamp) + + def get_normal_msg_size(self, logdata, offset): + """Get the needed size of the normal log message at offset""" + _, pkg_len, data_len, _ = struct.unpack_from( + self.fmt_msg_hdr, + logdata, + offset + struct.calcsize(self.fmt_msg_type), + ) + return self.get_full_msg_hdr_size() + pkg_len + data_len def parse_one_normal_msg(self, logdata, offset): """Parse one normal log message and print the encoded message""" @@ -350,33 +364,54 @@ def parse_one_normal_msg(self, logdata, offset): # Point to next message return next_msg_offset + def parse_one_msg(self, logdata, offset): + if offset + struct.calcsize(self.fmt_msg_type) > len(logdata): + return False, offset - def parse_log_data(self, logdata, debug=False): - """Parse binary log data and print the encoded log messages""" - offset = 0 + # Get message type + msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0] + + if msg_type == MSG_TYPE_DROPPED: + + if offset + struct.calcsize(self.fmt_dropped_cnt) > len(logdata): + return False, offset - while offset < len(logdata): - # Get message type - msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0] offset += struct.calcsize(self.fmt_msg_type) - if msg_type == MSG_TYPE_DROPPED: - num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset) - offset += struct.calcsize(self.fmt_dropped_cnt) + num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset) + offset += struct.calcsize(self.fmt_dropped_cnt) - print(f"--- {num_dropped} messages dropped ---") + print(f"--- {num_dropped} messages dropped ---") - elif msg_type == MSG_TYPE_NORMAL: - ret = self.parse_one_normal_msg(logdata, offset) - if ret is None: - return False + elif msg_type == MSG_TYPE_NORMAL: - offset = ret + if ((offset + self.get_full_msg_hdr_size() > len(logdata)) or + (offset + self.get_normal_msg_size(logdata, offset) > len(logdata))): + return False, offset - else: - logger.error("------ Unknown message type: %s", msg_type) - return False + offset += struct.calcsize(self.fmt_msg_type) + + ret = self.parse_one_normal_msg(logdata, offset) + if ret is None: + raise ValueError("Error parsing normal log message") + + offset = ret + + else: + logger.error("------ Unknown message type: %s", msg_type) + raise ValueError(f"Unknown message type: {msg_type}") + + return True, offset + + def parse_log_data(self, logdata, debug=False): + """Parse binary log data and print the encoded log messages""" + offset = 0 + still_parsing = True + + while offset < len(logdata) and still_parsing: + still_parsing, offset = self.parse_one_msg(logdata, offset) + + return offset - return True colorama.init() diff --git a/scripts/logging/dictionary/log_parser.py b/scripts/logging/dictionary/log_parser.py index 8d640e8c38ce2..b282c04098fa9 100755 --- a/scripts/logging/dictionary/log_parser.py +++ b/scripts/logging/dictionary/log_parser.py @@ -109,12 +109,20 @@ def main(): else: logger.setLevel(logging.INFO) + log_parser = parserlib.get_log_parser(args.dbfile, logger) + logdata = read_log_file(args) if logdata is None: logger.error("ERROR: cannot read log from file: %s, exiting...", args.logfile) sys.exit(1) - parserlib.parser(logdata, args.dbfile, logger) + parsed_data_offset = parserlib.parser(logdata, log_parser, logger) + if parsed_data_offset != len(logdata): + logger.error( + 'ERROR: Not all data was parsed, %d bytes left unparsed', + len(logdata) - parsed_data_offset, + ) + sys.exit(1) if __name__ == "__main__": main() diff --git a/scripts/logging/dictionary/log_parser_uart.py b/scripts/logging/dictionary/log_parser_uart.py index 28fabc128ea17..2dd3b437f5030 100755 --- a/scripts/logging/dictionary/log_parser_uart.py +++ b/scripts/logging/dictionary/log_parser_uart.py @@ -50,14 +50,18 @@ def main(): else: logger.setLevel(logging.INFO) + log_parser = parserlib.get_log_parser(args.dbfile, logger) + # Parse the log every second from serial port + data = b'' with serial.Serial(args.serialPort, args.baudrate) as ser: ser.timeout = 2 while True: size = ser.inWaiting() if size: - data = ser.read(size) - parserlib.parser(data, args.dbfile, logger) + data += ser.read(size) + parsed_data_offset = parserlib.parser(data, log_parser, logger) + data = data[parsed_data_offset:] time.sleep(1) if __name__ == "__main__": diff --git a/scripts/logging/dictionary/parserlib.py b/scripts/logging/dictionary/parserlib.py index ad6c4073ff813..cd9bb5b5afc20 100755 --- a/scripts/logging/dictionary/parserlib.py +++ b/scripts/logging/dictionary/parserlib.py @@ -13,29 +13,23 @@ """ import logging -import sys import dictionary_parser from dictionary_parser.log_database import LogDatabase -def parser(logdata, dbfile, logger): - """function of serial parser""" - # Read from database file - database = LogDatabase.read_json_database(dbfile) +def get_log_parser(dbfile, logger): + """Get the log parser for the given database. - if not isinstance(logger, logging.Logger): - raise ValueError("Invalid logger instance. Please configure the logger!") + In addition to creating the parser, the function prints general information about the parser. + """ + database = LogDatabase.read_json_database(dbfile) if database is None: logger.error("ERROR: Cannot open database file: exiting...") - sys.exit(1) - - if logdata is None: - logger.error("ERROR: cannot read log from file: exiting...") - sys.exit(1) - + raise ValueError(f"Cannot open database file: {dbfile}") log_parser = dictionary_parser.get_parser(database) + if log_parser is not None: logger.debug("# Build ID: %s", database.get_build_id()) logger.debug("# Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits()) @@ -43,10 +37,22 @@ def parser(logdata, dbfile, logger): logger.debug("# Endianness: Little") else: logger.debug("# Endianness: Big") - - ret = log_parser.parse_log_data(logdata) - if not ret: - logger.error("ERROR: there were error(s) parsing log data") - sys.exit(1) else: logger.error("ERROR: Cannot find a suitable parser matching database version!") + raise ValueError("Cannot create parser.") + + return log_parser + + +def parser(logdata, log_parser, logger): + """function of serial parser""" + + if not isinstance(logger, logging.Logger): + raise ValueError("Invalid logger instance. Please configure the logger!") + + if logdata is None: + logger.error("ERROR: cannot read log from file: exiting...") + raise ValueError("Cannot read log data.") + + ret = log_parser.parse_log_data(logdata) + return ret From 3766f9b3776b89e6cd364dce3ec0d794e19e82ec Mon Sep 17 00:00:00 2001 From: Omri Sarig Date: Mon, 14 Jul 2025 15:17:20 +0200 Subject: [PATCH 3/3] scripts/logging: Create live_log_parser Create a new, generic parser, that is able to parse dictionary logs live - showing the messages as they are received from the device. This functionality previously existed only for reading the log from a UART. With the new script, the functionality is extended to be able to also read the log from a file or stdin. Additionally, the script is written in an extend-able way, making it simple to support more input sources in the future. The new script contains a better version of the live functionality than the old script of log_parser_uart script, therefore, the old script has been deprecated. The UART script is still available, and will work by invoking the new implementation with relevant arguments translation, however, it should ideally not be used any longer, and should be removed from a future release. Signed-off-by: Omri Sarig --- doc/releases/migration-guide-4.3.rst | 9 ++ scripts/logging/dictionary/live_log_parser.py | 140 ++++++++++++++++++ scripts/logging/dictionary/log_parser_uart.py | 48 +++--- 3 files changed, 169 insertions(+), 28 deletions(-) create mode 100755 scripts/logging/dictionary/live_log_parser.py diff --git a/doc/releases/migration-guide-4.3.rst b/doc/releases/migration-guide-4.3.rst index f668d9ef6f561..4182353d6be27 100644 --- a/doc/releases/migration-guide-4.3.rst +++ b/doc/releases/migration-guide-4.3.rst @@ -41,6 +41,15 @@ Networking Other subsystems **************** +Logging +======= + +* The UART dictionary log parsing script + :zephyr_file:`scripts/logging/dictionary/log_parser_uart.py` has been deprecated. Instead, the + more generic script of :zephyr_file:`scripts/logging/dictionary/live_log_parser.py` should be + used. The new script supports the same functionality (and more), but requires different command + line arguments when invoked. + Modules ******* diff --git a/scripts/logging/dictionary/live_log_parser.py b/scripts/logging/dictionary/live_log_parser.py new file mode 100755 index 0000000000000..5821a24022789 --- /dev/null +++ b/scripts/logging/dictionary/live_log_parser.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2024 Nordic Semiconductor ASA +# +# SPDX-License-Identifier: Apache-2.0 + +""" +Log Parser for Dictionary-based Logging + +This uses the JSON database file to decode the binary +log data taken directly from input serialport and print +the log messages. +""" + +import argparse +import contextlib +import logging +import os +import select +import sys + +import parserlib +import serial + +LOGGER_FORMAT = "%(message)s" +logger = logging.getLogger("parser") + + +class SerialReader: + """Class to read data from serial port and parse it""" + + def __init__(self, serial_port, baudrate): + self.serial_port = serial_port + self.baudrate = baudrate + self.serial = None + + @contextlib.contextmanager + def open(self): + try: + self.serial = serial.Serial(self.serial_port, self.baudrate) + yield + finally: + self.serial.close() + + def fileno(self): + return self.serial.fileno() + + def read_non_blocking(self): + size = self.serial.in_waiting + return self.serial.read(size) + + +class FileReader: + """Class to read data from serial port and parse it""" + + def __init__(self, filepath): + self.filepath = filepath + self.file = None + + @contextlib.contextmanager + def open(self): + if self.filepath is not None: + with open(self.filepath, 'rb') as f: + self.file = f + yield + else: + sys.stdin = os.fdopen(sys.stdin.fileno(), 'rb', 0) + self.file = sys.stdin + yield + + def fileno(self): + return self.file.fileno() + + def read_non_blocking(self): + # Read available data using a reasonable buffer size (without buffer size, this blocks + # forever, but with buffer size it returns even when less data than the buffer read was + # available). + return self.file.read(1024) + + +def parse_args(): + """Parse command line arguments""" + parser = argparse.ArgumentParser(allow_abbrev=False) + + parser.add_argument("dbfile", help="Dictionary Logging Database file") + parser.add_argument("--debug", action="store_true", help="Print extra debugging information") + + # Create subparsers for different input modes + subparsers = parser.add_subparsers(dest="mode", required=True, help="Input source mode") + + # Serial subparser + serial_parser = subparsers.add_parser("serial", help="Read from serial port") + serial_parser.add_argument("port", help="Serial port") + serial_parser.add_argument("baudrate", type=int, help="Baudrate") + + # File subparser + file_parser = subparsers.add_parser("file", help="Read from file") + file_parser.add_argument("filepath", nargs="?", default=None, + help="Input file path, leave empty for stdin") + + return parser.parse_args() + + +def main(): + """function of serial parser""" + args = parse_args() + + if args.dbfile is None or '.json' not in args.dbfile: + logger.error("ERROR: invalid log database path: %s, exiting...", args.dbfile) + sys.exit(1) + + logging.basicConfig(format=LOGGER_FORMAT) + + if args.debug: + logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.INFO) + + log_parser = parserlib.get_log_parser(args.dbfile, logger) + + data = b'' + + if args.mode == "serial": + reader = SerialReader(args.port, args.baudrate) + elif args.mode == "file": + reader = FileReader(args.filepath) + else: + raise ValueError("Invalid mode selected. Use 'serial' or 'file'.") + + with reader.open(): + while True: + ready, _, _ = select.select([reader], [], []) + if ready: + data += reader.read_non_blocking() + parsed_data_offset = parserlib.parser(data, log_parser, logger) + data = data[parsed_data_offset:] + + +if __name__ == "__main__": + main() diff --git a/scripts/logging/dictionary/log_parser_uart.py b/scripts/logging/dictionary/log_parser_uart.py index 2dd3b437f5030..1d4f6847c89de 100755 --- a/scripts/logging/dictionary/log_parser_uart.py +++ b/scripts/logging/dictionary/log_parser_uart.py @@ -13,15 +13,10 @@ """ import argparse -import logging import sys -import time -import parserlib -import serial +import live_log_parser -LOGGER_FORMAT = "%(message)s" -logger = logging.getLogger("parser") def parse_args(): """Parse command line arguments""" @@ -35,34 +30,31 @@ def parse_args(): return argparser.parse_args() + def main(): """function of serial parser""" - args = parse_args() - if args.dbfile is None or '.json' not in args.dbfile: - logger.error("ERROR: invalid log database path: %s, exiting...", args.dbfile) - sys.exit(1) + print("This script is deprecated. Use 'live_log_parser.py' instead.", + file=sys.stderr) - logging.basicConfig(format=LOGGER_FORMAT) + # Convert the arguments to the format expected by live_log_parser, and invoke it directly. + args = parse_args() + sys.argv = [ + 'live_log_parser.py', + args.dbfile, + ] if args.debug: - logger.setLevel(logging.DEBUG) - else: - logger.setLevel(logging.INFO) - - log_parser = parserlib.get_log_parser(args.dbfile, logger) - - # Parse the log every second from serial port - data = b'' - with serial.Serial(args.serialPort, args.baudrate) as ser: - ser.timeout = 2 - while True: - size = ser.inWaiting() - if size: - data += ser.read(size) - parsed_data_offset = parserlib.parser(data, log_parser, logger) - data = data[parsed_data_offset:] - time.sleep(1) + sys.argv.append('--debug') + + sys.argv += [ + 'serial', + args.serialPort, + args.baudrate + ] + + live_log_parser.main() + if __name__ == "__main__": main()