From b6d7b795811b604d0944ec69b8ede1c3c99202c5 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Tue, 21 Apr 2026 12:41:10 +0100 Subject: [PATCH 1/3] fix more panics --- crates/monty/src/builtins/chr.rs | 11 ++++++++- crates/monty/src/builtins/pow.rs | 14 ++++------- crates/monty/src/bytecode/vm/exceptions.rs | 5 ++-- crates/monty/src/types/range.rs | 28 ++++++++++++---------- crates/monty/test_cases/range__ops.py | 4 ++++ 5 files changed, 37 insertions(+), 25 deletions(-) diff --git a/crates/monty/src/builtins/chr.rs b/crates/monty/src/builtins/chr.rs index cace9fdd0..7620d4bea 100644 --- a/crates/monty/src/builtins/chr.rs +++ b/crates/monty/src/builtins/chr.rs @@ -22,7 +22,16 @@ pub fn builtin_chr(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) - Value::Int(n) => { if *n < 0 || *n > 0x0010_FFFF { Err(SimpleException::new_msg(ExcType::ValueError, "chr() arg not in range(0x110000)").into()) - } else if let Some(c) = char::from_u32(u32::try_from(*n).expect("chr() range check failed")) { + } else if let Some(c) = char::from_u32( + #[expect( + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + reason = "range of n already validated in the first if condition" + )] + { + *n as u32 + }, + ) { Ok(allocate_char(c, vm.heap)?) } else { // This shouldn't happen for valid Unicode range, but handle it diff --git a/crates/monty/src/builtins/pow.rs b/crates/monty/src/builtins/pow.rs index 22b63cfd8..bc0e320fa 100644 --- a/crates/monty/src/builtins/pow.rs +++ b/crates/monty/src/builtins/pow.rs @@ -38,20 +38,16 @@ pub fn builtin_pow(vm: &mut VM<'_, '_, impl ResourceTracker>, args: ArgValues) - (Value::Int(b), Value::Int(e), Value::Int(m_val)) => { if *m_val == 0 { Err(SimpleException::new_msg(ExcType::ValueError, "pow() 3rd argument cannot be 0").into()) - } else if *e < 0 { + } else if let Ok(e) = u64::try_from(*e) { + // Use modular exponentiation + Ok(Value::Int(mod_pow(*b, e, *m_val))) + } else { + debug_assert!(*e < 0, "i64 -> u64 succeeds for all non-negative values"); Err(SimpleException::new_msg( ExcType::ValueError, "pow() 2nd argument cannot be negative when 3rd argument specified", ) .into()) - } else { - // Use modular exponentiation - let result = mod_pow( - *b, - u64::try_from(*e).expect("pow exponent >= 0 but failed u64 conversion"), - *m_val, - ); - Ok(Value::Int(result)) } } _ => Err(SimpleException::new_msg( diff --git a/crates/monty/src/bytecode/vm/exceptions.rs b/crates/monty/src/bytecode/vm/exceptions.rs index 07d9b99c6..f45da7426 100644 --- a/crates/monty/src/bytecode/vm/exceptions.rs +++ b/crates/monty/src/bytecode/vm/exceptions.rs @@ -158,9 +158,8 @@ impl VM<'_, '_, T> { let target_stack_depth = frame.stack_base + frame.locals_count as usize + entry.stack_depth() as usize; // Unwind stack to target depth (drop excess values) - while this.stack.len() > target_stack_depth { - let value = this.stack.pop().unwrap(); - value.drop_with_heap(this); + for value in this.stack.drain(target_stack_depth..).rev() { + value.drop_with_heap(this.heap); } // Push exception value onto stack (handler expects it) diff --git a/crates/monty/src/types/range.rs b/crates/monty/src/types/range.rs index 45fb4f910..7ba022207 100644 --- a/crates/monty/src/types/range.rs +++ b/crates/monty/src/types/range.rs @@ -67,15 +67,17 @@ impl Range { /// Returns the length of the range (number of elements it will yield). #[must_use] pub fn len(&self) -> usize { + self.len_i128().try_into().unwrap_or(usize::MAX) + } + + fn len_i128(&self) -> i128 { // self.stop - self.start could be up to i64::MAX - i64::MIN, which overflows i64, - // so we use i128 for the calculation to avoid overflow. The result then saturates at - // usize boundaries + // so we use i128 for the calculation to avoid overflow. let start = i128::from(self.start); let stop = i128::from(self.stop); let step = i128::from(self.step); - let len = div_ceil(stop - start, step); - len.max(0).try_into().unwrap_or(usize::MAX) + div_ceil(stop - start, step).max(0) } #[must_use] @@ -195,11 +197,14 @@ impl<'h> PyTrait<'h> for HeapRead<'h, Range> { let range = *self.get(vm.heap); + // Calculate in i128 space to avoid overflow issues with large ranges and indices. + // Extract integer index, accepting Int, Bool (True=1, False=0), and LongInt - let index = key.as_index(vm, Type::Range)?; + let index = i128::from(key.as_index(vm, Type::Range)?); // Get range length for normalization - let len = i64::try_from(range.len()).expect("range length exceeds i64::MAX"); + let len = range.len_i128(); + let normalized = if index < 0 { index + len } else { index }; // Bounds check @@ -208,12 +213,11 @@ impl<'h> PyTrait<'h> for HeapRead<'h, Range> { } // Calculate: start + normalized * step - // Use checked arithmetic to avoid overflow in intermediate calculations - let offset = normalized - .checked_mul(range.step) - .and_then(|v| range.start.checked_add(v)) - .expect("range element calculation overflowed"); - Ok(Value::Int(offset)) + let offset = i128::from(range.start) + (normalized * i128::from(range.step)); + + // because start / stop / step are i64, the result must always fit in i64 as well + let offset_i64 = offset.try_into().expect("calculated range index should fit in i64"); + Ok(Value::Int(offset_i64)) } fn py_eq(&self, other: &Self, vm: &mut VM<'h, '_, impl ResourceTracker>) -> Result { diff --git a/crates/monty/test_cases/range__ops.py b/crates/monty/test_cases/range__ops.py index 938a2b3eb..4fc627146 100644 --- a/crates/monty/test_cases/range__ops.py +++ b/crates/monty/test_cases/range__ops.py @@ -240,3 +240,7 @@ assert True in range(5), 'True in range(5)' assert False in range(5), 'False in range(5)' assert True not in range(0), 'True not in empty range' + +# Large ranges which can hit monty's range i64 limits should not panic +assert range(-(2**63), 2**63 - 1)[0] == -(2**63), 'range with len exceeding i64::MAX get first item' +assert range(-(2**63), 2**63 - 1, 2**63 - 1)[2] == 2**63 - 2, 'range with step exceeding i64::MAX get last item' From 0384c0d1d5455265975dc36e42206e8462687b13 Mon Sep 17 00:00:00 2001 From: David Hewitt Date: Wed, 22 Apr 2026 10:28:01 +0100 Subject: [PATCH 2/3] add more test cases for format strings --- crates/monty/test_cases/fstring__all.py | 105 ++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/crates/monty/test_cases/fstring__all.py b/crates/monty/test_cases/fstring__all.py index ce632b222..c316a53ce 100644 --- a/crates/monty/test_cases/fstring__all.py +++ b/crates/monty/test_cases/fstring__all.py @@ -98,6 +98,42 @@ # sign-aware padding assert f'{-23:=5d}' == '- 23', 'sign-aware padding' +# i64::MIN: formatting must not overflow when taking abs of the minimum int +assert f'{-9223372036854775808:d}' == '-9223372036854775808', 'i64 min :d' +assert f'{-9223372036854775808:+d}' == '-9223372036854775808', 'i64 min with sign' +assert f'{-9223372036854775808:=22d}' == '- 9223372036854775808', 'i64 min sign-aware padding' + +# integer fill character with alignment +assert f'{42:*>10d}' == '********42', 'int fill right' +assert f'{42:*<10d}' == '42********', 'int fill left' +assert f'{42:*^10d}' == '****42****', 'int fill center' + +# === Integer non-decimal bases === +# binary +assert f'{10:b}' == '1010', 'binary positive' +assert f'{-10:b}' == '-1010', 'binary negative' +assert f'{0:b}' == '0', 'binary zero' + +# octal +assert f'{8:o}' == '10', 'octal positive' +assert f'{-8:o}' == '-10', 'octal negative' + +# hexadecimal (lower and upper) +assert f'{255:x}' == 'ff', 'hex lowercase' +assert f'{-255:x}' == '-ff', 'hex lowercase negative' +assert f'{255:X}' == 'FF', 'hex uppercase' +assert f'{-255:X}' == '-FF', 'hex uppercase negative' + +# === Integer as Unicode character (:c) === +assert f'{65:c}' == 'A', 'char ascii' +assert f'{0x4E2D:c}' == '中', 'char BMP unicode' + +# === Bool with format spec === +# bool is a subclass of int, so :d works +assert f'{True:d}' == '1', 'bool True as int' +assert f'{False:d}' == '0', 'bool False as int' +assert f'{True:04d}' == '0001', 'bool with zero-pad' + # === Float formatting === # basic float assert f'{3.14159}' == '3.14159', 'basic float' @@ -135,6 +171,69 @@ assert f'{0.25:.1%}' == '25.0%', 'percentage with precision' assert f'{0.125:.0%}' == '12%', 'percentage zero precision' +# zero precision rounds (banker's/half-even style per Python) +assert f'{3.7:.0f}' == '4', 'zero precision rounds up' +assert f'{3.4:.0f}' == '3', 'zero precision rounds down' +assert f'{1234.5:.0e}' == '1e+03', 'zero precision exponential' + +# uppercase exponential +assert f'{1234.5:E}' == '1.234500E+03', 'uppercase E' + +# float fill character with alignment + precision +assert f'{3.14:*>10.2f}' == '******3.14', 'float fill right' +assert f'{3.14:*<10.2f}' == '3.14******', 'float fill left' +assert f'{3.14:*^10.2f}' == '***3.14***', 'float fill center' + +# large and small magnitude exponents +assert f'{1e100:.3e}' == '1.000e+100', 'very large exponent' +assert f'{1e-100:.3e}' == '1.000e-100', 'very small exponent' + +# high precision reveals f64 representation +assert f'{0.1:.20f}' == '0.10000000000000000555', 'high precision float' + +# === Large dynamic precision === +# Precision > u16::MAX (65535) must not overflow Rust's `format!` precision +# argument. Each of these exercises a different internal format code path. +assert f'{1:.{10**6}f}' == '1.' + '0' * 10**6, 'huge precision :f' +assert f'{1:.{10**6}e}' == '1.' + '0' * 10**6 + 'e+00', 'huge precision :e' +assert f'{1:.{10**6}E}' == '1.' + '0' * 10**6 + 'E+00', 'huge precision :E' +assert f'{0.5:.{10**6}%}' == '50.' + '0' * 10**6 + '%', 'huge precision :%' +# :g strips trailing zeros, so the visible result is short, but the +# underlying format call still uses the full precision internally. +assert f'{1.5:.{10**6}g}' == '1.5', 'huge precision :g fixed branch' +assert f'{1e-10:.{10**6}g}' == '1.0000000000000000364321973154977415791655470655996396089904010295867919921875e-10', ( + 'huge precision :g exponential branch' +) + +# === Large static width/precision === +# Static format specs are parsed at parse time and packed into 16 bits for +# width and precision; values >= 65536 (or 65535 for precision, which is +# reserved as the "no precision" marker) must still round-trip correctly. +assert len(f'{1.5:.65535f}') == 65537, 'static precision 65535' +assert len(f'{1.5:.65536f}') == 65538, 'static precision 65536' +assert len(f'{42:65536d}') == 65536, 'static width 65536' + +# === Integer with float format types === +# Python allows formatting integers with float types +assert f'{42:f}' == '42.000000', 'int as :f' +assert f'{42:.2f}' == '42.00', 'int as :.2f' +assert f'{42:.2e}' == '4.20e+01', 'int as :.2e' +assert f'{1234:g}' == '1234', 'int as :g' +assert f'{5:%}' == '500.000000%', 'int as :%' + +# === Negative zero preserves sign === +assert f'{-0.0}' == '-0.0', 'negative zero default' +assert f'{-0.0:f}' == '-0.000000', 'negative zero :f' +assert f'{-0.0:+.2f}' == '-0.00', 'negative zero with sign' + +# === Infinity formatting across format codes === +# inf bypasses precision/width-pad zero rules and renders as 'inf' +assert f'{float("inf"):f}' == 'inf', 'inf :f' +assert f'{float("inf"):e}' == 'inf', 'inf :e' +assert f'{float("inf"):.3f}' == 'inf', 'inf with precision' +assert f'{float("inf"):+f}' == '+inf', 'inf with sign' +assert f'{float("-inf"):f}' == '-inf', 'negative inf' + # === Nested format specs === width = 10 assert f'{"hi":{width}}' == 'hi ', 'nested format spec width' @@ -204,6 +303,12 @@ def format_num(n, w): # no extra whitespace handling needed, width handles it assert f'{"x":5}' == 'x ', 'single char width' +# === Empty format spec with various types === +# trailing `:` with no spec behaves like no spec +assert f'{42:}' == '42', 'empty spec int' +assert f'{3.14:}' == '3.14', 'empty spec float' +assert f'{"hi":}' == 'hi', 'empty spec string' + # === Unicode character counting in padding === x = 'café' assert f'{x:_<10}' == 'café______' From 870e9fe971d8b457da4400bddb2ea104884a3333 Mon Sep 17 00:00:00 2001 From: Samuel Colvin Date: Mon, 27 Apr 2026 17:57:24 +0100 Subject: [PATCH 3/3] wip fixes --- crates/monty/src/bytecode/builder.rs | 8 ++ crates/monty/src/bytecode/compiler.rs | 24 +++- crates/monty/src/fstring.rs | 166 ++++++++++++++++++++++---- 3 files changed, 168 insertions(+), 30 deletions(-) diff --git a/crates/monty/src/bytecode/builder.rs b/crates/monty/src/bytecode/builder.rs index db4bb751e..c6741fef3 100644 --- a/crates/monty/src/bytecode/builder.rs +++ b/crates/monty/src/bytecode/builder.rs @@ -160,6 +160,11 @@ impl CodeBuilder { // pops obj + args, pushes result: 1 - (1 + arg_count) = -arg_count self.adjust_stack(-i16::from(operand2)); } + Opcode::CallAttrExtended => { + // Pops obj + args_tuple (+ kwargs_dict if flag set), pushes result. + // Flag=0 (no kwargs): pops 2, pushes 1 -> -1. Flag=1 (kwargs): pops 3, pushes 1 -> -2. + self.adjust_stack(-1 - i16::from(operand2 & 1)); + } _ => { if let Some(effect) = op.stack_effect() { self.adjust_stack(effect); @@ -528,6 +533,9 @@ impl CodeBuilder { let effect: i16 = match op { // CallFunction pops (callable + args), pushes result: -(1 + arg_count) + 1 = -arg_count Opcode::CallFunction => -i16::from(operand), + // CallFunctionExtended pops callable + args_tuple (+ kwargs_dict if flag set), pushes result. + // Flag=0 (no kwargs): pops 2, pushes 1 -> -1. Flag=1 (kwargs): pops 3, pushes 1 -> -2. + Opcode::CallFunctionExtended => -1 - i16::from(operand & 1), // UnpackSequence pops 1, pushes n: n - 1 Opcode::UnpackSequence => i16::from(operand) - 1, // ListAppend/SetAdd pop value: -1 (depth operand doesn't affect stack count) diff --git a/crates/monty/src/bytecode/compiler.rs b/crates/monty/src/bytecode/compiler.rs index 0f3450fb0..25b366dc3 100644 --- a/crates/monty/src/bytecode/compiler.rs +++ b/crates/monty/src/bytecode/compiler.rs @@ -24,7 +24,10 @@ use crate::{ AssignTarget, Callable, CmpOperator, Comprehension, DictItem, Expr, ExprLoc, Identifier, Literal, NameScope, Node, Operator, PreparedFunctionDef, PreparedNode, SequenceItem, UnpackTarget, }, - fstring::{ConversionFlag, FStringPart, FormatSpec, ParsedFormatSpec, encode_format_spec}, + fstring::{ + ConversionFlag, FStringPart, FormatSpec, MAX_ENCODED_PRECISION, MAX_ENCODED_WIDTH, ParsedFormatSpec, + encode_format_spec, + }, function::Function, intern::{Interns, StringId}, modules::StandardLib, @@ -2744,7 +2747,7 @@ impl<'a> Compiler<'a> { // Static format spec - push a marker constant with the parsed spec info // We store this as a special format spec value in the constant pool // The VM will recognize this and use the pre-parsed spec - let const_idx = self.add_format_spec_const(parsed); + let const_idx = self.add_format_spec_const(parsed)?; self.code.emit_u16(Opcode::LoadConst, const_idx); Ok(conv_bits | 0x04) // has format spec on stack } @@ -2764,14 +2767,23 @@ impl<'a> Compiler<'a> { /// Adds a format spec to the constant pool as an encoded integer. /// /// Uses the encoding from `fstring::encode_format_spec` and stores it as - /// a negative integer to distinguish from regular ints. - fn add_format_spec_const(&mut self, spec: &ParsedFormatSpec) -> u16 { - let encoded = encode_format_spec(spec); + /// a negative integer to distinguish from regular ints. Returns a + /// `CompileError` if the width or precision is larger than the compact + /// encoding can represent — far beyond any realistic format spec. + fn add_format_spec_const(&mut self, spec: &ParsedFormatSpec) -> Result { + let encoded = encode_format_spec(spec).ok_or_else(|| { + CompileError::new( + format!( + "format specifier width or precision exceeds supported limits (max width {MAX_ENCODED_WIDTH}, max precision {MAX_ENCODED_PRECISION})" + ), + CodeRange::default(), + ) + })?; // Use negative to distinguish from regular ints (format spec marker) // We negate and subtract 1 to ensure it's negative and recoverable let encoded_i64 = i64::try_from(encoded).expect("format spec encoding exceeds i64::MAX"); let marker = -(encoded_i64 + 1); - self.code.add_const(Value::Int(marker)) + Ok(self.code.add_const(Value::Int(marker))) } // ======================================================================== diff --git a/crates/monty/src/fstring.rs b/crates/monty/src/fstring.rs index f8df24b21..cda87c372 100644 --- a/crates/monty/src/fstring.rs +++ b/crates/monty/src/fstring.rs @@ -6,7 +6,11 @@ //! F-strings can contain literal text and interpolated expressions with optional //! conversion flags (`!s`, `!r`, `!a`) and format specifications. -use std::{fmt, iter, str::FromStr}; +use std::{ + fmt::{self, Write as _}, + iter, + str::FromStr, +}; use crate::{ bytecode::VM, @@ -111,6 +115,40 @@ pub struct ParsedFormatSpec { pub type_char: Option, } +impl fmt::Display for ParsedFormatSpec { + /// Renders the spec back into the string form `from_str` would parse. + /// + /// Used as a fallback when `encode_format_spec` can't fit the spec into the + /// compact bytecode constant encoding (e.g. precision or width larger than + /// the encoding reserves): we serialize the spec to a string constant and + /// let the VM parse it dynamically. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(align) = self.align { + // Only emit fill when it's non-default; otherwise ambiguity with sign chars. + if self.fill != ' ' { + f.write_char(self.fill)?; + } + f.write_char(align)?; + } + if let Some(sign) = self.sign { + f.write_char(sign)?; + } + if self.zero_pad { + f.write_char('0')?; + } + if self.width > 0 { + write!(f, "{}", self.width)?; + } + if let Some(precision) = self.precision { + write!(f, ".{precision}")?; + } + if let Some(type_char) = self.type_char { + f.write_char(type_char)?; + } + Ok(()) + } +} + impl FromStr for ParsedFormatSpec { type Err = String; @@ -311,17 +349,37 @@ pub fn format_with_spec( } } +/// Maximum width that fits in the 20-bit width field of the encoded format spec. +pub const MAX_ENCODED_WIDTH: usize = (1 << 20) - 1; + +/// Maximum precision that fits in the 21-bit precision field of the encoded format +/// spec. One slot (the zero value) is reserved to mean "no precision", so the +/// usable range for an explicit precision is `0..=MAX_ENCODED_PRECISION`. +pub const MAX_ENCODED_PRECISION: usize = (1 << 21) - 2; + /// Encodes a ParsedFormatSpec into a u64 for storage in bytecode constants. /// -/// Encoding layout (fits in 48 bits): +/// Returns `None` if any field exceeds the encoding's capacity — the caller +/// should fall back to a dynamic (string-based) format spec in that case. +/// +/// Encoding layout (fits in 60 bits, so the result round-trips through `i64`): /// - bits 0-7: fill character (as ASCII, default space=32) /// - bits 8-10: align (0=none, 1='<', 2='>', 3='^', 4='=') /// - bits 11-12: sign (0=none, 1='+', 2='-', 3=' ') /// - bit 13: zero_pad -/// - bits 14-29: width (16 bits, max 65535) -/// - bits 30-45: precision (16 bits, using 0xFFFF as "no precision") -/// - bits 46-50: type_char (0=none, 1-15=explicit type mapping: b,c,d,e,E,f,F,g,G,n,o,s,x,X,%) -pub fn encode_format_spec(spec: &ParsedFormatSpec) -> u64 { +/// - bits 14-33: width (20 bits, max `MAX_ENCODED_WIDTH`) +/// - bits 34-54: precision+1 (21 bits; 0 = no precision) +/// - bits 55-59: type_char (0=none, 1-15=explicit type mapping: b,c,d,e,E,f,F,g,G,n,o,s,x,X,%) +pub fn encode_format_spec(spec: &ParsedFormatSpec) -> Option { + if spec.width > MAX_ENCODED_WIDTH { + return None; + } + if let Some(p) = spec.precision + && p > MAX_ENCODED_PRECISION + { + return None; + } + let fill = spec.fill as u64; let align = match spec.align { None => 0u64, @@ -340,7 +398,9 @@ pub fn encode_format_spec(spec: &ParsedFormatSpec) -> u64 { }; let zero_pad = u64::from(spec.zero_pad); let width = spec.width as u64; - let precision = spec.precision.map_or(0xFFFFu64, |p| p as u64); + // Store precision as `p + 1`, reserving 0 for the "no precision" marker. + // `p + 1` fits in u64 because `p <= MAX_ENCODED_PRECISION` was checked above. + let precision = spec.precision.map_or(0u64, |p| p as u64 + 1); let type_char = spec.type_char.map_or(0u64, |c| match c { 'b' => 1, 'c' => 2, @@ -360,7 +420,7 @@ pub fn encode_format_spec(spec: &ParsedFormatSpec) -> u64 { _ => 0, }); - fill | (align << 8) | (sign << 11) | (zero_pad << 13) | (width << 14) | (precision << 30) | (type_char << 46) + Some(fill | (align << 8) | (sign << 11) | (zero_pad << 13) | (width << 14) | (precision << 34) | (type_char << 55)) } /// Decodes a u64 back into a ParsedFormatSpec. @@ -373,9 +433,9 @@ pub fn decode_format_spec(encoded: u64) -> ParsedFormatSpec { let align_bits = (encoded >> 8) & 0x07; let sign_bits = (encoded >> 11) & 0x03; let zero_pad = ((encoded >> 13) & 0x01) != 0; - let width = ((encoded >> 14) & 0xFFFF) as usize; - let precision_raw = ((encoded >> 30) & 0xFFFF) as usize; - let type_bits = ((encoded >> 46) & 0x1F) as u8; + let width = ((encoded >> 14) & 0xF_FFFF) as usize; + let precision_raw = ((encoded >> 34) & 0x1F_FFFF) as usize; + let type_bits = ((encoded >> 55) & 0x1F) as u8; let align = match align_bits { 1 => Some('<'), @@ -392,10 +452,11 @@ pub fn decode_format_spec(encoded: u64) -> ParsedFormatSpec { _ => None, }; - let precision = if precision_raw == 0xFFFF { + // Encoding stores `precision + 1`, so 0 means "no precision". + let precision = if precision_raw == 0 { None } else { - Some(precision_raw) + Some(precision_raw - 1) }; let type_char = match type_bits { @@ -467,7 +528,8 @@ pub fn format_string(value: &str, spec: &ParsedFormatSpec) -> Result String { let is_negative = n < 0; - let abs_str = n.abs().to_string(); + // Use unsigned_abs() to avoid overflow panic on i64::MIN + let abs_str = n.unsigned_abs().to_string(); // Build the sign prefix let sign = if is_negative { @@ -549,7 +611,7 @@ pub fn format_float_f(f: f64, spec: &ParsedFormatSpec) -> String { let is_negative = f.is_sign_negative() && !f.is_nan(); let abs_val = f.abs(); - let abs_str = format!("{abs_val:.precision$}"); + let abs_str = fmt_float_fixed(abs_val, precision); let sign = if is_negative { "-" @@ -589,11 +651,7 @@ pub fn format_float_e(f: f64, spec: &ParsedFormatSpec, uppercase: bool) -> Strin let is_negative = f.is_sign_negative() && !f.is_nan(); let abs_val = f.abs(); - let abs_str = if uppercase { - format!("{abs_val:.precision$E}") - } else { - format!("{abs_val:.precision$e}") - }; + let abs_str = fmt_float_exp(abs_val, precision, uppercase); // Fix exponent format to match Python (e+03 not e3) let abs_str = fix_exp_format(&abs_str); @@ -639,14 +697,17 @@ pub fn format_float_g(f: f64, spec: &ParsedFormatSpec) -> String { let abs_str = if exp < -4 || exp >= prec_i32 { // Use exponential notation let exp_prec = precision.saturating_sub(1); - let formatted = format!("{abs_val:.exp_prec$e}"); + // Cap Rust precision; trailing zeros are stripped so padding isn't needed. + let formatted = fmt_float_exp(abs_val, exp_prec.min(MAX_FMT_PRECISION_EXP), false); // Python strips trailing zeros from the mantissa strip_trailing_zeros_exp(&formatted) } else { // Use fixed notation - result is non-negative due to .max(0) let sig_digits_i32 = (prec_i32 - exp - 1).max(0); let sig_digits = usize::try_from(sig_digits_i32).expect("sig_digits guaranteed non-negative"); - let formatted = format!("{abs_val:.sig_digits$}"); + // Cap Rust precision; trailing zeros are stripped so padding isn't needed. + let cap = sig_digits.min(MAX_FMT_PRECISION); + let formatted = format!("{abs_val:.cap$}"); strip_trailing_zeros(&formatted) }; @@ -670,7 +731,6 @@ pub fn format_float_g(f: f64, spec: &ParsedFormatSpec) -> String { /// Used for the `!a` conversion flag in f-strings. Takes a string (typically a repr) /// and escapes all non-ASCII characters using `\xNN`, `\uNNNN`, or `\UNNNNNNNN`. pub fn ascii_escape(s: &str) -> String { - use std::fmt::Write; let mut result = String::new(); for c in s.chars() { if c.is_ascii() { @@ -700,7 +760,7 @@ pub fn format_float_percent(f: f64, spec: &ParsedFormatSpec) -> String { let is_negative = percent_val.is_sign_negative() && !percent_val.is_nan(); let abs_val = percent_val.abs(); - let abs_str = format!("{abs_val:.precision$}%"); + let abs_str = format!("{}%", fmt_float_fixed(abs_val, precision)); let sign = if is_negative { "-" @@ -721,6 +781,64 @@ pub fn format_float_percent(f: f64, spec: &ParsedFormatSpec) -> String { // Helper functions // ============================================================================ +/// Maximum precision Rust's `format!` accepts for fixed-point float formatting +/// before it panics with "Formatting argument out of range" (i.e. `u16::MAX`). +/// +/// Python allows arbitrary precision in f-strings (e.g. `.{10**6}f`), so +/// we cap at this limit and pad manually with zeros beyond it. +const MAX_FMT_PRECISION: usize = u16::MAX as usize; + +/// Maximum precision Rust's `format!` accepts for exponential (`e`/`E`) float +/// formatting. One less than `MAX_FMT_PRECISION` because Rust's internal +/// `to_exact_exp_str` uses `ndigits = precision + 1`, which would overflow +/// `u16::MAX` and hit an `ndigits > 0` assertion at exactly `u16::MAX`. +const MAX_FMT_PRECISION_EXP: usize = (u16::MAX as usize) - 1; + +/// Formats a float in fixed-point notation at an arbitrary precision. +/// +/// Rust's `format!` panics if precision exceeds `u16::MAX`. For non-finite +/// values (NaN/inf) precision is ignored entirely, matching Rust's behavior. +/// For finite values beyond the native limit we format at `MAX_FMT_PRECISION` +/// and append trailing zeros — f64 precision bottoms out long before this, so +/// every additional digit Python would emit is a zero anyway. +fn fmt_float_fixed(abs_val: f64, precision: usize) -> String { + if precision <= MAX_FMT_PRECISION || !abs_val.is_finite() { + return format!("{abs_val:.precision$}"); + } + let mut s = format!("{abs_val:.MAX_FMT_PRECISION$}"); + s.extend(iter::repeat_n('0', precision - MAX_FMT_PRECISION)); + s +} + +/// Formats a float in exponential notation at an arbitrary precision. +/// +/// Same precision-capping strategy as `fmt_float_fixed`, but trailing zeros +/// are injected into the mantissa (before the exponent marker) rather than +/// appended to the end. +fn fmt_float_exp(abs_val: f64, precision: usize, uppercase: bool) -> String { + if precision <= MAX_FMT_PRECISION_EXP || !abs_val.is_finite() { + return if uppercase { + format!("{abs_val:.precision$E}") + } else { + format!("{abs_val:.precision$e}") + }; + } + let base = if uppercase { + format!("{abs_val:.MAX_FMT_PRECISION_EXP$E}") + } else { + format!("{abs_val:.MAX_FMT_PRECISION_EXP$e}") + }; + let extra = precision - MAX_FMT_PRECISION_EXP; + // Inject padding zeros immediately before the exponent marker. + if let Some(e_pos) = base.find(['e', 'E']) { + let (mantissa, exp_part) = base.split_at(e_pos); + let zeros: String = iter::repeat_n('0', extra).collect(); + format!("{mantissa}{zeros}{exp_part}") + } else { + base + } +} + /// Pads a string to a given width with alignment. /// /// Alignment options: