diff --git a/crates/ruff_python_parser/src/error.rs b/crates/ruff_python_parser/src/error.rs index e2353816aa018..6d823739638c4 100644 --- a/crates/ruff_python_parser/src/error.rs +++ b/crates/ruff_python_parser/src/error.rs @@ -200,6 +200,9 @@ pub enum ParseErrorType { TStringError(InterpolatedStringErrorType), /// Parser encountered an error during lexing. Lexical(LexicalErrorType), + + /// Parser aborted because [`crate::ParseOptions::max_recursion_depth`] was exceeded. + RecursionLimitExceeded, } impl ParseErrorType { @@ -329,6 +332,7 @@ impl std::fmt::Display for ParseErrorType { ParseErrorType::UnexpectedExpressionToken => { write!(f, "Unexpected token at the end of an expression") } + ParseErrorType::RecursionLimitExceeded => f.write_str("Source is too deeply nested"), } } } diff --git a/crates/ruff_python_parser/src/parser/expression.rs b/crates/ruff_python_parser/src/parser/expression.rs index 7c0d169b3a091..586931cdcfdb2 100644 --- a/crates/ruff_python_parser/src/parser/expression.rs +++ b/crates/ruff_python_parser/src/parser/expression.rs @@ -327,6 +327,29 @@ impl<'src> Parser<'src> { &mut self, left_precedence: OperatorPrecedence, context: ExpressionContext, + ) -> ParsedExpr { + let range = self.current_token_range(); + if self.enter_recursion(range) { + let result = self.parse_lhs_expression_inner(left_precedence, context); + self.leave_recursion(); + result + } else { + // Returns a trivial placeholder expression to stand in for a real + // expression when the recursion limit has been exceeded. + ParsedExpr { + expr: Expr::EllipsisLiteral(ast::ExprEllipsisLiteral { + range, + node_index: AtomicNodeIndex::NONE, + }), + is_parenthesized: false, + } + } + } + + fn parse_lhs_expression_inner( + &mut self, + left_precedence: OperatorPrecedence, + context: ExpressionContext, ) -> ParsedExpr { let start = self.node_start(); let token = self.current_token_kind(); @@ -1797,11 +1820,17 @@ impl<'src> Parser<'src> { let format_spec = if self.eat(TokenKind::Colon) { let spec_start = self.node_start(); - let elements = self.parse_interpolated_string_elements( - flags, - InterpolatedStringElementsKind::FormatSpec(string_kind), - string_kind, - ); + let elements = if self.enter_recursion(self.current_token_range()) { + let elements = self.parse_interpolated_string_elements( + flags, + InterpolatedStringElementsKind::FormatSpec(string_kind), + string_kind, + ); + self.leave_recursion(); + elements + } else { + ast::InterpolatedStringElements::from(vec![]) + }; Some(Box::new(ast::InterpolatedStringFormatSpec { range: self.node_range(spec_start), elements, diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index b826b561a9e07..c4e492443fe94 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -56,6 +56,9 @@ pub(crate) struct Parser<'src> { /// The start offset in the source code from which to start parsing at. start_offset: TextSize, + + /// Current parser recursion depth remaining before the depth limit is exceeded. + depth_remaining: u16, } impl<'src> Parser<'src> { @@ -72,6 +75,7 @@ impl<'src> Parser<'src> { ) -> Self { let tokens = TokenSource::from_source(source, options.mode, start_offset); + let depth_remaining = options.max_recursion_depth; Parser { options, source, @@ -82,9 +86,39 @@ impl<'src> Parser<'src> { prev_token_end: TextSize::new(0), start_offset, current_token_id: TokenId::default(), + depth_remaining, + } + } + + /// Call at the top of every recursive parsing function. Returns `false` if + /// the caller must abort and return a placeholder instead of recursing + /// further. + /// + /// Every successful call must be paired with a matching [`Parser::leave_recursion`]. + #[must_use] + fn enter_recursion(&mut self, ranged: R) -> bool { + if let Some(depth_remaining) = self.depth_remaining.checked_sub(1) { + self.depth_remaining = depth_remaining; + true + } else { + self.add_error(ParseErrorType::RecursionLimitExceeded, ranged); + // Skip to end-of-file so outer parser frames unwind quickly + // and our `ParserProgress` infinite-loop guards don't fire + // when they see the same `(` / `[` etc. that this frame + // failed to consume. + while self.current_token_kind() != TokenKind::EndOfFile { + self.bump_any(); + } + false } } + /// Must be called at the end of every recursive parsing function whose + /// matching [`Parser::enter_recursion`] returned `true`. + fn leave_recursion(&mut self) { + self.depth_remaining += 1; + } + /// Consumes the [`Parser`] and returns the parsed [`Parsed`]. pub(crate) fn parse(mut self) -> Parsed { let syntax = match self.options.mode { diff --git a/crates/ruff_python_parser/src/parser/options.rs b/crates/ruff_python_parser/src/parser/options.rs index ec87a72d1e481..023593e1b4e9c 100644 --- a/crates/ruff_python_parser/src/parser/options.rs +++ b/crates/ruff_python_parser/src/parser/options.rs @@ -2,6 +2,19 @@ use ruff_python_ast::{PySourceType, PythonVersion}; use crate::{AsMode, Mode}; +/// The default maximum recursion depth used by the parser. +/// +/// Real-world Python rarely nests more than a handful of levels deep; this cap +/// exists to keep the parser from overflowing the stack on adversarial or +/// machine-generated input. The value is intentionally modest because each +/// "depth unit" corresponds to several real stack frames on the parser's +/// descent (for a parenthesised expression: ~8 frames, each a few KB in a +/// debug build), so one depth unit is roughly 15–30 KB of actual stack. The +/// default has to fit comfortably within the tightest stacks we care about: +/// Rust's default 2 MB worker-thread stack (used by `std::thread`, tokio, +/// `cargo test`, …) and Windows' 1 MB main-thread stack. +const DEFAULT_MAX_RECURSION_DEPTH: u16 = 200; + /// Options for controlling how a source file is parsed. /// /// You can construct a [`ParseOptions`] directly from a [`Mode`]: @@ -26,6 +39,11 @@ pub struct ParseOptions { pub(crate) mode: Mode, /// Target version for detecting version-related syntax errors. pub(crate) target_version: PythonVersion, + /// Maximum recursion depth for the parser. The parser aborts with a + /// [`crate::ParseErrorType::RecursionLimitExceeded`] error once this many + /// nested expression / statement / pattern nodes are on the parser's call + /// stack. Defaults to [`DEFAULT_MAX_RECURSION_DEPTH`]. + pub(crate) max_recursion_depth: u16, } impl ParseOptions { @@ -38,6 +56,17 @@ impl ParseOptions { pub fn target_version(&self) -> PythonVersion { self.target_version } + + /// Set the maximum recursion depth for the parser. + #[must_use] + pub fn with_max_recursion_depth(mut self, depth: u16) -> Self { + self.max_recursion_depth = depth; + self + } + + pub fn max_recursion_depth(&self) -> u16 { + self.max_recursion_depth + } } impl From for ParseOptions { @@ -45,6 +74,7 @@ impl From for ParseOptions { Self { mode, target_version: PythonVersion::default(), + max_recursion_depth: DEFAULT_MAX_RECURSION_DEPTH, } } } @@ -54,6 +84,7 @@ impl From for ParseOptions { Self { mode: source_type.as_mode(), target_version: PythonVersion::default(), + max_recursion_depth: DEFAULT_MAX_RECURSION_DEPTH, } } } diff --git a/crates/ruff_python_parser/src/parser/pattern.rs b/crates/ruff_python_parser/src/parser/pattern.rs index f28dc237c9065..0740bdf547d0a 100644 --- a/crates/ruff_python_parser/src/parser/pattern.rs +++ b/crates/ruff_python_parser/src/parser/pattern.rs @@ -88,6 +88,23 @@ impl Parser<'_> { /// /// See: fn parse_match_pattern(&mut self, allow_star_pattern: AllowStarPattern) -> Pattern { + let range = self.current_token_range(); + if self.enter_recursion(range) { + let result = self.parse_match_pattern_inner(allow_star_pattern); + self.leave_recursion(); + result + } else { + // Wildcard-style placeholder so the caller always gets a valid `Pattern`. + Pattern::MatchAs(ast::PatternMatchAs { + range, + name: None, + pattern: None, + node_index: AtomicNodeIndex::NONE, + }) + } + } + + fn parse_match_pattern_inner(&mut self, allow_star_pattern: AllowStarPattern) -> Pattern { let start = self.node_start(); // We don't yet know if it's an or pattern or an as pattern, so use whatever diff --git a/crates/ruff_python_parser/src/parser/statement.rs b/crates/ruff_python_parser/src/parser/statement.rs index b52d527b5e349..1cc733d1c4f37 100644 --- a/crates/ruff_python_parser/src/parser/statement.rs +++ b/crates/ruff_python_parser/src/parser/statement.rs @@ -111,6 +111,23 @@ impl<'src> Parser<'src> { /// - /// - pub(super) fn parse_statement(&mut self) -> Stmt { + let range = self.current_token_range(); + if self.enter_recursion(range) { + let stmt = self.parse_statement_inner(); + self.leave_recursion(); + stmt + } else { + // `enter_recursion` already recorded a `RecursionLimitExceeded` + // error, so the returned `Parsed` is already in a failed state; + // this placeholder is only here so the parser can keep unwinding. + Stmt::Pass(ast::StmtPass { + range, + node_index: AtomicNodeIndex::NONE, + }) + } + } + + fn parse_statement_inner(&mut self) -> Stmt { let start = self.node_start(); match self.current_token_kind() { diff --git a/crates/ruff_python_parser/src/parser/tests.rs b/crates/ruff_python_parser/src/parser/tests.rs index 20cc04c7011cf..05b8f29158c9f 100644 --- a/crates/ruff_python_parser/src/parser/tests.rs +++ b/crates/ruff_python_parser/src/parser/tests.rs @@ -1,4 +1,4 @@ -use crate::{Mode, ParseOptions, parse, parse_expression, parse_module}; +use crate::{Mode, ParseErrorType, ParseOptions, parse, parse_expression, parse_module}; #[test] fn test_modes() { @@ -179,3 +179,125 @@ fn test_tstring_fstring_middle_fuzzer() { insta::assert_debug_snapshot!(error); } + +#[test] +fn recursion_limit_nested_parens() { + let src = format!("{}1{}", "(".repeat(1_000), ")".repeat(1_000)); + let opts = ParseOptions::from(Mode::Module).with_max_recursion_depth(100); + let err = parse(&src, opts).unwrap_err(); + assert!(matches!(err.error, ParseErrorType::RecursionLimitExceeded)); +} + +#[test] +fn recursion_limit_normal_python_unaffected() { + // 50 levels is well above what real-world Python ever produces and well + // below the default cap — the point is to confirm the default doesn't + // reject ordinary input. + let src = format!("x = {}1{}", "(".repeat(50), ")".repeat(50)); + parse_module(&src).unwrap(); +} + +#[test] +fn recursion_limit_nested_def_blocks() { + // Nested function definitions exercise instrumentation on + // `parse_statement` rather than `parse_lhs_expression`. Each level + // needs one more leading tab to make indentation valid. + let depth = 400; + let mut src = String::new(); + for i in 0..depth { + src.push_str(&"\t".repeat(i)); + src.push_str("def f():\n"); + } + src.push_str(&"\t".repeat(depth)); + src.push_str("pass\n"); + let opts = ParseOptions::from(Mode::Module).with_max_recursion_depth(100); + let err = parse(&src, opts).unwrap_err(); + assert!(matches!(err.error, ParseErrorType::RecursionLimitExceeded)); +} + +#[test] +fn recursion_limit_nested_lists() { + let src = format!("{}1{}", "[".repeat(1_000), "]".repeat(1_000)); + let opts = ParseOptions::from(Mode::Module).with_max_recursion_depth(100); + let err = parse(&src, opts).unwrap_err(); + assert!(matches!(err.error, ParseErrorType::RecursionLimitExceeded)); +} + +#[test] +fn recursion_limit_nested_match_patterns() { + // Deeply parenthesised match patterns — exercises pattern-parsing + // instrumentation in addition to statement / expression paths. + let mut src = String::from("match x:\n case "); + for _ in 0..600 { + src.push('('); + } + src.push('y'); + for _ in 0..600 { + src.push(')'); + } + src.push_str(": pass\n"); + let opts = ParseOptions::from(Mode::Module).with_max_recursion_depth(100); + let err = parse(&src, opts).unwrap_err(); + assert!(matches!(err.error, ParseErrorType::RecursionLimitExceeded)); +} + +#[test] +fn recursion_limit_binary_paren_interplay() { + // `1+(1+(1+(1+...)))` — each level alternates a binary operator and a + // parenthesised sub-expression, exactly like the pattern described in + // the tracking issue. + let depth = 2_000; + let mut src = String::new(); + for _ in 0..depth { + src.push_str("1+("); + } + src.push('1'); + for _ in 0..depth { + src.push(')'); + } + let opts = ParseOptions::from(Mode::Module).with_max_recursion_depth(100); + let err = parse(&src, opts).unwrap_err(); + assert!(matches!(err.error, ParseErrorType::RecursionLimitExceeded)); +} + +#[test] +fn recursion_limit_first_error_is_recursion_not_noise() { + // When the limit is hit the outer parser frames will emit secondary + // errors as they unwind. Callers read the first error via `into_result` + // / `Parsed::errors()`, so `RecursionLimitExceeded` must come first, and + // the drain-to-EOF inside `enter_recursion` should keep the total count + // small rather than producing one noisy error per unwound frame. + let src = format!("{}1{}", "(".repeat(2_000), ")".repeat(2_000)); + let opts = ParseOptions::from(Mode::Module).with_max_recursion_depth(50); + let parsed = crate::parse_unchecked(&src, opts); + let errors = parsed.errors(); + let first = errors.first().expect("expected at least one error"); + assert!(matches!( + first.error, + ParseErrorType::RecursionLimitExceeded + )); + // Exactly one `RecursionLimitExceeded` — guards against a regression + // where the unwind loops and re-triggers the limit check. + let recursion_errors = errors + .iter() + .filter(|e| matches!(e.error, ParseErrorType::RecursionLimitExceeded)) + .count(); + assert_eq!(recursion_errors, 1); + // Small, bounded tail of follow-up errors from the unwinding frames. + // Today this is 0; the generous cap is a regression gate, not a spec. + assert!( + errors.len() <= 8, + "expected a small number of errors, got {}: {errors:?}", + errors.len(), + ); +} + +#[test] +fn recursion_limit_default_set() { + let opts = ParseOptions::from(Mode::Module); + // Guards against someone accidentally unsetting the default. Real-world + // Python never approaches this depth, and the value must stay within the + // threading stack's capacity — see the const's docs in `options.rs`. + assert!(opts.max_recursion_depth() >= 200); + assert!(opts.max_recursion_depth() <= 2000); +} diff --git a/crates/ruff_python_parser/src/string.rs b/crates/ruff_python_parser/src/string.rs index 4b750865a29b4..651dca7523973 100644 --- a/crates/ruff_python_parser/src/string.rs +++ b/crates/ruff_python_parser/src/string.rs @@ -527,7 +527,10 @@ mod tests { use ruff_python_ast::Suite; use crate::error::LexicalErrorType; - use crate::{InterpolatedStringErrorType, ParseError, ParseErrorType, Parsed, parse_module}; + use crate::{ + InterpolatedStringErrorType, Mode, ParseError, ParseErrorType, ParseOptions, Parsed, parse, + parse_module, + }; const WINDOWS_EOL: &str = "\r\n"; const MAC_EOL: &str = "\r"; @@ -537,6 +540,25 @@ mod tests { parse_module(source).map(Parsed::into_suite) } + fn parse_suite_with_recursion_limit( + source: &str, + max_recursion_depth: u16, + ) -> Result { + parse( + source, + ParseOptions::from(Mode::Module).with_max_recursion_depth(max_recursion_depth), + ) + .map(|parsed| parsed.try_into_module().unwrap().into_suite()) + } + + fn nested_format_spec(prefix: char, depth: usize) -> String { + let mut replacement_field = String::from("{spec}"); + for _ in 0..depth { + replacement_field = format!("{{foo:{replacement_field}}}"); + } + format!(r#"{prefix}"{replacement_field}""#) + } + fn string_parser_escaped_eol(eol: &str) -> Suite { let source = format!(r"'text \{eol}more text'"); parse_suite(&source).unwrap() @@ -574,6 +596,14 @@ mod tests { insta::assert_debug_snapshot!(suite); } + #[test] + fn test_parse_fstring_nested_spec_recursion_limit() { + assert!(parse_suite_with_recursion_limit(r#"f"{foo:{spec}}""#, 8).is_ok()); + + let err = parse_suite_with_recursion_limit(&nested_format_spec('f', 200), 8).unwrap_err(); + assert!(matches!(err.error, ParseErrorType::RecursionLimitExceeded)); + } + #[test] fn test_parse_fstring_not_nested_spec() { let source = r#"f"{foo:spec}""#; @@ -686,6 +716,14 @@ mod tests { insta::assert_debug_snapshot!(suite); } + #[test] + fn test_parse_tstring_nested_spec_recursion_limit() { + assert!(parse_suite_with_recursion_limit(r#"t"{foo:{spec}}""#, 8).is_ok()); + + let err = parse_suite_with_recursion_limit(&nested_format_spec('t', 200), 8).unwrap_err(); + assert!(matches!(err.error, ParseErrorType::RecursionLimitExceeded)); + } + #[test] fn test_parse_tstring_not_nested_spec() { let source = r#"t"{foo:spec}""#;