Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions crates/ruff_python_parser/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ pub enum ParseErrorType {
TStringError(InterpolatedStringErrorType),
/// Parser encountered an error during lexing.
Lexical(LexicalErrorType),

/// Parser aborted because [`crate::ParseOptions::max_recursion_depth`] was exceeded.
RecursionLimitExceeded,
}

impl ParseErrorType {
Expand Down Expand Up @@ -329,6 +332,7 @@ impl std::fmt::Display for ParseErrorType {
ParseErrorType::UnexpectedExpressionToken => {
write!(f, "Unexpected token at the end of an expression")
}
ParseErrorType::RecursionLimitExceeded => f.write_str("Source is too deeply nested"),
}
}
}
Expand Down
39 changes: 34 additions & 5 deletions crates/ruff_python_parser/src/parser/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,29 @@ impl<'src> Parser<'src> {
&mut self,
left_precedence: OperatorPrecedence,
context: ExpressionContext,
) -> ParsedExpr {
let range = self.current_token_range();
if self.enter_recursion(range) {
let result = self.parse_lhs_expression_inner(left_precedence, context);
self.leave_recursion();
result
} else {
// Returns a trivial placeholder expression to stand in for a real
// expression when the recursion limit has been exceeded.
ParsedExpr {
expr: Expr::EllipsisLiteral(ast::ExprEllipsisLiteral {
range,
node_index: AtomicNodeIndex::NONE,
}),
is_parenthesized: false,
}
}
}

fn parse_lhs_expression_inner(
&mut self,
left_precedence: OperatorPrecedence,
context: ExpressionContext,
) -> ParsedExpr {
let start = self.node_start();
let token = self.current_token_kind();
Expand Down Expand Up @@ -1797,11 +1820,17 @@ impl<'src> Parser<'src> {

let format_spec = if self.eat(TokenKind::Colon) {
let spec_start = self.node_start();
let elements = self.parse_interpolated_string_elements(
flags,
InterpolatedStringElementsKind::FormatSpec(string_kind),
string_kind,
);
let elements = if self.enter_recursion(self.current_token_range()) {
let elements = self.parse_interpolated_string_elements(
flags,
InterpolatedStringElementsKind::FormatSpec(string_kind),
string_kind,
);
self.leave_recursion();
elements
} else {
ast::InterpolatedStringElements::from(vec![])
};
Some(Box::new(ast::InterpolatedStringFormatSpec {
range: self.node_range(spec_start),
elements,
Expand Down
34 changes: 34 additions & 0 deletions crates/ruff_python_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ pub(crate) struct Parser<'src> {

/// The start offset in the source code from which to start parsing at.
start_offset: TextSize,

/// Current parser recursion depth remaining before the depth limit is exceeded.
depth_remaining: u16,
}

impl<'src> Parser<'src> {
Expand All @@ -72,6 +75,7 @@ impl<'src> Parser<'src> {
) -> Self {
let tokens = TokenSource::from_source(source, options.mode, start_offset);

let depth_remaining = options.max_recursion_depth;
Parser {
options,
source,
Expand All @@ -82,9 +86,39 @@ impl<'src> Parser<'src> {
prev_token_end: TextSize::new(0),
start_offset,
current_token_id: TokenId::default(),
depth_remaining,
}
}

/// Call at the top of every recursive parsing function. Returns `false` if
/// the caller must abort and return a placeholder instead of recursing
/// further.
///
/// Every successful call must be paired with a matching [`Parser::leave_recursion`].
#[must_use]
fn enter_recursion<R: Ranged>(&mut self, ranged: R) -> bool {
if let Some(depth_remaining) = self.depth_remaining.checked_sub(1) {
self.depth_remaining = depth_remaining;
true
} else {
self.add_error(ParseErrorType::RecursionLimitExceeded, ranged);
// Skip to end-of-file so outer parser frames unwind quickly
// and our `ParserProgress` infinite-loop guards don't fire
// when they see the same `(` / `[` etc. that this frame
// failed to consume.
while self.current_token_kind() != TokenKind::EndOfFile {
self.bump_any();
}
false
}
}

/// Must be called at the end of every recursive parsing function whose
/// matching [`Parser::enter_recursion`] returned `true`.
fn leave_recursion(&mut self) {
self.depth_remaining += 1;
}

/// Consumes the [`Parser`] and returns the parsed [`Parsed`].
pub(crate) fn parse(mut self) -> Parsed<Mod> {
let syntax = match self.options.mode {
Expand Down
31 changes: 31 additions & 0 deletions crates/ruff_python_parser/src/parser/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@ use ruff_python_ast::{PySourceType, PythonVersion};

use crate::{AsMode, Mode};

/// The default maximum recursion depth used by the parser.
///
/// Real-world Python rarely nests more than a handful of levels deep; this cap
/// exists to keep the parser from overflowing the stack on adversarial or
/// machine-generated input. The value is intentionally modest because each
/// "depth unit" corresponds to several real stack frames on the parser's
/// descent (for a parenthesised expression: ~8 frames, each a few KB in a
/// debug build), so one depth unit is roughly 15–30 KB of actual stack. The
/// default has to fit comfortably within the tightest stacks we care about:
/// Rust's default 2 MB worker-thread stack (used by `std::thread`, tokio,
/// `cargo test`, …) and Windows' 1 MB main-thread stack.
const DEFAULT_MAX_RECURSION_DEPTH: u16 = 200;

/// Options for controlling how a source file is parsed.
///
/// You can construct a [`ParseOptions`] directly from a [`Mode`]:
Expand All @@ -26,6 +39,11 @@ pub struct ParseOptions {
pub(crate) mode: Mode,
/// Target version for detecting version-related syntax errors.
pub(crate) target_version: PythonVersion,
/// Maximum recursion depth for the parser. The parser aborts with a
/// [`crate::ParseErrorType::RecursionLimitExceeded`] error once this many
/// nested expression / statement / pattern nodes are on the parser's call
/// stack. Defaults to [`DEFAULT_MAX_RECURSION_DEPTH`].
pub(crate) max_recursion_depth: u16,
}

impl ParseOptions {
Expand All @@ -38,13 +56,25 @@ impl ParseOptions {
pub fn target_version(&self) -> PythonVersion {
self.target_version
}

/// Set the maximum recursion depth for the parser.
#[must_use]
pub fn with_max_recursion_depth(mut self, depth: u16) -> Self {
self.max_recursion_depth = depth;
self
}

pub fn max_recursion_depth(&self) -> u16 {
self.max_recursion_depth
}
}

impl From<Mode> for ParseOptions {
fn from(mode: Mode) -> Self {
Self {
mode,
target_version: PythonVersion::default(),
max_recursion_depth: DEFAULT_MAX_RECURSION_DEPTH,
}
}
}
Expand All @@ -54,6 +84,7 @@ impl From<PySourceType> for ParseOptions {
Self {
mode: source_type.as_mode(),
target_version: PythonVersion::default(),
max_recursion_depth: DEFAULT_MAX_RECURSION_DEPTH,
}
}
}
17 changes: 17 additions & 0 deletions crates/ruff_python_parser/src/parser/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,23 @@ impl Parser<'_> {
///
/// See: <https://docs.python.org/3/reference/compound_stmts.html#grammar-token-python-grammar-pattern>
fn parse_match_pattern(&mut self, allow_star_pattern: AllowStarPattern) -> Pattern {
let range = self.current_token_range();
if self.enter_recursion(range) {
let result = self.parse_match_pattern_inner(allow_star_pattern);
self.leave_recursion();
result
} else {
// Wildcard-style placeholder so the caller always gets a valid `Pattern`.
Pattern::MatchAs(ast::PatternMatchAs {
range,
name: None,
pattern: None,
node_index: AtomicNodeIndex::NONE,
})
}
}

fn parse_match_pattern_inner(&mut self, allow_star_pattern: AllowStarPattern) -> Pattern {
let start = self.node_start();

// We don't yet know if it's an or pattern or an as pattern, so use whatever
Expand Down
17 changes: 17 additions & 0 deletions crates/ruff_python_parser/src/parser/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,23 @@ impl<'src> Parser<'src> {
/// - <https://docs.python.org/3/reference/compound_stmts.html>
/// - <https://docs.python.org/3/reference/simple_stmts.html>
pub(super) fn parse_statement(&mut self) -> Stmt {
let range = self.current_token_range();
if self.enter_recursion(range) {
let stmt = self.parse_statement_inner();
self.leave_recursion();
stmt
} else {
// `enter_recursion` already recorded a `RecursionLimitExceeded`
// error, so the returned `Parsed` is already in a failed state;
// this placeholder is only here so the parser can keep unwinding.
Stmt::Pass(ast::StmtPass {
range,
node_index: AtomicNodeIndex::NONE,
})
}
}

fn parse_statement_inner(&mut self) -> Stmt {
let start = self.node_start();

match self.current_token_kind() {
Expand Down
124 changes: 123 additions & 1 deletion crates/ruff_python_parser/src/parser/tests.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{Mode, ParseOptions, parse, parse_expression, parse_module};
use crate::{Mode, ParseErrorType, ParseOptions, parse, parse_expression, parse_module};

#[test]
fn test_modes() {
Expand Down Expand Up @@ -179,3 +179,125 @@ fn test_tstring_fstring_middle_fuzzer() {

insta::assert_debug_snapshot!(error);
}

#[test]
fn recursion_limit_nested_parens() {
let src = format!("{}1{}", "(".repeat(1_000), ")".repeat(1_000));
let opts = ParseOptions::from(Mode::Module).with_max_recursion_depth(100);
let err = parse(&src, opts).unwrap_err();
assert!(matches!(err.error, ParseErrorType::RecursionLimitExceeded));
}

#[test]
fn recursion_limit_normal_python_unaffected() {
// 50 levels is well above what real-world Python ever produces and well
// below the default cap — the point is to confirm the default doesn't
// reject ordinary input.
let src = format!("x = {}1{}", "(".repeat(50), ")".repeat(50));
parse_module(&src).unwrap();
}

#[test]
fn recursion_limit_nested_def_blocks() {
// Nested function definitions exercise instrumentation on
// `parse_statement` rather than `parse_lhs_expression`. Each level
// needs one more leading tab to make indentation valid.
let depth = 400;
let mut src = String::new();
for i in 0..depth {
src.push_str(&"\t".repeat(i));
src.push_str("def f():\n");
}
src.push_str(&"\t".repeat(depth));
src.push_str("pass\n");
let opts = ParseOptions::from(Mode::Module).with_max_recursion_depth(100);
let err = parse(&src, opts).unwrap_err();
assert!(matches!(err.error, ParseErrorType::RecursionLimitExceeded));
}

#[test]
fn recursion_limit_nested_lists() {
let src = format!("{}1{}", "[".repeat(1_000), "]".repeat(1_000));
let opts = ParseOptions::from(Mode::Module).with_max_recursion_depth(100);
let err = parse(&src, opts).unwrap_err();
assert!(matches!(err.error, ParseErrorType::RecursionLimitExceeded));
}

#[test]
fn recursion_limit_nested_match_patterns() {
// Deeply parenthesised match patterns — exercises pattern-parsing
// instrumentation in addition to statement / expression paths.
let mut src = String::from("match x:\n case ");
for _ in 0..600 {
src.push('(');
}
src.push('y');
for _ in 0..600 {
src.push(')');
}
src.push_str(": pass\n");
let opts = ParseOptions::from(Mode::Module).with_max_recursion_depth(100);
let err = parse(&src, opts).unwrap_err();
assert!(matches!(err.error, ParseErrorType::RecursionLimitExceeded));
}

#[test]
fn recursion_limit_binary_paren_interplay() {
// `1+(1+(1+(1+...)))` — each level alternates a binary operator and a
// parenthesised sub-expression, exactly like the pattern described in
// the tracking issue.
let depth = 2_000;
let mut src = String::new();
for _ in 0..depth {
src.push_str("1+(");
}
src.push('1');
for _ in 0..depth {
src.push(')');
}
let opts = ParseOptions::from(Mode::Module).with_max_recursion_depth(100);
let err = parse(&src, opts).unwrap_err();
assert!(matches!(err.error, ParseErrorType::RecursionLimitExceeded));
}

#[test]
fn recursion_limit_first_error_is_recursion_not_noise() {
// When the limit is hit the outer parser frames will emit secondary
// errors as they unwind. Callers read the first error via `into_result`
// / `Parsed::errors()`, so `RecursionLimitExceeded` must come first, and
// the drain-to-EOF inside `enter_recursion` should keep the total count
// small rather than producing one noisy error per unwound frame.
let src = format!("{}1{}", "(".repeat(2_000), ")".repeat(2_000));
let opts = ParseOptions::from(Mode::Module).with_max_recursion_depth(50);
let parsed = crate::parse_unchecked(&src, opts);
let errors = parsed.errors();
let first = errors.first().expect("expected at least one error");
assert!(matches!(
first.error,
ParseErrorType::RecursionLimitExceeded
));
// Exactly one `RecursionLimitExceeded` — guards against a regression
// where the unwind loops and re-triggers the limit check.
let recursion_errors = errors
.iter()
.filter(|e| matches!(e.error, ParseErrorType::RecursionLimitExceeded))
.count();
assert_eq!(recursion_errors, 1);
// Small, bounded tail of follow-up errors from the unwinding frames.
// Today this is 0; the generous cap is a regression gate, not a spec.
assert!(
errors.len() <= 8,
"expected a small number of errors, got {}: {errors:?}",
errors.len(),
);
}

#[test]
fn recursion_limit_default_set() {
let opts = ParseOptions::from(Mode::Module);
// Guards against someone accidentally unsetting the default. Real-world
// Python never approaches this depth, and the value must stay within the
// threading stack's capacity — see the const's docs in `options.rs`.
assert!(opts.max_recursion_depth() >= 200);
assert!(opts.max_recursion_depth() <= 2000);
}
Loading
Loading