Skip to content

Commit a1ac559

Browse files
lysnikolaouFFY00serhiy-storchaka
authored
gh-107450: Check for overflow in the tokenizer and fix overflow test (#110832)
Co-authored-by: Filipe Laíns <[email protected]> Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent b3c9faf commit a1ac559

File tree

4 files changed

+40
-22
lines changed

4 files changed

+40
-22
lines changed

Include/errcode.h

+19-18
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,25 @@
1919
extern "C" {
2020
#endif
2121

22-
#define E_OK 10 /* No error */
23-
#define E_EOF 11 /* End Of File */
24-
#define E_INTR 12 /* Interrupted */
25-
#define E_TOKEN 13 /* Bad token */
26-
#define E_SYNTAX 14 /* Syntax error */
27-
#define E_NOMEM 15 /* Ran out of memory */
28-
#define E_DONE 16 /* Parsing complete */
29-
#define E_ERROR 17 /* Execution error */
30-
#define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */
31-
#define E_OVERFLOW 19 /* Node had too many children */
32-
#define E_TOODEEP 20 /* Too many indentation levels */
33-
#define E_DEDENT 21 /* No matching outer block for dedent */
34-
#define E_DECODE 22 /* Error in decoding into Unicode */
35-
#define E_EOFS 23 /* EOF in triple-quoted string */
36-
#define E_EOLS 24 /* EOL in single-quoted string */
37-
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
38-
#define E_BADSINGLE 27 /* Ill-formed single statement input */
39-
#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */
22+
#define E_OK 10 /* No error */
23+
#define E_EOF 11 /* End Of File */
24+
#define E_INTR 12 /* Interrupted */
25+
#define E_TOKEN 13 /* Bad token */
26+
#define E_SYNTAX 14 /* Syntax error */
27+
#define E_NOMEM 15 /* Ran out of memory */
28+
#define E_DONE 16 /* Parsing complete */
29+
#define E_ERROR 17 /* Execution error */
30+
#define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */
31+
#define E_OVERFLOW 19 /* Node had too many children */
32+
#define E_TOODEEP 20 /* Too many indentation levels */
33+
#define E_DEDENT 21 /* No matching outer block for dedent */
34+
#define E_DECODE 22 /* Error in decoding into Unicode */
35+
#define E_EOFS 23 /* EOF in triple-quoted string */
36+
#define E_EOLS 24 /* EOL in single-quoted string */
37+
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
38+
#define E_BADSINGLE 27 /* Ill-formed single statement input */
39+
#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */
40+
#define E_COLUMNOVERFLOW 29 /* Column offset overflow */
4041

4142
#ifdef __cplusplus
4243
}

Lib/test/test_exceptions.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@
1818
from test.support.warnings_helper import check_warnings
1919
from test import support
2020

21+
try:
22+
from _testcapi import INT_MAX
23+
except ImportError:
24+
INT_MAX = 2**31 - 1
25+
26+
2127

2228
class NaiveException(Exception):
2329
def __init__(self, x):
@@ -318,11 +324,13 @@ def baz():
318324
check('(yield i) = 2', 1, 2)
319325
check('def f(*):\n pass', 1, 7)
320326

327+
@unittest.skipIf(INT_MAX >= sys.maxsize, "Downcasting to int is safe for col_offset")
321328
@support.requires_resource('cpu')
322-
@support.bigmemtest(support._2G, memuse=1.5)
323-
def testMemoryErrorBigSource(self, _size):
324-
with self.assertRaises(OverflowError):
325-
exec(f"if True:\n {' ' * 2**31}print('hello world')")
329+
@support.bigmemtest(INT_MAX, memuse=2, dry_run=False)
330+
def testMemoryErrorBigSource(self, size):
331+
src = b"if True:\n%*s" % (size, b"pass")
332+
with self.assertRaisesRegex(OverflowError, "Parser column offset overflow"):
333+
compile(src, '<fragment>', 'exec')
326334

327335
@cpython_only
328336
def testSettingException(self):

Parser/lexer/lexer.c

+4
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ tok_nextc(struct tok_state *tok)
5959
int rc;
6060
for (;;) {
6161
if (tok->cur != tok->inp) {
62+
if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
63+
tok->done = E_COLUMNOVERFLOW;
64+
return EOF;
65+
}
6266
tok->col_offset++;
6367
return Py_CHARMASK(*tok->cur++); /* Fast path */
6468
}

Parser/pegen_errors.c

+5
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ _Pypegen_tokenizer_error(Parser *p)
6868
const char *msg = NULL;
6969
PyObject* errtype = PyExc_SyntaxError;
7070
Py_ssize_t col_offset = -1;
71+
p->error_indicator = 1;
7172
switch (p->tok->done) {
7273
case E_TOKEN:
7374
msg = "invalid token";
@@ -103,6 +104,10 @@ _Pypegen_tokenizer_error(Parser *p)
103104
msg = "unexpected character after line continuation character";
104105
break;
105106
}
107+
case E_COLUMNOVERFLOW:
108+
PyErr_SetString(PyExc_OverflowError,
109+
"Parser column offset overflow - source line is too big");
110+
return -1;
106111
default:
107112
msg = "unknown parsing error";
108113
}

0 commit comments

Comments
 (0)