Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Lib/test/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3188,6 +3188,16 @@ def get_tokens(string):
with self.subTest(case=case):
self.assertRaises(tokenize.TokenError, get_tokens, case)

def test_tstring_multiline_bang_underflow(self):
# gh-149183: t-string with '!' across two lines used to raise
# MemoryError because last_expr_end > last_expr_size produced a
# negative length that was cast to a huge size_t.
self.assertRaises(
tokenize.TokenError,
list,
tokenize.tokenize(BytesIO(b't"{!\n!x').readline),
)

@support.skip_wasi_stack_overflow()
def test_max_indent(self):
MAXINDENT = 100
Expand Down
23 changes: 16 additions & 7 deletions Parser/lexer/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,22 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
}
PyObject *res = NULL;

Py_ssize_t expr_len = tok_mode->last_expr_size - tok_mode->last_expr_end;
if (expr_len < 0) {
Comment on lines +124 to +125
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put this check before the declaration of res

/* last_expr_end > last_expr_size: happens when '{' and the closing
delimiter span different source lines, causing the strlen-based
size tracking to underflow. Treat as a tokenizer error rather
than passing a negative length (cast to huge size_t) to malloc or
PyUnicode_DecodeUTF8. */
return -1;
}

// Look for a # character outside of string literals
int hash_detected = 0;
int in_string = 0;
char quote_char = 0;

for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
for (Py_ssize_t i = 0; i < expr_len; i++) {
char ch = tok_mode->last_expr_buffer[i];

// Skip escaped characters
Expand Down Expand Up @@ -163,7 +173,7 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
// If we found a # character in the expression, we need to handle comments
if (hash_detected) {
// Allocate buffer for processed result
char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
char *result = (char *)PyMem_Malloc((expr_len + 1) * sizeof(char));
if (!result) {
return -1;
}
Expand All @@ -174,7 +184,7 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
quote_char = 0; // Current string quote char

// Process each character
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
while (i < expr_len) {
char ch = tok_mode->last_expr_buffer[i];

// Handle string quotes
Expand All @@ -190,11 +200,10 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
}
// Skip comments
else if (ch == '#' && !in_string) {
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
tok_mode->last_expr_buffer[i] != '\n') {
while (i < expr_len && tok_mode->last_expr_buffer[i] != '\n') {
i++;
}
if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
if (i < expr_len) {
result[j++] = '\n';
}
}
Expand All @@ -211,7 +220,7 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
} else {
res = PyUnicode_DecodeUTF8(
tok_mode->last_expr_buffer,
tok_mode->last_expr_size - tok_mode->last_expr_end,
expr_len,
NULL
);
}
Expand Down
Loading