From 05f856f3142698a6d34f9879897849a5d3a9f329 Mon Sep 17 00:00:00 2001
From: Edouard CHIN <chin.edouard@gmail.com>
Date: Wed, 25 Mar 2026 13:28:57 +0100
Subject: [PATCH 01/12] [ruby/rubygems] This new suite isn't running on
 windows:

- This new suite needs to be added to a group in order to be picked up.

  I saw the warning on CI

  ./spec/bundler/errors_spec.rb is not assigned to any Windows runner group. see spec/support/windows_tag_group.rb for details.

https://github.com/ruby/rubygems/commit/ed87214b37
---
 spec/bundler/support/windows_tag_group.rb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/spec/bundler/support/windows_tag_group.rb b/spec/bundler/support/windows_tag_group.rb
index bd6acb9d55ca2a..f1a78f23e8d37e 100644
--- a/spec/bundler/support/windows_tag_group.rb
+++ b/spec/bundler/support/windows_tag_group.rb
@@ -33,6 +33,7 @@ module WindowsTagGroup
         "spec/bundler/source/git/git_proxy_spec.rb",
         "spec/bundler/source_list_spec.rb",
         "spec/bundler/plugin/installer_spec.rb",
+        "spec/bundler/errors_spec.rb",
         "spec/bundler/friendly_errors_spec.rb",
         "spec/resolver/platform_spec.rb",
         "spec/bundler/fetcher/downloader_spec.rb",

From d88aca8aca2be9da86da4f42c4b5d5e2bbb5363a Mon Sep 17 00:00:00 2001
From: Earlopain <14981592+Earlopain@users.noreply.github.com>
Date: Wed, 25 Mar 2026 09:35:49 +0100
Subject: [PATCH 02/12] [ruby/prism] Add `keyword_loc` to `ForwardingSuperNode`

They can take a block and so the keyword location is not always the entirety of the node.
You can get it by constructing a location that is just the first 5 characters
but why not have it be provided by prism?

https://github.com/ruby/prism/commit/878d79eb8c
---
 prism/config.yml | 8 ++++++++
 prism/prism.c    | 1 +
 2 files changed, 9 insertions(+)

diff --git a/prism/config.yml b/prism/config.yml
index ef970eba9d356d..7c283741d3b52c 100644
--- a/prism/config.yml
+++ b/prism/config.yml
@@ -2627,6 +2627,14 @@ nodes:
           end
   - name: ForwardingSuperNode
     fields:
+      - name: keyword_loc
+        type: location
+        comment: |
+          super
+          ^^^^^
+
+          super { 123 }
+          ^^^^^
       - name: block
         type: node?
         kind: BlockNode
diff --git a/prism/prism.c b/prism/prism.c
index d68c83f1e34c18..3b475657808805 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -4379,6 +4379,7 @@ pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm
         ++parser->node_id,
         0,
         (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block),
+        PM_LOCATION_INIT_TOKEN(parser, token),
         block
     );
 }

From 3a5536aaf7aa511cda485ccbd45b2373c1403b49 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 24 Mar 2026 11:53:12 -0400
Subject: [PATCH 03/12] [ruby/prism] Pull out parse_def from
 parse_expression_prefix

https://github.com/ruby/prism/commit/f77dc15864
---
 prism/prism.c | 735 +++++++++++++++++++++++++-------------------------
 1 file changed, 368 insertions(+), 367 deletions(-)

diff --git a/prism/prism.c b/prism/prism.c
index 3b475657808805..743683f1ec29b6 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -17769,6 +17769,372 @@ pm_block_call_p(const pm_node_t *node) {
     return false;
 }
 
+/**
+ * Parse a method definition expression (the `def` keyword).
+ */
+static pm_node_t *
+parse_def(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
+    pm_node_list_t current_block_exits = { 0 };
+    pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+    pm_token_t def_keyword = parser->current;
+    size_t opening_newline_index = token_newline_index(parser);
+
+    pm_node_t *receiver = NULL;
+    pm_token_t operator = { 0 };
+    pm_token_t name;
+
+    /* This context is necessary for lexing `...` in a bare params correctly. It
+     * must be pushed before lexing the first param, so it is here. */
+    context_push(parser, PM_CONTEXT_DEF_PARAMS);
+    parser_lex(parser);
+
+    /* This will be false if the method name is not a valid identifier but could
+     * be followed by an operator. */
+    bool valid_name = true;
+
+    switch (parser->current.type) {
+        case PM_CASE_OPERATOR:
+            pm_parser_scope_push(parser, true);
+            lex_state_set(parser, PM_LEX_STATE_ENDFN);
+            parser_lex(parser);
+
+            name = parser->previous;
+            break;
+        case PM_TOKEN_IDENTIFIER: {
+            parser_lex(parser);
+
+            if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
+                receiver = parse_variable_call(parser);
+
+                pm_parser_scope_push(parser, true);
+                lex_state_set(parser, PM_LEX_STATE_FNAME);
+                parser_lex(parser);
+
+                operator = parser->previous;
+                name = parse_method_definition_name(parser);
+            } else {
+                pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
+                pm_parser_scope_push(parser, true);
+
+                name = parser->previous;
+            }
+
+            break;
+        }
+        case PM_TOKEN_INSTANCE_VARIABLE:
+        case PM_TOKEN_CLASS_VARIABLE:
+        case PM_TOKEN_GLOBAL_VARIABLE:
+            valid_name = false;
+            PRISM_FALLTHROUGH
+        case PM_TOKEN_CONSTANT:
+        case PM_TOKEN_KEYWORD_NIL:
+        case PM_TOKEN_KEYWORD_SELF:
+        case PM_TOKEN_KEYWORD_TRUE:
+        case PM_TOKEN_KEYWORD_FALSE:
+        case PM_TOKEN_KEYWORD___FILE__:
+        case PM_TOKEN_KEYWORD___LINE__:
+        case PM_TOKEN_KEYWORD___ENCODING__: {
+            pm_parser_scope_push(parser, true);
+            parser_lex(parser);
+
+            pm_token_t identifier = parser->previous;
+
+            if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
+                lex_state_set(parser, PM_LEX_STATE_FNAME);
+                parser_lex(parser);
+                operator = parser->previous;
+
+                switch (identifier.type) {
+                    case PM_TOKEN_CONSTANT:
+                        receiver = UP(pm_constant_read_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_INSTANCE_VARIABLE:
+                        receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_CLASS_VARIABLE:
+                        receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_GLOBAL_VARIABLE:
+                        receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD_NIL:
+                        receiver = UP(pm_nil_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD_SELF:
+                        receiver = UP(pm_self_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD_TRUE:
+                        receiver = UP(pm_true_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD_FALSE:
+                        receiver = UP(pm_false_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD___FILE__:
+                        receiver = UP(pm_source_file_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD___LINE__:
+                        receiver = UP(pm_source_line_node_create(parser, &identifier));
+                        break;
+                    case PM_TOKEN_KEYWORD___ENCODING__:
+                        receiver = UP(pm_source_encoding_node_create(parser, &identifier));
+                        break;
+                    default:
+                        break;
+                }
+
+                name = parse_method_definition_name(parser);
+            } else {
+                if (!valid_name) {
+                    PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_str(identifier.type));
+                }
+
+                name = identifier;
+            }
+            break;
+        }
+        case PM_TOKEN_PARENTHESIS_LEFT: {
+            /* The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner
+             * expression of this parenthesis should not be processed under this
+             * context. Thus, the context is popped here. */
+            context_pop(parser);
+            parser_lex(parser);
+
+            pm_token_t lparen = parser->previous;
+            pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
+
+            accept1(parser, PM_TOKEN_NEWLINE);
+            expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+            pm_token_t rparen = parser->previous;
+
+            lex_state_set(parser, PM_LEX_STATE_FNAME);
+            expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
+
+            operator = parser->previous;
+            receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
+
+            /* To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as
+             * described the above. */
+            pm_parser_scope_push(parser, true);
+            context_push(parser, PM_CONTEXT_DEF_PARAMS);
+            name = parse_method_definition_name(parser);
+            break;
+        }
+        default:
+            pm_parser_scope_push(parser, true);
+            name = parse_method_definition_name(parser);
+            break;
+    }
+
+    pm_token_t lparen = { 0 };
+    pm_token_t rparen = { 0 };
+    pm_parameters_node_t *params;
+
+    bool accept_endless_def = true;
+    switch (parser->current.type) {
+        case PM_TOKEN_PARENTHESIS_LEFT: {
+            parser_lex(parser);
+            lparen = parser->previous;
+
+            if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                params = NULL;
+            } else {
+                /* https://bugs.ruby-lang.org/issues/19107 */
+                bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1;
+                params = parse_parameters(
+                    parser,
+                    PM_BINDING_POWER_DEFINED,
+                    true,
+                    allow_trailing_comma,
+                    true,
+                    true,
+                    false,
+                    PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
+                    (uint16_t) (depth + 1)
+                );
+            }
+
+            lex_state_set(parser, PM_LEX_STATE_BEG);
+            parser->command_start = true;
+
+            context_pop(parser);
+            if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_str(parser->current.type));
+                parser->previous.start = parser->previous.end;
+                parser->previous.type = 0;
+            }
+
+            rparen = parser->previous;
+            break;
+        }
+        case PM_CASE_PARAMETER: {
+            /* If we're about to lex a label, we need to add the label state to
+             * make sure the next newline is ignored. */
+            if (parser->current.type == PM_TOKEN_LABEL) {
+                lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
+            }
+
+            params = parse_parameters(
+                parser,
+                PM_BINDING_POWER_DEFINED,
+                false,
+                false,
+                true,
+                true,
+                false,
+                PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
+                (uint16_t) (depth + 1)
+            );
+
+            /* Reject `def * = 1` and similar. We have to specifically check for
+             * them because they create ambiguity with optional arguments. */
+            accept_endless_def = false;
+
+            context_pop(parser);
+            break;
+        }
+        default: {
+            params = NULL;
+            context_pop(parser);
+            break;
+        }
+    }
+
+    pm_node_t *statements = NULL;
+    pm_token_t equal = { 0 };
+    pm_token_t end_keyword = { 0 };
+
+    if (accept1(parser, PM_TOKEN_EQUAL)) {
+        if (token_is_setter_name(&name)) {
+            pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
+        }
+        if (!accept_endless_def) {
+            pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
+        }
+        if (
+            parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS &&
+            parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS
+        ) {
+            PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
+        }
+        equal = parser->previous;
+
+        context_push(parser, PM_CONTEXT_DEF);
+        pm_do_loop_stack_push(parser, false);
+        statements = UP(pm_statements_node_create(parser));
+
+        uint8_t allow_flags;
+        if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
+            allow_flags = flags & PM_PARSE_ACCEPTS_COMMAND_CALL;
+        } else {
+            /* Allow `def foo = puts "Hello"` but not
+             * `private def foo = puts "Hello"` */
+            allow_flags = (binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION) ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0;
+        }
+
+        /* Inside a def body, we push true onto the accepts_block_stack so that
+         * `do` is lexed as PM_TOKEN_KEYWORD_DO (which can only start a block
+         * for primary-level constructs, not commands). During command argument
+         * parsing, the stack is pushed to false, causing `do` to be lexed as
+         * PM_TOKEN_KEYWORD_DO_BLOCK, which is not consumed inside the endless
+         * def body and instead left for the outer context. */
+        pm_accepts_block_stack_push(parser, true);
+        pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_flags | PM_PARSE_IN_ENDLESS_DEF, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
+        pm_accepts_block_stack_pop(parser);
+
+        /* If an unconsumed PM_TOKEN_KEYWORD_DO follows the body, it is an error
+         * (e.g., `def f = 1 do end`). PM_TOKEN_KEYWORD_DO_BLOCK is
+         * intentionally not caught here — it should bubble up to the outer
+         * context (e.g., `private def f = puts "Hello" do end` where the block
+         * attaches to `private`). */
+        if (accept1(parser, PM_TOKEN_KEYWORD_DO)) {
+            pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1));
+            pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK);
+        }
+
+        if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
+            context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
+
+            pm_token_t rescue_keyword = parser->previous;
+
+            /* In the Ruby grammar, the rescue value of an endless method
+             * command excludes and/or and in/=>. */
+            pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
+            context_pop(parser);
+
+            statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
+        }
+
+        /* A nested endless def whose body is a command call (e.g.,
+         * `def f = def g = foo bar`) is a command assignment and cannot appear
+         * as a def body. */
+        if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) {
+            PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+        }
+
+        pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
+        pm_do_loop_stack_pop(parser);
+        context_pop(parser);
+    } else {
+        if (lparen.start == NULL) {
+            lex_state_set(parser, PM_LEX_STATE_BEG);
+            parser->command_start = true;
+            expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
+        } else {
+            accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+        }
+
+        pm_accepts_block_stack_push(parser, true);
+        pm_do_loop_stack_push(parser, false);
+
+        if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+            pm_accepts_block_stack_push(parser, true);
+            statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
+            pm_accepts_block_stack_pop(parser);
+        }
+
+        if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
+            assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+            statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
+        } else {
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
+        }
+
+        pm_accepts_block_stack_pop(parser);
+        pm_do_loop_stack_pop(parser);
+
+        expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
+        end_keyword = parser->previous;
+    }
+
+    pm_constant_id_list_t locals;
+    pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+    pm_parser_scope_pop(parser);
+
+    /* If the final character is `@` as is the case when defining methods to
+     * override the unary operators, we should ignore the @ in the same way we
+     * do for symbols. */
+    pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name));
+
+    flush_block_exits(parser, previous_block_exits);
+
+    return UP(pm_def_node_create(
+        parser,
+        name_id,
+        &name,
+        receiver,
+        params,
+        statements,
+        &locals,
+        &def_keyword,
+        NTOK2PTR(operator),
+        NTOK2PTR(lparen),
+        NTOK2PTR(rparen),
+        NTOK2PTR(equal),
+        NTOK2PTR(end_keyword)
+    ));
+}
+
 /**
  * Parse an expression that begins with the previous node that we just lexed.
  */
@@ -18988,373 +19354,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u
             pop_block_exits(parser, previous_block_exits);
             return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous));
         }
-        case PM_TOKEN_KEYWORD_DEF: {
-            pm_node_list_t current_block_exits = { 0 };
-            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
-            pm_token_t def_keyword = parser->current;
-            size_t opening_newline_index = token_newline_index(parser);
-
-            pm_node_t *receiver = NULL;
-            pm_token_t operator = { 0 };
-            pm_token_t name;
-
-            // This context is necessary for lexing `...` in a bare params
-            // correctly. It must be pushed before lexing the first param, so it
-            // is here.
-            context_push(parser, PM_CONTEXT_DEF_PARAMS);
-            parser_lex(parser);
-
-            // This will be false if the method name is not a valid identifier
-            // but could be followed by an operator.
-            bool valid_name = true;
-
-            switch (parser->current.type) {
-                case PM_CASE_OPERATOR:
-                    pm_parser_scope_push(parser, true);
-                    lex_state_set(parser, PM_LEX_STATE_ENDFN);
-                    parser_lex(parser);
-
-                    name = parser->previous;
-                    break;
-                case PM_TOKEN_IDENTIFIER: {
-                    parser_lex(parser);
-
-                    if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
-                        receiver = parse_variable_call(parser);
-
-                        pm_parser_scope_push(parser, true);
-                        lex_state_set(parser, PM_LEX_STATE_FNAME);
-                        parser_lex(parser);
-
-                        operator = parser->previous;
-                        name = parse_method_definition_name(parser);
-                    } else {
-                        pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
-                        pm_parser_scope_push(parser, true);
-
-                        name = parser->previous;
-                    }
-
-                    break;
-                }
-                case PM_TOKEN_INSTANCE_VARIABLE:
-                case PM_TOKEN_CLASS_VARIABLE:
-                case PM_TOKEN_GLOBAL_VARIABLE:
-                    valid_name = false;
-                    PRISM_FALLTHROUGH
-                case PM_TOKEN_CONSTANT:
-                case PM_TOKEN_KEYWORD_NIL:
-                case PM_TOKEN_KEYWORD_SELF:
-                case PM_TOKEN_KEYWORD_TRUE:
-                case PM_TOKEN_KEYWORD_FALSE:
-                case PM_TOKEN_KEYWORD___FILE__:
-                case PM_TOKEN_KEYWORD___LINE__:
-                case PM_TOKEN_KEYWORD___ENCODING__: {
-                    pm_parser_scope_push(parser, true);
-                    parser_lex(parser);
-
-                    pm_token_t identifier = parser->previous;
-
-                    if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
-                        lex_state_set(parser, PM_LEX_STATE_FNAME);
-                        parser_lex(parser);
-                        operator = parser->previous;
-
-                        switch (identifier.type) {
-                            case PM_TOKEN_CONSTANT:
-                                receiver = UP(pm_constant_read_node_create(parser, &identifier));
-                                break;
-                            case PM_TOKEN_INSTANCE_VARIABLE:
-                                receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
-                                break;
-                            case PM_TOKEN_CLASS_VARIABLE:
-                                receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
-                                break;
-                            case PM_TOKEN_GLOBAL_VARIABLE:
-                                receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
-                                break;
-                            case PM_TOKEN_KEYWORD_NIL:
-                                receiver = UP(pm_nil_node_create(parser, &identifier));
-                                break;
-                            case PM_TOKEN_KEYWORD_SELF:
-                                receiver = UP(pm_self_node_create(parser, &identifier));
-                                break;
-                            case PM_TOKEN_KEYWORD_TRUE:
-                                receiver = UP(pm_true_node_create(parser, &identifier));
-                                break;
-                            case PM_TOKEN_KEYWORD_FALSE:
-                                receiver = UP(pm_false_node_create(parser, &identifier));
-                                break;
-                            case PM_TOKEN_KEYWORD___FILE__:
-                                receiver = UP(pm_source_file_node_create(parser, &identifier));
-                                break;
-                            case PM_TOKEN_KEYWORD___LINE__:
-                                receiver = UP(pm_source_line_node_create(parser, &identifier));
-                                break;
-                            case PM_TOKEN_KEYWORD___ENCODING__:
-                                receiver = UP(pm_source_encoding_node_create(parser, &identifier));
-                                break;
-                            default:
-                                break;
-                        }
-
-                        name = parse_method_definition_name(parser);
-                    } else {
-                        if (!valid_name) {
-                            PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_str(identifier.type));
-                        }
-
-                        name = identifier;
-                    }
-                    break;
-                }
-                case PM_TOKEN_PARENTHESIS_LEFT: {
-                    // The current context is `PM_CONTEXT_DEF_PARAMS`, however
-                    // the inner expression of this parenthesis should not be
-                    // processed under this context. Thus, the context is popped
-                    // here.
-                    context_pop(parser);
-                    parser_lex(parser);
-
-                    pm_token_t lparen = parser->previous;
-                    pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
-
-                    accept1(parser, PM_TOKEN_NEWLINE);
-                    expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
-                    pm_token_t rparen = parser->previous;
-
-                    lex_state_set(parser, PM_LEX_STATE_FNAME);
-                    expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
-
-                    operator = parser->previous;
-                    receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
-
-                    // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
-                    // reason as described the above.
-                    pm_parser_scope_push(parser, true);
-                    context_push(parser, PM_CONTEXT_DEF_PARAMS);
-                    name = parse_method_definition_name(parser);
-                    break;
-                }
-                default:
-                    pm_parser_scope_push(parser, true);
-                    name = parse_method_definition_name(parser);
-                    break;
-            }
-
-            pm_token_t lparen = { 0 };
-            pm_token_t rparen = { 0 };
-            pm_parameters_node_t *params;
-
-            bool accept_endless_def = true;
-            switch (parser->current.type) {
-                case PM_TOKEN_PARENTHESIS_LEFT: {
-                    parser_lex(parser);
-                    lparen = parser->previous;
-
-                    if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                        params = NULL;
-                    } else {
-                        // https://bugs.ruby-lang.org/issues/19107
-                        bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1;
-                        params = parse_parameters(
-                            parser,
-                            PM_BINDING_POWER_DEFINED,
-                            true,
-                            allow_trailing_comma,
-                            true,
-                            true,
-                            false,
-                            PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
-                            (uint16_t) (depth + 1)
-                        );
-                    }
-
-                    lex_state_set(parser, PM_LEX_STATE_BEG);
-                    parser->command_start = true;
-
-                    context_pop(parser);
-                    if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_str(parser->current.type));
-                        parser->previous.start = parser->previous.end;
-                        parser->previous.type = 0;
-                    }
-
-                    rparen = parser->previous;
-                    break;
-                }
-                case PM_CASE_PARAMETER: {
-                    // If we're about to lex a label, we need to add the label
-                    // state to make sure the next newline is ignored.
-                    if (parser->current.type == PM_TOKEN_LABEL) {
-                        lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
-                    }
-
-                    params = parse_parameters(
-                        parser,
-                        PM_BINDING_POWER_DEFINED,
-                        false,
-                        false,
-                        true,
-                        true,
-                        false,
-                        PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
-                        (uint16_t) (depth + 1)
-                    );
-
-                    // Reject `def * = 1` and similar. We have to specifically check
-                    // for them because they create ambiguity with optional arguments.
-                    accept_endless_def = false;
-
-                    context_pop(parser);
-                    break;
-                }
-                default: {
-                    params = NULL;
-                    context_pop(parser);
-                    break;
-                }
-            }
-
-            pm_node_t *statements = NULL;
-            pm_token_t equal = { 0 };
-            pm_token_t end_keyword = { 0 };
-
-            if (accept1(parser, PM_TOKEN_EQUAL)) {
-                if (token_is_setter_name(&name)) {
-                    pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
-                }
-                if (!accept_endless_def) {
-                    pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
-                }
-                if (
-                    parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS &&
-                    parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS
-                ) {
-                    PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
-                }
-                equal = parser->previous;
-
-                context_push(parser, PM_CONTEXT_DEF);
-                pm_do_loop_stack_push(parser, false);
-                statements = UP(pm_statements_node_create(parser));
-
-                uint8_t allow_flags;
-                if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
-                    allow_flags = flags & PM_PARSE_ACCEPTS_COMMAND_CALL;
-                } else {
-                    // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"`
-                    allow_flags = (binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION) ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0;
-                }
-
-                // Inside a def body, we push true onto the
-                // accepts_block_stack so that `do` is lexed as
-                // PM_TOKEN_KEYWORD_DO (which can only start a block for
-                // primary-level constructs, not commands). During command
-                // argument parsing, the stack is pushed to false, causing
-                // `do` to be lexed as PM_TOKEN_KEYWORD_DO_BLOCK, which
-                // is not consumed inside the endless def body and instead
-                // left for the outer context.
-                pm_accepts_block_stack_push(parser, true);
-                pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_flags | PM_PARSE_IN_ENDLESS_DEF, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
-                pm_accepts_block_stack_pop(parser);
-
-                // If an unconsumed PM_TOKEN_KEYWORD_DO follows the body,
-                // it is an error (e.g., `def f = 1 do end`).
-                // PM_TOKEN_KEYWORD_DO_BLOCK is intentionally not caught
-                // here — it should bubble up to the outer context (e.g.,
-                // `private def f = puts "Hello" do end` where the block
-                // attaches to `private`).
-                if (accept1(parser, PM_TOKEN_KEYWORD_DO)) {
-                    pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1));
-                    pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK);
-                }
-
-                if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
-                    context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
-
-                    pm_token_t rescue_keyword = parser->previous;
-
-                    // In the Ruby grammar, the rescue value of an endless
-                    // method command excludes and/or and in/=>.
-                    pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
-                    context_pop(parser);
-
-                    statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
-                }
-
-                // A nested endless def whose body is a command call (e.g.,
-                // `def f = def g = foo bar`) is a command assignment and
-                // cannot appear as a def body.
-                if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) {
-                    PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
-                }
-
-                pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
-                pm_do_loop_stack_pop(parser);
-                context_pop(parser);
-            } else {
-                if (lparen.start == NULL) {
-                    lex_state_set(parser, PM_LEX_STATE_BEG);
-                    parser->command_start = true;
-                    expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
-                } else {
-                    accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
-                }
-
-                pm_accepts_block_stack_push(parser, true);
-                pm_do_loop_stack_push(parser, false);
-
-                if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
-                    pm_accepts_block_stack_push(parser, true);
-                    statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
-                    pm_accepts_block_stack_pop(parser);
-                }
-
-                if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
-                    assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
-                    statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
-                } else {
-                    parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
-                }
-
-                pm_accepts_block_stack_pop(parser);
-                pm_do_loop_stack_pop(parser);
-
-                expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
-                end_keyword = parser->previous;
-            }
-
-            pm_constant_id_list_t locals;
-            pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-            pm_parser_scope_pop(parser);
-
-            /**
-             * If the final character is `@` as is the case when defining
-             * methods to override the unary operators, we should ignore
-             * the @ in the same way we do for symbols.
-             */
-            pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name));
-
-            flush_block_exits(parser, previous_block_exits);
-
-            return UP(pm_def_node_create(
-                parser,
-                name_id,
-                &name,
-                receiver,
-                params,
-                statements,
-                &locals,
-                &def_keyword,
-                NTOK2PTR(operator),
-                NTOK2PTR(lparen),
-                NTOK2PTR(rparen),
-                NTOK2PTR(equal),
-                NTOK2PTR(end_keyword)
-            ));
-        }
+        case PM_TOKEN_KEYWORD_DEF:
+            return parse_def(parser, binding_power, flags, depth);
         case PM_TOKEN_KEYWORD_DEFINED: {
             parser_lex(parser);
 

From 5fcdaff8a986ccbc81c9a84870e08be34f6abf0a Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 24 Mar 2026 11:57:30 -0400
Subject: [PATCH 04/12] [ruby/prism] Pull out parse_case from
 parse_expression_prefix

https://github.com/ruby/prism/commit/410a623cdf
---
 prism/prism.c | 433 +++++++++++++++++++++++++-------------------------
 1 file changed, 220 insertions(+), 213 deletions(-)

diff --git a/prism/prism.c b/prism/prism.c
index 743683f1ec29b6..3016d704ca4f8f 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -17769,6 +17769,224 @@ pm_block_call_p(const pm_node_t *node) {
     return false;
 }
 
+/**
+ * Parse a case expression (the `case` keyword). This handles both case-when and
+ * case-in (pattern matching) forms.
+ */
+static pm_node_t *
+parse_case(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
+    size_t opening_newline_index = token_newline_index(parser);
+    parser_lex(parser);
+
+    pm_token_t case_keyword = parser->previous;
+    pm_node_t *predicate = NULL;
+
+    pm_node_list_t current_block_exits = { 0 };
+    pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+    if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+        while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+        predicate = NULL;
+    } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
+        predicate = NULL;
+     } else if (!token_begins_expression_p(parser->current.type)) {
+        predicate = NULL;
+    } else {
+        predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
+        while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+    }
+
+    if (match1(parser, PM_TOKEN_KEYWORD_END)) {
+        parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
+        parser_lex(parser);
+        pop_block_exits(parser, previous_block_exits);
+        pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+        return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
+    }
+
+    /* At this point we can create a case node, though we don't yet know if it
+     * is a case-in or case-when node. */
+    pm_node_t *node;
+
+    if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
+        pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL);
+        pm_static_literals_t literals = { 0 };
+
+        /* At this point we've seen a when keyword, so we know this is a
+         * case-when node. We will continue to parse the when nodes until we hit
+         * the end of the list. */
+        while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
+            parser_lex(parser);
+
+            pm_token_t when_keyword = parser->previous;
+            pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
+
+            do {
+                if (accept1(parser, PM_TOKEN_USTAR)) {
+                    pm_token_t operator = parser->previous;
+                    pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
+
+                    pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
+                    pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node));
+
+                    if (PM_NODE_TYPE_P(expression, PM_ERROR_RECOVERY_NODE)) break;
+                } else {
+                    pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
+                    pm_when_node_conditions_append(parser->arena, when_node, condition);
+
+                    /* If we found a missing node, then this is a syntax error
+                     * and we should stop looping. */
+                    if (PM_NODE_TYPE_P(condition, PM_ERROR_RECOVERY_NODE)) break;
+
+                    /* If this is a string node, then we need to mark it as
+                     * frozen because when clause strings are frozen. */
+                    if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
+                        pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
+                    } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
+                        pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
+                    }
+
+                    pm_when_clause_static_literals_add(parser, &literals, condition);
+                }
+            } while (accept1(parser, PM_TOKEN_COMMA));
+
+            if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+                if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+                    pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
+                }
+            } else {
+                expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
+                pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
+            }
+
+            if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+                pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
+                if (statements != NULL) {
+                    pm_when_node_statements_set(when_node, statements);
+                }
+            }
+
+            pm_case_node_condition_append(parser->arena, case_node, UP(when_node));
+        }
+
+        /* If we didn't parse any conditions (in or when) then we need to
+         * indicate that we have an error. */
+        if (case_node->conditions.size == 0) {
+            pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+        }
+
+        pm_static_literals_free(&literals);
+        node = UP(case_node);
+    } else {
+        pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate);
+
+        /* If this is a case-match node (i.e., it is a pattern matching case
+         * statement) then we must have a predicate. */
+        if (predicate == NULL) {
+            pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
+        }
+
+        /* At this point we expect that we're parsing a case-in node. We will
+         * continue to parse the in nodes until we hit the end of the list. */
+        while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
+
+            bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
+            parser->pattern_matching_newlines = true;
+
+            lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
+            parser->command_start = false;
+            parser_lex(parser);
+
+            pm_token_t in_keyword = parser->previous;
+
+            pm_constant_id_list_t captures = { 0 };
+            pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
+
+            parser->pattern_matching_newlines = previous_pattern_matching_newlines;
+
+            /* Since we're in the top-level of the case-in node we need to
+             * check for guard clauses in the form of `if` or `unless`
+             * statements. */
+            if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
+                pm_token_t keyword = parser->previous;
+                pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
+                pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
+            } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
+                pm_token_t keyword = parser->previous;
+                pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
+                pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
+            }
+
+            /* Now we need to check for the terminator of the in node's pattern.
+             * It can be a newline or semicolon optionally followed by a `then`
+             * keyword. */
+            pm_token_t then_keyword = { 0 };
+            if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+                if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
+                    then_keyword = parser->previous;
+                }
+            } else {
+                expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
+                then_keyword = parser->previous;
+            }
+
+            /* Now we can actually parse the statements associated with the in
+             * node. */
+            pm_statements_node_t *statements;
+            if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+                statements = NULL;
+            } else {
+                statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
+            }
+
+            /* Now that we have the full pattern and statements, we can create
+             * the node and attach it to the case node. */
+            pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword)));
+            pm_case_match_node_condition_append(parser->arena, case_node, condition);
+        }
+
+        /* If we didn't parse any conditions (in or when) then we need to
+         * indicate that we have an error. */
+        if (case_node->conditions.size == 0) {
+            pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
+        }
+
+        node = UP(case_node);
+    }
+
+    accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+    if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
+        pm_token_t else_keyword = parser->previous;
+        pm_else_node_t *else_node;
+
+        if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
+            else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
+        } else {
+            else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
+        }
+
+        if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
+            pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
+        } else {
+            pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
+        }
+    }
+
+    parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
+    expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
+
+    if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
+        pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous);
+    } else {
+        pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous);
+    }
+
+    pop_block_exits(parser, previous_block_exits);
+    return node;
+}
+
 /**
  * Parse a method definition expression (the `def` keyword).
  */
@@ -18895,219 +19113,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u
                     return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
             }
         }
-        case PM_TOKEN_KEYWORD_CASE: {
-            size_t opening_newline_index = token_newline_index(parser);
-            parser_lex(parser);
-
-            pm_token_t case_keyword = parser->previous;
-            pm_node_t *predicate = NULL;
-
-            pm_node_list_t current_block_exits = { 0 };
-            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
-            if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
-                predicate = NULL;
-            } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
-                predicate = NULL;
-             } else if (!token_begins_expression_p(parser->current.type)) {
-                predicate = NULL;
-            } else {
-                predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
-                while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
-            }
-
-            if (match1(parser, PM_TOKEN_KEYWORD_END)) {
-                parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
-                parser_lex(parser);
-                pop_block_exits(parser, previous_block_exits);
-                pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
-                return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
-            }
-
-            // At this point we can create a case node, though we don't yet know
-            // if it is a case-in or case-when node.
-            pm_node_t *node;
-
-            if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
-                pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL);
-                pm_static_literals_t literals = { 0 };
-
-                // At this point we've seen a when keyword, so we know this is a
-                // case-when node. We will continue to parse the when nodes
-                // until we hit the end of the list.
-                while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
-                    parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
-                    parser_lex(parser);
-
-                    pm_token_t when_keyword = parser->previous;
-                    pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
-
-                    do {
-                        if (accept1(parser, PM_TOKEN_USTAR)) {
-                            pm_token_t operator = parser->previous;
-                            pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
-
-                            pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
-                            pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node));
-
-                            if (PM_NODE_TYPE_P(expression, PM_ERROR_RECOVERY_NODE)) break;
-                        } else {
-                            pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
-                            pm_when_node_conditions_append(parser->arena, when_node, condition);
-
-                            // If we found a missing node, then this is a syntax
-                            // error and we should stop looping.
-                            if (PM_NODE_TYPE_P(condition, PM_ERROR_RECOVERY_NODE)) break;
-
-                            // If this is a string node, then we need to mark it
-                            // as frozen because when clause strings are frozen.
-                            if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
-                                pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
-                            } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
-                                pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
-                            }
-
-                            pm_when_clause_static_literals_add(parser, &literals, condition);
-                        }
-                    } while (accept1(parser, PM_TOKEN_COMMA));
-
-                    if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                        if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
-                            pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
-                        }
-                    } else {
-                        expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
-                        pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
-                    }
-
-                    if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
-                        pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
-                        if (statements != NULL) {
-                            pm_when_node_statements_set(when_node, statements);
-                        }
-                    }
-
-                    pm_case_node_condition_append(parser->arena, case_node, UP(when_node));
-                }
-
-                // If we didn't parse any conditions (in or when) then we need
-                // to indicate that we have an error.
-                if (case_node->conditions.size == 0) {
-                    pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
-                }
-
-                pm_static_literals_free(&literals);
-                node = UP(case_node);
-            } else {
-                pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate);
-
-                // If this is a case-match node (i.e., it is a pattern matching
-                // case statement) then we must have a predicate.
-                if (predicate == NULL) {
-                    pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
-                }
-
-                // At this point we expect that we're parsing a case-in node. We
-                // will continue to parse the in nodes until we hit the end of
-                // the list.
-                while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
-                    parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
-
-                    bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
-                    parser->pattern_matching_newlines = true;
-
-                    lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
-                    parser->command_start = false;
-                    parser_lex(parser);
-
-                    pm_token_t in_keyword = parser->previous;
-
-                    pm_constant_id_list_t captures = { 0 };
-                    pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
-
-                    parser->pattern_matching_newlines = previous_pattern_matching_newlines;
-
-                    // Since we're in the top-level of the case-in node we need
-                    // to check for guard clauses in the form of `if` or
-                    // `unless` statements.
-                    if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
-                        pm_token_t keyword = parser->previous;
-                        pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
-                        pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
-                    } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
-                        pm_token_t keyword = parser->previous;
-                        pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
-                        pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
-                    }
-
-                    // Now we need to check for the terminator of the in node's
-                    // pattern. It can be a newline or semicolon optionally
-                    // followed by a `then` keyword.
-                    pm_token_t then_keyword = { 0 };
-                    if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                        if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
-                            then_keyword = parser->previous;
-                        }
-                    } else {
-                        expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
-                        then_keyword = parser->previous;
-                    }
-
-                    // Now we can actually parse the statements associated with
-                    // the in node.
-                    pm_statements_node_t *statements;
-                    if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
-                        statements = NULL;
-                    } else {
-                        statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
-                    }
-
-                    // Now that we have the full pattern and statements, we can
-                    // create the node and attach it to the case node.
-                    pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword)));
-                    pm_case_match_node_condition_append(parser->arena, case_node, condition);
-                }
-
-                // If we didn't parse any conditions (in or when) then we need
-                // to indicate that we have an error.
-                if (case_node->conditions.size == 0) {
-                    pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
-                }
-
-                node = UP(case_node);
-            }
-
-            accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
-            if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
-                pm_token_t else_keyword = parser->previous;
-                pm_else_node_t *else_node;
-
-                if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
-                    else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
-                } else {
-                    else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
-                }
-
-                if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
-                    pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
-                } else {
-                    pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
-                }
-            }
-
-            parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
-            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
-
-            if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
-                pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous);
-            } else {
-                pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous);
-            }
-
-            pop_block_exits(parser, previous_block_exits);
-            return node;
-        }
+        case PM_TOKEN_KEYWORD_CASE:
+            return parse_case(parser, flags, depth);
         case PM_TOKEN_KEYWORD_BEGIN: {
             size_t opening_newline_index = token_newline_index(parser);
             parser_lex(parser);

From 2c01a495147f708aef9e99b62935290c3a53f317 Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 24 Mar 2026 12:04:13 -0400
Subject: [PATCH 05/12] [ruby/prism] Pull out parse_parentheses from
 parse_expression_prefix

https://github.com/ruby/prism/commit/3349087895
---
 prism/prism.c | 409 +++++++++++++++++++++++++-------------------------
 1 file changed, 207 insertions(+), 202 deletions(-)

diff --git a/prism/prism.c b/prism/prism.c
index 3016d704ca4f8f..ac1f7415b2517e 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -18353,6 +18353,211 @@ parse_def(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags,
     ));
 }
 
+/**
+ * Parse a parenthesized expression, which could be a grouping, a multi-target
+ * assignment, or a set of statements.
+ */
+static pm_node_t *
+parse_parentheses(pm_parser_t *parser, pm_binding_power_t binding_power, uint16_t depth) {
+    pm_token_t opening = parser->current;
+    pm_node_flags_t paren_flags = 0;
+
+    pm_node_list_t current_block_exits = { 0 };
+    pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+    parser_lex(parser);
+    while (true) {
+        if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+            paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+        } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+            break;
+        }
+    }
+
+    /* If this is the end of the file or we match a right parenthesis, then we
+     * have an empty parentheses node, and we can immediately return. */
+    if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
+        expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+        pop_block_exits(parser, previous_block_exits);
+        return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, paren_flags));
+    }
+
+    /* Otherwise, we're going to parse the first statement in the list of
+     * statements within the parentheses. */
+    pm_accepts_block_stack_push(parser, true);
+    context_push(parser, PM_CONTEXT_PARENS);
+    pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+    context_pop(parser);
+
+    /* Determine if this statement is followed by a terminator. In the case of a
+     * single statement, this is fine. But in the case of multiple statements
+     * it's required. */
+    bool terminator_found = false;
+
+    if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+        terminator_found = true;
+        paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+    } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
+        terminator_found = true;
+    }
+
+    if (terminator_found) {
+        while (true) {
+            if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+                paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+            } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
+                break;
+            }
+        }
+    }
+
+    /* If we hit a right parenthesis, then we're done parsing the parentheses
+     * node, and we can check which kind of node we should return. */
+    if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+        if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
+            lex_state_set(parser, PM_LEX_STATE_ENDARG);
+        }
+
+        parser_lex(parser);
+        pm_accepts_block_stack_pop(parser);
+        pop_block_exits(parser, previous_block_exits);
+
+        if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
+            /* If we have a single statement and are ending on a right
+             * parenthesis, then we need to check if this is possibly a multiple
+             * target node. */
+            pm_multi_target_node_t *multi_target;
+
+            if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) {
+                multi_target = (pm_multi_target_node_t *) statement;
+            } else {
+                multi_target = pm_multi_target_node_create(parser);
+                pm_multi_target_node_targets_append(parser, multi_target, statement);
+            }
+
+            multi_target->lparen_loc = TOK2LOC(parser, &opening);
+            multi_target->rparen_loc = TOK2LOC(parser, &parser->previous);
+            PM_NODE_START_SET_TOKEN(parser, multi_target, &opening);
+            PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous);
+
+            pm_node_t *result;
+            if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
+                result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
+                accept1(parser, PM_TOKEN_NEWLINE);
+            } else {
+                result = UP(multi_target);
+            }
+
+            if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
+                /* All set, this is explicitly allowed by the parent context. */
+            } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
+                /* All set, we're inside a for loop and we're parsing multiple
+                 * targets. */
+            } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
+                /* Multi targets are not allowed when it's not a statement
+                 * level. */
+                pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+            } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
+                /* Multi targets must be followed by an equal sign in order to
+                 * be valid (or a right parenthesis if they are nested). */
+                pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
+            }
+
+            return result;
+        }
+
+        /* If we have a single statement and are ending on a right parenthesis
+         * and we didn't return a multiple assignment node, then we can return a
+         * regular parentheses node now. */
+        pm_statements_node_t *statements = pm_statements_node_create(parser);
+        pm_statements_node_body_append(parser, statements, statement, true);
+
+        return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
+    }
+
+    /* If we have more than one statement in the set of parentheses, then we are
+     * going to parse all of them as a list of statements. We'll do that here.
+     */
+    context_push(parser, PM_CONTEXT_PARENS);
+    paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
+
+    pm_statements_node_t *statements = pm_statements_node_create(parser);
+    pm_statements_node_body_append(parser, statements, statement, true);
+
+    /* If we didn't find a terminator and we didn't find a right parenthesis,
+     * then this is a syntax error. */
+    if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
+        PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+    }
+
+    /* Parse each statement within the parentheses. */
+    while (true) {
+        pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
+        pm_statements_node_body_append(parser, statements, node, true);
+
+        /* If we're recovering from a syntax error, then we need to stop parsing
+         * the statements now. */
+        if (parser->recovering) {
+            /* If this is the level of context where the recovery has happened,
+             * then we can mark the parser as done recovering. */
+            if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
+            break;
+        }
+
+        /* If we couldn't parse an expression at all, then we need to bail out
+         * of the loop. */
+        if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) break;
+
+        /* If we successfully parsed a statement, then we are going to need a
+         * terminator to delimit them. */
+        if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+            while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
+            if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
+        } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
+            break;
+        } else if (!match1(parser, PM_TOKEN_EOF)) {
+            /* If we're at the end of the file, then we're going to add an error
+             * after this for the ) anyway. */
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
+        }
+    }
+
+    context_pop(parser);
+    pm_accepts_block_stack_pop(parser);
+    expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
+
+    /* When we're parsing multi targets, we allow them to be followed by a right
+     * parenthesis if they are at the statement level. This is only possible if
+     * they are the final statement in a parentheses. We need to explicitly
+     * reject that here. */
+    {
+        pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
+
+        if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
+            pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
+            pm_multi_target_node_targets_append(parser, multi_target, statement);
+
+            statement = UP(multi_target);
+            statements->body.nodes[statements->body.size - 1] = statement;
+        }
+
+        if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
+            const uint8_t *offset = parser->start + PM_NODE_END(statement);
+            pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
+            pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(statement), 0));
+
+            statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
+            statements->body.nodes[statements->body.size - 1] = statement;
+
+            pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
+        }
+    }
+
+    pop_block_exits(parser, previous_block_exits);
+    pm_void_statements_check(parser, statements, true);
+    return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
+}
+
 /**
  * Parse an expression that begins with the previous node that we just lexed.
  */
@@ -18473,208 +18678,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u
             return UP(array);
         }
         case PM_TOKEN_PARENTHESIS_LEFT:
-        case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
-            pm_token_t opening = parser->current;
-            pm_node_flags_t paren_flags = 0;
-
-            pm_node_list_t current_block_exits = { 0 };
-            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
-            parser_lex(parser);
-            while (true) {
-                if (accept1(parser, PM_TOKEN_SEMICOLON)) {
-                    paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
-                } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
-                    break;
-                }
-            }
-
-            // If this is the end of the file or we match a right parenthesis, then
-            // we have an empty parentheses node, and we can immediately return.
-            if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
-                expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
-                pop_block_exits(parser, previous_block_exits);
-                return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, paren_flags));
-            }
-
-            // Otherwise, we're going to parse the first statement in the list
-            // of statements within the parentheses.
-            pm_accepts_block_stack_push(parser, true);
-            context_push(parser, PM_CONTEXT_PARENS);
-            pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
-            context_pop(parser);
-
-            // Determine if this statement is followed by a terminator. In the
-            // case of a single statement, this is fine. But in the case of
-            // multiple statements it's required.
-            bool terminator_found = false;
-
-            if (accept1(parser, PM_TOKEN_SEMICOLON)) {
-                terminator_found = true;
-                paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
-            } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
-                terminator_found = true;
-            }
-
-            if (terminator_found) {
-                while (true) {
-                    if (accept1(parser, PM_TOKEN_SEMICOLON)) {
-                        paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
-                    } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
-                        break;
-                    }
-                }
-            }
-
-            // If we hit a right parenthesis, then we're done parsing the
-            // parentheses node, and we can check which kind of node we should
-            // return.
-            if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
-                    lex_state_set(parser, PM_LEX_STATE_ENDARG);
-                }
-
-                parser_lex(parser);
-                pm_accepts_block_stack_pop(parser);
-                pop_block_exits(parser, previous_block_exits);
-
-                if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
-                    // If we have a single statement and are ending on a right
-                    // parenthesis, then we need to check if this is possibly a
-                    // multiple target node.
-                    pm_multi_target_node_t *multi_target;
-
-                    if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) {
-                        multi_target = (pm_multi_target_node_t *) statement;
-                    } else {
-                        multi_target = pm_multi_target_node_create(parser);
-                        pm_multi_target_node_targets_append(parser, multi_target, statement);
-                    }
-
-                    multi_target->lparen_loc = TOK2LOC(parser, &opening);
-                    multi_target->rparen_loc = TOK2LOC(parser, &parser->previous);
-                    PM_NODE_START_SET_TOKEN(parser, multi_target, &opening);
-                    PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous);
-
-                    pm_node_t *result;
-                    if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
-                        result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
-                        accept1(parser, PM_TOKEN_NEWLINE);
-                    } else {
-                        result = UP(multi_target);
-                    }
-
-                    if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
-                        // All set, this is explicitly allowed by the parent
-                        // context.
-                    } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
-                        // All set, we're inside a for loop and we're parsing
-                        // multiple targets.
-                    } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
-                        // Multi targets are not allowed when it's not a
-                        // statement level.
-                        pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
-                    } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                        // Multi targets must be followed by an equal sign in
-                        // order to be valid (or a right parenthesis if they are
-                        // nested).
-                        pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
-                    }
-
-                    return result;
-                }
-
-                // If we have a single statement and are ending on a right parenthesis
-                // and we didn't return a multiple assignment node, then we can return a
-                // regular parentheses node now.
-                pm_statements_node_t *statements = pm_statements_node_create(parser);
-                pm_statements_node_body_append(parser, statements, statement, true);
-
-                return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
-            }
-
-            // If we have more than one statement in the set of parentheses,
-            // then we are going to parse all of them as a list of statements.
-            // We'll do that here.
-            context_push(parser, PM_CONTEXT_PARENS);
-            paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
-
-            pm_statements_node_t *statements = pm_statements_node_create(parser);
-            pm_statements_node_body_append(parser, statements, statement, true);
-
-            // If we didn't find a terminator and we didn't find a right
-            // parenthesis, then this is a syntax error.
-            if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
-                PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
-            }
-
-            // Parse each statement within the parentheses.
-            while (true) {
-                pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
-                pm_statements_node_body_append(parser, statements, node, true);
-
-                // If we're recovering from a syntax error, then we need to stop
-                // parsing the statements now.
-                if (parser->recovering) {
-                    // If this is the level of context where the recovery has
-                    // happened, then we can mark the parser as done recovering.
-                    if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
-                    break;
-                }
-
-                // If we couldn't parse an expression at all, then we need to
-                // bail out of the loop.
-                if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) break;
-
-                // If we successfully parsed a statement, then we are going to
-                // need terminator to delimit them.
-                if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                    while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
-                    if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
-                } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
-                    break;
-                } else if (!match1(parser, PM_TOKEN_EOF)) {
-                    // If we're at the end of the file, then we're going to add
-                    // an error after this for the ) anyway.
-                    PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
-                }
-            }
-
-            context_pop(parser);
-            pm_accepts_block_stack_pop(parser);
-            expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
-
-            // When we're parsing multi targets, we allow them to be followed by
-            // a right parenthesis if they are at the statement level. This is
-            // only possible if they are the final statement in a parentheses.
-            // We need to explicitly reject that here.
-            {
-                pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
-
-                if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
-                    pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
-                    pm_multi_target_node_targets_append(parser, multi_target, statement);
-
-                    statement = UP(multi_target);
-                    statements->body.nodes[statements->body.size - 1] = statement;
-                }
-
-                if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
-                    const uint8_t *offset = parser->start + PM_NODE_END(statement);
-                    pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
-                    pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(statement), 0));
-
-                    statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
-                    statements->body.nodes[statements->body.size - 1] = statement;
-
-                    pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
-                }
-            }
-
-            pop_block_exits(parser, previous_block_exits);
-            pm_void_statements_check(parser, statements, true);
-            return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
-        }
+        case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
+            return parse_parentheses(parser, binding_power, depth);
         case PM_TOKEN_BRACE_LEFT: {
             // If we were passed a current_hash_keys via the parser, then that
             // means we're already parsing a hash and we want to share the set

From 9a24716e51e80a871dd8057de623c6217221835b Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 24 Mar 2026 12:28:34 -0400
Subject: [PATCH 06/12] [ruby/prism] Pull out parse_*_array from
 parse_expression_prefix

https://github.com/ruby/prism/commit/80220a9d6a
---
 prism/prism.c | 593 ++++++++++++++++++++++++++------------------------
 1 file changed, 304 insertions(+), 289 deletions(-)

diff --git a/prism/prism.c b/prism/prism.c
index ac1f7415b2517e..0fa6f1009e706f 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -18353,6 +18353,306 @@ parse_def(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags,
     ));
 }
 
+/**
+ * Parse an interpolated word array literal (`%W[...]`).
+ */
+static pm_node_t *
+parse_string_array(pm_parser_t *parser, uint16_t depth) {
+    parser_lex(parser);
+    pm_token_t opening = parser->previous;
+    pm_array_node_t *array = pm_array_node_create(parser, &opening);
+
+    /* This is the current node that we are parsing that will be added to the
+     * list of elements. */
+    pm_node_t *current = NULL;
+
+    while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+        switch (parser->current.type) {
+            case PM_TOKEN_WORDS_SEP: {
+                /* Reset the explicit encoding if we hit a separator since each
+                 * element can have its own encoding. */
+                parser->explicit_encoding = NULL;
+
+                if (current == NULL) {
+                    /* If we hit a separator before we have any content, then we
+                     * don't need to do anything. */
+                } else {
+                    /* If we hit a separator after we've hit content, then we
+                     * need to append that content to the list and reset the
+                     * current node. */
+                    pm_array_node_elements_append(parser->arena, array, current);
+                    current = NULL;
+                }
+
+                parser_lex(parser);
+                break;
+            }
+            case PM_TOKEN_STRING_CONTENT: {
+                pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+                pm_node_flag_set(string, parse_unescaped_encoding(parser));
+                parser_lex(parser);
+
+                if (current == NULL) {
+                    /* If we hit content and the current node is NULL, then this
+                     * is the first string content we've seen. In that case
+                     * we're going to create a new string node and set that to
+                     * the current. */
+                    current = string;
+                } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+                    /* If we hit string content and the current node is an
+                     * interpolated string, then we need to append the string
+                     * content to the list of child nodes. */
+                    pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
+                } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+                    /* If we hit string content and the current node is a string
+                     * node, then we need to convert the current node into an
+                     * interpolated string and add the string content to the
+                     * list of child nodes. */
+                    pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+                    pm_interpolated_string_node_append(parser, interpolated, current);
+                    pm_interpolated_string_node_append(parser, interpolated, string);
+                    current = UP(interpolated);
+                } else {
+                    assert(false && "unreachable");
+                }
+
+                break;
+            }
+            case PM_TOKEN_EMBVAR: {
+                if (current == NULL) {
+                    /* If we hit an embedded variable and the current node is
+                     * NULL, then this is the start of a new string. We'll set
+                     * the current node to a new interpolated string. */
+                    current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
+                } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+                    /* If we hit an embedded variable and the current node is a
+                     * string node, then we'll convert the current into an
+                     * interpolated string and add the string node to the list
+                     * of parts. */
+                    pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+                    pm_interpolated_string_node_append(parser, interpolated, current);
+                    current = UP(interpolated);
+                } else {
+                    /* If we hit an embedded variable and the current node is an
+                     * interpolated string, then we'll just add the embedded
+                     * variable. */
+                }
+
+                pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+                pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
+                break;
+            }
+            case PM_TOKEN_EMBEXPR_BEGIN: {
+                if (current == NULL) {
+                    /* If we hit an embedded expression and the current node is
+                     * NULL, then this is the start of a new string. We'll set
+                     * the current node to a new interpolated string. */
+                    current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
+                } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
+                    /* If we hit an embedded expression and the current node is
+                     * a string node, then we'll convert the current into an
+                     * interpolated string and add the string node to the list
+                     * of parts. */
+                    pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
+                    pm_interpolated_string_node_append(parser, interpolated, current);
+                    current = UP(interpolated);
+                } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
+                    /* If we hit an embedded expression and the current node is
+                     * an interpolated string, then we'll just continue on. */
+                } else {
+                    assert(false && "unreachable");
+                }
+
+                pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+                pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
+                break;
+            }
+            default:
+                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
+                parser_lex(parser);
+                break;
+        }
+    }
+
+    /* If we have a current node, then we need to append it to the list. */
+    if (current) {
+        pm_array_node_elements_append(parser->arena, array, current);
+    }
+
+    pm_token_t closing = parser->current;
+    if (match1(parser, PM_TOKEN_EOF)) {
+        pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
+        closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+    } else {
+        expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
+    }
+
+    pm_array_node_close_set(parser, array, &closing);
+    return UP(array);
+}
+
+/**
+ * Parse an interpolated symbol array literal (`%I[...]`).
+ */
+static pm_node_t *
+parse_symbol_array(pm_parser_t *parser, uint16_t depth) {
+    parser_lex(parser);
+    pm_token_t opening = parser->previous;
+    pm_array_node_t *array = pm_array_node_create(parser, &opening);
+
+    /* This is the current node that we are parsing that will be added to the
+     * list of elements. */
+    pm_node_t *current = NULL;
+
+    while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
+        switch (parser->current.type) {
+            case PM_TOKEN_WORDS_SEP: {
+                if (current == NULL) {
+                    /* If we hit a separator before we have any content, then we
+                     * don't need to do anything. */
+                } else {
+                    /* If we hit a separator after we've hit content, then we
+                     * need to append that content to the list and reset the
+                     * current node. */
+                    pm_array_node_elements_append(parser->arena, array, current);
+                    current = NULL;
+                }
+
+                parser_lex(parser);
+                break;
+            }
+            case PM_TOKEN_STRING_CONTENT: {
+                if (current == NULL) {
+                    /* If we hit content and the current node is NULL, then this
+                     * is the first string content we've seen. In that case
+                     * we're going to create a new string node and set that to
+                     * the current. */
+                    current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
+                    parser_lex(parser);
+                } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+                    /* If we hit string content and the current node is an
+                     * interpolated string, then we need to append the string
+                     * content to the list of child nodes. */
+                    pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
+                    parser_lex(parser);
+
+                    pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
+                } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+                    /* If we hit string content and the current node is a symbol
+                     * node, then we need to convert the current node into an
+                     * interpolated string and add the string content to the
+                     * list of child nodes. */
+                    pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
+                    pm_token_t content = {
+                        .type = PM_TOKEN_STRING_CONTENT,
+                        .start = parser->start + cast->value_loc.start,
+                        .end = parser->start + cast->value_loc.start + cast->value_loc.length
+                    };
+
+                    pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
+                    pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
+                    parser_lex(parser);
+
+                    pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+                    pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
+                    pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
+
+                    current = UP(interpolated);
+                } else {
+                    assert(false && "unreachable");
+                }
+
+                break;
+            }
+            case PM_TOKEN_EMBVAR: {
+                bool start_location_set = false;
+                if (current == NULL) {
+                    /* If we hit an embedded variable and the current node is
+                     * NULL, then this is the start of a new string. We'll set
+                     * the current node to a new interpolated string. */
+                    current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
+                } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+                    /* If we hit an embedded variable and the current node is a
+                     * string node, then we'll convert the current into an
+                     * interpolated string and add the string node to the list
+                     * of parts. */
+                    pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+
+                    current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
+                    pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
+                    PM_NODE_START_SET_NODE(interpolated, current);
+                    start_location_set = true;
+                    current = UP(interpolated);
+                } else {
+                    /* If we hit an embedded variable and the current node is an
+                     * interpolated string, then we'll just add the embedded
+                     * variable. */
+                }
+
+                pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+                pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
+                if (!start_location_set) {
+                    PM_NODE_START_SET_NODE(current, part);
+                }
+                break;
+            }
+            case PM_TOKEN_EMBEXPR_BEGIN: {
+                bool start_location_set = false;
+                if (current == NULL) {
+                    /* If we hit an embedded expression and the current node is
+                     * NULL, then this is the start of a new string. We'll set
+                     * the current node to a new interpolated string. */
+                    current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
+                } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
+                    /* If we hit an embedded expression and the current node is
+                     * a string node, then we'll convert the current into an
+                     * interpolated string and add the string node to the list
+                     * of parts. */
+                    pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
+
+                    current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
+                    pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
+                    PM_NODE_START_SET_NODE(interpolated, current);
+                    start_location_set = true;
+                    current = UP(interpolated);
+                } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
+                    /* If we hit an embedded expression and the current node is
+                     * an interpolated string, then we'll just continue on. */
+                } else {
+                    assert(false && "unreachable");
+                }
+
+                pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
+                pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
+                if (!start_location_set) {
+                    PM_NODE_START_SET_NODE(current, part);
+                }
+                break;
+            }
+            default:
+                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
+                parser_lex(parser);
+                break;
+        }
+    }
+
+    /* If we have a current node, then we need to append it to the list. */
+    if (current) {
+        pm_array_node_elements_append(parser->arena, array, current);
+    }
+
+    pm_token_t closing = parser->current;
+    if (match1(parser, PM_TOKEN_EOF)) {
+        pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
+        closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+    } else {
+        expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
+    }
+    pm_array_node_close_set(parser, array, &closing);
+
+    return UP(array);
+}
+
 /**
  * Parse a parenthesized expression, which could be a grouping, a multi-target
  * assignment, or a set of statements.
@@ -19801,159 +20101,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u
 
             return UP(array);
         }
-        case PM_TOKEN_PERCENT_UPPER_I: {
-            parser_lex(parser);
-            pm_token_t opening = parser->previous;
-            pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
-            // This is the current node that we are parsing that will be added to the
-            // list of elements.
-            pm_node_t *current = NULL;
-
-            while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
-                switch (parser->current.type) {
-                    case PM_TOKEN_WORDS_SEP: {
-                        if (current == NULL) {
-                            // If we hit a separator before we have any content, then we don't
-                            // need to do anything.
-                        } else {
-                            // If we hit a separator after we've hit content, then we need to
-                            // append that content to the list and reset the current node.
-                            pm_array_node_elements_append(parser->arena, array, current);
-                            current = NULL;
-                        }
-
-                        parser_lex(parser);
-                        break;
-                    }
-                    case PM_TOKEN_STRING_CONTENT: {
-                        if (current == NULL) {
-                            // If we hit content and the current node is NULL, then this is
-                            // the first string content we've seen. In that case we're going
-                            // to create a new string node and set that to the current.
-                            current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
-                            parser_lex(parser);
-                        } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
-                            // If we hit string content and the current node is an
-                            // interpolated string, then we need to append the string content
-                            // to the list of child nodes.
-                            pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
-                            parser_lex(parser);
-
-                            pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
-                        } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
-                            // If we hit string content and the current node is a symbol node,
-                            // then we need to convert the current node into an interpolated
-                            // string and add the string content to the list of child nodes.
-                            pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
-                            pm_token_t content = {
-                                .type = PM_TOKEN_STRING_CONTENT,
-                                .start = parser->start + cast->value_loc.start,
-                                .end = parser->start + cast->value_loc.start + cast->value_loc.length
-                            };
-
-                            pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
-                            pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
-                            parser_lex(parser);
-
-                            pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
-                            pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
-                            pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
-
-                            // current is arena-allocated so no explicit free is needed.
-                            current = UP(interpolated);
-                        } else {
-                            assert(false && "unreachable");
-                        }
-
-                        break;
-                    }
-                    case PM_TOKEN_EMBVAR: {
-                        bool start_location_set = false;
-                        if (current == NULL) {
-                            // If we hit an embedded variable and the current node is NULL,
-                            // then this is the start of a new string. We'll set the current
-                            // node to a new interpolated string.
-                            current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
-                        } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
-                            // If we hit an embedded variable and the current node is a string
-                            // node, then we'll convert the current into an interpolated
-                            // string and add the string node to the list of parts.
-                            pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
-
-                            current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
-                            pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
-                            PM_NODE_START_SET_NODE(interpolated, current);
-                            start_location_set = true;
-                            current = UP(interpolated);
-                        } else {
-                            // If we hit an embedded variable and the current node is an
-                            // interpolated string, then we'll just add the embedded variable.
-                        }
-
-                        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
-                        pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
-                        if (!start_location_set) {
-                            PM_NODE_START_SET_NODE(current, part);
-                        }
-                        break;
-                    }
-                    case PM_TOKEN_EMBEXPR_BEGIN: {
-                        bool start_location_set = false;
-                        if (current == NULL) {
-                            // If we hit an embedded expression and the current node is NULL,
-                            // then this is the start of a new string. We'll set the current
-                            // node to a new interpolated string.
-                            current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
-                        } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
-                            // If we hit an embedded expression and the current node is a
-                            // string node, then we'll convert the current into an
-                            // interpolated string and add the string node to the list of
-                            // parts.
-                            pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
-
-                            current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
-                            pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
-                            PM_NODE_START_SET_NODE(interpolated, current);
-                            start_location_set = true;
-                            current = UP(interpolated);
-                        } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
-                            // If we hit an embedded expression and the current node is an
-                            // interpolated string, then we'll just continue on.
-                        } else {
-                            assert(false && "unreachable");
-                        }
-
-                        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
-                        pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
-                        if (!start_location_set) {
-                            PM_NODE_START_SET_NODE(current, part);
-                        }
-                        break;
-                    }
-                    default:
-                        expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
-                        parser_lex(parser);
-                        break;
-                }
-            }
-
-            // If we have a current node, then we need to append it to the list.
-            if (current) {
-                pm_array_node_elements_append(parser->arena, array, current);
-            }
-
-            pm_token_t closing = parser->current;
-            if (match1(parser, PM_TOKEN_EOF)) {
-                pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
-                closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
-            } else {
-                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
-            }
-            pm_array_node_close_set(parser, array, &closing);
-
-            return UP(array);
-        }
+        case PM_TOKEN_PERCENT_UPPER_I:
+            return parse_symbol_array(parser, depth);
         case PM_TOKEN_PERCENT_LOWER_W: {
             parser_lex(parser);
             pm_token_t opening = parser->previous;
@@ -20004,142 +20153,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u
             pm_array_node_close_set(parser, array, &closing);
             return UP(array);
         }
-        case PM_TOKEN_PERCENT_UPPER_W: {
-            parser_lex(parser);
-            pm_token_t opening = parser->previous;
-            pm_array_node_t *array = pm_array_node_create(parser, &opening);
-
-            // This is the current node that we are parsing that will be added
-            // to the list of elements.
-            pm_node_t *current = NULL;
-
-            while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
-                switch (parser->current.type) {
-                    case PM_TOKEN_WORDS_SEP: {
-                        // Reset the explicit encoding if we hit a separator
-                        // since each element can have its own encoding.
-                        parser->explicit_encoding = NULL;
-
-                        if (current == NULL) {
-                            // If we hit a separator before we have any content,
-                            // then we don't need to do anything.
-                        } else {
-                            // If we hit a separator after we've hit content,
-                            // then we need to append that content to the list
-                            // and reset the current node.
-                            pm_array_node_elements_append(parser->arena, array, current);
-                            current = NULL;
-                        }
-
-                        parser_lex(parser);
-                        break;
-                    }
-                    case PM_TOKEN_STRING_CONTENT: {
-                        pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
-                        pm_node_flag_set(string, parse_unescaped_encoding(parser));
-                        parser_lex(parser);
-
-                        if (current == NULL) {
-                            // If we hit content and the current node is NULL,
-                            // then this is the first string content we've seen.
-                            // In that case we're going to create a new string
-                            // node and set that to the current.
-                            current = string;
-                        } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
-                            // If we hit string content and the current node is
-                            // an interpolated string, then we need to append
-                            // the string content to the list of child nodes.
-                            pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
-                        } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
-                            // If we hit string content and the current node is
-                            // a string node, then we need to convert the
-                            // current node into an interpolated string and add
-                            // the string content to the list of child nodes.
-                            pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
-                            pm_interpolated_string_node_append(parser, interpolated, current);
-                            pm_interpolated_string_node_append(parser, interpolated, string);
-                            current = UP(interpolated);
-                        } else {
-                            assert(false && "unreachable");
-                        }
-
-                        break;
-                    }
-                    case PM_TOKEN_EMBVAR: {
-                        if (current == NULL) {
-                            // If we hit an embedded variable and the current
-                            // node is NULL, then this is the start of a new
-                            // string. We'll set the current node to a new
-                            // interpolated string.
-                            current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
-                        } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
-                            // If we hit an embedded variable and the current
-                            // node is a string node, then we'll convert the
-                            // current into an interpolated string and add the
-                            // string node to the list of parts.
-                            pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
-                            pm_interpolated_string_node_append(parser, interpolated, current);
-                            current = UP(interpolated);
-                        } else {
-                            // If we hit an embedded variable and the current
-                            // node is an interpolated string, then we'll just
-                            // add the embedded variable.
-                        }
-
-                        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
-                        pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
-                        break;
-                    }
-                    case PM_TOKEN_EMBEXPR_BEGIN: {
-                        if (current == NULL) {
-                            // If we hit an embedded expression and the current
-                            // node is NULL, then this is the start of a new
-                            // string. We'll set the current node to a new
-                            // interpolated string.
-                            current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
-                        } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
-                            // If we hit an embedded expression and the current
-                            // node is a string node, then we'll convert the
-                            // current into an interpolated string and add the
-                            // string node to the list of parts.
-                            pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
-                            pm_interpolated_string_node_append(parser, interpolated, current);
-                            current = UP(interpolated);
-                        } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
-                            // If we hit an embedded expression and the current
-                            // node is an interpolated string, then we'll just
-                            // continue on.
-                        } else {
-                            assert(false && "unreachable");
-                        }
-
-                        pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
-                        pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
-                        break;
-                    }
-                    default:
-                        expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
-                        parser_lex(parser);
-                        break;
-                }
-            }
-
-            // If we have a current node, then we need to append it to the list.
-            if (current) {
-                pm_array_node_elements_append(parser->arena, array, current);
-            }
-
-            pm_token_t closing = parser->current;
-            if (match1(parser, PM_TOKEN_EOF)) {
-                pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
-                closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
-            } else {
-                expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
-            }
-
-            pm_array_node_close_set(parser, array, &closing);
-            return UP(array);
-        }
+        case PM_TOKEN_PERCENT_UPPER_W:
+            return parse_string_array(parser, depth);
         case PM_TOKEN_REGEXP_BEGIN: {
             pm_token_t opening = parser->current;
             parser_lex(parser);

From a3eca0c9b87c99447b62ca5faff945cdadd535de Mon Sep 17 00:00:00 2001
From: Kevin Newton <kddnewton@gmail.com>
Date: Tue, 24 Mar 2026 21:32:03 -0400
Subject: [PATCH 07/12] [ruby/prism] Pull out parse_class and parse_module

https://github.com/ruby/prism/commit/781ecf4338
---
 prism/prism.c | 375 ++++++++++++++++++++++++++------------------------
 1 file changed, 195 insertions(+), 180 deletions(-)

diff --git a/prism/prism.c b/prism/prism.c
index 0fa6f1009e706f..0e798fdce88305 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -17987,6 +17987,122 @@ parse_case(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
     return node;
 }
 
+/**
+ * Parse a class definition expression (the `class` keyword). This handles both
+ * regular class definitions and singleton class definitions (`class << expr`).
+ */
+static pm_node_t *
+parse_class(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
+    size_t opening_newline_index = token_newline_index(parser);
+    parser_lex(parser);
+
+    pm_token_t class_keyword = parser->previous;
+    pm_do_loop_stack_push(parser, false);
+
+    pm_node_list_t current_block_exits = { 0 };
+    pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+    if (accept1(parser, PM_TOKEN_LESS_LESS)) {
+        pm_token_t operator = parser->previous;
+        pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
+
+        pm_parser_scope_push(parser, true);
+        if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+            PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_str(parser->current.type));
+        }
+
+        pm_node_t *statements = NULL;
+        if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+            pm_accepts_block_stack_push(parser, true);
+            statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
+            pm_accepts_block_stack_pop(parser);
+        }
+
+        if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+            assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+            statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
+        } else {
+            parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
+        }
+
+        expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
+
+        pm_constant_id_list_t locals;
+        pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+        pm_parser_scope_pop(parser);
+        pm_do_loop_stack_pop(parser);
+
+        flush_block_exits(parser, previous_block_exits);
+        return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
+    }
+
+    pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
+    pm_token_t name = parser->previous;
+    if (name.type != PM_TOKEN_CONSTANT) {
+        pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
+    }
+
+    pm_token_t inheritance_operator = { 0 };
+    pm_node_t *superclass;
+
+    if (match1(parser, PM_TOKEN_LESS)) {
+        inheritance_operator = parser->current;
+        lex_state_set(parser, PM_LEX_STATE_BEG);
+
+        parser->command_start = true;
+        parser_lex(parser);
+
+        superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
+    } else {
+        superclass = NULL;
+    }
+
+    pm_parser_scope_push(parser, true);
+
+    if (inheritance_operator.start != NULL) {
+        expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
+    } else {
+        accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
+    }
+    pm_node_t *statements = NULL;
+
+    if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+        pm_accepts_block_stack_push(parser, true);
+        statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
+        pm_accepts_block_stack_pop(parser);
+    }
+
+    if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
+        assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+        statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
+    } else {
+        parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
+    }
+
+    expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
+
+    if (context_def_p(parser)) {
+        pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
+    }
+
+    pm_constant_id_list_t locals;
+    pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+    pm_parser_scope_pop(parser);
+    pm_do_loop_stack_pop(parser);
+
+    if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
+        pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
+        if (!PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
+            constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
+        }
+    }
+
+    pop_block_exits(parser, previous_block_exits);
+    return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous));
+}
+
 /**
  * Parse a method definition expression (the `def` keyword).
  */
@@ -18353,6 +18469,81 @@ parse_def(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags,
     ));
 }
 
+/**
+ * Parse a module definition expression (the `module` keyword).
+ */
+static pm_node_t *
+parse_module(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
+    pm_node_list_t current_block_exits = { 0 };
+    pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
+
+    size_t opening_newline_index = token_newline_index(parser);
+    parser_lex(parser);
+    pm_token_t module_keyword = parser->previous;
+
+    pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
+    pm_token_t name;
+
+    /* If we can recover from a syntax error that occurred while parsing the
+     * name of the module, then we'll handle that here. */
+    if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
+        pop_block_exits(parser, previous_block_exits);
+
+        pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
+        return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
+    }
+
+    while (accept1(parser, PM_TOKEN_COLON_COLON)) {
+        pm_token_t double_colon = parser->previous;
+
+        expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+        constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
+    }
+
+    /* Here we retrieve the name of the module. If it wasn't a constant, then
+     * it's possible that `module foo` was passed, which is a syntax error. We
+     * handle that here as well. */
+    name = parser->previous;
+    if (name.type != PM_TOKEN_CONSTANT) {
+        pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
+    }
+
+    if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE) && !PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
+        constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
+    }
+
+    pm_parser_scope_push(parser, true);
+    accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
+    pm_node_t *statements = NULL;
+
+    if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
+        pm_accepts_block_stack_push(parser, true);
+        statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
+        pm_accepts_block_stack_pop(parser);
+    }
+
+    if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
+        assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
+        statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
+    } else {
+        parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
+    }
+
+    pm_constant_id_list_t locals;
+    pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
+
+    pm_parser_scope_pop(parser);
+    expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
+
+    if (context_def_p(parser)) {
+        pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
+    }
+
+    pop_block_exits(parser, previous_block_exits);
+
+    return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
+}
+
 /**
  * Parse an interpolated word array literal (`%W[...]`).
  */
@@ -19556,116 +19747,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u
 
             return node;
         }
-        case PM_TOKEN_KEYWORD_CLASS: {
-            size_t opening_newline_index = token_newline_index(parser);
-            parser_lex(parser);
-
-            pm_token_t class_keyword = parser->previous;
-            pm_do_loop_stack_push(parser, false);
-
-            pm_node_list_t current_block_exits = { 0 };
-            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
-            if (accept1(parser, PM_TOKEN_LESS_LESS)) {
-                pm_token_t operator = parser->previous;
-                pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
-
-                pm_parser_scope_push(parser, true);
-                if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
-                    PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_str(parser->current.type));
-                }
-
-                pm_node_t *statements = NULL;
-                if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
-                    pm_accepts_block_stack_push(parser, true);
-                    statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
-                    pm_accepts_block_stack_pop(parser);
-                }
-
-                if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
-                    assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
-                    statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
-                } else {
-                    parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
-                }
-
-                expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
-
-                pm_constant_id_list_t locals;
-                pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-
-                pm_parser_scope_pop(parser);
-                pm_do_loop_stack_pop(parser);
-
-                flush_block_exits(parser, previous_block_exits);
-                return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
-            }
-
-            pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
-            pm_token_t name = parser->previous;
-            if (name.type != PM_TOKEN_CONSTANT) {
-                pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
-            }
-
-            pm_token_t inheritance_operator = { 0 };
-            pm_node_t *superclass;
-
-            if (match1(parser, PM_TOKEN_LESS)) {
-                inheritance_operator = parser->current;
-                lex_state_set(parser, PM_LEX_STATE_BEG);
-
-                parser->command_start = true;
-                parser_lex(parser);
-
-                superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
-            } else {
-                superclass = NULL;
-            }
-
-            pm_parser_scope_push(parser, true);
-
-            if (inheritance_operator.start != NULL) {
-                expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
-            } else {
-                accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
-            }
-            pm_node_t *statements = NULL;
-
-            if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
-                pm_accepts_block_stack_push(parser, true);
-                statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
-                pm_accepts_block_stack_pop(parser);
-            }
-
-            if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
-                assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
-                statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
-            } else {
-                parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
-            }
-
-            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
-
-            if (context_def_p(parser)) {
-                pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
-            }
-
-            pm_constant_id_list_t locals;
-            pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-
-            pm_parser_scope_pop(parser);
-            pm_do_loop_stack_pop(parser);
-
-            if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
-                pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
-                if (!PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
-                    constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
-                }
-            }
-
-            pop_block_exits(parser, previous_block_exits);
-            return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous));
-        }
+        case PM_TOKEN_KEYWORD_CLASS:
+            return parse_class(parser, flags, depth);
         case PM_TOKEN_KEYWORD_DEF:
             return parse_def(parser, binding_power, flags, depth);
         case PM_TOKEN_KEYWORD_DEFINED: {
@@ -19882,76 +19965,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u
 
             return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
         }
-        case PM_TOKEN_KEYWORD_MODULE: {
-            pm_node_list_t current_block_exits = { 0 };
-            pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
-
-            size_t opening_newline_index = token_newline_index(parser);
-            parser_lex(parser);
-            pm_token_t module_keyword = parser->previous;
-
-            pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
-            pm_token_t name;
-
-            // If we can recover from a syntax error that occurred while parsing
-            // the name of the module, then we'll handle that here.
-            if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
-                pop_block_exits(parser, previous_block_exits);
-
-                pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
-                return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
-            }
-
-            while (accept1(parser, PM_TOKEN_COLON_COLON)) {
-                pm_token_t double_colon = parser->previous;
-
-                expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
-                constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
-            }
-
-            // Here we retrieve the name of the module. If it wasn't a constant,
-            // then it's possible that `module foo` was passed, which is a
-            // syntax error. We handle that here as well.
-            name = parser->previous;
-            if (name.type != PM_TOKEN_CONSTANT) {
-                pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
-            }
-
-            if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE) && !PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
-                constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
-            }
-
-            pm_parser_scope_push(parser, true);
-            accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
-            pm_node_t *statements = NULL;
-
-            if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
-                pm_accepts_block_stack_push(parser, true);
-                statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
-                pm_accepts_block_stack_pop(parser);
-            }
-
-            if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
-                assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
-                statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
-            } else {
-                parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
-            }
-
-            pm_constant_id_list_t locals;
-            pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
-
-            pm_parser_scope_pop(parser);
-            expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
-
-            if (context_def_p(parser)) {
-                pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
-            }
-
-            pop_block_exits(parser, previous_block_exits);
-
-            return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
-        }
+        case PM_TOKEN_KEYWORD_MODULE:
+            return parse_module(parser, flags, depth);
         case PM_TOKEN_KEYWORD_NIL:
             parser_lex(parser);
             return UP(pm_nil_node_create(parser, &parser->previous));

From ff89d56b56932940225ebad7a4d1e9aa04c3ae47 Mon Sep 17 00:00:00 2001
From: Takashi Kokubun <takashikkbn@gmail.com>
Date: Wed, 25 Mar 2026 09:44:01 -0700
Subject: [PATCH 08/12] Pass reg_cfp instead of iseq to vm_search_method and
 related functions (#16541)

Change vm_search_method_fastpath, vm_search_method, vm_method_cfunc_is,
opt_equality, vm_opt_neq, vm_opt_nil_p, vm_opt_not, and vm_objtostring
to take a CFP instead of an iseq. The iseq is now read lazily from
reg_cfp->iseq only when the slowpath is actually needed.

This is a preparatory refactoring for lightweight JIT frames where the
iseq may not always be written eagerly to the CFP.

ZJIT's rb_zjit_vm_search_method and rb_vm_method_cfunc_is, which are
called with a compile-time iseq rather than a live CFP, now call the
slowpath directly.
---
 insns.def           | 12 ++++-----
 vm_insnhelper.c     | 63 ++++++++++++++++++++++++++-------------------
 zjit/src/codegen.rs |  4 +--
 zjit/src/cruby.rs   |  2 +-
 4 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/insns.def b/insns.def
index 92ae7c181628b9..df4147efdeacf8 100644
--- a/insns.def
+++ b/insns.def
@@ -926,7 +926,7 @@ opt_new
     // The bookkeeping slot should be empty.
     RUBY_ASSERT(TOPN(argc + 1) == Qnil);
 
-    if (vm_method_cfunc_is(GET_ISEQ(), cd, val, rb_class_new_instance_pass_kw)) {
+    if (vm_method_cfunc_is(GET_CFP(), cd, val, rb_class_new_instance_pass_kw)) {
         RB_DEBUG_COUNTER_INC(opt_new_hit);
         val = rb_obj_alloc(val);
         TOPN(argc) = val;
@@ -947,7 +947,7 @@ objtostring
 // attr bool leaf = false;
 // attr bool zjit_profile = true;
 {
-    val = vm_objtostring(GET_ISEQ(), recv, cd);
+    val = vm_objtostring(GET_CFP(), recv, cd);
 
     if (UNDEF_P(val)) {
         CALL_SIMPLE_METHOD();
@@ -1006,7 +1006,7 @@ opt_nil_p
 (VALUE val)
 // attr bool zjit_profile = true;
 {
-    val = vm_opt_nil_p(GET_ISEQ(), cd, recv);
+    val = vm_opt_nil_p(GET_CFP(), cd, recv);
 
     if (UNDEF_P(val)) {
         CALL_SIMPLE_METHOD();
@@ -1435,7 +1435,7 @@ opt_eq
 (VALUE val)
 // attr bool zjit_profile = true;
 {
-    val = opt_equality(GET_ISEQ(), recv, obj, cd);
+    val = opt_equality(GET_CFP(), recv, obj, cd);
 
     if (UNDEF_P(val)) {
         CALL_SIMPLE_METHOD();
@@ -1450,7 +1450,7 @@ opt_neq
 (VALUE val)
 // attr bool zjit_profile = true;
 {
-    val = vm_opt_neq(GET_ISEQ(), cd, cd_eq, recv, obj);
+    val = vm_opt_neq(GET_CFP(), cd, cd_eq, recv, obj);
 
     if (UNDEF_P(val)) {
         CALL_SIMPLE_METHOD();
@@ -1672,7 +1672,7 @@ opt_not
 (VALUE val)
 // attr bool zjit_profile = true;
 {
-    val = vm_opt_not(GET_ISEQ(), cd, recv);
+    val = vm_opt_not(GET_CFP(), cd, recv);
 
     if (UNDEF_P(val)) {
         CALL_SIMPLE_METHOD();
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 4e4ec36eb6729b..7a4f0cf54a6e2c 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -2352,9 +2352,9 @@ vm_search_method_slowpath0(VALUE cd_owner, struct rb_call_data *cd, VALUE klass)
     return cc;
 }
 
-ALWAYS_INLINE(static const struct rb_callcache *vm_search_method_fastpath(VALUE cd_owner, struct rb_call_data *cd, VALUE klass));
+ALWAYS_INLINE(static const struct rb_callcache *vm_search_method_fastpath(const struct rb_control_frame_struct *reg_cfp, struct rb_call_data *cd, VALUE klass));
 static const struct rb_callcache *
-vm_search_method_fastpath(VALUE cd_owner, struct rb_call_data *cd, VALUE klass)
+vm_search_method_fastpath(const struct rb_control_frame_struct *reg_cfp, struct rb_call_data *cd, VALUE klass)
 {
     const struct rb_callcache *cc = cd->cc;
 
@@ -2376,24 +2376,28 @@ vm_search_method_fastpath(VALUE cd_owner, struct rb_call_data *cd, VALUE klass)
     }
 #endif
 
-    return vm_search_method_slowpath0(cd_owner, cd, klass);
+    return vm_search_method_slowpath0((VALUE)reg_cfp->iseq, cd, klass);
 }
 
 static const struct rb_callable_method_entry_struct *
-vm_search_method(VALUE cd_owner, struct rb_call_data *cd, VALUE recv)
+vm_search_method(struct rb_control_frame_struct *reg_cfp, struct rb_call_data *cd, VALUE recv)
 {
     VALUE klass = CLASS_OF(recv);
     VM_ASSERT(klass != Qfalse);
     VM_ASSERT(RBASIC_CLASS(klass) == 0 || rb_obj_is_kind_of(klass, rb_cClass));
 
-    const struct rb_callcache *cc = vm_search_method_fastpath(cd_owner, cd, klass);
+    const struct rb_callcache *cc = vm_search_method_fastpath(reg_cfp, cd, klass);
     return vm_cc_cme(cc);
 }
 
 const struct rb_callable_method_entry_struct *
 rb_zjit_vm_search_method(VALUE cd_owner, struct rb_call_data *cd, VALUE recv)
 {
-    return vm_search_method(cd_owner, cd, recv);
+    // Called from ZJIT with the compile-time iseq, which may differ from
+    // the iseq on the current CFP. Use the slowpath to avoid stale caches.
+    VALUE klass = CLASS_OF(recv);
+    const struct rb_callcache *cc = vm_search_method_slowpath0(cd_owner, cd, klass);
+    return vm_cc_cme(cc);
 }
 
 #if __has_attribute(transparent_union)
@@ -2453,10 +2457,10 @@ check_method_basic_definition(const rb_callable_method_entry_t *me)
 }
 
 static inline int
-vm_method_cfunc_is(const rb_iseq_t *iseq, CALL_DATA cd, VALUE recv, cfunc_type func)
+vm_method_cfunc_is(struct rb_control_frame_struct *reg_cfp, CALL_DATA cd, VALUE recv, cfunc_type func)
 {
-    VM_ASSERT(iseq != NULL);
-    const struct rb_callable_method_entry_struct *cme = vm_search_method((VALUE)iseq, cd, recv);
+    VM_ASSERT(reg_cfp != NULL);
+    const struct rb_callable_method_entry_struct *cme = vm_search_method(reg_cfp, cd, recv);
     return check_cfunc(cme, func);
 }
 
@@ -2469,11 +2473,16 @@ rb_zjit_cme_is_cfunc(const rb_callable_method_entry_t *me, const cfunc_type func
 int
 rb_vm_method_cfunc_is(const rb_iseq_t *iseq, CALL_DATA cd, VALUE recv, cfunc_type func)
 {
-    return vm_method_cfunc_is(iseq, cd, recv, func);
+    // Called from ZJIT with the compile-time iseq, which may differ from
+    // the iseq on the current CFP. Use the slowpath to avoid stale caches.
+    VALUE klass = CLASS_OF(recv);
+    const struct rb_callcache *cc = vm_search_method_slowpath0((VALUE)iseq, cd, klass);
+    const struct rb_callable_method_entry_struct *cme = vm_cc_cme(cc);
+    return check_cfunc(cme, func);
 }
 
 #define check_cfunc(me, func) check_cfunc(me, make_cfunc_type(func))
-#define vm_method_cfunc_is(iseq, cd, recv, func) vm_method_cfunc_is(iseq, cd, recv, make_cfunc_type(func))
+#define vm_method_cfunc_is(reg_cfp, cd, recv, func) vm_method_cfunc_is(reg_cfp, cd, recv, make_cfunc_type(func))
 
 #define EQ_UNREDEFINED_P(t) BASIC_OP_UNREDEFINED_P(BOP_EQ, t##_REDEFINED_OP_FLAG)
 
@@ -2542,14 +2551,14 @@ opt_equality_specialized(VALUE recv, VALUE obj)
 }
 
 static VALUE
-opt_equality(const rb_iseq_t *cd_owner, VALUE recv, VALUE obj, CALL_DATA cd)
+opt_equality(struct rb_control_frame_struct *reg_cfp, VALUE recv, VALUE obj, CALL_DATA cd)
 {
-    VM_ASSERT(cd_owner != NULL);
+    VM_ASSERT(reg_cfp != NULL);
 
     VALUE val = opt_equality_specialized(recv, obj);
     if (!UNDEF_P(val)) return val;
 
-    if (!vm_method_cfunc_is(cd_owner, cd, recv, rb_obj_equal)) {
+    if (!vm_method_cfunc_is(reg_cfp, cd, recv, rb_obj_equal)) {
         return Qundef;
     }
     else {
@@ -5171,7 +5180,7 @@ vm_search_super_method(const rb_control_frame_t *reg_cfp, struct rb_call_data *c
         RB_OBJ_WRITE(reg_cfp->iseq, &cd->cc, cc);
     }
     else {
-        cc = vm_search_method_fastpath((VALUE)reg_cfp->iseq, cd, klass);
+        cc = vm_search_method_fastpath(reg_cfp, cd, klass);
         const rb_callable_method_entry_t *cached_cme = vm_cc_cme(cc);
 
         // define_method can cache for different method id
@@ -6123,7 +6132,7 @@ vm_sendish(
 
     switch (method_explorer) {
       case mexp_search_method:
-        calling.cc = cc = vm_search_method_fastpath((VALUE)reg_cfp->iseq, cd, CLASS_OF(recv));
+        calling.cc = cc = vm_search_method_fastpath(reg_cfp, cd, CLASS_OF(recv));
         val = vm_cc_call(cc)(ec, GET_CFP(), &calling);
         break;
       case mexp_search_super:
@@ -6230,14 +6239,14 @@ VALUE rb_mod_to_s(VALUE);
 VALUE rb_mod_name(VALUE);
 
 static VALUE
-vm_objtostring(const rb_iseq_t *iseq, VALUE recv, CALL_DATA cd)
+vm_objtostring(struct rb_control_frame_struct *reg_cfp, VALUE recv, CALL_DATA cd)
 {
     int type = TYPE(recv);
     if (type == T_STRING) {
         return recv;
     }
 
-    const struct rb_callable_method_entry_struct *cme = vm_search_method((VALUE)iseq, cd, recv);
+    const struct rb_callable_method_entry_struct *cme = vm_search_method(reg_cfp, cd, recv);
 
     switch (type) {
       case T_SYMBOL:
@@ -6288,9 +6297,9 @@ vm_objtostring(const rb_iseq_t *iseq, VALUE recv, CALL_DATA cd)
 // ZJIT implementation is using the C function
 // and needs to call a non-static function
 VALUE
-rb_vm_objtostring(const rb_iseq_t *iseq, VALUE recv, CALL_DATA cd)
+rb_vm_objtostring(struct rb_control_frame_struct *reg_cfp, VALUE recv, CALL_DATA cd)
 {
-    return vm_objtostring(iseq, recv, cd);
+    return vm_objtostring(reg_cfp, recv, cd);
 }
 
 static VALUE
@@ -6841,10 +6850,10 @@ vm_opt_mod(VALUE recv, VALUE obj)
 }
 
 static VALUE
-vm_opt_neq(const rb_iseq_t *iseq, CALL_DATA cd, CALL_DATA cd_eq, VALUE recv, VALUE obj)
+vm_opt_neq(struct rb_control_frame_struct *reg_cfp, CALL_DATA cd, CALL_DATA cd_eq, VALUE recv, VALUE obj)
 {
-    if (vm_method_cfunc_is(iseq, cd, recv, rb_obj_not_equal)) {
-        VALUE val = opt_equality(iseq, recv, obj, cd_eq);
+    if (vm_method_cfunc_is(reg_cfp, cd, recv, rb_obj_not_equal)) {
+        VALUE val = opt_equality(reg_cfp, recv, obj, cd_eq);
 
         if (!UNDEF_P(val)) {
             return RBOOL(!RTEST(val));
@@ -7096,13 +7105,13 @@ vm_opt_empty_p(VALUE recv)
 VALUE rb_false(VALUE obj);
 
 static VALUE
-vm_opt_nil_p(const rb_iseq_t *iseq, CALL_DATA cd, VALUE recv)
+vm_opt_nil_p(struct rb_control_frame_struct *reg_cfp, CALL_DATA cd, VALUE recv)
 {
     if (NIL_P(recv) &&
         BASIC_OP_UNREDEFINED_P(BOP_NIL_P, NIL_REDEFINED_OP_FLAG)) {
         return Qtrue;
     }
-    else if (vm_method_cfunc_is(iseq, cd, recv, rb_false)) {
+    else if (vm_method_cfunc_is(reg_cfp, cd, recv, rb_false)) {
         return Qfalse;
     }
     else {
@@ -7158,9 +7167,9 @@ vm_opt_succ(VALUE recv)
 }
 
 static VALUE
-vm_opt_not(const rb_iseq_t *iseq, CALL_DATA cd, VALUE recv)
+vm_opt_not(struct rb_control_frame_struct *reg_cfp, CALL_DATA cd, VALUE recv)
 {
-    if (vm_method_cfunc_is(iseq, cd, recv, rb_obj_not)) {
+    if (vm_method_cfunc_is(reg_cfp, cd, recv, rb_obj_not)) {
         return RBOOL(!RTEST(recv));
     }
     else {
diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index b473bc69a630c1..d2d5c6abd6b1b8 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -742,8 +742,8 @@ fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd {
 fn gen_objtostring(jit: &mut JITState, asm: &mut Assembler, val: Opnd, cd: *const rb_call_data, state: &FrameState) -> Opnd {
     gen_prepare_non_leaf_call(jit, asm, state);
     // TODO: Specialize for immediate types
-    // Call rb_vm_objtostring(iseq, recv, cd)
-    let ret = asm_ccall!(asm, rb_vm_objtostring, VALUE::from(jit.iseq).into(), val, Opnd::const_ptr(cd));
+    // Call rb_vm_objtostring(cfp, recv, cd)
+    let ret = asm_ccall!(asm, rb_vm_objtostring, CFP, val, Opnd::const_ptr(cd));
 
     // TODO: Call `to_s` on the receiver if rb_vm_objtostring returns Qundef
     // Need to replicate what CALL_SIMPLE_METHOD does
diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs
index e1343f9b40dfdf..b92df55d48dbfe 100644
--- a/zjit/src/cruby.rs
+++ b/zjit/src/cruby.rs
@@ -165,7 +165,7 @@ unsafe extern "C" {
     pub fn rb_vm_stack_canary() -> VALUE;
     pub fn rb_vm_push_cfunc_frame(cme: *const rb_callable_method_entry_t, recv_idx: c_int);
     pub fn rb_obj_class(klass: VALUE) -> VALUE;
-    pub fn rb_vm_objtostring(iseq: IseqPtr, recv: VALUE, cd: *const rb_call_data) -> VALUE;
+    pub fn rb_vm_objtostring(reg_cfp: CfpPtr, recv: VALUE, cd: *const rb_call_data) -> VALUE;
 }
 
 // Renames

From 2ca2865aa194cf832b205f530b4fb440166aea68 Mon Sep 17 00:00:00 2001
From: Takashi Kokubun <takashikkbn@gmail.com>
Date: Wed, 25 Mar 2026 09:44:30 -0700
Subject: [PATCH 09/12] ZJIT: Remove eager nil-fill of locals in JIT-to-JIT
 calls (#16544)

The caller in gen_send_iseq_direct was eagerly writing Qnil to all
non-parameter local slots of the callee's frame before every JIT-to-JIT
call. This is unnecessary because compile_jit_entry_state already
initializes non-parameter locals to Const(Qnil) in the JIT entry block,
and these values are propagated to the target block via branch edges.
Before any non-leaf call (including eval/binding), gen_spill_locals
writes these nil values from the FrameState to the stack, ensuring
that eval can correctly read uninitialized locals as nil.

The nil-fill in function_stub_hit's prepare_for_exit is kept because
that path handles compilation failures where JIT code never runs.
---
 zjit/src/codegen.rs | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index d2d5c6abd6b1b8..178b122ee6e5e6 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -1619,16 +1619,6 @@ fn gen_send_iseq_direct(
         0
     };
 
-    // Fill non-parameter locals with nil (they may be read by eval before being written)
-    let num_params = params.size.to_usize();
-    if local_size > num_params {
-        asm_comment!(asm, "initialize non-parameter locals to nil");
-        for local_idx in num_params..local_size {
-            let offset = local_size_and_idx_to_bp_offset(local_size, local_idx);
-            asm.store(Opnd::mem(64, SP, -offset * SIZEOF_VALUE_I32), Qnil.into());
-        }
-    }
-
     // Make a method call. The target address will be rewritten once compiled.
     let iseq_call = IseqCall::new(iseq, num_optionals_passed);
     let dummy_ptr = cb.get_write_ptr().raw_ptr(cb);
@@ -2876,7 +2866,7 @@ c_callable! {
             let pc = unsafe { rb_iseq_pc_at_idx(iseq, entry_insn_idxs[iseq_call.jit_entry_idx.to_usize()]) };
             unsafe { rb_set_cfp_pc(cfp, pc) };
 
-            // Successful JIT-to-JIT calls fill nils to non-parameter locals in generated code.
+            // JIT-to-JIT calls don't eagerly fill nils to non-parameter locals.
             // If we side-exit from function_stub_hit (before JIT code runs), we need to set them here.
             fn prepare_for_exit(iseq: IseqPtr, cfp: CfpPtr, sp: *mut VALUE, compile_error: &CompileError) {
                 unsafe {

From 17cd9bffd37710dbcd746a62b3d49ea7213095ed Mon Sep 17 00:00:00 2001
From: Max Bernstein <ruby@bernsteinbear.com>
Date: Wed, 25 Mar 2026 09:35:56 -0400
Subject: [PATCH 10/12] ZJIT: Support negative array indices

Previously we would side-exit if the index was negative. Instead, adjust
the index to be in-bounds.
---
 zjit/src/codegen.rs       |   9 +++
 zjit/src/cruby_methods.rs |   1 +
 zjit/src/hir.rs           |  27 ++++++++
 zjit/src/hir/opt_tests.rs | 126 ++++++++++++++++++++++----------------
 4 files changed, 109 insertions(+), 54 deletions(-)

diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs
index 178b122ee6e5e6..af6e881bd02510 100644
--- a/zjit/src/codegen.rs
+++ b/zjit/src/codegen.rs
@@ -561,6 +561,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
         Insn::NewRange { low, high, flag, state } => gen_new_range(jit, asm, opnd!(low), opnd!(high), *flag, &function.frame_state(*state)),
         Insn::NewRangeFixnum { low, high, flag, state } => gen_new_range_fixnum(asm, opnd!(low), opnd!(high), *flag, &function.frame_state(*state)),
         Insn::ArrayDup { val, state } => gen_array_dup(asm, opnd!(val), &function.frame_state(*state)),
+        Insn::AdjustBounds { index, length } => gen_adjust_bounds(asm, opnd!(index), opnd!(length)),
         Insn::ArrayAref { array, index, .. } => gen_array_aref(asm, opnd!(array), opnd!(index)),
         Insn::ArrayAset { array, index, val } => {
             no_output!(gen_array_aset(asm, opnd!(array), opnd!(index), opnd!(val)))
@@ -1781,6 +1782,14 @@ fn gen_new_array(
     }
 }
 
+/// Adjust potentially-negative index by the given length, returning the adjusted index. If still negative,
+/// return a negative number, which indicates the index is still out-of-bounds.
+fn gen_adjust_bounds(asm: &mut Assembler, index: Opnd, length: Opnd) -> lir::Opnd {
+    let adjusted = asm.add(index, length);
+    asm.test(index, index);
+    asm.csel_l(adjusted, index)
+}
+
 /// Compile array access (`array[index]`)
 fn gen_array_aref(
     asm: &mut Assembler,
diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs
index d39f38028743c5..bd4310e0845bd7 100644
--- a/zjit/src/cruby_methods.rs
+++ b/zjit/src/cruby_methods.rs
@@ -354,6 +354,7 @@ fn inline_array_aref(fun: &mut hir::Function, block: hir::BlockId, recv: hir::In
             let index = fun.push_insn(block, hir::Insn::UnboxFixnum { val: index });
             let length = fun.push_insn(block, hir::Insn::ArrayLength { array: recv });
             let index = fun.push_insn(block, hir::Insn::GuardLess { left: index, right: length, state });
+            let index = fun.push_insn(block, hir::Insn::AdjustBounds { index, length });
             let zero = fun.push_insn(block, hir::Insn::Const { val: hir::Const::CInt64(0) });
             use crate::hir::SideExitReason;
             let index = fun.push_insn(block, hir::Insn::GuardGreaterEq { left: index, right: zero, reason: SideExitReason::GuardGreaterEq, state });
diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index b14298c935e981..f2e02181175a26 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -806,6 +806,9 @@ pub enum Insn {
     ArrayPop { array: InsnId, state: InsnId },
     /// Return the length of the array as a C `long` ([`types::CInt64`])
     ArrayLength { array: InsnId },
+    /// Adjust potentially-negative index by the given length, returning the adjusted index. If
+    /// still negative, return a negative number, which indicates the index is still out-of-bounds.
+    AdjustBounds { index: InsnId, length: InsnId },
 
     HashAref { hash: InsnId, key: InsnId, state: InsnId },
     HashAset { hash: InsnId, key: InsnId, val: InsnId, state: InsnId },
@@ -1270,6 +1273,10 @@ macro_rules! for_each_operand_impl {
             Insn::ArrayLength { array } => {
                 $visit_one!(array);
             }
+            Insn::AdjustBounds { index, length } => {
+                $visit_one!(index);
+                $visit_one!(length);
+            }
             Insn::HashAref { hash, key, state } => {
                 $visit_one!(hash);
                 $visit_one!(key);
@@ -1472,6 +1479,7 @@ impl Insn {
             Insn::ArrayAset { .. } => effects::Any,
             Insn::ArrayPop { ..  } => effects::Any,
             Insn::ArrayLength { .. } => Effect::write(abstract_heaps::Empty),
+            Insn::AdjustBounds { .. } => effects::Empty,
             Insn::HashAref { .. } => effects::Any,
             Insn::HashAset { .. } => effects::Any,
             Insn::HashDup { .. } => allocates,
@@ -1713,6 +1721,9 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
             Insn::ArrayLength { array } => {
                 write!(f, "ArrayLength {array}")
             }
+            Insn::AdjustBounds { index, length } => {
+                write!(f, "AdjustBounds {index}, {length}")
+            }
             Insn::NewHash { elements, .. } => {
                 write!(f, "NewHash")?;
                 let mut prefix = " ";
@@ -2806,6 +2817,7 @@ impl Function {
             &ArrayAset { array, index, val } => ArrayAset { array: find!(array), index: find!(index), val: find!(val) },
             &ArrayPop { array, state } => ArrayPop { array: find!(array), state: find!(state) },
             &ArrayLength { array } => ArrayLength { array: find!(array) },
+            &AdjustBounds { index, length } => AdjustBounds { index: find!(index), length: find!(length) },
             &ArrayMax { ref elements, state } => ArrayMax { elements: find_vec!(elements), state: find!(state) },
             &ArrayInclude { ref elements, target, state } => ArrayInclude { elements: find_vec!(elements), target: find!(target), state: find!(state) },
             &ArrayPackBuffer { ref elements, fmt, buffer, state } => ArrayPackBuffer { elements: find_vec!(elements), fmt: find!(fmt), buffer: find!(buffer), state: find!(state) },
@@ -2923,6 +2935,7 @@ impl Function {
             Insn::ArrayAref { .. } => types::BasicObject,
             Insn::ArrayPop { .. } => types::BasicObject,
             Insn::ArrayLength { .. } => types::CInt64,
+            Insn::AdjustBounds { .. } => types::CInt64,
             Insn::HashAref { .. } => types::BasicObject,
             Insn::NewHash { .. } => types::HashExact,
             Insn::HashDup { .. } => types::HashExact,
@@ -5281,6 +5294,16 @@ impl Function {
                             _ => insn_id,
                         }
                     }
+                    Insn::AdjustBounds { index, .. } => {
+                        // If index is known nonnegative, then we don't need to adjust bounds.
+                        if self.type_of(index).cint64_value().filter(|&i| i >= 0).is_some() {
+                            self.make_equal_to(insn_id, index);
+                            // Don't bother re-inferring the type of index; we already know it.
+                            continue;
+                        } else {
+                            insn_id
+                        }
+                    }
                     Insn::Test { val } if self.type_of(val).is_known_falsy() => {
                         self.new_insn(Insn::Const { val: Const::CBool(false) })
                     }
@@ -6063,6 +6086,10 @@ impl Function {
                 self.assert_subtype(insn_id, array, types::ArrayExact)?;
                 self.assert_subtype(insn_id, index, types::CInt64)
             }
+            Insn::AdjustBounds { index, length } => {
+                self.assert_subtype(insn_id, index, types::CInt64)?;
+                self.assert_subtype(insn_id, length, types::CInt64)
+            }
             // Instructions with Hash operands
             Insn::HashAref { hash, .. }
             | Insn::HashAset { hash, .. } => self.assert_subtype(insn_id, hash, types::HashExact),
diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs
index fb9e92b5bf0c45..db3889449f149a 100644
--- a/zjit/src/hir/opt_tests.rs
+++ b/zjit/src/hir/opt_tests.rs
@@ -1014,12 +1014,12 @@ mod hir_opt_tests {
           PatchPoint NoSingletonClass(Array@0x1008)
           PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018)
           v27:ArrayExact = GuardType v10, ArrayExact
-          v34:CInt64[0] = Const CInt64(0)
+          v35:CInt64[0] = Const CInt64(0)
           v29:CInt64 = ArrayLength v27
-          v30:CInt64[0] = GuardLess v34, v29
-          v33:BasicObject = ArrayAref v27, v30
+          v30:CInt64[0] = GuardLess v35, v29
+          v34:BasicObject = ArrayAref v27, v30
           CheckInterrupts
-          Return v33
+          Return v34
         ");
     }
 
@@ -1047,12 +1047,12 @@ mod hir_opt_tests {
           PatchPoint NoSingletonClass(Array@0x1008)
           PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018)
           v27:ArrayExact = GuardType v10, ArrayExact
-          v34:CInt64[0] = Const CInt64(0)
+          v35:CInt64[0] = Const CInt64(0)
           v29:CInt64 = ArrayLength v27
-          v30:CInt64[0] = GuardLess v34, v29
-          v33:BasicObject = ArrayAref v27, v30
+          v30:CInt64[0] = GuardLess v35, v29
+          v34:BasicObject = ArrayAref v27, v30
           CheckInterrupts
-          Return v33
+          Return v34
         ");
     }
 
@@ -1077,10 +1077,15 @@ mod hir_opt_tests {
           v13:Fixnum[-10] = Const Value(-10)
           PatchPoint NoSingletonClass(Array@0x1008)
           PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018)
-          v31:CInt64[-10] = Const CInt64(-10)
+          v32:CInt64[-10] = Const CInt64(-10)
           v26:CInt64 = ArrayLength v11
-          v27:CInt64[-10] = GuardLess v31, v26
-          SideExit GuardGreaterEq
+          v27:CInt64[-10] = GuardLess v32, v26
+          v28:CInt64 = AdjustBounds v27, v26
+          v29:CInt64[0] = Const CInt64(0)
+          v30:CInt64 = GuardGreaterEq v28, v29
+          v31:BasicObject = ArrayAref v11, v30
+          CheckInterrupts
+          Return v31
         ");
     }
 
@@ -2343,12 +2348,12 @@ mod hir_opt_tests {
           PatchPoint NoSingletonClass(Array@0x1008)
           PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018)
           v27:ArrayExact = GuardType v10, ArrayExact
-          v34:CInt64[0] = Const CInt64(0)
+          v35:CInt64[0] = Const CInt64(0)
           v29:CInt64 = ArrayLength v27
-          v30:CInt64[0] = GuardLess v34, v29
-          v33:BasicObject = ArrayAref v27, v30
+          v30:CInt64[0] = GuardLess v35, v29
+          v34:BasicObject = ArrayAref v27, v30
           CheckInterrupts
-          Return v33
+          Return v34
         ");
         assert_snapshot!(inspect("test [1,2,3]"), @"1");
     }
@@ -6055,12 +6060,12 @@ mod hir_opt_tests {
           v12:Fixnum[0] = Const Value(0)
           PatchPoint NoSingletonClass(Array@0x1010)
           PatchPoint MethodRedefined(Array@0x1010, []@0x1018, cme:0x1020)
-          v33:CInt64[0] = Const CInt64(0)
+          v34:CInt64[0] = Const CInt64(0)
           v28:CInt64 = ArrayLength v23
-          v29:CInt64[0] = GuardLess v33, v28
-          v32:BasicObject = ArrayAref v23, v29
+          v29:CInt64[0] = GuardLess v34, v28
+          v33:BasicObject = ArrayAref v23, v29
           CheckInterrupts
-          Return v32
+          Return v33
         ");
        // TODO(max): Check the result of `S[0] = 5; test` using `inspect` to make sure that we
        // actually do the load at run-time.
@@ -6087,12 +6092,12 @@ mod hir_opt_tests {
           v13:Fixnum[1] = Const Value(1)
           PatchPoint NoSingletonClass(Array@0x1008)
           PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018)
-          v31:CInt64[1] = Const CInt64(1)
+          v32:CInt64[1] = Const CInt64(1)
           v26:CInt64 = ArrayLength v11
-          v27:CInt64[1] = GuardLess v31, v26
-          v32:Fixnum[5] = Const Value(5)
+          v27:CInt64[1] = GuardLess v32, v26
+          v33:Fixnum[5] = Const Value(5)
           CheckInterrupts
-          Return v32
+          Return v33
         ");
     }
 
@@ -6117,10 +6122,15 @@ mod hir_opt_tests {
           v13:Fixnum[-3] = Const Value(-3)
           PatchPoint NoSingletonClass(Array@0x1008)
           PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018)
-          v31:CInt64[-3] = Const CInt64(-3)
+          v32:CInt64[-3] = Const CInt64(-3)
           v26:CInt64 = ArrayLength v11
-          v27:CInt64[-3] = GuardLess v31, v26
-          SideExit GuardGreaterEq
+          v27:CInt64[-3] = GuardLess v32, v26
+          v28:CInt64 = AdjustBounds v27, v26
+          v29:CInt64[0] = Const CInt64(0)
+          v30:CInt64 = GuardGreaterEq v28, v29
+          v31:BasicObject = ArrayAref v11, v30
+          CheckInterrupts
+          Return v31
         ");
     }
 
@@ -6145,10 +6155,15 @@ mod hir_opt_tests {
           v13:Fixnum[-10] = Const Value(-10)
           PatchPoint NoSingletonClass(Array@0x1008)
           PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018)
-          v31:CInt64[-10] = Const CInt64(-10)
+          v32:CInt64[-10] = Const CInt64(-10)
           v26:CInt64 = ArrayLength v11
-          v27:CInt64[-10] = GuardLess v31, v26
-          SideExit GuardGreaterEq
+          v27:CInt64[-10] = GuardLess v32, v26
+          v28:CInt64 = AdjustBounds v27, v26
+          v29:CInt64[0] = Const CInt64(0)
+          v30:CInt64 = GuardGreaterEq v28, v29
+          v31:BasicObject = ArrayAref v11, v30
+          CheckInterrupts
+          Return v31
         ");
     }
 
@@ -6173,12 +6188,12 @@ mod hir_opt_tests {
           v13:Fixnum[10] = Const Value(10)
           PatchPoint NoSingletonClass(Array@0x1008)
           PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018)
-          v31:CInt64[10] = Const CInt64(10)
+          v32:CInt64[10] = Const CInt64(10)
           v26:CInt64 = ArrayLength v11
-          v27:CInt64[10] = GuardLess v31, v26
-          v32:NilClass = Const Value(nil)
+          v27:CInt64[10] = GuardLess v32, v26
+          v33:NilClass = Const Value(nil)
           CheckInterrupts
-          Return v32
+          Return v33
         ");
     }
 
@@ -8665,12 +8680,12 @@ mod hir_opt_tests {
           v19:Fixnum[0] = Const Value(0)
           PatchPoint NoSingletonClass(Array@0x1008)
           PatchPoint MethodRedefined(Array@0x1008, []@0x1010, cme:0x1018)
-          v37:CInt64[0] = Const CInt64(0)
+          v38:CInt64[0] = Const CInt64(0)
           v32:CInt64 = ArrayLength v14
-          v33:CInt64[0] = GuardLess v37, v32
-          v36:BasicObject = ArrayAref v14, v33
+          v33:CInt64[0] = GuardLess v38, v32
+          v37:BasicObject = ArrayAref v14, v33
           CheckInterrupts
-          Return v36
+          Return v37
         ");
     }
 
@@ -8705,11 +8720,12 @@ mod hir_opt_tests {
           v31:CInt64 = UnboxFixnum v30
           v32:CInt64 = ArrayLength v29
           v33:CInt64 = GuardLess v31, v32
-          v34:CInt64[0] = Const CInt64(0)
-          v35:CInt64 = GuardGreaterEq v33, v34
-          v36:BasicObject = ArrayAref v29, v35
+          v34:CInt64 = AdjustBounds v33, v32
+          v35:CInt64[0] = Const CInt64(0)
+          v36:CInt64 = GuardGreaterEq v34, v35
+          v37:BasicObject = ArrayAref v29, v36
           CheckInterrupts
-          Return v36
+          Return v37
         ");
     }
 
@@ -8745,11 +8761,12 @@ mod hir_opt_tests {
           v31:CInt64 = UnboxFixnum v30
           v32:CInt64 = ArrayLength v29
           v33:CInt64 = GuardLess v31, v32
-          v34:CInt64[0] = Const CInt64(0)
-          v35:CInt64 = GuardGreaterEq v33, v34
-          v36:BasicObject = ArrayAref v29, v35
+          v34:CInt64 = AdjustBounds v33, v32
+          v35:CInt64[0] = Const CInt64(0)
+          v36:CInt64 = GuardGreaterEq v34, v35
+          v37:BasicObject = ArrayAref v29, v36
           CheckInterrupts
-          Return v36
+          Return v37
         ");
     }
 
@@ -9340,21 +9357,22 @@ mod hir_opt_tests {
           v25:CallableMethodEntry[VALUE(0x1048)] = GuardBitEquals v24, Value(VALUE(0x1048))
           v26:RubyValue = LoadField v23, :_ep_specval@0x1050
           v27:FalseClass = GuardBitEquals v26, Value(false)
-          v37:CPtr = GetEP 0
-          v38:RubyValue = LoadField v37, :_ep_method_entry@0x1040
-          v39:CallableMethodEntry[VALUE(0x1048)] = GuardBitEquals v38, Value(VALUE(0x1048))
-          v40:RubyValue = LoadField v37, :_ep_specval@0x1050
-          v41:FalseClass = GuardBitEquals v40, Value(false)
+          v38:CPtr = GetEP 0
+          v39:RubyValue = LoadField v38, :_ep_method_entry@0x1040
+          v40:CallableMethodEntry[VALUE(0x1048)] = GuardBitEquals v39, Value(VALUE(0x1048))
+          v41:RubyValue = LoadField v38, :_ep_specval@0x1050
+          v42:FalseClass = GuardBitEquals v41, Value(false)
           v28:Array = GuardType v9, Array
           v29:Fixnum = GuardType v10, Fixnum
           v30:CInt64 = UnboxFixnum v29
           v31:CInt64 = ArrayLength v28
           v32:CInt64 = GuardLess v30, v31
-          v33:CInt64[0] = Const CInt64(0)
-          v34:CInt64 = GuardGreaterEq v32, v33
-          v35:BasicObject = ArrayAref v28, v34
+          v33:CInt64 = AdjustBounds v32, v31
+          v34:CInt64[0] = Const CInt64(0)
+          v35:CInt64 = GuardGreaterEq v33, v34
+          v36:BasicObject = ArrayAref v28, v35
           CheckInterrupts
-          Return v35
+          Return v36
         ");
     }
 

From be783db2c2bb73456f808291aa4f72fa02861641 Mon Sep 17 00:00:00 2001
From: Max Bernstein <ruby@bernsteinbear.com>
Date: Wed, 25 Mar 2026 09:59:07 -0400
Subject: [PATCH 11/12] ZJIT: Support negative indices in more places

---
 zjit/src/cruby_methods.rs |  3 +++
 zjit/src/hir/opt_tests.rs | 47 ++++++++++++++++++++++-----------------
 2 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs
index bd4310e0845bd7..767f6499e80606 100644
--- a/zjit/src/cruby_methods.rs
+++ b/zjit/src/cruby_methods.rs
@@ -379,6 +379,7 @@ fn inline_array_aset(fun: &mut hir::Function, block: hir::BlockId, recv: hir::In
             let index = fun.push_insn(block, hir::Insn::UnboxFixnum { val: index });
             let length = fun.push_insn(block, hir::Insn::ArrayLength { array: recv });
             let index = fun.push_insn(block, hir::Insn::GuardLess { left: index, right: length, state });
+            let index = fun.push_insn(block, hir::Insn::AdjustBounds { index, length });
             let zero = fun.push_insn(block, hir::Insn::Const { val: hir::Const::CInt64(0) });
             use crate::hir::SideExitReason;
             let index = fun.push_insn(block, hir::Insn::GuardGreaterEq { left: index, right: zero, reason: SideExitReason::GuardGreaterEq, state });
@@ -476,6 +477,7 @@ fn inline_string_getbyte(fun: &mut hir::Function, block: hir::BlockId, recv: hir
         //
         // This is unlike most other guards.
         let unboxed_index = fun.push_insn(block, hir::Insn::GuardLess { left: unboxed_index, right: len, state });
+        let unboxed_index = fun.push_insn(block, hir::Insn::AdjustBounds { index: unboxed_index, length: len });
         let zero = fun.push_insn(block, hir::Insn::Const { val: hir::Const::CInt64(0) });
         use crate::hir::SideExitReason;
         let _ = fun.push_insn(block, hir::Insn::GuardGreaterEq { left: unboxed_index, right: zero, reason: SideExitReason::GuardGreaterEq, state });
@@ -499,6 +501,7 @@ fn inline_string_setbyte(fun: &mut hir::Function, block: hir::BlockId, recv: hir
             return_type: types::CInt64,
         });
         let unboxed_index = fun.push_insn(block, hir::Insn::GuardLess { left: unboxed_index, right: len, state });
+        let unboxed_index = fun.push_insn(block, hir::Insn::AdjustBounds { index: unboxed_index, length: len });
         let zero = fun.push_insn(block, hir::Insn::Const { val: hir::Const::CInt64(0) });
         use crate::hir::SideExitReason;
         let _ = fun.push_insn(block, hir::Insn::GuardGreaterEq { left: unboxed_index, right: zero, reason: SideExitReason::GuardGreaterEq, state });
diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs
index db3889449f149a..af438c361b8af9 100644
--- a/zjit/src/hir/opt_tests.rs
+++ b/zjit/src/hir/opt_tests.rs
@@ -9062,9 +9062,9 @@ mod hir_opt_tests {
           v34:CUInt64 = LoadField v33, :_rbasic_flags@0x1040
           v35:CUInt64 = GuardNoBitsSet v34, RUBY_FL_FREEZE=CUInt64(2048)
           v37:CUInt64 = GuardNoBitsSet v34, RUBY_ELTS_SHARED=CUInt64(4096)
-          v45:CInt64[1] = Const CInt64(1)
+          v46:CInt64[1] = Const CInt64(1)
           v39:CInt64 = ArrayLength v33
-          v40:CInt64[1] = GuardLess v45, v39
+          v40:CInt64[1] = GuardLess v46, v39
           ArrayAset v33, v40, v19
           WriteBarrier v33, v19
           CheckInterrupts
@@ -9108,9 +9108,10 @@ mod hir_opt_tests {
           v43:CInt64 = UnboxFixnum v38
           v44:CInt64 = ArrayLength v37
           v45:CInt64 = GuardLess v43, v44
-          v46:CInt64[0] = Const CInt64(0)
-          v47:CInt64 = GuardGreaterEq v45, v46
-          ArrayAset v37, v47, v16
+          v46:CInt64 = AdjustBounds v45, v44
+          v47:CInt64[0] = Const CInt64(0)
+          v48:CInt64 = GuardGreaterEq v46, v47
+          ArrayAset v37, v48, v16
           WriteBarrier v37, v16
           CheckInterrupts
           Return v16
@@ -9533,11 +9534,12 @@ mod hir_opt_tests {
           v30:CInt64 = UnboxFixnum v29
           v31:CInt64 = LoadField v28, :len@0x1040
           v32:CInt64 = GuardLess v30, v31
-          v33:CInt64[0] = Const CInt64(0)
-          v34:CInt64 = GuardGreaterEq v32, v33
-          v35:Fixnum = StringGetbyte v28, v32
+          v33:CInt64 = AdjustBounds v32, v31
+          v34:CInt64[0] = Const CInt64(0)
+          v35:CInt64 = GuardGreaterEq v33, v34
+          v36:Fixnum = StringGetbyte v28, v33
           CheckInterrupts
-          Return v35
+          Return v36
         ");
     }
 
@@ -9573,8 +9575,9 @@ mod hir_opt_tests {
           v34:CInt64 = UnboxFixnum v33
           v35:CInt64 = LoadField v32, :len@0x1040
           v36:CInt64 = GuardLess v34, v35
-          v37:CInt64[0] = Const CInt64(0)
-          v38:CInt64 = GuardGreaterEq v36, v37
+          v37:CInt64 = AdjustBounds v36, v35
+          v38:CInt64[0] = Const CInt64(0)
+          v39:CInt64 = GuardGreaterEq v37, v38
           v23:Fixnum[5] = Const Value(5)
           CheckInterrupts
           Return v23
@@ -9615,11 +9618,12 @@ mod hir_opt_tests {
           v35:CInt64 = UnboxFixnum v33
           v36:CInt64 = LoadField v32, :len@0x1040
           v37:CInt64 = GuardLess v35, v36
-          v38:CInt64[0] = Const CInt64(0)
-          v39:CInt64 = GuardGreaterEq v37, v38
-          v40:CUInt64 = LoadField v32, :_rbasic_flags@0x1041
-          v41:CUInt64 = GuardNoBitsSet v40, RUBY_FL_FREEZE=CUInt64(2048)
-          v42:Fixnum = StringSetbyteFixnum v32, v33, v34
+          v38:CInt64 = AdjustBounds v37, v36
+          v39:CInt64[0] = Const CInt64(0)
+          v40:CInt64 = GuardGreaterEq v38, v39
+          v41:CUInt64 = LoadField v32, :_rbasic_flags@0x1041
+          v42:CUInt64 = GuardNoBitsSet v41, RUBY_FL_FREEZE=CUInt64(2048)
+          v43:Fixnum = StringSetbyteFixnum v32, v33, v34
           CheckInterrupts
           Return v34
         ");
@@ -9661,11 +9665,12 @@ mod hir_opt_tests {
           v35:CInt64 = UnboxFixnum v33
           v36:CInt64 = LoadField v32, :len@0x1040
           v37:CInt64 = GuardLess v35, v36
-          v38:CInt64[0] = Const CInt64(0)
-          v39:CInt64 = GuardGreaterEq v37, v38
-          v40:CUInt64 = LoadField v32, :_rbasic_flags@0x1041
-          v41:CUInt64 = GuardNoBitsSet v40, RUBY_FL_FREEZE=CUInt64(2048)
-          v42:Fixnum = StringSetbyteFixnum v32, v33, v34
+          v38:CInt64 = AdjustBounds v37, v36
+          v39:CInt64[0] = Const CInt64(0)
+          v40:CInt64 = GuardGreaterEq v38, v39
+          v41:CUInt64 = LoadField v32, :_rbasic_flags@0x1041
+          v42:CUInt64 = GuardNoBitsSet v41, RUBY_FL_FREEZE=CUInt64(2048)
+          v43:Fixnum = StringSetbyteFixnum v32, v33, v34
           CheckInterrupts
           Return v34
         ");

From 8514166d54eec6ecb5cb79d56d4ea6cf74d4a390 Mon Sep 17 00:00:00 2001
From: Max Bernstein <ruby@bernsteinbear.com>
Date: Wed, 25 Mar 2026 09:59:31 -0400
Subject: [PATCH 12/12] ZJIT: Adjust Type API for checking signedness

---
 zjit/src/hir.rs          |  2 +-
 zjit/src/hir_type/mod.rs | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs
index f2e02181175a26..0891a59fa2c2b6 100644
--- a/zjit/src/hir.rs
+++ b/zjit/src/hir.rs
@@ -5296,7 +5296,7 @@ impl Function {
                     }
                     Insn::AdjustBounds { index, .. } => {
                         // If index is known nonnegative, then we don't need to adjust bounds.
-                        if self.type_of(index).cint64_value().filter(|&i| i >= 0).is_some() {
+                        if self.type_of(index).known_nonnegative() {
                             self.make_equal_to(insn_id, index);
                             // Don't bother re-inferring the type of index; we already know it.
                             continue;
diff --git a/zjit/src/hir_type/mod.rs b/zjit/src/hir_type/mod.rs
index e1e2c1a8104d51..1e6c0d2df7f8b8 100644
--- a/zjit/src/hir_type/mod.rs
+++ b/zjit/src/hir_type/mod.rs
@@ -411,6 +411,19 @@ impl Type {
         }
     }
 
+    fn int_spec_signed(&self) -> Option<i64> {
+        assert!(self.is_subtype(types::CSigned), "int_spec_signed() only makes sense for signed integer types");
+        match self.spec {
+            Specialization::Int(val) => Some(val as i64),
+            _ => None,
+        }
+    }
+
+    pub fn known_nonnegative(&self) -> bool {
+        assert!(self.is_subtype(types::CSigned), "nonnegative() only makes sense for signed integer types");
+        self.int_spec_signed().map_or(false, |val| val >= 0)
+    }
+
     /// Return true if the Type has object specialization and false otherwise.
     pub fn ruby_object_known(&self) -> bool {
         matches!(self.spec, Specialization::Object(_))