diff --git a/src/prism.c b/src/prism.c index fe1ad17d31..d0d6d6dcfc 100644 --- a/src/prism.c +++ b/src/prism.c @@ -17761,1591 +17761,1987 @@ pm_block_call_p(const pm_node_t *node) { } /** - * Parse an expression that begins with the previous node that we just lexed. + * Parse a case expression (the `case` keyword). This handles both case-when and + * case-in (pattern matching) forms. */ -static PRISM_INLINE pm_node_t * -parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) { - switch (parser->current.type) { - case PM_TOKEN_BRACKET_LEFT_ARRAY: { - parser_lex(parser); +static pm_node_t * +parse_case(pm_parser_t *parser, uint8_t flags, uint16_t depth) { + size_t opening_newline_index = token_newline_index(parser); + parser_lex(parser); - pm_array_node_t *array = pm_array_node_create(parser, &parser->previous); - pm_accepts_block_stack_push(parser, true); - bool parsed_bare_hash = false; + pm_token_t case_keyword = parser->previous; + pm_node_t *predicate = NULL; - while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) { - bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE); + pm_node_list_t current_block_exits = { 0 }; + pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); - // Handle the case where we don't have a comma and we have a - // newline followed by a right bracket. - if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) { - break; - } + if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { + while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); + predicate = NULL; + } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) { + predicate = NULL; + } else if (!token_begins_expression_p(parser->current.type)) { + predicate = NULL; + } else { + predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1)); + while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); + } - // Ensure that we have a comma between elements in the array. - if (array->elements.size > 0) { - if (accept1(parser, PM_TOKEN_COMMA)) { - // If there was a comma but we also accepts a newline, - // then this is a syntax error. - if (accepted_newline) { - pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA); - } - } else { - // If there was no comma, then we need to add a syntax - // error. - PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_str(parser->current.type)); - parser->previous.start = parser->previous.end; - parser->previous.type = 0; - } - } + if (match1(parser, PM_TOKEN_KEYWORD_END)) { + parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false); + parser_lex(parser); + pop_block_exits(parser, previous_block_exits); + pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS); + return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous)); + } - // If we have a right bracket immediately following a comma, - // this is allowed since it's a trailing comma. In this case we - // can break out of the loop. - if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break; + /* At this point we can create a case node, though we don't yet know if it + * is a case-in or case-when node. */ + pm_node_t *node; - pm_node_t *element; + if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) { + pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL); + pm_static_literals_t literals = { 0 }; - if (accept1(parser, PM_TOKEN_USTAR)) { - pm_token_t operator = parser->previous; - pm_node_t *expression = NULL; + /* At this point we've seen a when keyword, so we know this is a + * case-when node. We will continue to parse the when nodes until we hit + * the end of the list. */ + while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) { + parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true); + parser_lex(parser); - if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) { - pm_parser_scope_forwarding_positionals_check(parser, &operator); - } else { - expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); - } + pm_token_t when_keyword = parser->previous; + pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword); - element = UP(pm_splat_node_create(parser, &operator, expression)); - } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) { - if (parsed_bare_hash) { - pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH); - } + do { + if (accept1(parser, PM_TOKEN_USTAR)) { + pm_token_t operator = parser->previous; + pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); - element = UP(pm_keyword_hash_node_create(parser)); - pm_static_literals_t hash_keys = { 0 }; + pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression); + pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node)); - if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) { - parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1)); + if (PM_NODE_TYPE_P(expression, PM_ERROR_RECOVERY_NODE)) break; + } else { + pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1)); + pm_when_node_conditions_append(parser->arena, when_node, condition); + + /* If we found a missing node, then this is a syntax error + * and we should stop looping. */ + if (PM_NODE_TYPE_P(condition, PM_ERROR_RECOVERY_NODE)) break; + + /* If this is a string node, then we need to mark it as + * frozen because when clause strings are frozen. */ + if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) { + pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL); + } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) { + pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL); } - pm_static_literals_free(&hash_keys); - parsed_bare_hash = true; - } else { - element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_LABEL, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1)); + pm_when_clause_static_literals_add(parser, &literals, condition); + } + } while (accept1(parser, PM_TOKEN_COMMA)); - if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) { - if (parsed_bare_hash) { - pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH); - } + if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { + if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { + pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous); + } + } else { + expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER); + pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous); + } - pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser); - pm_static_literals_t hash_keys = { 0 }; - pm_hash_key_static_literals_add(parser, &hash_keys, element); + if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1)); + if (statements != NULL) { + pm_when_node_statements_set(when_node, statements); + } + } - pm_token_t operator = { 0 }; - if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) { - operator = parser->previous; - } + pm_case_node_condition_append(parser->arena, case_node, UP(when_node)); + } - pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); - pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value)); - pm_keyword_hash_node_elements_append(parser->arena, hash, assoc); + /* If we didn't parse any conditions (in or when) then we need to + * indicate that we have an error. */ + if (case_node->conditions.size == 0) { + pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS); + } - element = UP(hash); - if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) { - parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1)); - } + pm_static_literals_free(&literals); + node = UP(case_node); + } else { + pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate); - pm_static_literals_free(&hash_keys); - parsed_bare_hash = true; - } - } + /* If this is a case-match node (i.e., it is a pattern matching case + * statement) then we must have a predicate. */ + if (predicate == NULL) { + pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE); + } - pm_array_node_elements_append(parser->arena, array, element); - if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break; - } + /* At this point we expect that we're parsing a case-in node. We will + * continue to parse the in nodes until we hit the end of the list. */ + while (match1(parser, PM_TOKEN_KEYWORD_IN)) { + parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true); - accept1(parser, PM_TOKEN_NEWLINE); + bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; + parser->pattern_matching_newlines = true; - if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type)); - parser->previous.start = parser->previous.end; - parser->previous.type = 0; - } + lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL); + parser->command_start = false; + parser_lex(parser); - pm_array_node_close_set(parser, array, &parser->previous); - pm_accepts_block_stack_pop(parser); + pm_token_t in_keyword = parser->previous; - return UP(array); - } - case PM_TOKEN_PARENTHESIS_LEFT: - case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: { - pm_token_t opening = parser->current; - pm_node_flags_t paren_flags = 0; + pm_constant_id_list_t captures = { 0 }; + pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1)); - pm_node_list_t current_block_exits = { 0 }; - pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + parser->pattern_matching_newlines = previous_pattern_matching_newlines; - parser_lex(parser); - while (true) { - if (accept1(parser, PM_TOKEN_SEMICOLON)) { - paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS; - } else if (!accept1(parser, PM_TOKEN_NEWLINE)) { - break; + /* Since we're in the top-level of the case-in node we need to + * check for guard clauses in the form of `if` or `unless` + * statements. */ + if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) { + pm_token_t keyword = parser->previous; + pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1)); + pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate)); + } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) { + pm_token_t keyword = parser->previous; + pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1)); + pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate)); + } + + /* Now we need to check for the terminator of the in node's pattern. + * It can be a newline or semicolon optionally followed by a `then` + * keyword. */ + pm_token_t then_keyword = { 0 }; + if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { + if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { + then_keyword = parser->previous; } + } else { + expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER); + then_keyword = parser->previous; } - // If this is the end of the file or we match a right parenthesis, then - // we have an empty parentheses node, and we can immediately return. - if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) { - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - pop_block_exits(parser, previous_block_exits); - return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, paren_flags)); + /* Now we can actually parse the statements associated with the in + * node. */ + pm_statements_node_t *statements; + if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + statements = NULL; + } else { + statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1)); } - // Otherwise, we're going to parse the first statement in the list - // of statements within the parentheses. - pm_accepts_block_stack_push(parser, true); - context_push(parser, PM_CONTEXT_PARENS); - pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1)); - context_pop(parser); + /* Now that we have the full pattern and statements, we can create + * the node and attach it to the case node. */ + pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword))); + pm_case_match_node_condition_append(parser->arena, case_node, condition); + } - // Determine if this statement is followed by a terminator. In the - // case of a single statement, this is fine. But in the case of - // multiple statements it's required. - bool terminator_found = false; + /* If we didn't parse any conditions (in or when) then we need to + * indicate that we have an error. */ + if (case_node->conditions.size == 0) { + pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS); + } - if (accept1(parser, PM_TOKEN_SEMICOLON)) { - terminator_found = true; - paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS; - } else if (accept1(parser, PM_TOKEN_NEWLINE)) { - terminator_found = true; - } + node = UP(case_node); + } - if (terminator_found) { - while (true) { - if (accept1(parser, PM_TOKEN_SEMICOLON)) { - paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS; - } else if (!accept1(parser, PM_TOKEN_NEWLINE)) { - break; - } - } - } + accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); + if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) { + pm_token_t else_keyword = parser->previous; + pm_else_node_t *else_node; - // If we hit a right parenthesis, then we're done parsing the - // parentheses node, and we can check which kind of node we should - // return. - if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) { - lex_state_set(parser, PM_LEX_STATE_ENDARG); - } + if (!match1(parser, PM_TOKEN_KEYWORD_END)) { + else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current); + } else { + else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current); + } - parser_lex(parser); - pm_accepts_block_stack_pop(parser); - pop_block_exits(parser, previous_block_exits); + if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) { + pm_case_node_else_clause_set((pm_case_node_t *) node, else_node); + } else { + pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node); + } + } - if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) { - // If we have a single statement and are ending on a right - // parenthesis, then we need to check if this is possibly a - // multiple target node. - pm_multi_target_node_t *multi_target; + parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword); - if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) { - multi_target = (pm_multi_target_node_t *) statement; - } else { - multi_target = pm_multi_target_node_create(parser); - pm_multi_target_node_targets_append(parser, multi_target, statement); - } + if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) { + pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous); + } else { + pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous); + } - multi_target->lparen_loc = TOK2LOC(parser, &opening); - multi_target->rparen_loc = TOK2LOC(parser, &parser->previous); - PM_NODE_START_SET_TOKEN(parser, multi_target, &opening); - PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous); + pop_block_exits(parser, previous_block_exits); + return node; +} - pm_node_t *result; - if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) { - result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); - accept1(parser, PM_TOKEN_NEWLINE); - } else { - result = UP(multi_target); - } +/** + * Parse a class definition expression (the `class` keyword). This handles both + * regular class definitions and singleton class definitions (`class << expr`). + */ +static pm_node_t * +parse_class(pm_parser_t *parser, uint8_t flags, uint16_t depth) { + size_t opening_newline_index = token_newline_index(parser); + parser_lex(parser); - if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) { - // All set, this is explicitly allowed by the parent - // context. - } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) { - // All set, we're inside a for loop and we're parsing - // multiple targets. - } else if (binding_power != PM_BINDING_POWER_STATEMENT) { - // Multi targets are not allowed when it's not a - // statement level. - pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED); - } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) { - // Multi targets must be followed by an equal sign in - // order to be valid (or a right parenthesis if they are - // nested). - pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED); - } + pm_token_t class_keyword = parser->previous; + pm_do_loop_stack_push(parser, false); - return result; - } + pm_node_list_t current_block_exits = { 0 }; + pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); - // If we have a single statement and are ending on a right parenthesis - // and we didn't return a multiple assignment node, then we can return a - // regular parentheses node now. - pm_statements_node_t *statements = pm_statements_node_create(parser); - pm_statements_node_body_append(parser, statements, statement, true); + if (accept1(parser, PM_TOKEN_LESS_LESS)) { + pm_token_t operator = parser->previous; + pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1)); - return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags)); - } + pm_parser_scope_push(parser, true); + if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_str(parser->current.type)); + } - // If we have more than one statement in the set of parentheses, - // then we are going to parse all of them as a list of statements. - // We'll do that here. - context_push(parser, PM_CONTEXT_PARENS); - paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS; + pm_node_t *statements = NULL; + if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + pm_accepts_block_stack_push(parser, true); + statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1))); + pm_accepts_block_stack_pop(parser); + } - pm_statements_node_t *statements = pm_statements_node_create(parser); - pm_statements_node_body_append(parser, statements, statement, true); + if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) { + assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); + statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1))); + } else { + parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false); + } - // If we didn't find a terminator and we didn't find a right - // parenthesis, then this is a syntax error. - if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); - } + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword); - // Parse each statement within the parentheses. - while (true) { - pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1)); - pm_statements_node_body_append(parser, statements, node, true); + pm_constant_id_list_t locals; + pm_locals_order(parser, &parser->current_scope->locals, &locals, false); - // If we're recovering from a syntax error, then we need to stop - // parsing the statements now. - if (parser->recovering) { - // If this is the level of context where the recovery has - // happened, then we can mark the parser as done recovering. - if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false; - break; - } + pm_parser_scope_pop(parser); + pm_do_loop_stack_pop(parser); - // If we couldn't parse an expression at all, then we need to - // bail out of the loop. - if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) break; + flush_block_exits(parser, previous_block_exits); + return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous)); + } - // If we successfully parsed a statement, then we are going to - // need terminator to delimit them. - if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); - if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break; - } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - break; - } else if (!match1(parser, PM_TOKEN_EOF)) { - // If we're at the end of the file, then we're going to add - // an error after this for the ) anyway. - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); - } - } + pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1)); + pm_token_t name = parser->previous; + if (name.type != PM_TOKEN_CONSTANT) { + pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME); + } - context_pop(parser); - pm_accepts_block_stack_pop(parser); - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); + pm_token_t inheritance_operator = { 0 }; + pm_node_t *superclass; - // When we're parsing multi targets, we allow them to be followed by - // a right parenthesis if they are at the statement level. This is - // only possible if they are the final statement in a parentheses. - // We need to explicitly reject that here. - { - pm_node_t *statement = statements->body.nodes[statements->body.size - 1]; + if (match1(parser, PM_TOKEN_LESS)) { + inheritance_operator = parser->current; + lex_state_set(parser, PM_LEX_STATE_BEG); - if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) { - pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser); - pm_multi_target_node_targets_append(parser, multi_target, statement); + parser->command_start = true; + parser_lex(parser); - statement = UP(multi_target); - statements->body.nodes[statements->body.size - 1] = statement; - } + superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1)); + } else { + superclass = NULL; + } - if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) { - const uint8_t *offset = parser->start + PM_NODE_END(statement); - pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset }; - pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(statement), 0)); + pm_parser_scope_push(parser, true); - statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value)); - statements->body.nodes[statements->body.size - 1] = statement; + if (inheritance_operator.start != NULL) { + expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END); + } else { + accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); + } + pm_node_t *statements = NULL; - pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED); - } - } + if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + pm_accepts_block_stack_push(parser, true); + statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1))); + pm_accepts_block_stack_pop(parser); + } - pop_block_exits(parser, previous_block_exits); - pm_void_statements_check(parser, statements, true); - return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags)); - } - case PM_TOKEN_BRACE_LEFT: { - // If we were passed a current_hash_keys via the parser, then that - // means we're already parsing a hash and we want to share the set - // of hash keys with this inner hash we're about to parse for the - // sake of warnings. We'll set it to NULL after we grab it to make - // sure subsequent expressions don't use it. Effectively this is a - // way of getting around passing it to every call to - // parse_expression. - pm_static_literals_t *current_hash_keys = parser->current_hash_keys; - parser->current_hash_keys = NULL; + if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) { + assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); + statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1))); + } else { + parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false); + } - pm_accepts_block_stack_push(parser, true); - parser_lex(parser); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword); - pm_token_t opening = parser->previous; - pm_hash_node_t *node = pm_hash_node_create(parser, &opening); + if (context_def_p(parser)) { + pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD); + } - if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) { - if (current_hash_keys != NULL) { - parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1)); - } else { - pm_static_literals_t hash_keys = { 0 }; - parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1)); - pm_static_literals_free(&hash_keys); - } + pm_constant_id_list_t locals; + pm_locals_order(parser, &parser->current_scope->locals, &locals, false); - accept1(parser, PM_TOKEN_NEWLINE); - } + pm_parser_scope_pop(parser); + pm_do_loop_stack_pop(parser); - pm_accepts_block_stack_pop(parser); - expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening); - pm_hash_node_closing_loc_set(parser, node, &parser->previous); - - return UP(node); + if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) { + pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME); + if (!PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) { + constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path)); } - case PM_TOKEN_CHARACTER_LITERAL: { - pm_node_t *node = UP(pm_string_node_create_current_string( - parser, - &(pm_token_t) { - .type = PM_TOKEN_STRING_BEGIN, - .start = parser->current.start, - .end = parser->current.start + 1 - }, - &(pm_token_t) { - .type = PM_TOKEN_STRING_CONTENT, - .start = parser->current.start + 1, - .end = parser->current.end - }, - NULL - )); + } - pm_node_flag_set(node, parse_unescaped_encoding(parser)); + pop_block_exits(parser, previous_block_exits); + return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous)); +} - // Skip past the character literal here, since now we have handled - // parser->explicit_encoding correctly. +/** + * Parse a method definition expression (the `def` keyword). + */ +static pm_node_t * +parse_def(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) { + pm_node_list_t current_block_exits = { 0 }; + pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + + pm_token_t def_keyword = parser->current; + size_t opening_newline_index = token_newline_index(parser); + + pm_node_t *receiver = NULL; + pm_token_t operator = { 0 }; + pm_token_t name; + + /* This context is necessary for lexing `...` in a bare params correctly. It + * must be pushed before lexing the first param, so it is here. */ + context_push(parser, PM_CONTEXT_DEF_PARAMS); + parser_lex(parser); + + /* This will be false if the method name is not a valid identifier but could + * be followed by an operator. */ + bool valid_name = true; + + switch (parser->current.type) { + case PM_CASE_OPERATOR: + pm_parser_scope_push(parser, true); + lex_state_set(parser, PM_LEX_STATE_ENDFN); parser_lex(parser); - // Characters can be followed by strings in which case they are - // automatically concatenated. - if (match1(parser, PM_TOKEN_STRING_BEGIN)) { - return parse_strings(parser, node, false, (uint16_t) (depth + 1)); + name = parser->previous; + break; + case PM_TOKEN_IDENTIFIER: { + parser_lex(parser); + + if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) { + receiver = parse_variable_call(parser); + + pm_parser_scope_push(parser, true); + lex_state_set(parser, PM_LEX_STATE_FNAME); + parser_lex(parser); + + operator = parser->previous; + name = parse_method_definition_name(parser); + } else { + pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)); + pm_parser_scope_push(parser, true); + + name = parser->previous; } - return node; + break; } - case PM_TOKEN_CLASS_VARIABLE: { + case PM_TOKEN_INSTANCE_VARIABLE: + case PM_TOKEN_CLASS_VARIABLE: + case PM_TOKEN_GLOBAL_VARIABLE: + valid_name = false; + PRISM_FALLTHROUGH + case PM_TOKEN_CONSTANT: + case PM_TOKEN_KEYWORD_NIL: + case PM_TOKEN_KEYWORD_SELF: + case PM_TOKEN_KEYWORD_TRUE: + case PM_TOKEN_KEYWORD_FALSE: + case PM_TOKEN_KEYWORD___FILE__: + case PM_TOKEN_KEYWORD___LINE__: + case PM_TOKEN_KEYWORD___ENCODING__: { + pm_parser_scope_push(parser, true); parser_lex(parser); - pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous)); - if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { - node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); - } + pm_token_t identifier = parser->previous; - return node; + if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) { + lex_state_set(parser, PM_LEX_STATE_FNAME); + parser_lex(parser); + operator = parser->previous; + + switch (identifier.type) { + case PM_TOKEN_CONSTANT: + receiver = UP(pm_constant_read_node_create(parser, &identifier)); + break; + case PM_TOKEN_INSTANCE_VARIABLE: + receiver = UP(pm_instance_variable_read_node_create(parser, &identifier)); + break; + case PM_TOKEN_CLASS_VARIABLE: + receiver = UP(pm_class_variable_read_node_create(parser, &identifier)); + break; + case PM_TOKEN_GLOBAL_VARIABLE: + receiver = UP(pm_global_variable_read_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD_NIL: + receiver = UP(pm_nil_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD_SELF: + receiver = UP(pm_self_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD_TRUE: + receiver = UP(pm_true_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD_FALSE: + receiver = UP(pm_false_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD___FILE__: + receiver = UP(pm_source_file_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD___LINE__: + receiver = UP(pm_source_line_node_create(parser, &identifier)); + break; + case PM_TOKEN_KEYWORD___ENCODING__: + receiver = UP(pm_source_encoding_node_create(parser, &identifier)); + break; + default: + break; + } + + name = parse_method_definition_name(parser); + } else { + if (!valid_name) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_str(identifier.type)); + } + + name = identifier; + } + break; } - case PM_TOKEN_CONSTANT: { + case PM_TOKEN_PARENTHESIS_LEFT: { + /* The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner + * expression of this parenthesis should not be processed under this + * context. Thus, the context is popped here. */ + context_pop(parser); parser_lex(parser); - pm_token_t constant = parser->previous; - // If a constant is immediately followed by parentheses, then this is in - // fact a method call, not a constant read. - if ( - match1(parser, PM_TOKEN_PARENTHESIS_LEFT) || - ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) || - (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) || - match1(parser, PM_TOKEN_BRACE_LEFT) - ) { - pm_arguments_t arguments = { 0 }; - parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1)); - return UP(pm_call_node_fcall_create(parser, &constant, &arguments)); - } + pm_token_t lparen = parser->previous; + pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1)); - pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous)); + accept1(parser, PM_TOKEN_NEWLINE); + expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); + pm_token_t rparen = parser->previous; - if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) { - // If we get here, then we have a comma immediately following a - // constant, so we're going to parse this as a multiple assignment. - node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); - } + lex_state_set(parser, PM_LEX_STATE_FNAME); + expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM); - return node; + operator = parser->previous; + receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0)); + + /* To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as + * described the above. */ + pm_parser_scope_push(parser, true); + context_push(parser, PM_CONTEXT_DEF_PARAMS); + name = parse_method_definition_name(parser); + break; } - case PM_TOKEN_UCOLON_COLON: { + default: + pm_parser_scope_push(parser, true); + name = parse_method_definition_name(parser); + break; + } + + pm_token_t lparen = { 0 }; + pm_token_t rparen = { 0 }; + pm_parameters_node_t *params; + + bool accept_endless_def = true; + switch (parser->current.type) { + case PM_TOKEN_PARENTHESIS_LEFT: { parser_lex(parser); - pm_token_t delimiter = parser->previous; + lparen = parser->previous; - expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); - pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous)); + if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { + params = NULL; + } else { + /* https://bugs.ruby-lang.org/issues/19107 */ + bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1; + params = parse_parameters( + parser, + PM_BINDING_POWER_DEFINED, + true, + allow_trailing_comma, + true, + true, + false, + PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES, + (uint16_t) (depth + 1) + ); + } - if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) { - node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); + lex_state_set(parser, PM_LEX_STATE_BEG); + parser->command_start = true; + + context_pop(parser); + if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_str(parser->current.type)); + parser->previous.start = parser->previous.end; + parser->previous.type = 0; } - return node; + rparen = parser->previous; + break; } - case PM_TOKEN_UDOT_DOT: - case PM_TOKEN_UDOT_DOT_DOT: { - pm_token_t operator = parser->current; - parser_lex(parser); + case PM_CASE_PARAMETER: { + /* If we're about to lex a label, we need to add the label state to + * make sure the next newline is ignored. */ + if (parser->current.type == PM_TOKEN_LABEL) { + lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL); + } - pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); + params = parse_parameters( + parser, + PM_BINDING_POWER_DEFINED, + false, + false, + true, + true, + false, + PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES, + (uint16_t) (depth + 1) + ); - // Unary .. and ... are special because these are non-associative - // operators that can also be unary operators. In this case we need - // to explicitly reject code that has a .. or ... that follows this - // expression. - if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) { - pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR); - } + /* Reject `def * = 1` and similar. We have to specifically check for + * them because they create ambiguity with optional arguments. */ + accept_endless_def = false; - return UP(pm_range_node_create(parser, NULL, &operator, right)); + context_pop(parser); + break; } - case PM_TOKEN_FLOAT: - parser_lex(parser); - return UP(pm_float_node_create(parser, &parser->previous)); - case PM_TOKEN_FLOAT_IMAGINARY: - parser_lex(parser); - return UP(pm_float_node_imaginary_create(parser, &parser->previous)); - case PM_TOKEN_FLOAT_RATIONAL: - parser_lex(parser); - return UP(pm_float_node_rational_create(parser, &parser->previous)); - case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: - parser_lex(parser); - return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous)); - case PM_TOKEN_NUMBERED_REFERENCE: { - parser_lex(parser); - pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous)); + default: { + params = NULL; + context_pop(parser); + break; + } + } - if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { - node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); - } + pm_node_t *statements = NULL; + pm_token_t equal = { 0 }; + pm_token_t end_keyword = { 0 }; - return node; + if (accept1(parser, PM_TOKEN_EQUAL)) { + if (token_is_setter_name(&name)) { + pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER); } - case PM_TOKEN_GLOBAL_VARIABLE: { - parser_lex(parser); - pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous)); + if (!accept_endless_def) { + pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS); + } + if ( + parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS && + parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS + ) { + PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition"); + } + equal = parser->previous; - if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { - node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); - } + context_push(parser, PM_CONTEXT_DEF); + pm_do_loop_stack_push(parser, false); + statements = UP(pm_statements_node_create(parser)); - return node; + uint8_t allow_flags; + if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) { + allow_flags = flags & PM_PARSE_ACCEPTS_COMMAND_CALL; + } else { + /* Allow `def foo = puts "Hello"` but not + * `private def foo = puts "Hello"` */ + allow_flags = (binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION) ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0; } - case PM_TOKEN_BACK_REFERENCE: { - parser_lex(parser); - pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous)); - if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { - node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); - } + /* Inside a def body, we push true onto the accepts_block_stack so that + * `do` is lexed as PM_TOKEN_KEYWORD_DO (which can only start a block + * for primary-level constructs, not commands). During command argument + * parsing, the stack is pushed to false, causing `do` to be lexed as + * PM_TOKEN_KEYWORD_DO_BLOCK, which is not consumed inside the endless + * def body and instead left for the outer context. */ + pm_accepts_block_stack_push(parser, true); + pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_flags | PM_PARSE_IN_ENDLESS_DEF, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1)); + pm_accepts_block_stack_pop(parser); - return node; + /* If an unconsumed PM_TOKEN_KEYWORD_DO follows the body, it is an error + * (e.g., `def f = 1 do end`). PM_TOKEN_KEYWORD_DO_BLOCK is + * intentionally not caught here — it should bubble up to the outer + * context (e.g., `private def f = puts "Hello" do end` where the block + * attaches to `private`). */ + if (accept1(parser, PM_TOKEN_KEYWORD_DO)) { + pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1)); + pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK); } - case PM_TOKEN_IDENTIFIER: - case PM_TOKEN_METHOD_NAME: { - parser_lex(parser); - pm_token_t identifier = parser->previous; - pm_node_t *node = parse_variable_call(parser); - if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) { - // If parse_variable_call returned with a call node, then we - // know the identifier is not in the local table. In that case - // we need to check if there are arguments following the - // identifier. - pm_call_node_t *call = (pm_call_node_t *) node; - pm_arguments_t arguments = { 0 }; + if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) { + context_push(parser, PM_CONTEXT_RESCUE_MODIFIER); - if (parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1))) { - // Since we found arguments, we need to turn off the - // variable call bit in the flags. - pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL); + pm_token_t rescue_keyword = parser->previous; - call->opening_loc = arguments.opening_loc; - call->arguments = arguments.arguments; - call->closing_loc = arguments.closing_loc; - call->block = arguments.block; + /* In the Ruby grammar, the rescue value of an endless method + * command excludes and/or and in/=>. */ + pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1)); + context_pop(parser); - const pm_location_t *end = pm_arguments_end(&arguments); - if (end == NULL) { - PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc); - } else { - PM_NODE_LENGTH_SET_LOCATION(call, end); - } - } - } else { - // Otherwise, we know the identifier is in the local table. This - // can still be a method call if it is followed by arguments or - // a block, so we need to check for that here. - if ( - ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) || - (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) || - match1(parser, PM_TOKEN_BRACE_LEFT) - ) { - pm_arguments_t arguments = { 0 }; - parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1)); - pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments); + statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value)); + } - if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) { - // If we're about to convert an 'it' implicit local - // variable read into a method call, we need to remove - // it from the list of implicit local variables. - pm_node_unreference(parser, node); - } else { - // Otherwise, we're about to convert a regular local - // variable read into a method call, in which case we - // need to indicate that this was not a read for the - // purposes of warnings. - assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)); + /* A nested endless def whose body is a command call (e.g., + * `def f = def g = foo bar`) is a command assignment and cannot appear + * as a def body. */ + if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) { + PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); + } - if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) { - pm_node_unreference(parser, node); - } else { - pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node; - pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name); - } - } + pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false); + pm_do_loop_stack_pop(parser); + context_pop(parser); + } else { + if (lparen.start == NULL) { + lex_state_set(parser, PM_LEX_STATE_BEG); + parser->command_start = true; + expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM); + } else { + accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); + } - return UP(fcall); - } - } + pm_accepts_block_stack_push(parser, true); + pm_do_loop_stack_push(parser, false); - if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) { - node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); - } + if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + pm_accepts_block_stack_push(parser, true); + statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1))); + pm_accepts_block_stack_pop(parser); + } - return node; + if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) { + assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); + statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1))); + } else { + parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false); } - case PM_TOKEN_HEREDOC_START: { - // Here we have found a heredoc. We'll parse it and add it to the - // list of strings. - assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC); - pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base; - size_t common_whitespace = (size_t) -1; - parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace; + pm_accepts_block_stack_pop(parser); + pm_do_loop_stack_pop(parser); - parser_lex(parser); - pm_token_t opening = parser->previous; + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword); + end_keyword = parser->previous; + } - pm_node_t *node; - pm_node_t *part; + pm_constant_id_list_t locals; + pm_locals_order(parser, &parser->current_scope->locals, &locals, false); + pm_parser_scope_pop(parser); - if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { - // If we get here, then we have an empty heredoc. We'll create - // an empty content token and return an empty string node. - expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); - pm_token_t content = parse_strings_empty_content(parser->previous.start); + /* If the final character is `@` as is the case when defining methods to + * override the unary operators, we should ignore the @ in the same way we + * do for symbols. */ + pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name)); - if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) { - node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY)); + flush_block_exits(parser, previous_block_exits); + + return UP(pm_def_node_create( + parser, + name_id, + &name, + receiver, + params, + statements, + &locals, + &def_keyword, + NTOK2PTR(operator), + NTOK2PTR(lparen), + NTOK2PTR(rparen), + NTOK2PTR(equal), + NTOK2PTR(end_keyword) + )); +} + +/** + * Parse a module definition expression (the `module` keyword). + */ +static pm_node_t * +parse_module(pm_parser_t *parser, uint8_t flags, uint16_t depth) { + pm_node_list_t current_block_exits = { 0 }; + pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + + size_t opening_newline_index = token_newline_index(parser); + parser_lex(parser); + pm_token_t module_keyword = parser->previous; + + pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1)); + pm_token_t name; + + /* If we can recover from a syntax error that occurred while parsing the + * name of the module, then we'll handle that here. */ + if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) { + pop_block_exits(parser, previous_block_exits); + + pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; + return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing)); + } + + while (accept1(parser, PM_TOKEN_COLON_COLON)) { + pm_token_t double_colon = parser->previous; + + expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); + constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous)); + } + + /* Here we retrieve the name of the module. If it wasn't a constant, then + * it's possible that `module foo` was passed, which is a syntax error. We + * handle that here as well. */ + name = parser->previous; + if (name.type != PM_TOKEN_CONSTANT) { + pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME); + } + + if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE) && !PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) { + constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path)); + } + + pm_parser_scope_push(parser, true); + accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE); + pm_node_t *statements = NULL; + + if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + pm_accepts_block_stack_push(parser, true); + statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1))); + pm_accepts_block_stack_pop(parser); + } + + if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) { + assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); + statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1))); + } else { + parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false); + } + + pm_constant_id_list_t locals; + pm_locals_order(parser, &parser->current_scope->locals, &locals, false); + + pm_parser_scope_pop(parser); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword); + + if (context_def_p(parser)) { + pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD); + } + + pop_block_exits(parser, previous_block_exits); + + return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous)); +} + +/** + * Parse an interpolated word array literal (`%W[...]`). + */ +static pm_node_t * +parse_string_array(pm_parser_t *parser, uint16_t depth) { + parser_lex(parser); + pm_token_t opening = parser->previous; + pm_array_node_t *array = pm_array_node_create(parser, &opening); + + /* This is the current node that we are parsing that will be added to the + * list of elements. */ + pm_node_t *current = NULL; + + while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { + switch (parser->current.type) { + case PM_TOKEN_WORDS_SEP: { + /* Reset the explicit encoding if we hit a separator since each + * element can have its own encoding. */ + parser->explicit_encoding = NULL; + + if (current == NULL) { + /* If we hit a separator before we have any content, then we + * don't need to do anything. */ } else { - node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY)); + /* If we hit a separator after we've hit content, then we + * need to append that content to the list and reset the + * current node. */ + pm_array_node_elements_append(parser->arena, array, current); + current = NULL; } - PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening); - } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) { - // If we get here, then we tried to find something in the - // heredoc but couldn't actually parse anything, so we'll just - // return a missing node. - // - // parse_string_part handles its own errors, so there is no need - // for us to add one here. - node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); - } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { - // If we get here, then the part that we parsed was plain string - // content and we're at the end of the heredoc, so we can return - // just a string node with the heredoc opening and closing as - // its opening and closing. - pm_node_flag_set(part, parse_unescaped_encoding(parser)); - pm_string_node_t *cast = (pm_string_node_t *) part; + parser_lex(parser); + break; + } + case PM_TOKEN_STRING_CONTENT: { + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); + pm_node_flag_set(string, parse_unescaped_encoding(parser)); + parser_lex(parser); - cast->opening_loc = TOK2LOC(parser, &opening); - cast->closing_loc = TOK2LOC(parser, &parser->current); - cast->base.location = cast->opening_loc; + if (current == NULL) { + /* If we hit content and the current node is NULL, then this + * is the first string content we've seen. In that case + * we're going to create a new string node and set that to + * the current. */ + current = string; + } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { + /* If we hit string content and the current node is an + * interpolated string, then we need to append the string + * content to the list of child nodes. */ + pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string); + } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { + /* If we hit string content and the current node is a string + * node, then we need to convert the current node into an + * interpolated string and add the string content to the + * list of child nodes. */ + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); + pm_interpolated_string_node_append(parser, interpolated, current); + pm_interpolated_string_node_append(parser, interpolated, string); + current = UP(interpolated); + } else { + assert(false && "unreachable"); + } - if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) { - assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t)); - cast->base.type = PM_X_STRING_NODE; + break; + } + case PM_TOKEN_EMBVAR: { + if (current == NULL) { + /* If we hit an embedded variable and the current node is + * NULL, then this is the start of a new string. We'll set + * the current node to a new interpolated string. */ + current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL)); + } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { + /* If we hit an embedded variable and the current node is a + * string node, then we'll convert the current into an + * interpolated string and add the string node to the list + * of parts. */ + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); + pm_interpolated_string_node_append(parser, interpolated, current); + current = UP(interpolated); + } else { + /* If we hit an embedded variable and the current node is an + * interpolated string, then we'll just add the embedded + * variable. */ } - if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) { - parse_heredoc_dedent_string(parser->arena, &cast->unescaped, common_whitespace); + pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); + pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part); + break; + } + case PM_TOKEN_EMBEXPR_BEGIN: { + if (current == NULL) { + /* If we hit an embedded expression and the current node is + * NULL, then this is the start of a new string. We'll set + * the current node to a new interpolated string. */ + current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL)); + } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { + /* If we hit an embedded expression and the current node is + * a string node, then we'll convert the current into an + * interpolated string and add the string node to the list + * of parts. */ + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); + pm_interpolated_string_node_append(parser, interpolated, current); + current = UP(interpolated); + } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { + /* If we hit an embedded expression and the current node is + * an interpolated string, then we'll just continue on. */ + } else { + assert(false && "unreachable"); } - node = UP(cast); - expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); - } else { - // If we get here, then we have multiple parts in the heredoc, - // so we'll need to create an interpolated string node to hold - // them all. - pm_node_list_t parts = { 0 }; - pm_node_list_append(parser->arena, &parts, part); + pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); + pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part); + break; + } + default: + expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT); + parser_lex(parser); + break; + } + } - while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { - if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) { - pm_node_list_append(parser->arena, &parts, part); - } - } + /* If we have a current node, then we need to append it to the list. */ + if (current) { + pm_array_node_elements_append(parser->arena, array, current); + } - // Now that we have all of the parts, create the correct type of - // interpolated node. - if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) { - pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening); - cast->parts = parts; + pm_token_t closing = parser->current; + if (match1(parser, PM_TOKEN_EOF)) { + pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM); + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; + } else { + expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM); + } - expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); - pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous); + pm_array_node_close_set(parser, array, &closing); + return UP(array); +} - cast->base.location = cast->opening_loc; - node = UP(cast); - } else { - pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening); +/** + * Parse an interpolated symbol array literal (`%I[...]`). + */ +static pm_node_t * +parse_symbol_array(pm_parser_t *parser, uint16_t depth) { + parser_lex(parser); + pm_token_t opening = parser->previous; + pm_array_node_t *array = pm_array_node_create(parser, &opening); - expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); - pm_interpolated_string_node_closing_set(parser, cast, &parser->previous); + /* This is the current node that we are parsing that will be added to the + * list of elements. */ + pm_node_t *current = NULL; - cast->base.location = cast->opening_loc; - node = UP(cast); + while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { + switch (parser->current.type) { + case PM_TOKEN_WORDS_SEP: { + if (current == NULL) { + /* If we hit a separator before we have any content, then we + * don't need to do anything. */ + } else { + /* If we hit a separator after we've hit content, then we + * need to append that content to the list and reset the + * current node. */ + pm_array_node_elements_append(parser->arena, array, current); + current = NULL; } - // If this is a heredoc that is indented with a ~, then we need - // to dedent each line by the common leading whitespace. - if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) { - pm_node_list_t *nodes; - if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) { - nodes = &((pm_interpolated_x_string_node_t *) node)->parts; - } else { - nodes = &((pm_interpolated_string_node_t *) node)->parts; - } + parser_lex(parser); + break; + } + case PM_TOKEN_STRING_CONTENT: { + if (current == NULL) { + /* If we hit content and the current node is NULL, then this + * is the first string content we've seen. In that case + * we're going to create a new string node and set that to + * the current. */ + current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL)); + parser_lex(parser); + } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { + /* If we hit string content and the current node is an + * interpolated string, then we need to append the string + * content to the list of child nodes. */ + pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); + parser_lex(parser); - parse_heredoc_dedent(parser, nodes, common_whitespace); + pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string); + } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { + /* If we hit string content and the current node is a symbol + * node, then we need to convert the current node into an + * interpolated string and add the string content to the + * list of child nodes. */ + pm_symbol_node_t *cast = (pm_symbol_node_t *) current; + pm_token_t content = { + .type = PM_TOKEN_STRING_CONTENT, + .start = parser->start + cast->value_loc.start, + .end = parser->start + cast->value_loc.start + cast->value_loc.length + }; + + pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped)); + pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL)); + parser_lex(parser); + + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); + pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string); + pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string); + + current = UP(interpolated); + } else { + assert(false && "unreachable"); } + + break; } + case PM_TOKEN_EMBVAR: { + bool start_location_set = false; + if (current == NULL) { + /* If we hit an embedded variable and the current node is + * NULL, then this is the start of a new string. We'll set + * the current node to a new interpolated string. */ + current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL)); + } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { + /* If we hit an embedded variable and the current node is a + * string node, then we'll convert the current into an + * interpolated string and add the string node to the list + * of parts. */ + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); + + current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current)); + pm_interpolated_symbol_node_append(parser->arena, interpolated, current); + PM_NODE_START_SET_NODE(interpolated, current); + start_location_set = true; + current = UP(interpolated); + } else { + /* If we hit an embedded variable and the current node is an + * interpolated string, then we'll just add the embedded + * variable. */ + } - if (match1(parser, PM_TOKEN_STRING_BEGIN)) { - return parse_strings(parser, node, false, (uint16_t) (depth + 1)); + pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); + pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part); + if (!start_location_set) { + PM_NODE_START_SET_NODE(current, part); + } + break; } + case PM_TOKEN_EMBEXPR_BEGIN: { + bool start_location_set = false; + if (current == NULL) { + /* If we hit an embedded expression and the current node is + * NULL, then this is the start of a new string. We'll set + * the current node to a new interpolated string. */ + current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL)); + } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { + /* If we hit an embedded expression and the current node is + * a string node, then we'll convert the current into an + * interpolated string and add the string node to the list + * of parts. */ + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); + + current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current)); + pm_interpolated_symbol_node_append(parser->arena, interpolated, current); + PM_NODE_START_SET_NODE(interpolated, current); + start_location_set = true; + current = UP(interpolated); + } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { + /* If we hit an embedded expression and the current node is + * an interpolated string, then we'll just continue on. */ + } else { + assert(false && "unreachable"); + } - return node; + pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); + pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part); + if (!start_location_set) { + PM_NODE_START_SET_NODE(current, part); + } + break; + } + default: + expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT); + parser_lex(parser); + break; } - case PM_TOKEN_INSTANCE_VARIABLE: { - parser_lex(parser); - pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous)); + } - if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { - node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); - } + /* If we have a current node, then we need to append it to the list. */ + if (current) { + pm_array_node_elements_append(parser->arena, array, current); + } - return node; - } - case PM_TOKEN_INTEGER: { - pm_node_flags_t base = parser->integer.base; - parser_lex(parser); - return UP(pm_integer_node_create(parser, base, &parser->previous)); - } - case PM_TOKEN_INTEGER_IMAGINARY: { - pm_node_flags_t base = parser->integer.base; - parser_lex(parser); - return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous)); - } - case PM_TOKEN_INTEGER_RATIONAL: { - pm_node_flags_t base = parser->integer.base; - parser_lex(parser); - return UP(pm_integer_node_rational_create(parser, base, &parser->previous)); - } - case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: { - pm_node_flags_t base = parser->integer.base; - parser_lex(parser); - return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous)); + pm_token_t closing = parser->current; + if (match1(parser, PM_TOKEN_EOF)) { + pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM); + closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; + } else { + expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM); + } + pm_array_node_close_set(parser, array, &closing); + + return UP(array); +} + +/** + * Parse a parenthesized expression, which could be a grouping, a multi-target + * assignment, or a set of statements. + */ +static pm_node_t * +parse_parentheses(pm_parser_t *parser, pm_binding_power_t binding_power, uint16_t depth) { + pm_token_t opening = parser->current; + pm_node_flags_t paren_flags = 0; + + pm_node_list_t current_block_exits = { 0 }; + pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + + parser_lex(parser); + while (true) { + if (accept1(parser, PM_TOKEN_SEMICOLON)) { + paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS; + } else if (!accept1(parser, PM_TOKEN_NEWLINE)) { + break; } - case PM_TOKEN_KEYWORD___ENCODING__: - parser_lex(parser); - return UP(pm_source_encoding_node_create(parser, &parser->previous)); - case PM_TOKEN_KEYWORD___FILE__: - parser_lex(parser); - return UP(pm_source_file_node_create(parser, &parser->previous)); - case PM_TOKEN_KEYWORD___LINE__: - parser_lex(parser); - return UP(pm_source_line_node_create(parser, &parser->previous)); - case PM_TOKEN_KEYWORD_ALIAS: { - if (binding_power != PM_BINDING_POWER_STATEMENT) { - pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS); - } + } - parser_lex(parser); - pm_token_t keyword = parser->previous; + /* If this is the end of the file or we match a right parenthesis, then we + * have an empty parentheses node, and we can immediately return. */ + if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) { + expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); + pop_block_exits(parser, previous_block_exits); + return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, paren_flags)); + } - pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1)); - pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1)); + /* Otherwise, we're going to parse the first statement in the list of + * statements within the parentheses. */ + pm_accepts_block_stack_push(parser, true); + context_push(parser, PM_CONTEXT_PARENS); + pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1)); + context_pop(parser); - switch (PM_NODE_TYPE(new_name)) { - case PM_BACK_REFERENCE_READ_NODE: - case PM_NUMBERED_REFERENCE_READ_NODE: - case PM_GLOBAL_VARIABLE_READ_NODE: { - if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) { - if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) { - pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE); - } - } else if (!PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) { - pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT); - old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name)); - } + /* Determine if this statement is followed by a terminator. In the case of a + * single statement, this is fine. But in the case of multiple statements + * it's required. */ + bool terminator_found = false; - return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name)); - } - case PM_SYMBOL_NODE: - case PM_INTERPOLATED_SYMBOL_NODE: { - if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) { - pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT); - old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name)); - } - } - PRISM_FALLTHROUGH - default: - return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name)); + if (accept1(parser, PM_TOKEN_SEMICOLON)) { + terminator_found = true; + paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS; + } else if (accept1(parser, PM_TOKEN_NEWLINE)) { + terminator_found = true; + } + + if (terminator_found) { + while (true) { + if (accept1(parser, PM_TOKEN_SEMICOLON)) { + paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS; + } else if (!accept1(parser, PM_TOKEN_NEWLINE)) { + break; } } - case PM_TOKEN_KEYWORD_CASE: { - size_t opening_newline_index = token_newline_index(parser); - parser_lex(parser); + } + + /* If we hit a right parenthesis, then we're done parsing the parentheses + * node, and we can check which kind of node we should return. */ + if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { + if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) { + lex_state_set(parser, PM_LEX_STATE_ENDARG); + } - pm_token_t case_keyword = parser->previous; - pm_node_t *predicate = NULL; + parser_lex(parser); + pm_accepts_block_stack_pop(parser); + pop_block_exits(parser, previous_block_exits); - pm_node_list_t current_block_exits = { 0 }; - pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) { + /* If we have a single statement and are ending on a right + * parenthesis, then we need to check if this is possibly a multiple + * target node. */ + pm_multi_target_node_t *multi_target; - if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); - predicate = NULL; - } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) { - predicate = NULL; - } else if (!token_begins_expression_p(parser->current.type)) { - predicate = NULL; + if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) { + multi_target = (pm_multi_target_node_t *) statement; } else { - predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1)); - while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); + multi_target = pm_multi_target_node_create(parser); + pm_multi_target_node_targets_append(parser, multi_target, statement); } - if (match1(parser, PM_TOKEN_KEYWORD_END)) { - parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false); - parser_lex(parser); - pop_block_exits(parser, previous_block_exits); - pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS); - return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous)); + multi_target->lparen_loc = TOK2LOC(parser, &opening); + multi_target->rparen_loc = TOK2LOC(parser, &parser->previous); + PM_NODE_START_SET_TOKEN(parser, multi_target, &opening); + PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous); + + pm_node_t *result; + if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) { + result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); + accept1(parser, PM_TOKEN_NEWLINE); + } else { + result = UP(multi_target); } - // At this point we can create a case node, though we don't yet know - // if it is a case-in or case-when node. - pm_node_t *node; + if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) { + /* All set, this is explicitly allowed by the parent context. */ + } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) { + /* All set, we're inside a for loop and we're parsing multiple + * targets. */ + } else if (binding_power != PM_BINDING_POWER_STATEMENT) { + /* Multi targets are not allowed when it's not a statement + * level. */ + pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED); + } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) { + /* Multi targets must be followed by an equal sign in order to + * be valid (or a right parenthesis if they are nested). */ + pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED); + } - if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) { - pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL); - pm_static_literals_t literals = { 0 }; + return result; + } - // At this point we've seen a when keyword, so we know this is a - // case-when node. We will continue to parse the when nodes - // until we hit the end of the list. - while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) { - parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true); - parser_lex(parser); + /* If we have a single statement and are ending on a right parenthesis + * and we didn't return a multiple assignment node, then we can return a + * regular parentheses node now. */ + pm_statements_node_t *statements = pm_statements_node_create(parser); + pm_statements_node_body_append(parser, statements, statement, true); - pm_token_t when_keyword = parser->previous; - pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword); + return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags)); + } - do { - if (accept1(parser, PM_TOKEN_USTAR)) { - pm_token_t operator = parser->previous; - pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); + /* If we have more than one statement in the set of parentheses, then we are + * going to parse all of them as a list of statements. We'll do that here. + */ + context_push(parser, PM_CONTEXT_PARENS); + paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS; - pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression); - pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node)); + pm_statements_node_t *statements = pm_statements_node_create(parser); + pm_statements_node_body_append(parser, statements, statement, true); - if (PM_NODE_TYPE_P(expression, PM_ERROR_RECOVERY_NODE)) break; - } else { - pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1)); - pm_when_node_conditions_append(parser->arena, when_node, condition); - - // If we found a missing node, then this is a syntax - // error and we should stop looping. - if (PM_NODE_TYPE_P(condition, PM_ERROR_RECOVERY_NODE)) break; - - // If this is a string node, then we need to mark it - // as frozen because when clause strings are frozen. - if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) { - pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL); - } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) { - pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL); - } + /* If we didn't find a terminator and we didn't find a right parenthesis, + * then this is a syntax error. */ + if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); + } - pm_when_clause_static_literals_add(parser, &literals, condition); - } - } while (accept1(parser, PM_TOKEN_COMMA)); + /* Parse each statement within the parentheses. */ + while (true) { + pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1)); + pm_statements_node_body_append(parser, statements, node, true); - if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { - pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous); - } - } else { - expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER); - pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous); - } + /* If we're recovering from a syntax error, then we need to stop parsing + * the statements now. */ + if (parser->recovering) { + /* If this is the level of context where the recovery has happened, + * then we can mark the parser as done recovering. */ + if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false; + break; + } - if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1)); - if (statements != NULL) { - pm_when_node_statements_set(when_node, statements); - } - } + /* If we couldn't parse an expression at all, then we need to bail out + * of the loop. */ + if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) break; - pm_case_node_condition_append(parser->arena, case_node, UP(when_node)); - } + /* If we successfully parsed a statement, then we are going to need a + * terminator to delimit them. */ + if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { + while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)); + if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break; + } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { + break; + } else if (!match1(parser, PM_TOKEN_EOF)) { + /* If we're at the end of the file, then we're going to add an error + * after this for the ) anyway. */ + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); + } + } - // If we didn't parse any conditions (in or when) then we need - // to indicate that we have an error. - if (case_node->conditions.size == 0) { - pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS); - } + context_pop(parser); + pm_accepts_block_stack_pop(parser); + expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - pm_static_literals_free(&literals); - node = UP(case_node); - } else { - pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate); + /* When we're parsing multi targets, we allow them to be followed by a right + * parenthesis if they are at the statement level. This is only possible if + * they are the final statement in a parentheses. We need to explicitly + * reject that here. */ + { + pm_node_t *statement = statements->body.nodes[statements->body.size - 1]; - // If this is a case-match node (i.e., it is a pattern matching - // case statement) then we must have a predicate. - if (predicate == NULL) { - pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE); - } + if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) { + pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser); + pm_multi_target_node_targets_append(parser, multi_target, statement); - // At this point we expect that we're parsing a case-in node. We - // will continue to parse the in nodes until we hit the end of - // the list. - while (match1(parser, PM_TOKEN_KEYWORD_IN)) { - parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true); + statement = UP(multi_target); + statements->body.nodes[statements->body.size - 1] = statement; + } - bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; - parser->pattern_matching_newlines = true; + if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) { + const uint8_t *offset = parser->start + PM_NODE_END(statement); + pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset }; + pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(statement), 0)); - lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL); - parser->command_start = false; - parser_lex(parser); + statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value)); + statements->body.nodes[statements->body.size - 1] = statement; - pm_token_t in_keyword = parser->previous; + pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED); + } + } - pm_constant_id_list_t captures = { 0 }; - pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1)); + pop_block_exits(parser, previous_block_exits); + pm_void_statements_check(parser, statements, true); + return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags)); +} - parser->pattern_matching_newlines = previous_pattern_matching_newlines; +/** + * Parse an expression that begins with the previous node that we just lexed. + */ +static PRISM_INLINE pm_node_t * +parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) { + switch (parser->current.type) { + case PM_TOKEN_BRACKET_LEFT_ARRAY: { + parser_lex(parser); - // Since we're in the top-level of the case-in node we need - // to check for guard clauses in the form of `if` or - // `unless` statements. - if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) { - pm_token_t keyword = parser->previous; - pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1)); - pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate)); - } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) { - pm_token_t keyword = parser->previous; - pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1)); - pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate)); - } + pm_array_node_t *array = pm_array_node_create(parser, &parser->previous); + pm_accepts_block_stack_push(parser, true); + bool parsed_bare_hash = false; + + while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) { + bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE); + + // Handle the case where we don't have a comma and we have a + // newline followed by a right bracket. + if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) { + break; + } - // Now we need to check for the terminator of the in node's - // pattern. It can be a newline or semicolon optionally - // followed by a `then` keyword. - pm_token_t then_keyword = { 0 }; - if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) { - then_keyword = parser->previous; + // Ensure that we have a comma between elements in the array. + if (array->elements.size > 0) { + if (accept1(parser, PM_TOKEN_COMMA)) { + // If there was a comma but we also accepts a newline, + // then this is a syntax error. + if (accepted_newline) { + pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA); } } else { - expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER); - then_keyword = parser->previous; + // If there was no comma, then we need to add a syntax + // error. + PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_str(parser->current.type)); + parser->previous.start = parser->previous.end; + parser->previous.type = 0; } + } + + // If we have a right bracket immediately following a comma, + // this is allowed since it's a trailing comma. In this case we + // can break out of the loop. + if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break; + + pm_node_t *element; - // Now we can actually parse the statements associated with - // the in node. - pm_statements_node_t *statements; - if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - statements = NULL; + if (accept1(parser, PM_TOKEN_USTAR)) { + pm_token_t operator = parser->previous; + pm_node_t *expression = NULL; + + if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) { + pm_parser_scope_forwarding_positionals_check(parser, &operator); } else { - statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1)); + expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1)); } - // Now that we have the full pattern and statements, we can - // create the node and attach it to the case node. - pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword))); - pm_case_match_node_condition_append(parser->arena, case_node, condition); - } - - // If we didn't parse any conditions (in or when) then we need - // to indicate that we have an error. - if (case_node->conditions.size == 0) { - pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS); - } + element = UP(pm_splat_node_create(parser, &operator, expression)); + } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) { + if (parsed_bare_hash) { + pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH); + } - node = UP(case_node); - } + element = UP(pm_keyword_hash_node_create(parser)); + pm_static_literals_t hash_keys = { 0 }; - accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); - if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) { - pm_token_t else_keyword = parser->previous; - pm_else_node_t *else_node; + if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) { + parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1)); + } - if (!match1(parser, PM_TOKEN_KEYWORD_END)) { - else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current); + pm_static_literals_free(&hash_keys); + parsed_bare_hash = true; } else { - else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current); - } + element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_LABEL, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1)); - if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) { - pm_case_node_else_clause_set((pm_case_node_t *) node, else_node); - } else { - pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node); + if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) { + if (parsed_bare_hash) { + pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH); + } + + pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser); + pm_static_literals_t hash_keys = { 0 }; + pm_hash_key_static_literals_add(parser, &hash_keys, element); + + pm_token_t operator = { 0 }; + if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) { + operator = parser->previous; + } + + pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1)); + pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value)); + pm_keyword_hash_node_elements_append(parser->arena, hash, assoc); + + element = UP(hash); + if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) { + parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1)); + } + + pm_static_literals_free(&hash_keys); + parsed_bare_hash = true; + } } + + pm_array_node_elements_append(parser->arena, array, element); + if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break; } - parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false); - expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword); + accept1(parser, PM_TOKEN_NEWLINE); - if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) { - pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous); - } else { - pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous); + if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type)); + parser->previous.start = parser->previous.end; + parser->previous.type = 0; } - pop_block_exits(parser, previous_block_exits); - return node; + pm_array_node_close_set(parser, array, &parser->previous); + pm_accepts_block_stack_pop(parser); + + return UP(array); } - case PM_TOKEN_KEYWORD_BEGIN: { - size_t opening_newline_index = token_newline_index(parser); + case PM_TOKEN_PARENTHESIS_LEFT: + case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: + return parse_parentheses(parser, binding_power, depth); + case PM_TOKEN_BRACE_LEFT: { + // If we were passed a current_hash_keys via the parser, then that + // means we're already parsing a hash and we want to share the set + // of hash keys with this inner hash we're about to parse for the + // sake of warnings. We'll set it to NULL after we grab it to make + // sure subsequent expressions don't use it. Effectively this is a + // way of getting around passing it to every call to + // parse_expression. + pm_static_literals_t *current_hash_keys = parser->current_hash_keys; + parser->current_hash_keys = NULL; + + pm_accepts_block_stack_push(parser, true); parser_lex(parser); - pm_token_t begin_keyword = parser->previous; - accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); + pm_token_t opening = parser->previous; + pm_hash_node_t *node = pm_hash_node_create(parser, &opening); - pm_node_list_t current_block_exits = { 0 }; - pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); - pm_statements_node_t *begin_statements = NULL; + if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) { + if (current_hash_keys != NULL) { + parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1)); + } else { + pm_static_literals_t hash_keys = { 0 }; + parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1)); + pm_static_literals_free(&hash_keys); + } - if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - pm_accepts_block_stack_push(parser, true); - begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1)); - pm_accepts_block_stack_pop(parser); - accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); + accept1(parser, PM_TOKEN_NEWLINE); } - pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements); - parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1)); - expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword); + pm_accepts_block_stack_pop(parser); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening); + pm_hash_node_closing_loc_set(parser, node, &parser->previous); - PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous); - pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous); - pop_block_exits(parser, previous_block_exits); - return UP(begin_node); + return UP(node); } - case PM_TOKEN_KEYWORD_BEGIN_UPCASE: { - pm_node_list_t current_block_exits = { 0 }; - pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + case PM_TOKEN_CHARACTER_LITERAL: { + pm_node_t *node = UP(pm_string_node_create_current_string( + parser, + &(pm_token_t) { + .type = PM_TOKEN_STRING_BEGIN, + .start = parser->current.start, + .end = parser->current.start + 1 + }, + &(pm_token_t) { + .type = PM_TOKEN_STRING_CONTENT, + .start = parser->current.start + 1, + .end = parser->current.end + }, + NULL + )); - if (binding_power != PM_BINDING_POWER_STATEMENT) { - pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN); - } + pm_node_flag_set(node, parse_unescaped_encoding(parser)); + // Skip past the character literal here, since now we have handled + // parser->explicit_encoding correctly. parser_lex(parser); - pm_token_t keyword = parser->previous; - - expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE); - pm_token_t opening = parser->previous; - pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1)); - expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening); - pm_context_t context = parser->current_context->context; - if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) { - pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL); + // Characters can be followed by strings in which case they are + // automatically concatenated. + if (match1(parser, PM_TOKEN_STRING_BEGIN)) { + return parse_strings(parser, node, false, (uint16_t) (depth + 1)); } - flush_block_exits(parser, previous_block_exits); - return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous)); + return node; } - case PM_TOKEN_KEYWORD_BREAK: - case PM_TOKEN_KEYWORD_NEXT: - case PM_TOKEN_KEYWORD_RETURN: { + case PM_TOKEN_CLASS_VARIABLE: { parser_lex(parser); + pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous)); - pm_token_t keyword = parser->previous; - pm_arguments_t arguments = { 0 }; - - if ( - token_begins_expression_p(parser->current.type) || - match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR) - ) { - pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left; - - if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) { - pm_token_t next = parser->current; - parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1)); - - // Reject `foo && return bar`. - if (!(flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && arguments.arguments != NULL) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(next.type)); - } - } + if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); } - switch (keyword.type) { - case PM_TOKEN_KEYWORD_BREAK: { - pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments)); - if (!parser->partial_script) parse_block_exit(parser, node); - return node; - } - case PM_TOKEN_KEYWORD_NEXT: { - pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments)); - if (!parser->partial_script) parse_block_exit(parser, node); - return node; - } - case PM_TOKEN_KEYWORD_RETURN: { - pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments)); - parse_return(parser, node); - return node; - } - default: - assert(false && "unreachable"); - return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); - } + return node; } - case PM_TOKEN_KEYWORD_SUPER: { + case PM_TOKEN_CONSTANT: { parser_lex(parser); + pm_token_t constant = parser->previous; - pm_token_t keyword = parser->previous; - pm_arguments_t arguments = { 0 }; - parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1)); - + // If a constant is immediately followed by parentheses, then this is in + // fact a method call, not a constant read. if ( - arguments.opening_loc.length == 0 && - arguments.arguments == NULL && - ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE)) + match1(parser, PM_TOKEN_PARENTHESIS_LEFT) || + ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) || + (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) || + match1(parser, PM_TOKEN_BRACE_LEFT) ) { - return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments)); + pm_arguments_t arguments = { 0 }; + parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1)); + return UP(pm_call_node_fcall_create(parser, &constant, &arguments)); } - return UP(pm_super_node_create(parser, &keyword, &arguments)); - } - case PM_TOKEN_KEYWORD_YIELD: { - parser_lex(parser); - - pm_token_t keyword = parser->previous; - pm_arguments_t arguments = { 0 }; - parse_arguments_list(parser, &arguments, false, flags, (uint16_t) (depth + 1)); + pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous)); - // It's possible that we've parsed a block argument through our - // call to parse_arguments_list. If we found one, we should mark it - // as invalid and destroy it, as we don't have a place for it on the - // yield node. - if (arguments.block != NULL) { - pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT); - pm_node_unreference(parser, arguments.block); - arguments.block = NULL; + if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) { + // If we get here, then we have a comma immediately following a + // constant, so we're going to parse this as a multiple assignment. + node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); } - pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc)); - if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node); - return node; } - case PM_TOKEN_KEYWORD_CLASS: { - size_t opening_newline_index = token_newline_index(parser); + case PM_TOKEN_UCOLON_COLON: { parser_lex(parser); + pm_token_t delimiter = parser->previous; - pm_token_t class_keyword = parser->previous; - pm_do_loop_stack_push(parser, false); - - pm_node_list_t current_block_exits = { 0 }; - pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); + pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous)); - if (accept1(parser, PM_TOKEN_LESS_LESS)) { - pm_token_t operator = parser->previous; - pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1)); + if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); + } - pm_parser_scope_push(parser, true); - if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_str(parser->current.type)); - } + return node; + } + case PM_TOKEN_UDOT_DOT: + case PM_TOKEN_UDOT_DOT_DOT: { + pm_token_t operator = parser->current; + parser_lex(parser); - pm_node_t *statements = NULL; - if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - pm_accepts_block_stack_push(parser, true); - statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1))); - pm_accepts_block_stack_pop(parser); - } + pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1)); - if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) { - assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); - statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1))); - } else { - parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false); - } + // Unary .. and ... are special because these are non-associative + // operators that can also be unary operators. In this case we need + // to explicitly reject code that has a .. or ... that follows this + // expression. + if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) { + pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR); + } - expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword); + return UP(pm_range_node_create(parser, NULL, &operator, right)); + } + case PM_TOKEN_FLOAT: + parser_lex(parser); + return UP(pm_float_node_create(parser, &parser->previous)); + case PM_TOKEN_FLOAT_IMAGINARY: + parser_lex(parser); + return UP(pm_float_node_imaginary_create(parser, &parser->previous)); + case PM_TOKEN_FLOAT_RATIONAL: + parser_lex(parser); + return UP(pm_float_node_rational_create(parser, &parser->previous)); + case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: + parser_lex(parser); + return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous)); + case PM_TOKEN_NUMBERED_REFERENCE: { + parser_lex(parser); + pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous)); - pm_constant_id_list_t locals; - pm_locals_order(parser, &parser->current_scope->locals, &locals, false); + if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); + } - pm_parser_scope_pop(parser); - pm_do_loop_stack_pop(parser); + return node; + } + case PM_TOKEN_GLOBAL_VARIABLE: { + parser_lex(parser); + pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous)); - flush_block_exits(parser, previous_block_exits); - return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous)); + if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); } - pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1)); - pm_token_t name = parser->previous; - if (name.type != PM_TOKEN_CONSTANT) { - pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME); - } + return node; + } + case PM_TOKEN_BACK_REFERENCE: { + parser_lex(parser); + pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous)); - pm_token_t inheritance_operator = { 0 }; - pm_node_t *superclass; + if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); + } - if (match1(parser, PM_TOKEN_LESS)) { - inheritance_operator = parser->current; - lex_state_set(parser, PM_LEX_STATE_BEG); + return node; + } + case PM_TOKEN_IDENTIFIER: + case PM_TOKEN_METHOD_NAME: { + parser_lex(parser); + pm_token_t identifier = parser->previous; + pm_node_t *node = parse_variable_call(parser); - parser->command_start = true; - parser_lex(parser); + if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) { + // If parse_variable_call returned with a call node, then we + // know the identifier is not in the local table. In that case + // we need to check if there are arguments following the + // identifier. + pm_call_node_t *call = (pm_call_node_t *) node; + pm_arguments_t arguments = { 0 }; - superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1)); - } else { - superclass = NULL; - } + if (parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1))) { + // Since we found arguments, we need to turn off the + // variable call bit in the flags. + pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL); - pm_parser_scope_push(parser, true); + call->opening_loc = arguments.opening_loc; + call->arguments = arguments.arguments; + call->closing_loc = arguments.closing_loc; + call->block = arguments.block; - if (inheritance_operator.start != NULL) { - expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END); + const pm_location_t *end = pm_arguments_end(&arguments); + if (end == NULL) { + PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc); + } else { + PM_NODE_LENGTH_SET_LOCATION(call, end); + } + } } else { - accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); - } - pm_node_t *statements = NULL; - - if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - pm_accepts_block_stack_push(parser, true); - statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1))); - pm_accepts_block_stack_pop(parser); - } + // Otherwise, we know the identifier is in the local table. This + // can still be a method call if it is followed by arguments or + // a block, so we need to check for that here. + if ( + ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) || + (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) || + match1(parser, PM_TOKEN_BRACE_LEFT) + ) { + pm_arguments_t arguments = { 0 }; + parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1)); + pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments); - if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) { - assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); - statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1))); - } else { - parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false); - } + if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) { + // If we're about to convert an 'it' implicit local + // variable read into a method call, we need to remove + // it from the list of implicit local variables. + pm_node_unreference(parser, node); + } else { + // Otherwise, we're about to convert a regular local + // variable read into a method call, in which case we + // need to indicate that this was not a read for the + // purposes of warnings. + assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)); - expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword); + if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) { + pm_node_unreference(parser, node); + } else { + pm_local_variable_read_node_t *cast = (pm_local_variable_read_node_t *) node; + pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name); + } + } - if (context_def_p(parser)) { - pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD); + return UP(fcall); + } } - pm_constant_id_list_t locals; - pm_locals_order(parser, &parser->current_scope->locals, &locals, false); - - pm_parser_scope_pop(parser); - pm_do_loop_stack_pop(parser); - - if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) { - pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME); - if (!PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) { - constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path)); - } + if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); } - pop_block_exits(parser, previous_block_exits); - return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous)); + return node; } - case PM_TOKEN_KEYWORD_DEF: { - pm_node_list_t current_block_exits = { 0 }; - pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); - - pm_token_t def_keyword = parser->current; - size_t opening_newline_index = token_newline_index(parser); + case PM_TOKEN_HEREDOC_START: { + // Here we have found a heredoc. We'll parse it and add it to the + // list of strings. + assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC); + pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base; - pm_node_t *receiver = NULL; - pm_token_t operator = { 0 }; - pm_token_t name; + size_t common_whitespace = (size_t) -1; + parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace; - // This context is necessary for lexing `...` in a bare params - // correctly. It must be pushed before lexing the first param, so it - // is here. - context_push(parser, PM_CONTEXT_DEF_PARAMS); parser_lex(parser); + pm_token_t opening = parser->previous; - // This will be false if the method name is not a valid identifier - // but could be followed by an operator. - bool valid_name = true; + pm_node_t *node; + pm_node_t *part; - switch (parser->current.type) { - case PM_CASE_OPERATOR: - pm_parser_scope_push(parser, true); - lex_state_set(parser, PM_LEX_STATE_ENDFN); - parser_lex(parser); + if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { + // If we get here, then we have an empty heredoc. We'll create + // an empty content token and return an empty string node. + expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); + pm_token_t content = parse_strings_empty_content(parser->previous.start); - name = parser->previous; - break; - case PM_TOKEN_IDENTIFIER: { - parser_lex(parser); + if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) { + node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY)); + } else { + node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY)); + } - if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) { - receiver = parse_variable_call(parser); + PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening); + } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) { + // If we get here, then we tried to find something in the + // heredoc but couldn't actually parse anything, so we'll just + // return a missing node. + // + // parse_string_part handles its own errors, so there is no need + // for us to add one here. + node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); + } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { + // If we get here, then the part that we parsed was plain string + // content and we're at the end of the heredoc, so we can return + // just a string node with the heredoc opening and closing as + // its opening and closing. + pm_node_flag_set(part, parse_unescaped_encoding(parser)); + pm_string_node_t *cast = (pm_string_node_t *) part; - pm_parser_scope_push(parser, true); - lex_state_set(parser, PM_LEX_STATE_FNAME); - parser_lex(parser); + cast->opening_loc = TOK2LOC(parser, &opening); + cast->closing_loc = TOK2LOC(parser, &parser->current); + cast->base.location = cast->opening_loc; - operator = parser->previous; - name = parse_method_definition_name(parser); - } else { - pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)); - pm_parser_scope_push(parser, true); + if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) { + assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t)); + cast->base.type = PM_X_STRING_NODE; + } - name = parser->previous; - } + if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) { + parse_heredoc_dedent_string(parser->arena, &cast->unescaped, common_whitespace); + } - break; + node = UP(cast); + expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); + } else { + // If we get here, then we have multiple parts in the heredoc, + // so we'll need to create an interpolated string node to hold + // them all. + pm_node_list_t parts = { 0 }; + pm_node_list_append(parser->arena, &parts, part); + + while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) { + if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) { + pm_node_list_append(parser->arena, &parts, part); + } } - case PM_TOKEN_INSTANCE_VARIABLE: - case PM_TOKEN_CLASS_VARIABLE: - case PM_TOKEN_GLOBAL_VARIABLE: - valid_name = false; - PRISM_FALLTHROUGH - case PM_TOKEN_CONSTANT: - case PM_TOKEN_KEYWORD_NIL: - case PM_TOKEN_KEYWORD_SELF: - case PM_TOKEN_KEYWORD_TRUE: - case PM_TOKEN_KEYWORD_FALSE: - case PM_TOKEN_KEYWORD___FILE__: - case PM_TOKEN_KEYWORD___LINE__: - case PM_TOKEN_KEYWORD___ENCODING__: { - pm_parser_scope_push(parser, true); - parser_lex(parser); - pm_token_t identifier = parser->previous; + // Now that we have all of the parts, create the correct type of + // interpolated node. + if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) { + pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening); + cast->parts = parts; - if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) { - lex_state_set(parser, PM_LEX_STATE_FNAME); - parser_lex(parser); - operator = parser->previous; + expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); + pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous); - switch (identifier.type) { - case PM_TOKEN_CONSTANT: - receiver = UP(pm_constant_read_node_create(parser, &identifier)); - break; - case PM_TOKEN_INSTANCE_VARIABLE: - receiver = UP(pm_instance_variable_read_node_create(parser, &identifier)); - break; - case PM_TOKEN_CLASS_VARIABLE: - receiver = UP(pm_class_variable_read_node_create(parser, &identifier)); - break; - case PM_TOKEN_GLOBAL_VARIABLE: - receiver = UP(pm_global_variable_read_node_create(parser, &identifier)); - break; - case PM_TOKEN_KEYWORD_NIL: - receiver = UP(pm_nil_node_create(parser, &identifier)); - break; - case PM_TOKEN_KEYWORD_SELF: - receiver = UP(pm_self_node_create(parser, &identifier)); - break; - case PM_TOKEN_KEYWORD_TRUE: - receiver = UP(pm_true_node_create(parser, &identifier)); - break; - case PM_TOKEN_KEYWORD_FALSE: - receiver = UP(pm_false_node_create(parser, &identifier)); - break; - case PM_TOKEN_KEYWORD___FILE__: - receiver = UP(pm_source_file_node_create(parser, &identifier)); - break; - case PM_TOKEN_KEYWORD___LINE__: - receiver = UP(pm_source_line_node_create(parser, &identifier)); - break; - case PM_TOKEN_KEYWORD___ENCODING__: - receiver = UP(pm_source_encoding_node_create(parser, &identifier)); - break; - default: - break; - } + cast->base.location = cast->opening_loc; + node = UP(cast); + } else { + pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening); - name = parse_method_definition_name(parser); - } else { - if (!valid_name) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_str(identifier.type)); - } + expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length); + pm_interpolated_string_node_closing_set(parser, cast, &parser->previous); - name = identifier; - } - break; + cast->base.location = cast->opening_loc; + node = UP(cast); } - case PM_TOKEN_PARENTHESIS_LEFT: { - // The current context is `PM_CONTEXT_DEF_PARAMS`, however - // the inner expression of this parenthesis should not be - // processed under this context. Thus, the context is popped - // here. - context_pop(parser); - parser_lex(parser); - pm_token_t lparen = parser->previous; - pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1)); + // If this is a heredoc that is indented with a ~, then we need + // to dedent each line by the common leading whitespace. + if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) { + pm_node_list_t *nodes; + if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) { + nodes = &((pm_interpolated_x_string_node_t *) node)->parts; + } else { + nodes = &((pm_interpolated_string_node_t *) node)->parts; + } - accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN); - pm_token_t rparen = parser->previous; + parse_heredoc_dedent(parser, nodes, common_whitespace); + } + } - lex_state_set(parser, PM_LEX_STATE_FNAME); - expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM); + if (match1(parser, PM_TOKEN_STRING_BEGIN)) { + return parse_strings(parser, node, false, (uint16_t) (depth + 1)); + } - operator = parser->previous; - receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0)); + return node; + } + case PM_TOKEN_INSTANCE_VARIABLE: { + parser_lex(parser); + pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous)); - // To push `PM_CONTEXT_DEF_PARAMS` again is for the same - // reason as described the above. - pm_parser_scope_push(parser, true); - context_push(parser, PM_CONTEXT_DEF_PARAMS); - name = parse_method_definition_name(parser); - break; - } - default: - pm_parser_scope_push(parser, true); - name = parse_method_definition_name(parser); - break; + if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1)); } - pm_token_t lparen = { 0 }; - pm_token_t rparen = { 0 }; - pm_parameters_node_t *params; - - bool accept_endless_def = true; - switch (parser->current.type) { - case PM_TOKEN_PARENTHESIS_LEFT: { - parser_lex(parser); - lparen = parser->previous; + return node; + } + case PM_TOKEN_INTEGER: { + pm_node_flags_t base = parser->integer.base; + parser_lex(parser); + return UP(pm_integer_node_create(parser, base, &parser->previous)); + } + case PM_TOKEN_INTEGER_IMAGINARY: { + pm_node_flags_t base = parser->integer.base; + parser_lex(parser); + return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous)); + } + case PM_TOKEN_INTEGER_RATIONAL: { + pm_node_flags_t base = parser->integer.base; + parser_lex(parser); + return UP(pm_integer_node_rational_create(parser, base, &parser->previous)); + } + case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: { + pm_node_flags_t base = parser->integer.base; + parser_lex(parser); + return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous)); + } + case PM_TOKEN_KEYWORD___ENCODING__: + parser_lex(parser); + return UP(pm_source_encoding_node_create(parser, &parser->previous)); + case PM_TOKEN_KEYWORD___FILE__: + parser_lex(parser); + return UP(pm_source_file_node_create(parser, &parser->previous)); + case PM_TOKEN_KEYWORD___LINE__: + parser_lex(parser); + return UP(pm_source_line_node_create(parser, &parser->previous)); + case PM_TOKEN_KEYWORD_ALIAS: { + if (binding_power != PM_BINDING_POWER_STATEMENT) { + pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS); + } - if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - params = NULL; - } else { - // https://bugs.ruby-lang.org/issues/19107 - bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1; - params = parse_parameters( - parser, - PM_BINDING_POWER_DEFINED, - true, - allow_trailing_comma, - true, - true, - false, - PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES, - (uint16_t) (depth + 1) - ); - } + parser_lex(parser); + pm_token_t keyword = parser->previous; - lex_state_set(parser, PM_LEX_STATE_BEG); - parser->command_start = true; + pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1)); + pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1)); - context_pop(parser); - if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { - PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_str(parser->current.type)); - parser->previous.start = parser->previous.end; - parser->previous.type = 0; + switch (PM_NODE_TYPE(new_name)) { + case PM_BACK_REFERENCE_READ_NODE: + case PM_NUMBERED_REFERENCE_READ_NODE: + case PM_GLOBAL_VARIABLE_READ_NODE: { + if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) { + if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) { + pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE); + } + } else if (!PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) { + pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT); + old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name)); } - rparen = parser->previous; - break; + return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name)); } - case PM_CASE_PARAMETER: { - // If we're about to lex a label, we need to add the label - // state to make sure the next newline is ignored. - if (parser->current.type == PM_TOKEN_LABEL) { - lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL); + case PM_SYMBOL_NODE: + case PM_INTERPOLATED_SYMBOL_NODE: { + if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) { + pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT); + old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name)); } + } + PRISM_FALLTHROUGH + default: + return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name)); + } + } + case PM_TOKEN_KEYWORD_CASE: + return parse_case(parser, flags, depth); + case PM_TOKEN_KEYWORD_BEGIN: { + size_t opening_newline_index = token_newline_index(parser); + parser_lex(parser); - params = parse_parameters( - parser, - PM_BINDING_POWER_DEFINED, - false, - false, - true, - true, - false, - PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES, - (uint16_t) (depth + 1) - ); + pm_token_t begin_keyword = parser->previous; + accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); - // Reject `def * = 1` and similar. We have to specifically check - // for them because they create ambiguity with optional arguments. - accept_endless_def = false; + pm_node_list_t current_block_exits = { 0 }; + pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); + pm_statements_node_t *begin_statements = NULL; - context_pop(parser); - break; - } - default: { - params = NULL; - context_pop(parser); - break; - } + if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { + pm_accepts_block_stack_push(parser, true); + begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1)); + pm_accepts_block_stack_pop(parser); + accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); } - pm_node_t *statements = NULL; - pm_token_t equal = { 0 }; - pm_token_t end_keyword = { 0 }; + pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements); + parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1)); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword); + + PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous); + pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous); + pop_block_exits(parser, previous_block_exits); + return UP(begin_node); + } + case PM_TOKEN_KEYWORD_BEGIN_UPCASE: { + pm_node_list_t current_block_exits = { 0 }; + pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); - if (accept1(parser, PM_TOKEN_EQUAL)) { - if (token_is_setter_name(&name)) { - pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER); - } - if (!accept_endless_def) { - pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS); - } - if ( - parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS && - parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS - ) { - PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition"); - } - equal = parser->previous; + if (binding_power != PM_BINDING_POWER_STATEMENT) { + pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN); + } - context_push(parser, PM_CONTEXT_DEF); - pm_do_loop_stack_push(parser, false); - statements = UP(pm_statements_node_create(parser)); + parser_lex(parser); + pm_token_t keyword = parser->previous; - uint8_t allow_flags; - if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) { - allow_flags = flags & PM_PARSE_ACCEPTS_COMMAND_CALL; - } else { - // Allow `def foo = puts "Hello"` but not `private def foo = puts "Hello"` - allow_flags = (binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION) ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0; - } - - // Inside a def body, we push true onto the - // accepts_block_stack so that `do` is lexed as - // PM_TOKEN_KEYWORD_DO (which can only start a block for - // primary-level constructs, not commands). During command - // argument parsing, the stack is pushed to false, causing - // `do` to be lexed as PM_TOKEN_KEYWORD_DO_BLOCK, which - // is not consumed inside the endless def body and instead - // left for the outer context. - pm_accepts_block_stack_push(parser, true); - pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_flags | PM_PARSE_IN_ENDLESS_DEF, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1)); - pm_accepts_block_stack_pop(parser); + expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE); + pm_token_t opening = parser->previous; + pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1)); - // If an unconsumed PM_TOKEN_KEYWORD_DO follows the body, - // it is an error (e.g., `def f = 1 do end`). - // PM_TOKEN_KEYWORD_DO_BLOCK is intentionally not caught - // here — it should bubble up to the outer context (e.g., - // `private def f = puts "Hello" do end` where the block - // attaches to `private`). - if (accept1(parser, PM_TOKEN_KEYWORD_DO)) { - pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1)); - pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK); - } + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening); + pm_context_t context = parser->current_context->context; + if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) { + pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL); + } - if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) { - context_push(parser, PM_CONTEXT_RESCUE_MODIFIER); + flush_block_exits(parser, previous_block_exits); + return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous)); + } + case PM_TOKEN_KEYWORD_BREAK: + case PM_TOKEN_KEYWORD_NEXT: + case PM_TOKEN_KEYWORD_RETURN: { + parser_lex(parser); - pm_token_t rescue_keyword = parser->previous; + pm_token_t keyword = parser->previous; + pm_arguments_t arguments = { 0 }; - // In the Ruby grammar, the rescue value of an endless - // method command excludes and/or and in/=>. - pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1)); - context_pop(parser); + if ( + token_begins_expression_p(parser->current.type) || + match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR) + ) { + pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left; - statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value)); - } + if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) { + pm_token_t next = parser->current; + parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1)); - // A nested endless def whose body is a command call (e.g., - // `def f = def g = foo bar`) is a command assignment and - // cannot appear as a def body. - if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) { - PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type)); + // Reject `foo && return bar`. + if (!(flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && arguments.arguments != NULL) { + PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(next.type)); + } } + } - pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false); - pm_do_loop_stack_pop(parser); - context_pop(parser); - } else { - if (lparen.start == NULL) { - lex_state_set(parser, PM_LEX_STATE_BEG); - parser->command_start = true; - expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM); - } else { - accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); + switch (keyword.type) { + case PM_TOKEN_KEYWORD_BREAK: { + pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments)); + if (!parser->partial_script) parse_block_exit(parser, node); + return node; } - - pm_accepts_block_stack_push(parser, true); - pm_do_loop_stack_push(parser, false); - - if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - pm_accepts_block_stack_push(parser, true); - statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1))); - pm_accepts_block_stack_pop(parser); + case PM_TOKEN_KEYWORD_NEXT: { + pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments)); + if (!parser->partial_script) parse_block_exit(parser, node); + return node; } - - if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) { - assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); - statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1))); - } else { - parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false); + case PM_TOKEN_KEYWORD_RETURN: { + pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments)); + parse_return(parser, node); + return node; } + default: + assert(false && "unreachable"); + return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous))); + } + } + case PM_TOKEN_KEYWORD_SUPER: { + parser_lex(parser); - pm_accepts_block_stack_pop(parser); - pm_do_loop_stack_pop(parser); + pm_token_t keyword = parser->previous; + pm_arguments_t arguments = { 0 }; + parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1)); - expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword); - end_keyword = parser->previous; + if ( + arguments.opening_loc.length == 0 && + arguments.arguments == NULL && + ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE)) + ) { + return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments)); } - pm_constant_id_list_t locals; - pm_locals_order(parser, &parser->current_scope->locals, &locals, false); - pm_parser_scope_pop(parser); + return UP(pm_super_node_create(parser, &keyword, &arguments)); + } + case PM_TOKEN_KEYWORD_YIELD: { + parser_lex(parser); - /** - * If the final character is `@` as is the case when defining - * methods to override the unary operators, we should ignore - * the @ in the same way we do for symbols. - */ - pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name)); + pm_token_t keyword = parser->previous; + pm_arguments_t arguments = { 0 }; + parse_arguments_list(parser, &arguments, false, flags, (uint16_t) (depth + 1)); - flush_block_exits(parser, previous_block_exits); + // It's possible that we've parsed a block argument through our + // call to parse_arguments_list. If we found one, we should mark it + // as invalid and destroy it, as we don't have a place for it on the + // yield node. + if (arguments.block != NULL) { + pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT); + pm_node_unreference(parser, arguments.block); + arguments.block = NULL; + } - return UP(pm_def_node_create( - parser, - name_id, - &name, - receiver, - params, - statements, - &locals, - &def_keyword, - NTOK2PTR(operator), - NTOK2PTR(lparen), - NTOK2PTR(rparen), - NTOK2PTR(equal), - NTOK2PTR(end_keyword) - )); + pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc)); + if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node); + + return node; } + case PM_TOKEN_KEYWORD_CLASS: + return parse_class(parser, flags, depth); + case PM_TOKEN_KEYWORD_DEF: + return parse_def(parser, binding_power, flags, depth); case PM_TOKEN_KEYWORD_DEFINED: { parser_lex(parser); @@ -19560,76 +19956,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1)); } - case PM_TOKEN_KEYWORD_MODULE: { - pm_node_list_t current_block_exits = { 0 }; - pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits); - - size_t opening_newline_index = token_newline_index(parser); - parser_lex(parser); - pm_token_t module_keyword = parser->previous; - - pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1)); - pm_token_t name; - - // If we can recover from a syntax error that occurred while parsing - // the name of the module, then we'll handle that here. - if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) { - pop_block_exits(parser, previous_block_exits); - - pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; - return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing)); - } - - while (accept1(parser, PM_TOKEN_COLON_COLON)) { - pm_token_t double_colon = parser->previous; - - expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); - constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous)); - } - - // Here we retrieve the name of the module. If it wasn't a constant, - // then it's possible that `module foo` was passed, which is a - // syntax error. We handle that here as well. - name = parser->previous; - if (name.type != PM_TOKEN_CONSTANT) { - pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME); - } - - if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE) && !PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) { - constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path)); - } - - pm_parser_scope_push(parser, true); - accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE); - pm_node_t *statements = NULL; - - if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) { - pm_accepts_block_stack_push(parser, true); - statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1))); - pm_accepts_block_stack_pop(parser); - } - - if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) { - assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE)); - statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1))); - } else { - parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false); - } - - pm_constant_id_list_t locals; - pm_locals_order(parser, &parser->current_scope->locals, &locals, false); - - pm_parser_scope_pop(parser); - expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword); - - if (context_def_p(parser)) { - pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD); - } - - pop_block_exits(parser, previous_block_exits); - - return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous)); - } + case PM_TOKEN_KEYWORD_MODULE: + return parse_module(parser, flags, depth); case PM_TOKEN_KEYWORD_NIL: parser_lex(parser); return UP(pm_nil_node_create(parser, &parser->previous)); @@ -19779,159 +20107,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u return UP(array); } - case PM_TOKEN_PERCENT_UPPER_I: { - parser_lex(parser); - pm_token_t opening = parser->previous; - pm_array_node_t *array = pm_array_node_create(parser, &opening); - - // This is the current node that we are parsing that will be added to the - // list of elements. - pm_node_t *current = NULL; - - while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { - switch (parser->current.type) { - case PM_TOKEN_WORDS_SEP: { - if (current == NULL) { - // If we hit a separator before we have any content, then we don't - // need to do anything. - } else { - // If we hit a separator after we've hit content, then we need to - // append that content to the list and reset the current node. - pm_array_node_elements_append(parser->arena, array, current); - current = NULL; - } - - parser_lex(parser); - break; - } - case PM_TOKEN_STRING_CONTENT: { - if (current == NULL) { - // If we hit content and the current node is NULL, then this is - // the first string content we've seen. In that case we're going - // to create a new string node and set that to the current. - current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL)); - parser_lex(parser); - } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { - // If we hit string content and the current node is an - // interpolated string, then we need to append the string content - // to the list of child nodes. - pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); - parser_lex(parser); - - pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string); - } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { - // If we hit string content and the current node is a symbol node, - // then we need to convert the current node into an interpolated - // string and add the string content to the list of child nodes. - pm_symbol_node_t *cast = (pm_symbol_node_t *) current; - pm_token_t content = { - .type = PM_TOKEN_STRING_CONTENT, - .start = parser->start + cast->value_loc.start, - .end = parser->start + cast->value_loc.start + cast->value_loc.length - }; - - pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped)); - pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL)); - parser_lex(parser); - - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); - pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string); - pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string); - - // current is arena-allocated so no explicit free is needed. - current = UP(interpolated); - } else { - assert(false && "unreachable"); - } - - break; - } - case PM_TOKEN_EMBVAR: { - bool start_location_set = false; - if (current == NULL) { - // If we hit an embedded variable and the current node is NULL, - // then this is the start of a new string. We'll set the current - // node to a new interpolated string. - current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL)); - } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { - // If we hit an embedded variable and the current node is a string - // node, then we'll convert the current into an interpolated - // string and add the string node to the list of parts. - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); - - current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current)); - pm_interpolated_symbol_node_append(parser->arena, interpolated, current); - PM_NODE_START_SET_NODE(interpolated, current); - start_location_set = true; - current = UP(interpolated); - } else { - // If we hit an embedded variable and the current node is an - // interpolated string, then we'll just add the embedded variable. - } - - pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); - pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part); - if (!start_location_set) { - PM_NODE_START_SET_NODE(current, part); - } - break; - } - case PM_TOKEN_EMBEXPR_BEGIN: { - bool start_location_set = false; - if (current == NULL) { - // If we hit an embedded expression and the current node is NULL, - // then this is the start of a new string. We'll set the current - // node to a new interpolated string. - current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL)); - } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { - // If we hit an embedded expression and the current node is a - // string node, then we'll convert the current into an - // interpolated string and add the string node to the list of - // parts. - pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL); - - current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current)); - pm_interpolated_symbol_node_append(parser->arena, interpolated, current); - PM_NODE_START_SET_NODE(interpolated, current); - start_location_set = true; - current = UP(interpolated); - } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { - // If we hit an embedded expression and the current node is an - // interpolated string, then we'll just continue on. - } else { - assert(false && "unreachable"); - } - - pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); - pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part); - if (!start_location_set) { - PM_NODE_START_SET_NODE(current, part); - } - break; - } - default: - expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT); - parser_lex(parser); - break; - } - } - - // If we have a current node, then we need to append it to the list. - if (current) { - pm_array_node_elements_append(parser->arena, array, current); - } - - pm_token_t closing = parser->current; - if (match1(parser, PM_TOKEN_EOF)) { - pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM); - closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; - } else { - expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM); - } - pm_array_node_close_set(parser, array, &closing); - - return UP(array); - } + case PM_TOKEN_PERCENT_UPPER_I: + return parse_symbol_array(parser, depth); case PM_TOKEN_PERCENT_LOWER_W: { parser_lex(parser); pm_token_t opening = parser->previous; @@ -19982,142 +20159,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, u pm_array_node_close_set(parser, array, &closing); return UP(array); } - case PM_TOKEN_PERCENT_UPPER_W: { - parser_lex(parser); - pm_token_t opening = parser->previous; - pm_array_node_t *array = pm_array_node_create(parser, &opening); - - // This is the current node that we are parsing that will be added - // to the list of elements. - pm_node_t *current = NULL; - - while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { - switch (parser->current.type) { - case PM_TOKEN_WORDS_SEP: { - // Reset the explicit encoding if we hit a separator - // since each element can have its own encoding. - parser->explicit_encoding = NULL; - - if (current == NULL) { - // If we hit a separator before we have any content, - // then we don't need to do anything. - } else { - // If we hit a separator after we've hit content, - // then we need to append that content to the list - // and reset the current node. - pm_array_node_elements_append(parser->arena, array, current); - current = NULL; - } - - parser_lex(parser); - break; - } - case PM_TOKEN_STRING_CONTENT: { - pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL)); - pm_node_flag_set(string, parse_unescaped_encoding(parser)); - parser_lex(parser); - - if (current == NULL) { - // If we hit content and the current node is NULL, - // then this is the first string content we've seen. - // In that case we're going to create a new string - // node and set that to the current. - current = string; - } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { - // If we hit string content and the current node is - // an interpolated string, then we need to append - // the string content to the list of child nodes. - pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string); - } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { - // If we hit string content and the current node is - // a string node, then we need to convert the - // current node into an interpolated string and add - // the string content to the list of child nodes. - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); - pm_interpolated_string_node_append(parser, interpolated, current); - pm_interpolated_string_node_append(parser, interpolated, string); - current = UP(interpolated); - } else { - assert(false && "unreachable"); - } - - break; - } - case PM_TOKEN_EMBVAR: { - if (current == NULL) { - // If we hit an embedded variable and the current - // node is NULL, then this is the start of a new - // string. We'll set the current node to a new - // interpolated string. - current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL)); - } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { - // If we hit an embedded variable and the current - // node is a string node, then we'll convert the - // current into an interpolated string and add the - // string node to the list of parts. - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); - pm_interpolated_string_node_append(parser, interpolated, current); - current = UP(interpolated); - } else { - // If we hit an embedded variable and the current - // node is an interpolated string, then we'll just - // add the embedded variable. - } - - pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); - pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part); - break; - } - case PM_TOKEN_EMBEXPR_BEGIN: { - if (current == NULL) { - // If we hit an embedded expression and the current - // node is NULL, then this is the start of a new - // string. We'll set the current node to a new - // interpolated string. - current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL)); - } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { - // If we hit an embedded expression and the current - // node is a string node, then we'll convert the - // current into an interpolated string and add the - // string node to the list of parts. - pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL); - pm_interpolated_string_node_append(parser, interpolated, current); - current = UP(interpolated); - } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { - // If we hit an embedded expression and the current - // node is an interpolated string, then we'll just - // continue on. - } else { - assert(false && "unreachable"); - } - - pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1)); - pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part); - break; - } - default: - expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT); - parser_lex(parser); - break; - } - } - - // If we have a current node, then we need to append it to the list. - if (current) { - pm_array_node_elements_append(parser->arena, array, current); - } - - pm_token_t closing = parser->current; - if (match1(parser, PM_TOKEN_EOF)) { - pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM); - closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end }; - } else { - expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM); - } - - pm_array_node_close_set(parser, array, &closing); - return UP(array); - } + case PM_TOKEN_PERCENT_UPPER_W: + return parse_string_array(parser, depth); case PM_TOKEN_REGEXP_BEGIN: { pm_token_t opening = parser->current; parser_lex(parser);