diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b9b2f00..0f74e683 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [Unreleased] +### Added + +- Added support for `not in` and `is not` compound operators. + ## [0.7.0] - 2025-11-11 ### Added diff --git a/core/CHANGELOG.md b/core/CHANGELOG.md index 417eabaa..11502875 100644 --- a/core/CHANGELOG.md +++ b/core/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- Added support for `not in` and `is not` compound operators. + ## 0.7.0 - 2025-11-11 ### Added diff --git a/core/datatests/generators/optimising_line_formatter.rs b/core/datatests/generators/optimising_line_formatter.rs index d7d0f00d..b0b0510c 100644 --- a/core/datatests/generators/optimising_line_formatter.rs +++ b/core/datatests/generators/optimising_line_formatter.rs @@ -361,6 +361,7 @@ mod comments { child_lines::generate(root_dir); conditional_directives::generate(root_dir); individual_block::generate(root_dir); + compound_operators::generate(root_dir); } mod midline_line { @@ -680,6 +681,44 @@ mod comments { ); } } + + mod compound_operators { + use super::*; + + pub fn generate(root_dir: &Path) { + generate_test_cases!( + root_dir, + not_in = " + AA := AAA {} not {} in {} BBB; + AAA := + AAA {} not {} in {} BBB; + AAA := + AAAAA {} + not {} in {} BBBB; + AAA := + AAAAA {} + not {} in {} BBBBBBBBB; + AAA := + AAAAA + {} + not + {} in + {} BBBBBBBBB; + AAA := + AAAAA + { + } + not + { + } + in + { + } + BBBBBBBBB; + ", + ); + } + } } mod anonymous { @@ -4312,6 +4351,51 @@ mod expressions { and DDDDDDD; ", + compound = " + A := AAAAAAAA not in BBBBBBBB; + A := + AAAAAAAA not in BBBBBBBBB; + A := + AAAAAAAAA + not in BBBBBBBBB; + A := + AAAAAAAAA + not in BBBBBBBBBBBBBBB; + A := + AAAAAA + BBBBBB + CCCCCCCC + not in DDDDDDDDDDDDDD; + A := + AAAAAA + BBBBBB + CCCCCCCC + not in DDDDD + EEEEEE; + A := + AAAAAAA + + BBBBBBB + + CCCCCCC + not in DDDDDDDDDDDDDD; + A := + AAAAAA + BBBBBB + CCCCCCCC + not in DDDDDD + + EEEEEE; + A := AAAAAAAA is not BBBBBBBB; + A := + AAAAAAAA is not BBBBBBBBB; + A := + AAAAAAAAA + is not BBBBBBBBB; + A := + AAAAAAAAA + is not BBBBBBBBBBBBBBB; + A := (AAA not in [DDD + EEE]); + A := + (AAAA not in [DDD + EEE]); + A := + (AAAAA + not in [DDD + EEE]); + A := + (AAAAA + not in [ + DDDDD + EEEE]); + ", ); } } diff --git a/core/src/rules/optimising_line_formatter/contexts.rs b/core/src/rules/optimising_line_formatter/contexts.rs index 8f09bcad..09a7a918 100644 --- a/core/src/rules/optimising_line_formatter/contexts.rs +++ b/core/src/rules/optimising_line_formatter/contexts.rs @@ -418,6 +418,15 @@ impl<'a> SpecificContextStack<'a> { ) .cloned() } + fn get_next_real_token_type_from_line_index(&self, line_index: u32) -> Option { + self.formatting_contexts + .line + .get_tokens() + .iter() + .skip(line_index as usize + 1) + .map(|index| self.formatting_contexts.token_types[*index]) + .find(|token_type| !token_type.is_comment_or_compiler_directive()) + } /// Updates all contexts to reflect the decision provided. pub(super) fn update_contexts(&self, node: &mut FormattingNode, decision: RawDecision) { @@ -647,8 +656,20 @@ impl<'a> SpecificContextStack<'a> { _ => {} } } + (Some(op1), Some(op2)) if (op1, op2).get_operator_precedence().is_some() => { + // In the middle of a compound operator, do nothing + } + (_, Some(op @ (TT::Op(_) | TT::Keyword(_)))) + if self + .get_next_real_token_type_from_line_index(line_index) + .is_some_and(|token_type| { + (op, token_type).get_operator_precedence().is_some() + }) => + { + self.update_operator_precedences(node, is_break); + } (prev, Some(op @ (TT::Op(_) | TT::Keyword(_)))) - if super::get_operator_precedence(op).is_some() && is_binary(op, prev) => + if op.get_operator_precedence().is_some() && is_binary(op, prev) => { self.update_operator_precedences(node, is_break); } @@ -775,12 +796,6 @@ impl<'a> LineFormattingContexts<'a> { token_types: &'a [TokenType], context_tree: &'a ParentPointerTree, ) -> Self { - let get_token_type_from_line_index = |line_index| { - token_types - .get(*line.get_tokens().get(line_index as usize)?) - .cloned() - }; - let builder_context_tree = Self::new_tree(); let mut contexts = LineFormattingContextsBuilder::new(&builder_context_tree); @@ -822,18 +837,44 @@ impl<'a> LineFormattingContexts<'a> { } } - let mut prev_prev_token_type = None; - let mut prev_token_type = None; - let mut prev_semantic_token_type = None; - let mut current = get_token_type_from_line_index(0); - let mut next_token_type = get_token_type_from_line_index(1); + let mut prev_token_types: Vec = Vec::with_capacity(line.get_tokens().len()); + macro_rules! last_semantic_token_type { + () => { + last_semantic_token_type!(0) + }; + ($i: expr) => { + prev_token_types + .iter() + .rev() + .filter(|tt| !tt.is_comment_or_directive()) + .nth($i) + }; + } + let mut next_token_types = line + .get_tokens() + .iter() + .rev() + .map(|id| token_types[*id]) + .collect::>(); + let mut current = next_token_types.pop(); + + fn next_real_token_type(token_types: &[TokenType]) -> Option { + token_types + .iter() + .rev() + .find(|token_type| !token_type.is_comment_or_compiler_directive()) + .cloned() + } + while let Some(current_token_type) = current { if !current_token_type.is_comment_or_compiler_directive() { let last_context_type = contexts.current_context.get().context_type; // New contexts relating to the previous token are pushed here // to avoid including any leading comments - if let (Some(prev_token_type), Some(prev_directive_token_type)) = - (prev_token_type, prev_semantic_token_type) + if let Some(prev_token_type) = prev_token_types + .iter() + .rev() + .find(|tt| !tt.is_comment_or_compiler_directive()) { match (prev_token_type, last_context_type) { (TT::Op(OK::LParen | OK::LBrack | OK::LessThan(ChK::Generic)), _) @@ -858,7 +899,7 @@ impl<'a> LineFormattingContexts<'a> { } _ => {} } - match prev_directive_token_type { + match prev_token_type { TT::Keyword(KK::Of) => { contexts.push(CT::Subject); contexts.push_expression(); @@ -908,7 +949,10 @@ impl<'a> LineFormattingContexts<'a> { contexts.push_expression(); } TT::Keyword(KK::Abstract) - if matches!(prev_prev_token_type, Some(TT::Keyword(KK::Class))) => {} + if matches!( + last_semantic_token_type!(1), + Some(TT::Keyword(KK::Class)) + ) => {} TT::Keyword(kk) if kk.is_directive() => { contexts.push_expression(); } @@ -933,8 +977,24 @@ impl<'a> LineFormattingContexts<'a> { TT::ConditionalDirective(kind) if kind.is_else() => { contexts.push_operators(); } - op if super::get_operator_precedence(op).is_some() - && is_binary(op, prev_prev_token_type) => + op if (*op, current_token_type) + .get_operator_precedence() + .is_some() => + { + // In the middle of a compound operator, do nothing + } + op if prev_token_types + .iter() + .rev() + .nth(1) + .cloned() + .and_then(|prev| (prev, *op).get_operator_precedence()) + .is_some() => + { + contexts.push_operators(); + } + op if op.get_operator_precedence().is_some() + && is_binary(*op, last_semantic_token_type!(1).cloned()) => { contexts.push_operators(); } @@ -952,7 +1012,7 @@ impl<'a> LineFormattingContexts<'a> { TT::Op(OK::LessThan(ChevronKind::Generic)) => BracketKind::Angle, _ => BracketKind::Round, }; - let (typ, cont_delta) = match prev_token_type { + let (typ, cont_delta) = match last_semantic_token_type!() { // routine invocations Some(TT::Identifier | TT::Op(OK::GreaterThan(ChevronKind::Generic))) => { (BracketStyle::BreakClose, 1) @@ -1094,7 +1154,10 @@ impl<'a> LineFormattingContexts<'a> { } TT::Op(OK::Dot) if CT::Precedence(0) == last_context_type => { contexts.retain_current(); - if matches!(prev_token_type, Some(TT::Op(OK::RParen | OK::RBrack))) { + if matches!( + last_semantic_token_type!(), + Some(TT::Op(OK::RParen | OK::RBrack)) + ) { /* Fluency is considered after () and [] because they allow for arbitrary computation which will @@ -1108,13 +1171,33 @@ impl<'a> LineFormattingContexts<'a> { contexts.fluent(contexts.current_context.clone()); } } - op if super::get_operator_precedence(op).is_some() - && is_binary(op, prev_token_type) => + + op if prev_token_types + .last() + .cloned() + .and_then(|prev| (prev, op).get_operator_precedence()) + .is_some() => + { + // We are in the middle of a compound operator, do nothing + } + op if next_real_token_type(&next_token_types) + .and_then(|next| (op, next).get_operator_precedence()) + .is_some() => { - let op_prec = super::get_operator_precedence(op).unwrap(); + let op_prec = next_real_token_type(&next_token_types) + .and_then(|next| (op, next).get_operator_precedence()) + .unwrap(); contexts.pop_until_and_retain(CT::Precedence(op_prec)); } - TT::Keyword(KK::Of) if matches!(next_token_type, Some(TT::Keyword(KK::Object))) => { + op if op.get_operator_precedence().is_some() + && is_binary(op, last_semantic_token_type!().cloned()) => + { + let op_prec = op.get_operator_precedence().unwrap(); + contexts.pop_until_and_retain(CT::Precedence(op_prec)); + } + TT::Keyword(KK::Of) + if matches!(next_token_types.last(), Some(TT::Keyword(KK::Object))) => + { contexts.pop_until_after(CT::AnonHeader); } TT::Keyword(KK::Then | KK::Do | KK::Of) => { @@ -1135,7 +1218,7 @@ impl<'a> LineFormattingContexts<'a> { contexts.pop_until_after(CT::AnonHeader); } TT::Keyword(KK::Abstract) - if matches!(prev_token_type, Some(TT::Keyword(KK::Class))) => {} + if matches!(last_semantic_token_type!(), Some(TT::Keyword(KK::Class))) => {} TT::Keyword(kk) if kk.is_directive() => { if contexts.pop_until(CT::DirectiveList) != Some(CT::DirectiveList) { if contexts @@ -1180,15 +1263,8 @@ impl<'a> LineFormattingContexts<'a> { _ => {} } - if !current_token_type.is_comment_or_directive() { - prev_prev_token_type = prev_token_type; - prev_token_type = current; - } - if !current_token_type.is_comment_or_compiler_directive() { - prev_semantic_token_type = current; - } - current = next_token_type; - next_token_type = get_token_type_from_line_index(contexts.line_index + 1); + prev_token_types.extend(current); + current = next_token_types.pop(); } contexts.finalise(); @@ -2160,6 +2236,17 @@ mod tests { 1 Precedence(3) ^----------- 1 Precedence(2) ^-----$ "}, + not_in_operator = {" + AA + BB not in CC + 1 Base ^---------------- + 1 Precedence(4) ^---------------- + 1 Precedence(3) ^-----$ + "}, + is_not_operator = {" + AA is not BB + 1 Base ^----------- + 1 Precedence(4) ^----------- + "}, routine_arguments = {" AA(BB, CC) + DD 1 Base ^-------------- diff --git a/core/src/rules/optimising_line_formatter/mod.rs b/core/src/rules/optimising_line_formatter/mod.rs index 6bd926e8..3f70c735 100644 --- a/core/src/rules/optimising_line_formatter/mod.rs +++ b/core/src/rules/optimising_line_formatter/mod.rs @@ -1258,41 +1258,57 @@ impl<'this> InternalOptimisingLineFormatter<'this, '_> { const HIGHEST_PRECEDENCE: u8 = 0; const LOWEST_PRECEDENCE: u8 = 5; -fn get_operator_precedence(token_type: TokenType) -> Option { - match token_type { - TT::Op(OK::Dot) => Some(0), +trait OperatorPrecedence { + fn get_operator_precedence(self) -> Option; +} - TT::Op(OK::AddressOf) | TT::Keyword(KK::Not) => Some(1), +impl OperatorPrecedence for TokenType { + fn get_operator_precedence(self) -> Option { + match self { + TT::Op(OK::Dot) => Some(0), - TT::Op(OK::Star | OK::Slash) - | TT::Keyword(KK::Div | KK::Mod | KK::And | KK::Shl | KK::Shr | KK::As) => Some(2), + TT::Op(OK::AddressOf) | TT::Keyword(KK::Not) => Some(1), - TT::Op(OK::Plus | OK::Minus) | TT::Keyword(KK::Or | KK::Xor) => Some(3), + TT::Op(OK::Star | OK::Slash) + | TT::Keyword(KK::Div | KK::Mod | KK::And | KK::Shl | KK::Shr | KK::As) => Some(2), - TT::Op( - OK::Equal(EqKind::Comp) - | OK::NotEqual - | OK::LessThan(ChK::Comp) - | OK::GreaterThan(ChK::Comp) - | OK::LessEqual - | OK::GreaterEqual, - ) - | TT::Keyword(KK::In(InKind::Op) | KK::Is) => Some(4), - // The import clause `in`s is most simply represented as a precedence - // relationship - TT::Keyword(KK::In(InKind::Import)) => Some(4), - TT::Op(OK::DotDot) => Some(5), - - TT::Op(_) - | TT::Identifier - | TT::Keyword(_) - | TT::TextLiteral(_) - | TT::NumberLiteral(_) - | TT::ConditionalDirective(_) - | TT::CompilerDirective - | TT::Comment(_) - | TT::Eof - | TT::Unknown => None, + TT::Op(OK::Plus | OK::Minus) | TT::Keyword(KK::Or | KK::Xor) => Some(3), + + TT::Op( + OK::Equal(EqKind::Comp) + | OK::NotEqual + | OK::LessThan(ChK::Comp) + | OK::GreaterThan(ChK::Comp) + | OK::LessEqual + | OK::GreaterEqual, + ) + | TT::Keyword(KK::In(InKind::Op) | KK::Is) => Some(4), + // The import clause `in`s is most simply represented as a precedence + // relationship + TT::Keyword(KK::In(InKind::Import)) => Some(4), + TT::Op(OK::DotDot) => Some(5), + + TT::Op(_) + | TT::Identifier + | TT::Keyword(_) + | TT::TextLiteral(_) + | TT::NumberLiteral(_) + | TT::ConditionalDirective(_) + | TT::CompilerDirective + | TT::Comment(_) + | TT::Eof + | TT::Unknown => None, + } + } +} +impl OperatorPrecedence for (TokenType, TokenType) { + fn get_operator_precedence(self) -> Option { + match self { + // Compound operators `not in` and `is not` + (TT::Keyword(KK::Not), op @ TT::Keyword(KK::In(InKind::Op))) + | (op @ TT::Keyword(KK::Is), TT::Keyword(KK::Not)) => op.get_operator_precedence(), + _ => None, + } } } diff --git a/core/src/rules/optimising_line_formatter/requirements.rs b/core/src/rules/optimising_line_formatter/requirements.rs index 8d352021..97c07604 100644 --- a/core/src/rules/optimising_line_formatter/requirements.rs +++ b/core/src/rules/optimising_line_formatter/requirements.rs @@ -1,10 +1,10 @@ use super::InternalOptimisingLineFormatter; use super::SpecificContextDataStack; use super::contexts::*; -use super::get_operator_precedence; use super::is_binary; use super::types::DecisionRequirement; use crate::lang::*; +use crate::rules::optimising_line_formatter::OperatorPrecedence; use super::contexts::ContextType as CT; use super::types::DecisionRequirement as DR; @@ -213,8 +213,11 @@ impl InternalOptimisingLineFormatter<'_, '_> { .get_last_context(CT::RaiseAt) .map(|(_, data)| data.is_broken | data.is_child_broken) .if_else_or_default(DR::MustBreak, DR::Indifferent), + (Some(op1), Some(op2)) if (op1, op2).get_operator_precedence().is_some() => { + DR::MustNotBreak + } (prev, Some(op @ (TT::Op(_) | TT::Keyword(_)))) - if get_operator_precedence(op).is_some() && is_binary(op, prev) => + if op.get_operator_precedence().is_some() && is_binary(op, prev) => { contexts_data .iter() @@ -239,7 +242,7 @@ impl InternalOptimisingLineFormatter<'_, '_> { .map(|(_, data)| data.is_broken | data.is_child_broken) .if_else_or_default(DR::MustBreak, DR::Indifferent), (Some(op @ (TT::Op(_) | TT::Keyword(_))), _) - if get_operator_precedence(op).is_some() => + if op.get_operator_precedence().is_some() => { DR::MustNotBreak }