From 812cd5a3e1dc1c6718aaae6409fdc1f5a60cbc92 Mon Sep 17 00:00:00 2001 From: Jochen Hoenle Date: Wed, 6 May 2026 16:21:45 +0200 Subject: [PATCH 1/3] [cr checker]: fix copyright detection for year ranges --- cr_checker/tests/test_cr_checker.py | 16 ++++++++++++++++ cr_checker/tool/cr_checker.py | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/cr_checker/tests/test_cr_checker.py b/cr_checker/tests/test_cr_checker.py index 79b028f5..48bc8422 100644 --- a/cr_checker/tests/test_cr_checker.py +++ b/cr_checker/tests/test_cr_checker.py @@ -443,3 +443,19 @@ def test_process_files_detects_duplicate_header(tmp_path): assert results["duplicate_copyright"] == 1 assert results["no_copyright"] == 0 + + +# test that has_duplicate_copyright detects two headers with different year ranges +def test_has_duplicate_copyright_detects_different_year_ranges(tmp_path): + cr_checker = load_cr_checker_module() + test_file = tmp_path / "file.py" + header_template = load_template("py") + header1 = header_template.format(year="2026", author="Author") + header2 = header_template.format(year="2024-2026", author="Author") + test_file.write_text(header1 + header2 + "some content\n", encoding="utf-8") + + result = cr_checker.has_duplicate_copyright( + test_file, header_template, False, "utf-8", 0 + ) + + assert result is True diff --git a/cr_checker/tool/cr_checker.py b/cr_checker/tool/cr_checker.py index 3da01cae..532d2c23 100755 --- a/cr_checker/tool/cr_checker.py +++ b/cr_checker/tool/cr_checker.py @@ -396,7 +396,7 @@ def has_copyright(path, template, use_mmap, encoding, offset, config=None): if BORDER_FILL_PATTERN.search(stripped_line): regex_parts.append(line_to_flexible_regex(line)) else: - formatted = line.format(year=r"\\d\{4\}", author=r"\.\*") + formatted = line.format(year=r"\\d\{4\}\(-\\d\{4\}\)\?", author=r"\.\*") regex_parts.append(convert_bre_to_regex(formatted)) template_regex = "".join(regex_parts) + "\n?" @@ -431,7 +431,7 @@ def has_duplicate_copyright(path, template, use_mmap, encoding, offset): if BORDER_FILL_PATTERN.search(stripped_line): regex_parts.append(line_to_flexible_regex(line)) else: - formatted = line.format(year=r"\\d\{4\}", author=r"\.\*") + formatted = line.format(year=r"\\d\{4\}\(-\\d\{4\}\)\?", author=r"\.\*") regex_parts.append(convert_bre_to_regex(formatted)) template_regex = "\n?".join(regex_parts) From 7975cb4a16ef575b22d6e3ff118a7c7f32066836 Mon Sep 17 00:00:00 2001 From: Jochen Hoenle Date: Wed, 6 May 2026 17:26:32 +0200 Subject: [PATCH 2/3] [plantuml parser] enhance error reporting --- plantuml/parser/puml_cli/src/main.rs | 116 +++++++++++++----- .../src/class_diagram/src/class_parser.rs | 13 +- .../component_diagram/src/component_parser.rs | 13 +- plantuml/parser/puml_parser/src/lib.rs | 4 +- .../puml_parser/src/parser_core/src/error.rs | 13 ++ .../puml_parser/src/parser_core/src/lib.rs | 2 +- .../src/sequence_diagram/src/syntax_parser.rs | 13 +- 7 files changed, 140 insertions(+), 34 deletions(-) diff --git a/plantuml/parser/puml_cli/src/main.rs b/plantuml/parser/puml_cli/src/main.rs index e790c822..9d6b3e60 100644 --- a/plantuml/parser/puml_cli/src/main.rs +++ b/plantuml/parser/puml_cli/src/main.rs @@ -23,7 +23,8 @@ use std::rc::Rc; use puml_lobster::{write_lobster_to_file, LobsterModel}; use puml_parser::{ - DiagramParser, Preprocessor, PumlClassParser, PumlComponentParser, PumlSequenceParser, + DiagramParser, ErrorLocation, Preprocessor, PumlClassParser, PumlComponentParser, + PumlSequenceParser, }; use puml_resolver::{ ClassResolver, ComponentResolver, DiagramResolver, SequenceResolver, SequenceTree, @@ -111,7 +112,14 @@ enum ParsedDiagram { Sequence(puml_parser::SeqPumlDocument), } -fn main() -> Result<(), Box> { +fn main() { + if let Err(e) = run() { + eprintln!("{e}"); + std::process::exit(1); + } +} + +fn run() -> Result<(), Box> { let args = Args::parse(); let log_level: LogLevel = args.log_level.into(); Builder::new() @@ -149,13 +157,8 @@ fn main() -> Result<(), Box> { debug!("Parsing started"); for (path, content) in &preprocessed_files { - let parsed_content = - parse_puml_file(path, content, log_level, args.diagram_type).map_err(|e| { - std::io::Error::new( - std::io::ErrorKind::Other, - format!("Parse error in {}: {}", path.display(), e), - ) - })?; + let parsed_content = parse_puml_file(path, content, log_level, args.diagram_type) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?; if emit_debug_json { if let Some(ref dir) = fbs_output_dir { write_json_to_file(&parsed_content, path, dir, "raw.ast")?; @@ -304,36 +307,91 @@ fn parse_puml_file( } } -type ParserFn = - fn(&Rc, &str, LogLevel) -> Result>; +type ParseAttempt<'a> = (&'a str, Box, Option<(usize, usize)>); fn parse_in_order( path: &Rc, content: &str, log_level: LogLevel, ) -> Result> { - let parsers: &[(&str, ParserFn)] = &[ - ("Component", |p, c, l| { - parse_with_parser(&mut PumlComponentParser, p, c, l).map(ParsedDiagram::Component) - }), - ("Class", |p, c, l| { - parse_with_parser(&mut PumlClassParser, p, c, l).map(ParsedDiagram::Class) - }), - ("Sequence", |p, c, l| { - parse_with_parser(&mut PumlSequenceParser, p, c, l).map(ParsedDiagram::Sequence) - }), - ]; - - for (parser_name, parser) in parsers { - if let Ok(ast) = parser(path, content, log_level) { - debug!("Successfully detected as {} diagram", parser_name); - return Ok(ast); + // Each attempt records the parser name, the boxed error, and the source + // location extracted from the concrete type before boxing. + let mut attempts: Vec> = Vec::new(); + + match PumlComponentParser.parse_file(path, content, log_level) { + Ok(doc) => { + debug!("Successfully detected as Component diagram"); + return Ok(ParsedDiagram::Component(doc)); + } + Err(e) => { + let loc = e.error_location(); + debug!("Component parser failed at {:?}: {}", loc, e); + attempts.push(("Component", Box::new(e), loc)); + } + } + + match PumlClassParser.parse_file(path, content, log_level) { + Ok(doc) => { + debug!("Successfully detected as Class diagram"); + return Ok(ParsedDiagram::Class(doc)); + } + Err(e) => { + let loc = e.error_location(); + debug!("Class parser failed at {:?}: {}", loc, e); + attempts.push(("Class", Box::new(e), loc)); } } + match PumlSequenceParser.parse_file(path, content, log_level) { + Ok(doc) => { + debug!("Successfully detected as Sequence diagram"); + return Ok(ParsedDiagram::Sequence(doc)); + } + Err(e) => { + let loc = e.error_location(); + debug!("Sequence parser failed at {:?}: {}", loc, e); + attempts.push(("Sequence", Box::new(e), loc)); + } + } + + // The parser that reached the furthest line is the most informative one. + let best = attempts + .iter() + .max_by_key(|(_, _, loc)| loc.map_or(0, |(line, _)| line)); + + let tried_names: Vec<&str> = attempts.iter().map(|(n, _, _)| *n).collect(); + + let detail = match best { + Some((best_name, best_err, Some((line_no, _col)))) => { + let source_line = content + .lines() + .nth(line_no - 1) + .unwrap_or("") + .trim(); + format!( + "\n Parsers tried: {}\n Parser with longest match: {}\n Failed at line {}: {}\n Error: {}", + tried_names.join(", "), + best_name, + line_no, + source_line, + best_err, + ) + } + Some((best_name, best_err, None)) => { + format!( + "\n Parsers tried: {}\n Parser with longest match: {}\n Error: {}", + tried_names.join(", "), + best_name, + best_err, + ) + } + None => String::new(), + }; + Err(format!( - "Failed to parse {} with any available parser", - path.display() + "Failed to parse {} with any available parser{}", + path.display(), + detail, ) .into()) } diff --git a/plantuml/parser/puml_parser/src/class_diagram/src/class_parser.rs b/plantuml/parser/puml_parser/src/class_diagram/src/class_parser.rs index 6e8aab03..96ac26c6 100644 --- a/plantuml/parser/puml_parser/src/class_diagram/src/class_parser.rs +++ b/plantuml/parser/puml_parser/src/class_diagram/src/class_parser.rs @@ -21,7 +21,9 @@ use crate::source_map::{ }; use log::{debug, trace}; use parser_core::common_parser::{parse_arrow, PlantUmlCommonParser, Rule}; -use parser_core::{format_parse_tree, pest_to_syntax_error, BaseParseError, DiagramParser}; +use parser_core::{ + format_parse_tree, pest_to_syntax_error, BaseParseError, DiagramParser, ErrorLocation, +}; use pest::Parser; use puml_utils::LogLevel; use std::collections::HashSet; @@ -39,6 +41,15 @@ pub enum ClassError { UnexpectedClassMember(String), } +impl ErrorLocation for ClassError { + fn error_location(&self) -> Option<(usize, usize)> { + match self { + Self::Base(b) => b.error_location(), + _ => None, + } + } +} + // Object definitions are ignored by the class parser, but their names must be // tracked long enough to drop relationships that reference those ignored objects. #[derive(Debug, Default)] diff --git a/plantuml/parser/puml_parser/src/component_diagram/src/component_parser.rs b/plantuml/parser/puml_parser/src/component_diagram/src/component_parser.rs index 1874aa5a..1a3177fd 100644 --- a/plantuml/parser/puml_parser/src/component_diagram/src/component_parser.rs +++ b/plantuml/parser/puml_parser/src/component_diagram/src/component_parser.rs @@ -18,7 +18,9 @@ use thiserror::Error; use crate::{ Arrow, CompPumlDocument, Component, ComponentStyle, Port, PortType, Relation, Statement, }; -use parser_core::{format_parse_tree, pest_to_syntax_error, BaseParseError, DiagramParser}; +use parser_core::{ + format_parse_tree, pest_to_syntax_error, BaseParseError, DiagramParser, ErrorLocation, +}; use puml_utils::LogLevel; use parser_core::common_parser::parse_arrow as common_parse_arrow; @@ -32,6 +34,15 @@ pub enum ComponentError { InvalidStatement(String), } +impl ErrorLocation for ComponentError { + fn error_location(&self) -> Option<(usize, usize)> { + match self { + Self::Base(b) => b.error_location(), + _ => None, + } + } +} + pub struct PumlComponentParser; // lobster-trace: Tools.ArchitectureModelingSyntax diff --git a/plantuml/parser/puml_parser/src/lib.rs b/plantuml/parser/puml_parser/src/lib.rs index c8faf42a..11334f08 100644 --- a/plantuml/parser/puml_parser/src/lib.rs +++ b/plantuml/parser/puml_parser/src/lib.rs @@ -14,7 +14,9 @@ // Re-export commonly used items that don't have name conflicts pub use class_parser::{ClassError, ClassUmlFile, PumlClassParser}; pub use component_parser::{CompPumlDocument, ComponentError, PumlComponentParser}; -pub use parser_core::{common_ast, common_parser, Arrow, BaseParseError, DiagramParser}; +pub use parser_core::{ + common_ast, common_parser, Arrow, BaseParseError, DiagramParser, ErrorLocation, +}; pub use preprocessor::{ IncludeExpandError, IncludeParseError, PreprocessError, Preprocessor, ProcedureExpandError, ProcedureParseError, diff --git a/plantuml/parser/puml_parser/src/parser_core/src/error.rs b/plantuml/parser/puml_parser/src/parser_core/src/error.rs index b5b40140..807e4ea3 100644 --- a/plantuml/parser/puml_parser/src/parser_core/src/error.rs +++ b/plantuml/parser/puml_parser/src/parser_core/src/error.rs @@ -35,6 +35,19 @@ pub enum BaseParseError { }, } +pub trait ErrorLocation { + fn error_location(&self) -> Option<(usize, usize)>; +} + +impl ErrorLocation for BaseParseError { + fn error_location(&self) -> Option<(usize, usize)> { + match self { + Self::SyntaxError { line, column, .. } => Some((*line, *column)), + _ => None, + } + } +} + pub fn pest_to_syntax_error( err: PestError, file: PathBuf, diff --git a/plantuml/parser/puml_parser/src/parser_core/src/lib.rs b/plantuml/parser/puml_parser/src/parser_core/src/lib.rs index cdf8e683..eda837f4 100644 --- a/plantuml/parser/puml_parser/src/parser_core/src/lib.rs +++ b/plantuml/parser/puml_parser/src/parser_core/src/lib.rs @@ -16,7 +16,7 @@ pub mod error; pub use common_ast::*; pub use common_parser::*; -pub use error::{pest_to_syntax_error, BaseParseError}; +pub use error::{pest_to_syntax_error, BaseParseError, ErrorLocation}; /// Recursively format a Pest parse tree into an indented string for diagnostic output. /// diff --git a/plantuml/parser/puml_parser/src/sequence_diagram/src/syntax_parser.rs b/plantuml/parser/puml_parser/src/sequence_diagram/src/syntax_parser.rs index 47f99f19..9982e7e2 100644 --- a/plantuml/parser/puml_parser/src/sequence_diagram/src/syntax_parser.rs +++ b/plantuml/parser/puml_parser/src/sequence_diagram/src/syntax_parser.rs @@ -13,7 +13,9 @@ use log::{debug, trace}; use parser_core::common_parser::parse_arrow as common_parse_arrow; use parser_core::common_parser::{PlantUmlCommonParser, Rule}; -use parser_core::{format_parse_tree, pest_to_syntax_error, BaseParseError, DiagramParser}; +use parser_core::{ + format_parse_tree, pest_to_syntax_error, BaseParseError, DiagramParser, ErrorLocation, +}; use puml_utils::LogLevel; use std::path::PathBuf; use std::rc::Rc; @@ -29,6 +31,15 @@ pub enum SequenceError { InvalidStatement(String), } +impl ErrorLocation for SequenceError { + fn error_location(&self) -> Option<(usize, usize)> { + match self { + Self::Base(b) => b.error_location(), + _ => None, + } + } +} + pub struct PumlSequenceParser; // lobster-trace: Tools.ArchitectureModelingSyntax From 1abedd9437de8522812486a67206dca6113030fb Mon Sep 17 00:00:00 2001 From: Jochen Hoenle Date: Wed, 6 May 2026 17:32:59 +0200 Subject: [PATCH 3/3] [plantuml parser]: fix whitespace in title --- plantuml/parser/puml_parser/src/grammar/common.pest | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plantuml/parser/puml_parser/src/grammar/common.pest b/plantuml/parser/puml_parser/src/grammar/common.pest index 3954da13..93504c92 100644 --- a/plantuml/parser/puml_parser/src/grammar/common.pest +++ b/plantuml/parser/puml_parser/src/grammar/common.pest @@ -62,7 +62,7 @@ NAME = { COMPOUND_NAME | LOOSE_NAME } CNAME = @{ quoted_string | NAME } quoted_string = @{ "\"" ~ (!"\"" ~ ANY)* ~ "\"" | "«" ~ (!"»" ~ ANY)* ~ "»" } -diagram_id = @{ (ASCII_ALPHANUMERIC | "_" | "-" | "." | "@")+ } +diagram_id = @{ (ASCII_ALPHANUMERIC | "_" | "-" | "." | "@" | " ")+ } puml_name = { STRING | diagram_id } identifier = @{ (ASCII_ALPHANUMERIC | "_")+ }