Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions cr_checker/tests/test_cr_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,3 +443,19 @@ def test_process_files_detects_duplicate_header(tmp_path):

assert results["duplicate_copyright"] == 1
assert results["no_copyright"] == 0


# test that has_duplicate_copyright detects two headers with different year ranges
def test_has_duplicate_copyright_detects_different_year_ranges(tmp_path):
cr_checker = load_cr_checker_module()
test_file = tmp_path / "file.py"
header_template = load_template("py")
header1 = header_template.format(year="2026", author="Author")
header2 = header_template.format(year="2024-2026", author="Author")
test_file.write_text(header1 + header2 + "some content\n", encoding="utf-8")

result = cr_checker.has_duplicate_copyright(
test_file, header_template, False, "utf-8", 0
)

assert result is True
4 changes: 2 additions & 2 deletions cr_checker/tool/cr_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def has_copyright(path, template, use_mmap, encoding, offset, config=None):
if BORDER_FILL_PATTERN.search(stripped_line):
regex_parts.append(line_to_flexible_regex(line))
else:
formatted = line.format(year=r"\\d\{4\}", author=r"\.\*")
formatted = line.format(year=r"\\d\{4\}\(-\\d\{4\}\)\?", author=r"\.\*")
regex_parts.append(convert_bre_to_regex(formatted))
template_regex = "".join(regex_parts) + "\n?"

Expand Down Expand Up @@ -431,7 +431,7 @@ def has_duplicate_copyright(path, template, use_mmap, encoding, offset):
if BORDER_FILL_PATTERN.search(stripped_line):
regex_parts.append(line_to_flexible_regex(line))
else:
formatted = line.format(year=r"\\d\{4\}", author=r"\.\*")
formatted = line.format(year=r"\\d\{4\}\(-\\d\{4\}\)\?", author=r"\.\*")
regex_parts.append(convert_bre_to_regex(formatted))
template_regex = "\n?".join(regex_parts)

Expand Down
116 changes: 87 additions & 29 deletions plantuml/parser/puml_cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ use std::rc::Rc;

use puml_lobster::{write_lobster_to_file, LobsterModel};
use puml_parser::{
DiagramParser, Preprocessor, PumlClassParser, PumlComponentParser, PumlSequenceParser,
DiagramParser, ErrorLocation, Preprocessor, PumlClassParser, PumlComponentParser,
PumlSequenceParser,
};
use puml_resolver::{
ClassResolver, ComponentResolver, DiagramResolver, SequenceResolver, SequenceTree,
Expand Down Expand Up @@ -111,7 +112,14 @@ enum ParsedDiagram {
Sequence(puml_parser::SeqPumlDocument),
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
fn main() {
if let Err(e) = run() {
eprintln!("{e}");
std::process::exit(1);
}
}

fn run() -> Result<(), Box<dyn std::error::Error>> {
let args = Args::parse();
let log_level: LogLevel = args.log_level.into();
Builder::new()
Expand Down Expand Up @@ -149,13 +157,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {

debug!("Parsing started");
for (path, content) in &preprocessed_files {
let parsed_content =
parse_puml_file(path, content, log_level, args.diagram_type).map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::Other,
format!("Parse error in {}: {}", path.display(), e),
)
})?;
let parsed_content = parse_puml_file(path, content, log_level, args.diagram_type)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?;
if emit_debug_json {
if let Some(ref dir) = fbs_output_dir {
write_json_to_file(&parsed_content, path, dir, "raw.ast")?;
Expand Down Expand Up @@ -304,36 +307,91 @@ fn parse_puml_file(
}
}

type ParserFn =
fn(&Rc<PathBuf>, &str, LogLevel) -> Result<ParsedDiagram, Box<dyn std::error::Error>>;
type ParseAttempt<'a> = (&'a str, Box<dyn std::error::Error>, Option<(usize, usize)>);

fn parse_in_order(
path: &Rc<PathBuf>,
content: &str,
log_level: LogLevel,
) -> Result<ParsedDiagram, Box<dyn std::error::Error>> {
let parsers: &[(&str, ParserFn)] = &[
("Component", |p, c, l| {
parse_with_parser(&mut PumlComponentParser, p, c, l).map(ParsedDiagram::Component)
}),
("Class", |p, c, l| {
parse_with_parser(&mut PumlClassParser, p, c, l).map(ParsedDiagram::Class)
}),
("Sequence", |p, c, l| {
parse_with_parser(&mut PumlSequenceParser, p, c, l).map(ParsedDiagram::Sequence)
}),
];

for (parser_name, parser) in parsers {
if let Ok(ast) = parser(path, content, log_level) {
debug!("Successfully detected as {} diagram", parser_name);
return Ok(ast);
// Each attempt records the parser name, the boxed error, and the source
// location extracted from the concrete type before boxing.
let mut attempts: Vec<ParseAttempt<'_>> = Vec::new();

match PumlComponentParser.parse_file(path, content, log_level) {
Ok(doc) => {
debug!("Successfully detected as Component diagram");
return Ok(ParsedDiagram::Component(doc));
}
Err(e) => {
let loc = e.error_location();
debug!("Component parser failed at {:?}: {}", loc, e);
attempts.push(("Component", Box::new(e), loc));
}
}

match PumlClassParser.parse_file(path, content, log_level) {
Ok(doc) => {
debug!("Successfully detected as Class diagram");
return Ok(ParsedDiagram::Class(doc));
}
Err(e) => {
let loc = e.error_location();
debug!("Class parser failed at {:?}: {}", loc, e);
attempts.push(("Class", Box::new(e), loc));
}
}

match PumlSequenceParser.parse_file(path, content, log_level) {
Ok(doc) => {
debug!("Successfully detected as Sequence diagram");
return Ok(ParsedDiagram::Sequence(doc));
}
Err(e) => {
let loc = e.error_location();
debug!("Sequence parser failed at {:?}: {}", loc, e);
attempts.push(("Sequence", Box::new(e), loc));
}
}

// The parser that reached the furthest line is the most informative one.
let best = attempts
.iter()
.max_by_key(|(_, _, loc)| loc.map_or(0, |(line, _)| line));

let tried_names: Vec<&str> = attempts.iter().map(|(n, _, _)| *n).collect();

let detail = match best {
Some((best_name, best_err, Some((line_no, _col)))) => {
let source_line = content
.lines()
.nth(line_no - 1)
.unwrap_or("<unknown>")
.trim();
format!(
"\n Parsers tried: {}\n Parser with longest match: {}\n Failed at line {}: {}\n Error: {}",
tried_names.join(", "),
best_name,
line_no,
source_line,
best_err,
)
}
Some((best_name, best_err, None)) => {
format!(
"\n Parsers tried: {}\n Parser with longest match: {}\n Error: {}",
tried_names.join(", "),
best_name,
best_err,
)
}
None => String::new(),
};

Err(format!(
"Failed to parse {} with any available parser",
path.display()
"Failed to parse {} with any available parser{}",
path.display(),
detail,
)
.into())
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ use crate::source_map::{
};
use log::{debug, trace};
use parser_core::common_parser::{parse_arrow, PlantUmlCommonParser, Rule};
use parser_core::{format_parse_tree, pest_to_syntax_error, BaseParseError, DiagramParser};
use parser_core::{
format_parse_tree, pest_to_syntax_error, BaseParseError, DiagramParser, ErrorLocation,
};
use pest::Parser;
use puml_utils::LogLevel;
use std::collections::HashSet;
Expand All @@ -39,6 +41,15 @@ pub enum ClassError {
UnexpectedClassMember(String),
}

impl ErrorLocation for ClassError {
fn error_location(&self) -> Option<(usize, usize)> {
match self {
Self::Base(b) => b.error_location(),
_ => None,
}
}
}

// Object definitions are ignored by the class parser, but their names must be
// tracked long enough to drop relationships that reference those ignored objects.
#[derive(Debug, Default)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ use thiserror::Error;
use crate::{
Arrow, CompPumlDocument, Component, ComponentStyle, Port, PortType, Relation, Statement,
};
use parser_core::{format_parse_tree, pest_to_syntax_error, BaseParseError, DiagramParser};
use parser_core::{
format_parse_tree, pest_to_syntax_error, BaseParseError, DiagramParser, ErrorLocation,
};
use puml_utils::LogLevel;

use parser_core::common_parser::parse_arrow as common_parse_arrow;
Expand All @@ -32,6 +34,15 @@ pub enum ComponentError {
InvalidStatement(String),
}

impl ErrorLocation for ComponentError {
fn error_location(&self) -> Option<(usize, usize)> {
match self {
Self::Base(b) => b.error_location(),
_ => None,
}
}
}

pub struct PumlComponentParser;

// lobster-trace: Tools.ArchitectureModelingSyntax
Expand Down
2 changes: 1 addition & 1 deletion plantuml/parser/puml_parser/src/grammar/common.pest
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ NAME = { COMPOUND_NAME | LOOSE_NAME }
CNAME = @{ quoted_string | NAME }
quoted_string = @{ "\"" ~ (!"\"" ~ ANY)* ~ "\"" | "«" ~ (!"»" ~ ANY)* ~ "»" }

diagram_id = @{ (ASCII_ALPHANUMERIC | "_" | "-" | "." | "@")+ }
diagram_id = @{ (ASCII_ALPHANUMERIC | "_" | "-" | "." | "@" | " ")+ }
puml_name = { STRING | diagram_id }

identifier = @{ (ASCII_ALPHANUMERIC | "_")+ }
Expand Down
4 changes: 3 additions & 1 deletion plantuml/parser/puml_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
// Re-export commonly used items that don't have name conflicts
pub use class_parser::{ClassError, ClassUmlFile, PumlClassParser};
pub use component_parser::{CompPumlDocument, ComponentError, PumlComponentParser};
pub use parser_core::{common_ast, common_parser, Arrow, BaseParseError, DiagramParser};
pub use parser_core::{
common_ast, common_parser, Arrow, BaseParseError, DiagramParser, ErrorLocation,
};
pub use preprocessor::{
IncludeExpandError, IncludeParseError, PreprocessError, Preprocessor, ProcedureExpandError,
ProcedureParseError,
Expand Down
13 changes: 13 additions & 0 deletions plantuml/parser/puml_parser/src/parser_core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,19 @@ pub enum BaseParseError<Rule> {
},
}

pub trait ErrorLocation {
fn error_location(&self) -> Option<(usize, usize)>;
}

impl<Rule> ErrorLocation for BaseParseError<Rule> {
fn error_location(&self) -> Option<(usize, usize)> {
match self {
Self::SyntaxError { line, column, .. } => Some((*line, *column)),
_ => None,
}
}
}

pub fn pest_to_syntax_error<Rule>(
err: PestError<Rule>,
file: PathBuf,
Expand Down
2 changes: 1 addition & 1 deletion plantuml/parser/puml_parser/src/parser_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub mod error;

pub use common_ast::*;
pub use common_parser::*;
pub use error::{pest_to_syntax_error, BaseParseError};
pub use error::{pest_to_syntax_error, BaseParseError, ErrorLocation};

/// Recursively format a Pest parse tree into an indented string for diagnostic output.
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
use log::{debug, trace};
use parser_core::common_parser::parse_arrow as common_parse_arrow;
use parser_core::common_parser::{PlantUmlCommonParser, Rule};
use parser_core::{format_parse_tree, pest_to_syntax_error, BaseParseError, DiagramParser};
use parser_core::{
format_parse_tree, pest_to_syntax_error, BaseParseError, DiagramParser, ErrorLocation,
};
use puml_utils::LogLevel;
use std::path::PathBuf;
use std::rc::Rc;
Expand All @@ -29,6 +31,15 @@ pub enum SequenceError {
InvalidStatement(String),
}

impl ErrorLocation for SequenceError {
fn error_location(&self) -> Option<(usize, usize)> {
match self {
Self::Base(b) => b.error_location(),
_ => None,
}
}
}

pub struct PumlSequenceParser;

// lobster-trace: Tools.ArchitectureModelingSyntax
Expand Down
Loading