diff --git a/LICENSE b/LICENSE index 2b8db5a..b0088be 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2021-2025 Codeplag Development Team +Copyright (c) 2021-2026 Codeplag Development Team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/docker/ubuntu2404.dockerfile b/docker/ubuntu2404.dockerfile index b6ba1d1..2c0b9ae 100644 --- a/docker/ubuntu2404.dockerfile +++ b/docker/ubuntu2404.dockerfile @@ -8,6 +8,7 @@ ARG DEB_PKG_NAME ADD LICENSE /usr/src/$UTIL_NAME/LICENSE ADD $DEBIAN_PACKAGES_PATH /usr/src/$UTIL_NAME/$DEBIAN_PACKAGES_PATH +RUN apt-get update RUN apt-get install -y /usr/src/$UTIL_NAME/$DEBIAN_PACKAGES_PATH/$DEB_PKG_NAME.deb # TODO: Fix this hook. apt-get don't install manpage into image. RUN install -D -m 0644 $DEBIAN_PACKAGES_PATH/$UTIL_NAME.1 /usr/share/man/man1/ diff --git a/locales/codeplag.pot b/locales/codeplag.pot index ce6d302..a302178 100644 --- a/locales/codeplag.pot +++ b/locales/codeplag.pot @@ -1,12 +1,12 @@ # Translations template for codeplag. -# Copyright (C) 2024-2025 Codeplag Development Team +# Copyright (C) 2024-2026 Codeplag Development Team # This file is distributed under the same license as the codeplag project. # #, fuzzy msgid "" msgstr "" -"Project-Id-Version: codeplag 0.6.1\n" -"POT-Creation-Date: 2025-12-02 18:35+0300\n" +"Project-Id-Version: codeplag 0.6.2\n" +"POT-Creation-Date: 2026-03-23 19:20+0300\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: Artyom Semidolin\n" "Language-Team: LANGUAGE \n" @@ -242,20 +242,20 @@ msgid "There is nothing to modify; please provide at least one argument." msgstr "" #: src/codeplag/codeplagcli.py:443 -msgid "The'repo-regexp' option requires the provided 'github-user' option." +msgid "The 'repo-regexp' option requires the provided 'github-user' option." msgstr "" #: src/codeplag/codeplagcli.py:450 msgid "" -"The'path-regexp' option requires the provided 'directories', 'github-" +"The 'path-regexp' option requires the provided 'directories', 'github-" "user', or 'github-urls' options." msgstr "" -#: src/codeplag/codeplagcli.py:460 src/codeplag/handlers/report.py:440 +#: src/codeplag/codeplagcli.py:460 src/codeplag/handlers/report.py:444 msgid "All paths must be provided." msgstr "" -#: src/codeplag/handlers/report.py:437 +#: src/codeplag/handlers/report.py:441 msgid "Invalid report type." msgstr "" diff --git a/locales/i18n.mk b/locales/i18n.mk index 9faa572..979eee1 100644 --- a/locales/i18n.mk +++ b/locales/i18n.mk @@ -23,7 +23,7 @@ translate-extract: --copyright-holder "Codeplag Development Team" \ --last-translator "Artyom Semidolin" \ --output-file ${LOCALES_DIR}/${UTIL_NAME}.pot . - sed -ri '2 s/[0-9]{4}/2024-2025/' ${LOCALES_DIR}/${UTIL_NAME}.pot + sed -ri '2 s/[0-9]{4}/2024-2026/' ${LOCALES_DIR}/${UTIL_NAME}.pot sed -i -e '4d;10d;$$ d' ${LOCALES_DIR}/${UTIL_NAME}.pot .PHONY: translate-update diff --git a/locales/translations/en/LC_MESSAGES/codeplag.po b/locales/translations/en/LC_MESSAGES/codeplag.po index e9c223b..1af80df 100644 --- a/locales/translations/en/LC_MESSAGES/codeplag.po +++ b/locales/translations/en/LC_MESSAGES/codeplag.po @@ -1,12 +1,12 @@ # English translations for codeplag. -# Copyright (C) 2024-2025 Codeplag Development Team +# Copyright (C) 2024-2026 Codeplag Development Team # This file is distributed under the same license as the codeplag project. # msgid "" msgstr "" -"Project-Id-Version: codeplag 0.6.1\n" +"Project-Id-Version: codeplag 0.6.2\n" "POT-Creation-Date: 2024-05-21 09:28+0300\n" -"PO-Revision-Date: 2025-03-28 12:05+0300\n" +"PO-Revision-Date: 2026-03-23 19:21+0300\n" "Last-Translator: Artyom Semidolin\n" "Language: en\n" "Language-Team: en \n" @@ -286,11 +286,11 @@ msgstr "" "The'path-regexp' option requires the provided 'directories', 'github-" "user', or 'github-urls' options." -#: src/codeplag/codeplagcli.py:460 src/codeplag/handlers/report.py:440 +#: src/codeplag/codeplagcli.py:460 src/codeplag/handlers/report.py:444 msgid "All paths must be provided." msgstr "All or none of the root paths must be specified." -#: src/codeplag/handlers/report.py:437 +#: src/codeplag/handlers/report.py:441 msgid "Invalid report type." msgstr "Invalid report type." diff --git a/locales/translations/ru/LC_MESSAGES/codeplag.po b/locales/translations/ru/LC_MESSAGES/codeplag.po index a0cc127..5fdebb8 100644 --- a/locales/translations/ru/LC_MESSAGES/codeplag.po +++ b/locales/translations/ru/LC_MESSAGES/codeplag.po @@ -1,10 +1,10 @@ # Russian translations for codeplag. -# Copyright (C) 2024-2025 Codeplag Development Team +# Copyright (C) 2024-2026 Codeplag Development Team # This file is distributed under the same license as the codeplag project. # msgid "" msgstr "" -"Project-Id-Version: codeplag 0.6.1\n" +"Project-Id-Version: codeplag 0.6.2\n" "POT-Creation-Date: 2024-05-21 09:28+0300\n" "PO-Revision-Date: 2025-03-28 12:05+0300\n" "Last-Translator: Artyom Semidolin\n" @@ -288,22 +288,22 @@ msgstr "" "модификации." #: src/codeplag/codeplagcli.py:443 -msgid "The'repo-regexp' option requires the provided 'github-user' option." +msgid "The 'repo-regexp' option requires the provided 'github-user' option." msgstr "Аргумент 'repo-regexp' требует заданного параметра 'github-user'." #: src/codeplag/codeplagcli.py:450 msgid "" -"The'path-regexp' option requires the provided 'directories', 'github-" +"The 'path-regexp' option requires the provided 'directories', 'github-" "user', or 'github-urls' options." msgstr "" "Аргумент 'path-regexp' требует заданного параметра 'directories', " "'github-user' или 'github-urls'." -#: src/codeplag/codeplagcli.py:460 src/codeplag/handlers/report.py:440 +#: src/codeplag/codeplagcli.py:460 src/codeplag/handlers/report.py:444 msgid "All paths must be provided." msgstr "Необходимо указать все корневые пути или не указывать ни одного." -#: src/codeplag/handlers/report.py:437 +#: src/codeplag/handlers/report.py:441 msgid "Invalid report type." msgstr "Некорректный тип отчёта." diff --git a/pyproject.toml b/pyproject.toml index 88e560f..55a2d75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "codeplag" -version = "0.6.1" +version = "0.6.2" description = "Code plagiarism searching package." authors = [ { name = "Artyom Semidolin, Dmitry Nikolaev, Alexander Evsikov" } diff --git a/src/codeplag/codeplagcli.py b/src/codeplag/codeplagcli.py index 1a29ab4..3b0d7a5 100644 --- a/src/codeplag/codeplagcli.py +++ b/src/codeplag/codeplagcli.py @@ -440,14 +440,14 @@ def validate_args(self: Self, parsed_args: argparse.Namespace) -> None: elif root == "check": if parsed_args.repo_regexp and not parsed_args.github_user: self.error( - _("The'repo-regexp' option requires the provided 'github-user' option.") + _("The 'repo-regexp' option requires the provided 'github-user' option.") ) elif parsed_args.path_regexp and not ( parsed_args.directories or parsed_args.github_user or parsed_args.github_urls ): self.error( _( - "The'path-regexp' option requires the provided 'directories', " + "The 'path-regexp' option requires the provided 'directories', " "'github-user', or 'github-urls' options." ) ) diff --git a/src/codeplag/cplag/tree.py b/src/codeplag/cplag/tree.py index f987b82..cfc3b43 100644 --- a/src/codeplag/cplag/tree.py +++ b/src/codeplag/cplag/tree.py @@ -62,3 +62,4 @@ def __add_node_to_structure(features: ASTFeatures, node_name: str, curr_depth: i features.from_num[features.count_unodes] = node_name features.count_unodes += 1 features.structure.append(NodeStructurePlace(curr_depth, features.unodes[node_name])) + features.count_of_nodes += 1 diff --git a/src/codeplag/cplag/utils.py b/src/codeplag/cplag/utils.py index 63ee526..7cdd275 100644 --- a/src/codeplag/cplag/utils.py +++ b/src/codeplag/cplag/utils.py @@ -75,6 +75,9 @@ def _get_works_from_filepaths( continue features = get_features(cursor, filepath) + if features.count_of_nodes == 0: + codeplag_logger.debug("Skipping the file '%s' due it contains no code.", filepath) + continue if features_cache is not None: features_cache.save_features(features) works.append(features) @@ -118,6 +121,11 @@ def get_from_content(self: Self, work_info: WorkInfo) -> ASTFeatures | None: # hook for correct filtering info while parsing source code features = get_features(cursor, tf_path) tf_path.unlink() + if features.count_of_nodes == 0: + self.logger.debug( + "Skipping the file '%s' due it contains no code.", work_info.link + ) + return None features.filepath = work_info.link features.modify_date = work_info.commit.date if self.features_cache is not None: diff --git a/src/codeplag/handlers/report.py b/src/codeplag/handlers/report.py index af3c372..206a839 100644 --- a/src/codeplag/handlers/report.py +++ b/src/codeplag/handlers/report.py @@ -181,9 +181,10 @@ def calculate_sources_total_similarity( def _convert_similarity_matrix_to_percent_matrix(matrix: NDArray) -> NDArray: """Convert compliance matrix of size N x M x 2 to percent 2 dimensional matrix.""" - percent_matrix = np.empty((matrix.shape[0], matrix.shape[1]), dtype=np.float64) + columns = 0 if len(matrix.shape) == 1 else matrix.shape[1] + percent_matrix = np.empty((matrix.shape[0], columns), dtype=np.float64) for i in range(matrix.shape[0]): - for j in range(matrix.shape[1]): + for j in range(columns): percent_matrix[i][j] = round(matrix[i][j][0] / matrix[i][j][1] * 100, 2) return percent_matrix diff --git a/src/codeplag/pyplag/utils.py b/src/codeplag/pyplag/utils.py index 2f217d9..9cebf8d 100644 --- a/src/codeplag/pyplag/utils.py +++ b/src/codeplag/pyplag/utils.py @@ -119,6 +119,9 @@ def _get_works_from_filepaths( continue features = get_features_from_ast(tree, filename) + if features.count_of_nodes == 0: + logger.debug("Skipping the file '%s' due it contains no code.", filename) + continue if features_cache is not None: features_cache.save_features(features) works.append(features) @@ -152,6 +155,11 @@ def get_from_content(self: Self, work_info: WorkInfo) -> ASTFeatures | None: tree = get_ast_from_content(work_info.code, work_info.link) if tree is not None: features = get_features_from_ast(tree, work_info.link) + if features.count_of_nodes == 0: + self.logger.debug( + "Skipping the file '%s' due it contains no code.", work_info.link + ) + return None features.modify_date = work_info.commit.date if self.features_cache is not None: self.features_cache.save_features(features) diff --git a/src/codeplag/reporters.py b/src/codeplag/reporters.py index 8c7a27d..81fd863 100644 --- a/src/codeplag/reporters.py +++ b/src/codeplag/reporters.py @@ -217,7 +217,10 @@ def deserialize_compare_result_from_dict(result: dict) -> FullCompareInfo: def _deserialize_head_nodes(head_nodes: str) -> list[str]: - return [head[1:-1] for head in head_nodes[1:-1].split(", ")] + head_nodes_without_brackets = head_nodes[1:-1] + if not head_nodes_without_brackets: + return [] + return [head[1:-1] for head in head_nodes_without_brackets.split(", ")] def _deserialize_path(path: str) -> str | Path: diff --git a/test/auto/functional/test_check.py b/test/auto/functional/test_check.py index 98a2ced..c778dbb 100644 --- a/test/auto/functional/test_check.py +++ b/test/auto/functional/test_check.py @@ -61,7 +61,7 @@ def test_check_util_version(): ), ( ["--github-urls", *CPP_GITHUB_FILES, CPP_GITHUB_DIR], - b"Getting works features from GitHub urls", + b"Getting works features from", True, ), ( @@ -143,7 +143,7 @@ def test_check_short_output() -> None: ) def test_check_failed_when_repo_regexp_provided_without_required_args( cmd: list[str], -): +) -> None: result = run_check(cmd + ["--repo-regexp", "something"]) result.assert_argparse_error() @@ -153,7 +153,6 @@ def test_check_failed_when_repo_regexp_provided_without_required_args( "cmd", [ ["--files", *PY_FILES], - ["--github-urls", *PY_GITHUB_FILES], ], ) def test_check_failed_when_path_regexp_provided_without_required_args( diff --git a/test/unit/codeplag/cplag/test_tree.py b/test/unit/codeplag/cplag/test_tree.py index 7e21549..455a8df 100644 --- a/test/unit/codeplag/cplag/test_tree.py +++ b/test/unit/codeplag/cplag/test_tree.py @@ -89,7 +89,7 @@ def test_generic_visit(first_cursor: Cursor) -> None: generic_visit(first_cursor, features) assert features.filepath == _SAMPLE1_PATH - assert features.count_of_nodes == 0 + assert features.count_of_nodes == len(features.structure) == 23 assert features.head_nodes == ['gcd'] assert features.operators == {} assert features.keywords == {} @@ -97,7 +97,6 @@ def test_generic_visit(first_cursor: Cursor) -> None: assert len(features.unodes) == 10 assert len(features.from_num) == 10 assert features.count_unodes == 10 - assert len(features.structure) == 23 assert features.tokens == [8, 10, 10, 202, 205, 114, 100, 101, 106, 214, 100, 101, 214, 103, 100, 101, 100, 101, 114, @@ -108,7 +107,7 @@ def test_get_features(second_cursor: Cursor) -> None: features = get_features(second_cursor, _SAMPLE2_PATH) assert features.filepath == _SAMPLE2_PATH - assert features.count_of_nodes == 0 + assert features.count_of_nodes == len(features.structure) == 25 assert features.head_nodes == ['gcd'] assert features.operators == {'==': 1, '%': 1} assert features.keywords == {'int': 1, 'if': 1, 'return': 2, 'long': 2} @@ -116,7 +115,6 @@ def test_get_features(second_cursor: Cursor) -> None: assert len(features.unodes) == 10 assert len(features.from_num) == 10 assert features.count_unodes == 10 - assert len(features.structure) == 25 assert features.tokens == [8, 10, 10, 202, 205, 114, 100, 101, 106, 202, 214, 100, 100, 101, 214, 103, 100, 101, 100, @@ -128,7 +126,7 @@ def test_bad_encoding_syms(third_cursor: Cursor) -> None: features = get_features(third_cursor, _SAMPLE3_PATH) assert features.filepath == _SAMPLE3_PATH - assert features.count_of_nodes == 0 + assert features.count_of_nodes == len(features.structure) == 167 assert features.head_nodes == ['main'] # TODO: why so many '<', '>' may be from include, ignore it assert features.operators == {'==': 1, '<': 5, '>': 3, '!=': 1, '&': 3, '*': 1, '=': 4} @@ -139,6 +137,5 @@ def test_bad_encoding_syms(third_cursor: Cursor) -> None: assert len(features.unodes) == 18 assert len(features.from_num) == 18 assert features.count_unodes == 18 - assert len(features.structure) == 167 assert len(features.tokens) == 167 assert features.sha256 == "236f1b7ea02c3f68e390c7e155fec1a198d4c9ab3d8306d613df8399189291de" diff --git a/test/unit/codeplag/handlers/test_report.py b/test/unit/codeplag/handlers/test_report.py index 2b3ba62..b7a1e4c 100644 --- a/test/unit/codeplag/handlers/test_report.py +++ b/test/unit/codeplag/handlers/test_report.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd import pytest +from numpy.typing import NDArray from codeplag.handlers.report import ( CntHeadNodes, @@ -268,7 +269,23 @@ def test__get_resulting_same_percentages( assert _get_resulting_same_percentages(same_parts_of_all, cnt_head_nodes) == expected -def test__convert_similarity_matrix_to_percent_matrix(): - assert _convert_similarity_matrix_to_percent_matrix( - np.array([[[1, 2], [1, 3], [3, 4]], [[1, 8], [1, 4], [3, 5]]]) - ).tolist() == [[50.0, 33.33, 75.0], [12.5, 25.0, 60.0]] +@pytest.mark.parametrize( + ("matrix", "result"), + [ + ( + np.array([[[1, 2], [1, 3], [3, 4]], [[1, 8], [1, 4], [3, 5]]]), + [[50.0, 33.33, 75.0], [12.5, 25.0, 60.0]], + ), + ( + np.array([]), + [], + ), + ], +) +def test__convert_similarity_matrix_to_percent_matrix(matrix: NDArray, result: NDArray) -> None: + assert ( + _convert_similarity_matrix_to_percent_matrix( + matrix, + ).tolist() + == result + ) diff --git a/test/unit/codeplag/test_reporters.py b/test/unit/codeplag/test_reporters.py index dd18010..9fa54a9 100644 --- a/test/unit/codeplag/test_reporters.py +++ b/test/unit/codeplag/test_reporters.py @@ -107,6 +107,7 @@ def test_compare_info_serialize_deserialize(first_compare_result: FullCompareInf "['Expr[1]', 'Expr[14]', 'application[16]']", ["Expr[1]", "Expr[14]", "application[16]"], ), + ("[]", []), ], ) def test__deserialize_head_nodes(str_head_nodes: str, list_head_nodes: list[str]) -> None: