Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2021-2025 Codeplag Development Team
Copyright (c) 2021-2026 Codeplag Development Team

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
1 change: 1 addition & 0 deletions docker/ubuntu2404.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ ARG DEB_PKG_NAME
ADD LICENSE /usr/src/$UTIL_NAME/LICENSE
ADD $DEBIAN_PACKAGES_PATH /usr/src/$UTIL_NAME/$DEBIAN_PACKAGES_PATH

RUN apt-get update
RUN apt-get install -y /usr/src/$UTIL_NAME/$DEBIAN_PACKAGES_PATH/$DEB_PKG_NAME.deb
# TODO: Fix this hook. apt-get don't install manpage into image.
RUN install -D -m 0644 $DEBIAN_PACKAGES_PATH/$UTIL_NAME.1 /usr/share/man/man1/
Expand Down
14 changes: 7 additions & 7 deletions locales/codeplag.pot
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Translations template for codeplag.
# Copyright (C) 2024-2025 Codeplag Development Team
# Copyright (C) 2024-2026 Codeplag Development Team
# This file is distributed under the same license as the codeplag project.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: codeplag 0.6.1\n"
"POT-Creation-Date: 2025-12-02 18:35+0300\n"
"Project-Id-Version: codeplag 0.6.2\n"
"POT-Creation-Date: 2026-03-23 19:20+0300\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: Artyom Semidolin\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
Expand Down Expand Up @@ -242,20 +242,20 @@ msgid "There is nothing to modify; please provide at least one argument."
msgstr ""

#: src/codeplag/codeplagcli.py:443
msgid "The'repo-regexp' option requires the provided 'github-user' option."
msgid "The 'repo-regexp' option requires the provided 'github-user' option."
msgstr ""

#: src/codeplag/codeplagcli.py:450
msgid ""
"The'path-regexp' option requires the provided 'directories', 'github-"
"The 'path-regexp' option requires the provided 'directories', 'github-"
"user', or 'github-urls' options."
msgstr ""

#: src/codeplag/codeplagcli.py:460 src/codeplag/handlers/report.py:440
#: src/codeplag/codeplagcli.py:460 src/codeplag/handlers/report.py:444
msgid "All paths must be provided."
msgstr ""

#: src/codeplag/handlers/report.py:437
#: src/codeplag/handlers/report.py:441
msgid "Invalid report type."
msgstr ""

Expand Down
2 changes: 1 addition & 1 deletion locales/i18n.mk
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ translate-extract:
--copyright-holder "Codeplag Development Team" \
--last-translator "Artyom Semidolin" \
--output-file ${LOCALES_DIR}/${UTIL_NAME}.pot .
sed -ri '2 s/[0-9]{4}/2024-2025/' ${LOCALES_DIR}/${UTIL_NAME}.pot
sed -ri '2 s/[0-9]{4}/2024-2026/' ${LOCALES_DIR}/${UTIL_NAME}.pot
sed -i -e '4d;10d;$$ d' ${LOCALES_DIR}/${UTIL_NAME}.pot

.PHONY: translate-update
Expand Down
10 changes: 5 additions & 5 deletions locales/translations/en/LC_MESSAGES/codeplag.po
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# English translations for codeplag.
# Copyright (C) 2024-2025 Codeplag Development Team
# Copyright (C) 2024-2026 Codeplag Development Team
# This file is distributed under the same license as the codeplag project.
#
msgid ""
msgstr ""
"Project-Id-Version: codeplag 0.6.1\n"
"Project-Id-Version: codeplag 0.6.2\n"
"POT-Creation-Date: 2024-05-21 09:28+0300\n"
"PO-Revision-Date: 2025-03-28 12:05+0300\n"
"PO-Revision-Date: 2026-03-23 19:21+0300\n"
"Last-Translator: Artyom Semidolin\n"
"Language: en\n"
"Language-Team: en <LL@li.org>\n"
Expand Down Expand Up @@ -286,11 +286,11 @@ msgstr ""
"The'path-regexp' option requires the provided 'directories', 'github-"
"user', or 'github-urls' options."

#: src/codeplag/codeplagcli.py:460 src/codeplag/handlers/report.py:440
#: src/codeplag/codeplagcli.py:460 src/codeplag/handlers/report.py:444
msgid "All paths must be provided."
msgstr "All or none of the root paths must be specified."

#: src/codeplag/handlers/report.py:437
#: src/codeplag/handlers/report.py:441
msgid "Invalid report type."
msgstr "Invalid report type."

Expand Down
12 changes: 6 additions & 6 deletions locales/translations/ru/LC_MESSAGES/codeplag.po
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Russian translations for codeplag.
# Copyright (C) 2024-2025 Codeplag Development Team
# Copyright (C) 2024-2026 Codeplag Development Team
# This file is distributed under the same license as the codeplag project.
#
msgid ""
msgstr ""
"Project-Id-Version: codeplag 0.6.1\n"
"Project-Id-Version: codeplag 0.6.2\n"
"POT-Creation-Date: 2024-05-21 09:28+0300\n"
"PO-Revision-Date: 2025-03-28 12:05+0300\n"
"Last-Translator: Artyom Semidolin\n"
Expand Down Expand Up @@ -288,22 +288,22 @@ msgstr ""
"модификации."

#: src/codeplag/codeplagcli.py:443
msgid "The'repo-regexp' option requires the provided 'github-user' option."
msgid "The 'repo-regexp' option requires the provided 'github-user' option."
msgstr "Аргумент 'repo-regexp' требует заданного параметра 'github-user'."

#: src/codeplag/codeplagcli.py:450
msgid ""
"The'path-regexp' option requires the provided 'directories', 'github-"
"The 'path-regexp' option requires the provided 'directories', 'github-"
"user', or 'github-urls' options."
msgstr ""
"Аргумент 'path-regexp' требует заданного параметра 'directories', "
"'github-user' или 'github-urls'."

#: src/codeplag/codeplagcli.py:460 src/codeplag/handlers/report.py:440
#: src/codeplag/codeplagcli.py:460 src/codeplag/handlers/report.py:444
msgid "All paths must be provided."
msgstr "Необходимо указать все корневые пути или не указывать ни одного."

#: src/codeplag/handlers/report.py:437
#: src/codeplag/handlers/report.py:441
msgid "Invalid report type."
msgstr "Некорректный тип отчёта."

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "codeplag"
version = "0.6.1"
version = "0.6.2"
description = "Code plagiarism searching package."
authors = [
{ name = "Artyom Semidolin, Dmitry Nikolaev, Alexander Evsikov" }
Expand Down
4 changes: 2 additions & 2 deletions src/codeplag/codeplagcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,14 +440,14 @@ def validate_args(self: Self, parsed_args: argparse.Namespace) -> None:
elif root == "check":
if parsed_args.repo_regexp and not parsed_args.github_user:
self.error(
_("The'repo-regexp' option requires the provided 'github-user' option.")
_("The 'repo-regexp' option requires the provided 'github-user' option.")
)
elif parsed_args.path_regexp and not (
parsed_args.directories or parsed_args.github_user or parsed_args.github_urls
):
self.error(
_(
"The'path-regexp' option requires the provided 'directories', "
"The 'path-regexp' option requires the provided 'directories', "
"'github-user', or 'github-urls' options."
)
)
Expand Down
1 change: 1 addition & 0 deletions src/codeplag/cplag/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,4 @@ def __add_node_to_structure(features: ASTFeatures, node_name: str, curr_depth: i
features.from_num[features.count_unodes] = node_name
features.count_unodes += 1
features.structure.append(NodeStructurePlace(curr_depth, features.unodes[node_name]))
features.count_of_nodes += 1
8 changes: 8 additions & 0 deletions src/codeplag/cplag/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ def _get_works_from_filepaths(
continue

features = get_features(cursor, filepath)
if features.count_of_nodes == 0:
codeplag_logger.debug("Skipping the file '%s' due it contains no code.", filepath)
continue
if features_cache is not None:
features_cache.save_features(features)
works.append(features)
Expand Down Expand Up @@ -118,6 +121,11 @@ def get_from_content(self: Self, work_info: WorkInfo) -> ASTFeatures | None:
# hook for correct filtering info while parsing source code
features = get_features(cursor, tf_path)
tf_path.unlink()
if features.count_of_nodes == 0:
self.logger.debug(
"Skipping the file '%s' due it contains no code.", work_info.link
)
return None
features.filepath = work_info.link
features.modify_date = work_info.commit.date
if self.features_cache is not None:
Expand Down
5 changes: 3 additions & 2 deletions src/codeplag/handlers/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,10 @@ def calculate_sources_total_similarity(

def _convert_similarity_matrix_to_percent_matrix(matrix: NDArray) -> NDArray:
"""Convert compliance matrix of size N x M x 2 to percent 2 dimensional matrix."""
percent_matrix = np.empty((matrix.shape[0], matrix.shape[1]), dtype=np.float64)
columns = 0 if len(matrix.shape) == 1 else matrix.shape[1]
percent_matrix = np.empty((matrix.shape[0], columns), dtype=np.float64)
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
for j in range(columns):
percent_matrix[i][j] = round(matrix[i][j][0] / matrix[i][j][1] * 100, 2)
return percent_matrix

Expand Down
8 changes: 8 additions & 0 deletions src/codeplag/pyplag/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ def _get_works_from_filepaths(
continue

features = get_features_from_ast(tree, filename)
if features.count_of_nodes == 0:
logger.debug("Skipping the file '%s' due it contains no code.", filename)
continue
if features_cache is not None:
features_cache.save_features(features)
works.append(features)
Expand Down Expand Up @@ -152,6 +155,11 @@ def get_from_content(self: Self, work_info: WorkInfo) -> ASTFeatures | None:
tree = get_ast_from_content(work_info.code, work_info.link)
if tree is not None:
features = get_features_from_ast(tree, work_info.link)
if features.count_of_nodes == 0:
self.logger.debug(
"Skipping the file '%s' due it contains no code.", work_info.link
)
return None
features.modify_date = work_info.commit.date
if self.features_cache is not None:
self.features_cache.save_features(features)
Expand Down
5 changes: 4 additions & 1 deletion src/codeplag/reporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,10 @@ def deserialize_compare_result_from_dict(result: dict) -> FullCompareInfo:


def _deserialize_head_nodes(head_nodes: str) -> list[str]:
return [head[1:-1] for head in head_nodes[1:-1].split(", ")]
head_nodes_without_brackets = head_nodes[1:-1]
if not head_nodes_without_brackets:
return []
return [head[1:-1] for head in head_nodes_without_brackets.split(", ")]


def _deserialize_path(path: str) -> str | Path:
Expand Down
5 changes: 2 additions & 3 deletions test/auto/functional/test_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_check_util_version():
),
(
["--github-urls", *CPP_GITHUB_FILES, CPP_GITHUB_DIR],
b"Getting works features from GitHub urls",
b"Getting works features from",
True,
),
(
Expand Down Expand Up @@ -143,7 +143,7 @@ def test_check_short_output() -> None:
)
def test_check_failed_when_repo_regexp_provided_without_required_args(
cmd: list[str],
):
) -> None:
result = run_check(cmd + ["--repo-regexp", "something"])

result.assert_argparse_error()
Expand All @@ -153,7 +153,6 @@ def test_check_failed_when_repo_regexp_provided_without_required_args(
"cmd",
[
["--files", *PY_FILES],
["--github-urls", *PY_GITHUB_FILES],
],
)
def test_check_failed_when_path_regexp_provided_without_required_args(
Expand Down
9 changes: 3 additions & 6 deletions test/unit/codeplag/cplag/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,14 @@ def test_generic_visit(first_cursor: Cursor) -> None:
generic_visit(first_cursor, features)

assert features.filepath == _SAMPLE1_PATH
assert features.count_of_nodes == 0
assert features.count_of_nodes == len(features.structure) == 23
assert features.head_nodes == ['gcd']
assert features.operators == {}
assert features.keywords == {}
assert features.literals == {}
assert len(features.unodes) == 10
assert len(features.from_num) == 10
assert features.count_unodes == 10
assert len(features.structure) == 23
assert features.tokens == [8, 10, 10, 202, 205, 114, 100,
101, 106, 214, 100, 101, 214,
103, 100, 101, 100, 101, 114,
Expand All @@ -108,15 +107,14 @@ def test_get_features(second_cursor: Cursor) -> None:
features = get_features(second_cursor, _SAMPLE2_PATH)

assert features.filepath == _SAMPLE2_PATH
assert features.count_of_nodes == 0
assert features.count_of_nodes == len(features.structure) == 25
assert features.head_nodes == ['gcd']
assert features.operators == {'==': 1, '%': 1}
assert features.keywords == {'int': 1, 'if': 1, 'return': 2, 'long': 2}
assert features.literals == {'0L': 1}
assert len(features.unodes) == 10
assert len(features.from_num) == 10
assert features.count_unodes == 10
assert len(features.structure) == 25
assert features.tokens == [8, 10, 10, 202, 205, 114, 100,
101, 106, 202, 214, 100, 100,
101, 214, 103, 100, 101, 100,
Expand All @@ -128,7 +126,7 @@ def test_bad_encoding_syms(third_cursor: Cursor) -> None:
features = get_features(third_cursor, _SAMPLE3_PATH)

assert features.filepath == _SAMPLE3_PATH
assert features.count_of_nodes == 0
assert features.count_of_nodes == len(features.structure) == 167
assert features.head_nodes == ['main']
# TODO: why so many '<', '>' may be from include, ignore it
assert features.operators == {'==': 1, '<': 5, '>': 3, '!=': 1, '&': 3, '*': 1, '=': 4}
Expand All @@ -139,6 +137,5 @@ def test_bad_encoding_syms(third_cursor: Cursor) -> None:
assert len(features.unodes) == 18
assert len(features.from_num) == 18
assert features.count_unodes == 18
assert len(features.structure) == 167
assert len(features.tokens) == 167
assert features.sha256 == "236f1b7ea02c3f68e390c7e155fec1a198d4c9ab3d8306d613df8399189291de"
25 changes: 21 additions & 4 deletions test/unit/codeplag/handlers/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pandas as pd
import pytest
from numpy.typing import NDArray

from codeplag.handlers.report import (
CntHeadNodes,
Expand Down Expand Up @@ -268,7 +269,23 @@ def test__get_resulting_same_percentages(
assert _get_resulting_same_percentages(same_parts_of_all, cnt_head_nodes) == expected


def test__convert_similarity_matrix_to_percent_matrix():
assert _convert_similarity_matrix_to_percent_matrix(
np.array([[[1, 2], [1, 3], [3, 4]], [[1, 8], [1, 4], [3, 5]]])
).tolist() == [[50.0, 33.33, 75.0], [12.5, 25.0, 60.0]]
@pytest.mark.parametrize(
("matrix", "result"),
[
(
np.array([[[1, 2], [1, 3], [3, 4]], [[1, 8], [1, 4], [3, 5]]]),
[[50.0, 33.33, 75.0], [12.5, 25.0, 60.0]],
),
(
np.array([]),
[],
),
],
)
def test__convert_similarity_matrix_to_percent_matrix(matrix: NDArray, result: NDArray) -> None:
assert (
_convert_similarity_matrix_to_percent_matrix(
matrix,
).tolist()
== result
)
1 change: 1 addition & 0 deletions test/unit/codeplag/test_reporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def test_compare_info_serialize_deserialize(first_compare_result: FullCompareInf
"['Expr[1]', 'Expr[14]', 'application[16]']",
["Expr[1]", "Expr[14]", "application[16]"],
),
("[]", []),
],
)
def test__deserialize_head_nodes(str_head_nodes: str, list_head_nodes: list[str]) -> None:
Expand Down
Loading