From 58f0dd93b1d8f44f1fbe3026a40a3d53082496e3 Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Tue, 24 Mar 2026 23:04:55 +0530 Subject: [PATCH 01/22] Add OWASP Top 10 and API importer support --- .../tests/owasp_api_top10_2023_parser_test.py | 43 ++++++++++ .../tests/owasp_top10_2025_parser_test.py | 80 +++++++++++++++++++ .../data/owasp_api_top10_2023.json | 62 ++++++++++++++ .../data/owasp_top10_2025.json | 62 ++++++++++++++ .../parsers/owasp_api_top10_2023.py | 47 +++++++++++ .../parsers/owasp_top10_2025.py | 47 +++++++++++ cre.py | 30 +++++++ 7 files changed, 371 insertions(+) create mode 100644 application/tests/owasp_api_top10_2023_parser_test.py create mode 100644 application/tests/owasp_top10_2025_parser_test.py create mode 100644 application/utils/external_project_parsers/data/owasp_api_top10_2023.json create mode 100644 application/utils/external_project_parsers/data/owasp_top10_2025.json create mode 100644 application/utils/external_project_parsers/parsers/owasp_api_top10_2023.py create mode 100644 application/utils/external_project_parsers/parsers/owasp_top10_2025.py diff --git a/application/tests/owasp_api_top10_2023_parser_test.py b/application/tests/owasp_api_top10_2023_parser_test.py new file mode 100644 index 000000000..806d11bed --- /dev/null +++ b/application/tests/owasp_api_top10_2023_parser_test.py @@ -0,0 +1,43 @@ +import unittest + +from application import create_app, sqla # type: ignore +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.parsers import owasp_api_top10_2023 + + +class TestOwaspApiTop10_2023Parser(unittest.TestCase): + def tearDown(self) -> None: + sqla.session.remove() + sqla.drop_all() + self.app_context.pop() + + def setUp(self) -> None: + self.app = create_app(mode="test") + self.app_context = self.app.app_context() + self.app_context.push() + sqla.create_all() + self.collection = db.Node_collection() + + def test_parse(self) -> None: + for cre_id, name in [ + ("304-667", "Protect API against unauthorized access/modification (IDOR)"), + ("724-770", "Technical application access control"), + ("715-223", "Ensure trusted origin of third party resources"), + ]: + self.collection.add_cre(defs.CRE(id=cre_id, name=name, description="")) + + result = owasp_api_top10_2023.OwaspApiTop10_2023().parse( + self.collection, prompt_client.PromptHandler(database=self.collection) + ) + + entries = result.results["OWASP API Security Top 10 2023"] + self.assertEqual(10, len(entries)) + self.assertEqual("API1", entries[0].sectionID) + self.assertEqual("Broken Object Level Authorization", entries[0].section) + self.assertEqual( + ["304-667", "724-770"], [l.document.id for l in entries[0].links] + ) + self.assertEqual("API10", entries[-1].sectionID) + self.assertEqual(["715-223"], [l.document.id for l in entries[-1].links]) diff --git a/application/tests/owasp_top10_2025_parser_test.py b/application/tests/owasp_top10_2025_parser_test.py new file mode 100644 index 000000000..de4f86a9f --- /dev/null +++ b/application/tests/owasp_top10_2025_parser_test.py @@ -0,0 +1,80 @@ +import unittest + +from application import create_app, sqla # type: ignore +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.parsers import owasp_top10_2025 + + +class TestOwaspTop10_2025Parser(unittest.TestCase): + def tearDown(self) -> None: + sqla.session.remove() + sqla.drop_all() + self.app_context.pop() + + def setUp(self) -> None: + self.app = create_app(mode="test") + self.app_context = self.app.app_context() + self.app_context.push() + sqla.create_all() + self.collection = db.Node_collection() + + def test_parse(self) -> None: + self.collection.add_cre( + defs.CRE(id="177-260", name="Session management", description="") + ) + self.collection.add_cre( + defs.CRE( + id="117-371", + name="Use a centralized access control mechanism", + description="", + ) + ) + self.collection.add_cre( + defs.CRE( + id="724-770", + name="Technical application access control", + description="", + ) + ) + self.collection.add_cre( + defs.CRE( + id="031-447", name="Whitelist all external (HTTP) input", description="" + ) + ) + self.collection.add_cre( + defs.CRE( + id="064-808", name="Encode output context-specifically", description="" + ) + ) + self.collection.add_cre( + defs.CRE(id="760-764", name="Injection protection", description="") + ) + self.collection.add_cre( + defs.CRE(id="513-183", name="Error handling", description="") + ) + + result = owasp_top10_2025.OwaspTop10_2025().parse( + self.collection, + prompt_client.PromptHandler(database=self.collection), + ) + + entries = result.results["OWASP Top 10 2025"] + self.assertEqual(10, len(entries)) + self.assertEqual("A01", entries[0].sectionID) + self.assertEqual("Broken Access Control", entries[0].section) + self.assertEqual( + "https://owasp.org/Top10/2025/A01_2025-Broken_Access_Control/", + entries[0].hyperlink, + ) + self.assertEqual( + ["117-371", "177-260", "724-770"], + [link.document.id for link in entries[0].links], + ) + self.assertEqual( + ["031-447", "064-808", "760-764"], + [link.document.id for link in entries[4].links], + ) + self.assertEqual("A10", entries[-1].sectionID) + self.assertEqual(["513-183"], [link.document.id for link in entries[-1].links]) diff --git a/application/utils/external_project_parsers/data/owasp_api_top10_2023.json b/application/utils/external_project_parsers/data/owasp_api_top10_2023.json new file mode 100644 index 000000000..7a8df0ed0 --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_api_top10_2023.json @@ -0,0 +1,62 @@ +[ + { + "section_id": "API1", + "section": "Broken Object Level Authorization", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa1-broken-object-level-authorization/", + "cre_ids": ["304-667", "724-770"] + }, + { + "section_id": "API2", + "section": "Broken Authentication", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa2-broken-authentication/", + "cre_ids": ["177-260", "586-842", "633-428"] + }, + { + "section_id": "API3", + "section": "Broken Object Property Level Authorization", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa3-broken-object-property-level-authorization/", + "cre_ids": ["538-770", "724-770", "128-128"] + }, + { + "section_id": "API4", + "section": "Unrestricted Resource Consumption", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa4-unrestricted-resource-consumption/", + "cre_ids": ["623-550"] + }, + { + "section_id": "API5", + "section": "Broken Function Level Authorization", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa5-broken-function-level-authorization/", + "cre_ids": ["650-560", "724-770"] + }, + { + "section_id": "API6", + "section": "Unrestricted Access to Sensitive Business Flows", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa6-unrestricted-access-to-sensitive-business-flows/", + "cre_ids": ["534-605", "630-573"] + }, + { + "section_id": "API7", + "section": "Server Side Request Forgery", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa7-server-side-request-forgery/", + "cre_ids": ["028-728", "657-084"] + }, + { + "section_id": "API8", + "section": "Security Misconfiguration", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa8-security-misconfiguration/", + "cre_ids": ["486-813"] + }, + { + "section_id": "API9", + "section": "Improper Inventory Management", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa9-improper-inventory-management/", + "cre_ids": ["162-655", "863-521"] + }, + { + "section_id": "API10", + "section": "Unsafe Consumption of APIs", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xaa-unsafe-consumption-of-apis/", + "cre_ids": ["715-223"] + } +] diff --git a/application/utils/external_project_parsers/data/owasp_top10_2025.json b/application/utils/external_project_parsers/data/owasp_top10_2025.json new file mode 100644 index 000000000..7e19d1a4e --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_top10_2025.json @@ -0,0 +1,62 @@ +[ + { + "section_id": "A01", + "section": "Broken Access Control", + "hyperlink": "https://owasp.org/Top10/2025/A01_2025-Broken_Access_Control/", + "cre_ids": ["117-371", "177-260", "724-770"] + }, + { + "section_id": "A02", + "section": "Security Misconfiguration", + "hyperlink": "https://owasp.org/Top10/2025/A02_2025-Security_Misconfiguration/", + "cre_ids": ["486-813"] + }, + { + "section_id": "A03", + "section": "Software Supply Chain Failures", + "hyperlink": "https://owasp.org/Top10/2025/A03_2025-Software_Supply_Chain_Failures/", + "cre_ids": ["613-286", "613-287", "715-223", "863-521"] + }, + { + "section_id": "A04", + "section": "Cryptographic Failures", + "hyperlink": "https://owasp.org/Top10/2025/A04_2025-Cryptographic_Failures/", + "cre_ids": ["170-772", "227-045"] + }, + { + "section_id": "A05", + "section": "Injection", + "hyperlink": "https://owasp.org/Top10/2025/A05_2025-Injection/", + "cre_ids": ["031-447", "064-808", "760-764"] + }, + { + "section_id": "A06", + "section": "Insecure Design", + "hyperlink": "https://owasp.org/Top10/2025/A06_2025-Insecure_Design/", + "cre_ids": ["126-668", "155-155"] + }, + { + "section_id": "A07", + "section": "Authentication Failures", + "hyperlink": "https://owasp.org/Top10/2025/A07_2025-Authentication_Failures/", + "cre_ids": ["002-630", "177-260", "586-842", "633-428"] + }, + { + "section_id": "A08", + "section": "Software or Data Integrity Failures", + "hyperlink": "https://owasp.org/Top10/2025/A08_2025-Software_or_Data_Integrity_Failures/", + "cre_ids": ["613-287", "836-068"] + }, + { + "section_id": "A09", + "section": "Security Logging and Alerting Failures", + "hyperlink": "https://owasp.org/Top10/2025/A09_2025-Security_Logging_and_Alerting_Failures/", + "cre_ids": ["067-050", "148-420", "402-706", "843-841"] + }, + { + "section_id": "A10", + "section": "Mishandling of Exceptional Conditions", + "hyperlink": "https://owasp.org/Top10/2025/A10_2025-Mishandling_of_Exceptional_Conditions/", + "cre_ids": ["513-183"] + } +] diff --git a/application/utils/external_project_parsers/parsers/owasp_api_top10_2023.py b/application/utils/external_project_parsers/parsers/owasp_api_top10_2023.py new file mode 100644 index 000000000..08157a1e9 --- /dev/null +++ b/application/utils/external_project_parsers/parsers/owasp_api_top10_2023.py @@ -0,0 +1,47 @@ +import json +from pathlib import Path + +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParseResult, + ParserInterface, +) + + +class OwaspApiTop10_2023(ParserInterface): + name = "OWASP API Security Top 10 2023" + data_file = ( + Path(__file__).resolve().parent.parent / "data" / "owasp_api_top10_2023.json" + ) + + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): + with self.data_file.open("r", encoding="utf-8") as handle: + raw_entries = json.load(handle) + + entries = [] + for entry in raw_entries: + standard = defs.Standard( + name=self.name, + sectionID=entry["section_id"], + section=entry["section"], + hyperlink=entry["hyperlink"], + ) + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + entries.append(standard) + + return ParseResult( + results={self.name: entries}, + calculate_gap_analysis=False, + calculate_embeddings=False, + ) diff --git a/application/utils/external_project_parsers/parsers/owasp_top10_2025.py b/application/utils/external_project_parsers/parsers/owasp_top10_2025.py new file mode 100644 index 000000000..070f869af --- /dev/null +++ b/application/utils/external_project_parsers/parsers/owasp_top10_2025.py @@ -0,0 +1,47 @@ +import json +from pathlib import Path + +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParseResult, + ParserInterface, +) + + +class OwaspTop10_2025(ParserInterface): + name = "OWASP Top 10 2025" + data_file = ( + Path(__file__).resolve().parent.parent / "data" / "owasp_top10_2025.json" + ) + + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): + with self.data_file.open("r", encoding="utf-8") as handle: + raw_entries = json.load(handle) + + entries = [] + for entry in raw_entries: + standard = defs.Standard( + name=self.name, + sectionID=entry["section_id"], + section=entry["section"], + hyperlink=entry["hyperlink"], + ) + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + entries.append(standard) + + return ParseResult( + results={self.name: entries}, + calculate_gap_analysis=False, + calculate_embeddings=False, + ) diff --git a/cre.py b/cre.py index 99735890f..1ffc24951 100644 --- a/cre.py +++ b/cre.py @@ -167,6 +167,36 @@ def main() -> None: action="store_true", help="import owasp secure headers", ) + parser.add_argument( + "--owasp_top10_2025_in", + action="store_true", + help="import OWASP Top 10 2025", + ) + parser.add_argument( + "--owasp_api_top10_2023_in", + action="store_true", + help="import OWASP API Security Top 10 2023", + ) + parser.add_argument( + "--owasp_kubernetes_top10_2022_in", + action="store_true", + help="import OWASP Kubernetes Top Ten 2022", + ) + parser.add_argument( + "--owasp_kubernetes_top10_2025_in", + action="store_true", + help="import OWASP Kubernetes Top Ten 2025 draft", + ) + parser.add_argument( + "--owasp_llm_top10_2025_in", + action="store_true", + help="import OWASP Top 10 for LLM and Gen AI Apps 2025", + ) + parser.add_argument( + "--owasp_aisvs_in", + action="store_true", + help="import OWASP AI Security Verification Standard (AISVS)", + ) parser.add_argument( "--pci_dss_3_2_in", action="store_true", From 72184329e2945f6f1fd388493ad8a593ab381ca3 Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Wed, 1 Apr 2026 16:13:04 +0530 Subject: [PATCH 02/22] Fix cheat sheet parser test expectations on importer branches --- application/tests/cheatsheets_parser_test.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/application/tests/cheatsheets_parser_test.py b/application/tests/cheatsheets_parser_test.py index 1a3ba4bf0..e2c0910d6 100644 --- a/application/tests/cheatsheets_parser_test.py +++ b/application/tests/cheatsheets_parser_test.py @@ -34,7 +34,13 @@ class Repo: repo.working_dir = loc cre = defs.CRE(name="blah", id="223-780") self.collection.add_cre(cre) - with open(os.path.join(os.path.join(loc, "cheatsheets"), "cs.md"), "w") as mdf: + with open( + os.path.join( + os.path.join(loc, "cheatsheets"), + "Secrets_Management_Cheat_Sheet.md", + ), + "w", + ) as mdf: mdf.write(cs) mock_clone.return_value = repo entries = cheatsheets_parser.Cheatsheets().parse( @@ -45,22 +51,26 @@ class Repo: # verify the external tagging convention, not just enum wiring. expected = defs.Standard( name="OWASP Cheat Sheets", - hyperlink="https://github.com/foo/bar/tree/master/cs.md", + hyperlink="https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html", section="Secrets Management Cheat Sheet", - links=[defs.Link(document=cre, ltype=defs.LinkTypes.LinkedTo)], + links=[ + defs.Link( + document=cre, ltype=defs.LinkTypes.AutomaticallyLinkedTo + ) + ], tags=[ "family:guidance", "subtype:cheatsheet", - "source:owasp_cheatsheets", "audience:developer", "maturity:stable", + "source:owasp_cheatsheets", ], ) self.maxDiff = None for name, nodes in entries.results.items(): self.assertEqual(name, parser.name) self.assertEqual(len(nodes), 1) - self.assertCountEqual(expected.todict(), nodes[0].todict()) + self.assertEqual(expected.todict(), nodes[0].todict()) cheatsheets_md = """ # Secrets Management Cheat Sheet From 616e9f4ffb7a297d0918d34302e611eacdda27a8 Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Wed, 1 Apr 2026 16:14:21 +0530 Subject: [PATCH 03/22] Use official OWASP cheat sheet URLs in importer branches --- .../external_project_parsers/parsers/cheatsheets_parser.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py index e695414d8..e234dadda 100644 --- a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py +++ b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py @@ -15,6 +15,7 @@ class Cheatsheets(ParserInterface): name = "OWASP Cheat Sheets" + cheatsheetseries_base_url = "https://cheatsheetseries.owasp.org/cheatsheets" def cheatsheet( self, section: str, hyperlink: str, tags: List[str] @@ -33,6 +34,10 @@ def cheatsheet( hyperlink=hyperlink, ) + def official_cheatsheet_url(self, markdown_filename: str) -> str: + html_name = os.path.splitext(markdown_filename)[0] + ".html" + return f"{self.cheatsheetseries_base_url}/{html_name}" + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): c_repo = "https://github.com/OWASP/CheatSheetSeries.git" cheatsheets_path = "cheatsheets/" @@ -65,7 +70,7 @@ def register_cheatsheets( name = title.group("title") cre_id = cre.group("cre") cres = cache.get_CREs(external_id=cre_id) - hyperlink = f"{repo_path.replace('.git','')}/tree/master/{cheatsheets_path}{mdfile}" + hyperlink = self.official_cheatsheet_url(mdfile) cs = self.cheatsheet(section=name, hyperlink=hyperlink, tags=[]) for cre in cres: cs.add_link( From 9951332f5f1d8b9caa6f56495f1008a2e447a7aa Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Tue, 24 Mar 2026 23:04:55 +0530 Subject: [PATCH 04/22] Add OWASP AI resource importer support --- application/tests/owasp_aisvs_parser_test.py | 62 +++++++++++++ .../tests/owasp_llm_top10_2025_parser_test.py | 45 ++++++++++ .../data/owasp_aisvs_1_0.json | 86 +++++++++++++++++++ .../data/owasp_llm_top10_2025.json | 62 +++++++++++++ .../parsers/owasp_aisvs.py | 45 ++++++++++ .../parsers/owasp_llm_top10_2025.py | 47 ++++++++++ 6 files changed, 347 insertions(+) create mode 100644 application/tests/owasp_aisvs_parser_test.py create mode 100644 application/tests/owasp_llm_top10_2025_parser_test.py create mode 100644 application/utils/external_project_parsers/data/owasp_aisvs_1_0.json create mode 100644 application/utils/external_project_parsers/data/owasp_llm_top10_2025.json create mode 100644 application/utils/external_project_parsers/parsers/owasp_aisvs.py create mode 100644 application/utils/external_project_parsers/parsers/owasp_llm_top10_2025.py diff --git a/application/tests/owasp_aisvs_parser_test.py b/application/tests/owasp_aisvs_parser_test.py new file mode 100644 index 000000000..461b2d68d --- /dev/null +++ b/application/tests/owasp_aisvs_parser_test.py @@ -0,0 +1,62 @@ +import unittest + +from application import create_app, sqla # type: ignore +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.parsers import owasp_aisvs + + +class TestOwaspAisvsParser(unittest.TestCase): + def tearDown(self) -> None: + sqla.session.remove() + sqla.drop_all() + self.app_context.pop() + + def setUp(self) -> None: + self.app = create_app(mode="test") + self.app_context = self.app.app_context() + self.app_context.push() + sqla.create_all() + self.collection = db.Node_collection() + + def test_parse(self) -> None: + for cre_id, name in [ + ("227-045", "Identify sensitive data and subject it to a policy"), + ( + "307-507", + "Allow only trusted sources both build time and runtime; therefore perform integrity checks on all resources and code", + ), + ( + "162-655", + "Documentation of all components' business or security function", + ), + ]: + self.collection.add_cre(defs.CRE(id=cre_id, name=name, description="")) + + result = owasp_aisvs.OwaspAisvs().parse( + self.collection, prompt_client.PromptHandler(database=self.collection) + ) + + entries = result.results["OWASP AI Security Verification Standard (AISVS)"] + self.assertEqual(14, len(entries)) + self.assertEqual("AISVS1", entries[0].sectionID) + self.assertEqual( + "Training Data Governance & Bias Management", entries[0].section + ) + self.assertEqual( + "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C01-Training-Data-Governance.md", + entries[0].hyperlink, + ) + self.assertEqual( + ["227-045", "307-507"], [l.document.id for l in entries[0].links] + ) + self.assertEqual("AISVS14", entries[-1].sectionID) + self.assertEqual( + "Human Oversight, Accountability & Governance", entries[-1].section + ) + self.assertEqual( + "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C14-Human-Oversight.md", + entries[-1].hyperlink, + ) + self.assertEqual(["162-655"], [l.document.id for l in entries[-1].links]) diff --git a/application/tests/owasp_llm_top10_2025_parser_test.py b/application/tests/owasp_llm_top10_2025_parser_test.py new file mode 100644 index 000000000..75b282c34 --- /dev/null +++ b/application/tests/owasp_llm_top10_2025_parser_test.py @@ -0,0 +1,45 @@ +import unittest + +from application import create_app, sqla # type: ignore +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.parsers import owasp_llm_top10_2025 + + +class TestOwaspLlmTop10_2025Parser(unittest.TestCase): + def tearDown(self) -> None: + sqla.session.remove() + sqla.drop_all() + self.app_context.pop() + + def setUp(self) -> None: + self.app = create_app(mode="test") + self.app_context = self.app.app_context() + self.app_context.push() + sqla.create_all() + self.collection = db.Node_collection() + + def test_parse(self) -> None: + for cre_id, name in [ + ("161-451", "Output encoding and injection prevention"), + ("064-808", "Encode output context-specifically"), + ("760-764", "Injection protection"), + ("623-550", "Denial Of Service protection"), + ]: + self.collection.add_cre(defs.CRE(id=cre_id, name=name, description="")) + + result = owasp_llm_top10_2025.OwaspLlmTop10_2025().parse( + self.collection, prompt_client.PromptHandler(database=self.collection) + ) + + entries = result.results["OWASP Top 10 for LLM and Gen AI Apps 2025"] + self.assertEqual(10, len(entries)) + self.assertEqual("LLM01", entries[0].sectionID) + self.assertEqual("Prompt Injection", entries[0].section) + self.assertEqual( + ["161-451", "760-764"], [l.document.id for l in entries[0].links] + ) + self.assertEqual(["064-808"], [l.document.id for l in entries[4].links]) + self.assertEqual("LLM10", entries[-1].sectionID) + self.assertEqual(["623-550"], [l.document.id for l in entries[-1].links]) diff --git a/application/utils/external_project_parsers/data/owasp_aisvs_1_0.json b/application/utils/external_project_parsers/data/owasp_aisvs_1_0.json new file mode 100644 index 000000000..c4880546f --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_aisvs_1_0.json @@ -0,0 +1,86 @@ +[ + { + "section_id": "AISVS1", + "section": "Training Data Governance & Bias Management", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C01-Training-Data-Governance.md", + "cre_ids": ["227-045", "307-507"] + }, + { + "section_id": "AISVS2", + "section": "User Input Validation", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C02-User-Input-Validation.md", + "cre_ids": ["031-447", "760-764"] + }, + { + "section_id": "AISVS3", + "section": "Model Lifecycle Management & Change Control", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C03-Model-Lifecycle-Management.md", + "cre_ids": ["148-853", "613-285"] + }, + { + "section_id": "AISVS4", + "section": "Infrastructure, Configuration & Deployment Security", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C04-Infrastructure.md", + "cre_ids": ["233-748", "486-813"] + }, + { + "section_id": "AISVS5", + "section": "Access Control & Identity for AI Components & Users", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C05-Access-Control-and-Identity.md", + "cre_ids": ["633-428", "724-770"] + }, + { + "section_id": "AISVS6", + "section": "Supply Chain Security for Models, Frameworks & Data", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C06-Supply-Chain.md", + "cre_ids": ["613-285", "613-287", "863-521"] + }, + { + "section_id": "AISVS7", + "section": "Model Behavior, Output Control & Safety Assurance", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C07-Model-Behavior.md", + "cre_ids": ["064-808", "141-555"] + }, + { + "section_id": "AISVS8", + "section": "Memory, Embeddings & Vector Database Security", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C08-Memory-Embeddings-and-Vector-Database.md", + "cre_ids": ["126-668", "538-770"] + }, + { + "section_id": "AISVS9", + "section": "Autonomous Orchestration & Agentic Action Security", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C09-Orchestration-and-Agentic-Action.md", + "cre_ids": ["117-371", "650-560"] + }, + { + "section_id": "AISVS10", + "section": "Model Context Protocol (MCP) Security", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C10-MCP-Security.md", + "cre_ids": ["307-507", "715-223"] + }, + { + "section_id": "AISVS11", + "section": "Adversarial Robustness & Privacy Defense", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C11-Adversarial-Robustness.md", + "cre_ids": ["141-555", "623-550"] + }, + { + "section_id": "AISVS12", + "section": "Privacy Protection & Personal Data Management", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C12-Privacy.md", + "cre_ids": ["126-668", "227-045", "482-866"] + }, + { + "section_id": "AISVS13", + "section": "Monitoring, Logging & Anomaly Detection", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C13-Monitoring-and-Logging.md", + "cre_ids": ["058-083", "148-420", "402-706", "843-841"] + }, + { + "section_id": "AISVS14", + "section": "Human Oversight, Accountability & Governance", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C14-Human-Oversight.md", + "cre_ids": ["162-655", "766-162"] + } +] diff --git a/application/utils/external_project_parsers/data/owasp_llm_top10_2025.json b/application/utils/external_project_parsers/data/owasp_llm_top10_2025.json new file mode 100644 index 000000000..b761d5e09 --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_llm_top10_2025.json @@ -0,0 +1,62 @@ +[ + { + "section_id": "LLM01", + "section": "Prompt Injection", + "hyperlink": "https://genai.owasp.org/llmrisk/llm01-prompt-injection/", + "cre_ids": ["161-451", "760-764"] + }, + { + "section_id": "LLM02", + "section": "Sensitive Information Disclosure", + "hyperlink": "https://genai.owasp.org/llmrisk/llm022025-sensitive-information-disclosure/", + "cre_ids": ["126-668", "227-045"] + }, + { + "section_id": "LLM03", + "section": "Supply Chain", + "hyperlink": "https://genai.owasp.org/llmrisk/llm032025-supply-chain/", + "cre_ids": ["613-285", "613-287"] + }, + { + "section_id": "LLM04", + "section": "Data and Model Poisoning", + "hyperlink": "https://genai.owasp.org/llmrisk/llm042025-data-and-model-poisoning/", + "cre_ids": ["307-507", "613-287"] + }, + { + "section_id": "LLM05", + "section": "Improper Output Handling", + "hyperlink": "https://genai.owasp.org/llmrisk/llm052025-improper-output-handling/", + "cre_ids": ["064-808"] + }, + { + "section_id": "LLM06", + "section": "Excessive Agency", + "hyperlink": "https://genai.owasp.org/llmrisk/llm062025-excessive-agency/", + "cre_ids": ["117-371", "650-560"] + }, + { + "section_id": "LLM07", + "section": "System Prompt Leakage", + "hyperlink": "https://genai.owasp.org/llmrisk/llm072025-system-prompt-leakage/", + "cre_ids": ["126-668", "227-045"] + }, + { + "section_id": "LLM08", + "section": "Vector and Embedding Weaknesses", + "hyperlink": "https://genai.owasp.org/llmrisk/llm082025-vector-and-embedding-weaknesses/", + "cre_ids": ["126-668", "538-770"] + }, + { + "section_id": "LLM09", + "section": "Misinformation", + "hyperlink": "https://genai.owasp.org/llmrisk/llm092025-misinformation/", + "cre_ids": ["141-555"] + }, + { + "section_id": "LLM10", + "section": "Unbounded Consumption", + "hyperlink": "https://genai.owasp.org/llmrisk/llm102025-unbounded-consumption/", + "cre_ids": ["267-031", "623-550"] + } +] diff --git a/application/utils/external_project_parsers/parsers/owasp_aisvs.py b/application/utils/external_project_parsers/parsers/owasp_aisvs.py new file mode 100644 index 000000000..cec4abad9 --- /dev/null +++ b/application/utils/external_project_parsers/parsers/owasp_aisvs.py @@ -0,0 +1,45 @@ +import json +from pathlib import Path + +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParseResult, + ParserInterface, +) + + +class OwaspAisvs(ParserInterface): + name = "OWASP AI Security Verification Standard (AISVS)" + data_file = Path(__file__).resolve().parent.parent / "data" / "owasp_aisvs_1_0.json" + + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): + with self.data_file.open("r", encoding="utf-8") as handle: + raw_entries = json.load(handle) + + entries = [] + for entry in raw_entries: + standard = defs.Standard( + name=self.name, + sectionID=entry["section_id"], + section=entry["section"], + hyperlink=entry["hyperlink"], + ) + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + entries.append(standard) + + return ParseResult( + results={self.name: entries}, + calculate_gap_analysis=False, + calculate_embeddings=False, + ) diff --git a/application/utils/external_project_parsers/parsers/owasp_llm_top10_2025.py b/application/utils/external_project_parsers/parsers/owasp_llm_top10_2025.py new file mode 100644 index 000000000..3971b9e6b --- /dev/null +++ b/application/utils/external_project_parsers/parsers/owasp_llm_top10_2025.py @@ -0,0 +1,47 @@ +import json +from pathlib import Path + +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParseResult, + ParserInterface, +) + + +class OwaspLlmTop10_2025(ParserInterface): + name = "OWASP Top 10 for LLM and Gen AI Apps 2025" + data_file = ( + Path(__file__).resolve().parent.parent / "data" / "owasp_llm_top10_2025.json" + ) + + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): + with self.data_file.open("r", encoding="utf-8") as handle: + raw_entries = json.load(handle) + + entries = [] + for entry in raw_entries: + standard = defs.Standard( + name=self.name, + sectionID=entry["section_id"], + section=entry["section"], + hyperlink=entry["hyperlink"], + ) + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + entries.append(standard) + + return ParseResult( + results={self.name: entries}, + calculate_gap_analysis=False, + calculate_embeddings=False, + ) From ca0071a726925b79813239bfd926b1bcf12f4625 Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Tue, 24 Mar 2026 23:04:55 +0530 Subject: [PATCH 05/22] Add OWASP Kubernetes importer support --- ...owasp_kubernetes_top10_2022_parser_test.py | 45 ++++++++ ...owasp_kubernetes_top10_2025_parser_test.py | 102 ++++++++++++++++++ .../data/owasp_kubernetes_top10_2022.json | 62 +++++++++++ .../data/owasp_kubernetes_top10_2025.json | 72 +++++++++++++ .../parsers/owasp_kubernetes_top10_2022.py | 49 +++++++++ .../parsers/owasp_kubernetes_top10_2025.py | 78 ++++++++++++++ 6 files changed, 408 insertions(+) create mode 100644 application/tests/owasp_kubernetes_top10_2022_parser_test.py create mode 100644 application/tests/owasp_kubernetes_top10_2025_parser_test.py create mode 100644 application/utils/external_project_parsers/data/owasp_kubernetes_top10_2022.json create mode 100644 application/utils/external_project_parsers/data/owasp_kubernetes_top10_2025.json create mode 100644 application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2022.py create mode 100644 application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2025.py diff --git a/application/tests/owasp_kubernetes_top10_2022_parser_test.py b/application/tests/owasp_kubernetes_top10_2022_parser_test.py new file mode 100644 index 000000000..30b0922c9 --- /dev/null +++ b/application/tests/owasp_kubernetes_top10_2022_parser_test.py @@ -0,0 +1,45 @@ +import unittest + +from application import create_app, sqla # type: ignore +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.parsers import ( + owasp_kubernetes_top10_2022, +) + + +class TestOwaspKubernetesTop10_2022Parser(unittest.TestCase): + def tearDown(self) -> None: + sqla.session.remove() + sqla.drop_all() + self.app_context.pop() + + def setUp(self) -> None: + self.app = create_app(mode="test") + self.app_context = self.app.app_context() + self.app_context.push() + sqla.create_all() + self.collection = db.Node_collection() + + def test_parse(self) -> None: + for cre_id, name in [ + ("233-748", "Configuration hardening"), + ("486-813", "Configuration"), + ("053-751", "Force build pipeline to check outdated/insecure components"), + ]: + self.collection.add_cre(defs.CRE(id=cre_id, name=name, description="")) + + result = owasp_kubernetes_top10_2022.OwaspKubernetesTop10_2022().parse( + self.collection, prompt_client.PromptHandler(database=self.collection) + ) + + entries = result.results["OWASP Kubernetes Top Ten 2022"] + self.assertEqual(10, len(entries)) + self.assertEqual("K01", entries[0].sectionID) + self.assertEqual("Insecure Workload Configurations", entries[0].section) + self.assertEqual( + ["233-748", "486-813"], [l.document.id for l in entries[0].links] + ) + self.assertEqual("K10", entries[-1].sectionID) + self.assertEqual(["053-751"], [l.document.id for l in entries[-1].links]) diff --git a/application/tests/owasp_kubernetes_top10_2025_parser_test.py b/application/tests/owasp_kubernetes_top10_2025_parser_test.py new file mode 100644 index 000000000..6f444c9a9 --- /dev/null +++ b/application/tests/owasp_kubernetes_top10_2025_parser_test.py @@ -0,0 +1,102 @@ +import unittest +import tempfile +from pathlib import Path + +from application import create_app, sqla # type: ignore +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.parsers import ( + owasp_kubernetes_top10_2025, +) + + +class TestOwaspKubernetesTop10_2025Parser(unittest.TestCase): + def tearDown(self) -> None: + sqla.session.remove() + sqla.drop_all() + self.app_context.pop() + + def setUp(self) -> None: + self.app = create_app(mode="test") + self.app_context = self.app.app_context() + self.app_context.push() + sqla.create_all() + self.collection = db.Node_collection() + + def test_parse(self) -> None: + for cre_id, name in [ + ("233-748", "Configuration hardening"), + ("486-813", "Configuration"), + ("148-420", "Log integrity"), + ("402-706", "Log relevant"), + ("843-841", "Log discretely"), + ]: + self.collection.add_cre(defs.CRE(id=cre_id, name=name, description="")) + + result = owasp_kubernetes_top10_2025.OwaspKubernetesTop10_2025().parse( + self.collection, prompt_client.PromptHandler(database=self.collection) + ) + + entries = result.results["OWASP Kubernetes Top Ten 2025 (Draft)"] + self.assertEqual(10, len(entries)) + self.assertEqual("K01", entries[0].sectionID) + self.assertEqual("Insecure Workload Configurations", entries[0].section) + self.assertEqual( + ["233-748", "486-813"], [l.document.id for l in entries[0].links] + ) + self.assertEqual("K10", entries[-1].sectionID) + self.assertEqual( + ["148-420", "402-706", "843-841"], + [l.document.id for l in entries[-1].links], + ) + + def test_parse_falls_back_to_2022_mapping_when_2025_links_missing(self) -> None: + self.collection.add_cre( + defs.CRE(id="148-420", name="Log integrity", description="") + ) + + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + current_file = tmp_path / "k8s_2025.json" + fallback_file = tmp_path / "k8s_2022.json" + current_file.write_text( + """ +[ + { + "section_id": "K10", + "section": "Inadequate Logging And Monitoring", + "hyperlink": "https://example.com/k10", + "cre_ids": ["999-999"], + "fallback_section_ids": ["K05"] + } +] + """.strip(), + encoding="utf-8", + ) + fallback_file.write_text( + """ +[ + { + "section_id": "K05", + "section": "Inadequate Logging and Monitoring", + "hyperlink": "https://example.com/k05", + "cre_ids": ["148-420"] + } +] + """.strip(), + encoding="utf-8", + ) + + parser = owasp_kubernetes_top10_2025.OwaspKubernetesTop10_2025() + parser.data_file = current_file + parser.fallback_data_file = fallback_file + + result = parser.parse( + self.collection, + prompt_client.PromptHandler(database=self.collection), + ) + + entries = result.results["OWASP Kubernetes Top Ten 2025 (Draft)"] + self.assertEqual(1, len(entries)) + self.assertEqual(["148-420"], [link.document.id for link in entries[0].links]) diff --git a/application/utils/external_project_parsers/data/owasp_kubernetes_top10_2022.json b/application/utils/external_project_parsers/data/owasp_kubernetes_top10_2022.json new file mode 100644 index 000000000..c4eb3d6fd --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_kubernetes_top10_2022.json @@ -0,0 +1,62 @@ +[ + { + "section_id": "K01", + "section": "Insecure Workload Configurations", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K01-insecure-workload-configurations", + "cre_ids": ["233-748", "486-813"] + }, + { + "section_id": "K02", + "section": "Supply Chain Vulnerabilities", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K02-supply-chain-vulnerabilities", + "cre_ids": ["613-285", "613-287"] + }, + { + "section_id": "K03", + "section": "Overly Permissive RBAC Configurations", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K03-overly-permissive-rbac-configurations", + "cre_ids": ["128-128", "724-770"] + }, + { + "section_id": "K04", + "section": "Lack of Centralized Policy Enforcement", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K04-lack-of-centralized-policy-enforcement", + "cre_ids": ["117-371"] + }, + { + "section_id": "K05", + "section": "Inadequate Logging and Monitoring", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K05-inadequate-logging-and-monitoring", + "cre_ids": ["058-083", "148-420", "402-706", "843-841"] + }, + { + "section_id": "K06", + "section": "Broken Authentication Mechanisms", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K06-broken-authentication-mechanisms", + "cre_ids": ["177-260", "586-842", "633-428"] + }, + { + "section_id": "K07", + "section": "Missing Network Segmentation Controls", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K07-missing-network-segmentation-controls", + "cre_ids": ["132-146", "467-784", "515-021"] + }, + { + "section_id": "K08", + "section": "Secrets Management Failures", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K08-secrets-management-failures", + "cre_ids": ["340-375", "774-888", "813-610"] + }, + { + "section_id": "K09", + "section": "Misconfigured Cluster Components", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K09-misconfigured-cluster-components", + "cre_ids": ["233-748", "486-813"] + }, + { + "section_id": "K10", + "section": "Outdated and Vulnerable Kubernetes Components", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K10-outdated-and-vulnerable-kubernetes-components", + "cre_ids": ["053-751", "715-334", "863-521"] + } +] diff --git a/application/utils/external_project_parsers/data/owasp_kubernetes_top10_2025.json b/application/utils/external_project_parsers/data/owasp_kubernetes_top10_2025.json new file mode 100644 index 000000000..c55afb059 --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_kubernetes_top10_2025.json @@ -0,0 +1,72 @@ +[ + { + "section_id": "K01", + "section": "Insecure Workload Configurations", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["233-748", "486-813"], + "fallback_section_ids": ["K01"] + }, + { + "section_id": "K02", + "section": "Overly Permissive Authorization Configurations", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["128-128", "724-770"], + "fallback_section_ids": ["K03"] + }, + { + "section_id": "K03", + "section": "Secrets Management Failures", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["340-375", "774-888", "813-610"], + "fallback_section_ids": ["K08"] + }, + { + "section_id": "K04", + "section": "Lack Of Cluster Level Policy Enforcement", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["117-371"], + "fallback_section_ids": ["K04"] + }, + { + "section_id": "K05", + "section": "Missing Network Segmentation Controls", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["132-146", "467-784", "515-021"], + "fallback_section_ids": ["K07"] + }, + { + "section_id": "K06", + "section": "Overly Exposed Kubernetes Components", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["152-725", "640-364"], + "fallback_section_ids": ["K09"] + }, + { + "section_id": "K07", + "section": "Misconfigured And Vulnerable Cluster Components", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["053-751", "233-748", "486-813", "715-334"], + "fallback_section_ids": ["K09", "K10"] + }, + { + "section_id": "K08", + "section": "Cluster To Cloud Lateral Movement", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["132-146", "640-364", "724-770"], + "fallback_section_ids": ["K03", "K07"] + }, + { + "section_id": "K09", + "section": "Broken Authentication Mechanisms", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["177-260", "586-842", "633-428"], + "fallback_section_ids": ["K06"] + }, + { + "section_id": "K10", + "section": "Inadequate Logging And Monitoring", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["058-083", "148-420", "402-706", "843-841"], + "fallback_section_ids": ["K05"] + } +] diff --git a/application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2022.py b/application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2022.py new file mode 100644 index 000000000..9d3822ab7 --- /dev/null +++ b/application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2022.py @@ -0,0 +1,49 @@ +import json +from pathlib import Path + +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParseResult, + ParserInterface, +) + + +class OwaspKubernetesTop10_2022(ParserInterface): + name = "OWASP Kubernetes Top Ten 2022" + data_file = ( + Path(__file__).resolve().parent.parent + / "data" + / "owasp_kubernetes_top10_2022.json" + ) + + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): + with self.data_file.open("r", encoding="utf-8") as handle: + raw_entries = json.load(handle) + + entries = [] + for entry in raw_entries: + standard = defs.Standard( + name=self.name, + sectionID=entry["section_id"], + section=entry["section"], + hyperlink=entry["hyperlink"], + ) + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + entries.append(standard) + + return ParseResult( + results={self.name: entries}, + calculate_gap_analysis=False, + calculate_embeddings=False, + ) diff --git a/application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2025.py b/application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2025.py new file mode 100644 index 000000000..31deed8da --- /dev/null +++ b/application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2025.py @@ -0,0 +1,78 @@ +import json +from pathlib import Path + +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParseResult, + ParserInterface, +) + + +class OwaspKubernetesTop10_2025(ParserInterface): + name = "OWASP Kubernetes Top Ten 2025 (Draft)" + data_file = ( + Path(__file__).resolve().parent.parent + / "data" + / "owasp_kubernetes_top10_2025.json" + ) + fallback_data_file = ( + Path(__file__).resolve().parent.parent + / "data" + / "owasp_kubernetes_top10_2022.json" + ) + + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): + with self.data_file.open("r", encoding="utf-8") as handle: + raw_entries = json.load(handle) + with self.fallback_data_file.open("r", encoding="utf-8") as handle: + fallback_entries = { + entry["section_id"]: entry for entry in json.load(handle) + } + + entries = [] + for entry in raw_entries: + standard = defs.Standard( + name=self.name, + sectionID=entry["section_id"], + section=entry["section"], + hyperlink=entry["hyperlink"], + ) + linked_cre_ids = [] + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + linked_cre_ids.append(cre_id) + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + if not linked_cre_ids: + for section_id in entry.get("fallback_section_ids", []): + fallback_entry = fallback_entries.get(section_id) + if not fallback_entry: + continue + for cre_id in fallback_entry.get("cre_ids", []): + if cre_id in linked_cre_ids: + continue + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + linked_cre_ids.append(cre_id) + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + entries.append(standard) + + return ParseResult( + results={self.name: entries}, + calculate_gap_analysis=False, + calculate_embeddings=False, + ) From 861e843b1bc9a5d62c3576869fd6b30cd0b3de12 Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Tue, 24 Mar 2026 23:04:55 +0530 Subject: [PATCH 06/22] Normalize OWASP cheat sheet references --- application/tests/cheatsheets_parser_test.py | 33 +++++++++- .../data/owasp_cheatsheets_supplement.json | 47 ++++++++++++++ .../parsers/cheatsheets_parser.py | 62 +++++++++++++++++-- 3 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 application/utils/external_project_parsers/data/owasp_cheatsheets_supplement.json diff --git a/application/tests/cheatsheets_parser_test.py b/application/tests/cheatsheets_parser_test.py index e2c0910d6..fb2a9c277 100644 --- a/application/tests/cheatsheets_parser_test.py +++ b/application/tests/cheatsheets_parser_test.py @@ -69,8 +69,37 @@ class Repo: self.maxDiff = None for name, nodes in entries.results.items(): self.assertEqual(name, parser.name) - self.assertEqual(len(nodes), 1) - self.assertEqual(expected.todict(), nodes[0].todict()) + sections = {node.section for node in nodes} + self.assertIn("Secrets Management Cheat Sheet", sections) + secret_entry = [ + node + for node in nodes + if node.section == "Secrets Management Cheat Sheet" + ][0] + self.assertEqual(expected.todict(), secret_entry.todict()) + + def test_register_supplemental_cheatsheets(self) -> None: + for cre_id, name in [ + ("118-110", "API/web services"), + ("724-770", "Technical application access control"), + ("623-550", "Denial Of Service protection"), + ]: + self.collection.add_cre(defs.CRE(name=name, id=cre_id)) + + entries = cheatsheets_parser.Cheatsheets().register_supplemental_cheatsheets( + cache=self.collection + ) + rest = [ + entry for entry in entries if entry.section == "REST Security Cheat Sheet" + ][0] + self.assertEqual( + "https://cheatsheetseries.owasp.org/cheatsheets/REST_Security_Cheat_Sheet.html", + rest.hyperlink, + ) + self.assertEqual( + ["118-110", "724-770", "623-550"], + [link.document.id for link in rest.links], + ) cheatsheets_md = """ # Secrets Management Cheat Sheet diff --git a/application/utils/external_project_parsers/data/owasp_cheatsheets_supplement.json b/application/utils/external_project_parsers/data/owasp_cheatsheets_supplement.json new file mode 100644 index 000000000..4e06bee8c --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_cheatsheets_supplement.json @@ -0,0 +1,47 @@ +[ + { + "section": "Authorization Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/Authorization_Cheat_Sheet.html", + "cre_ids": ["128-128", "117-371"] + }, + { + "section": "REST Security Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/REST_Security_Cheat_Sheet.html", + "cre_ids": ["118-110", "724-770", "623-550"] + }, + { + "section": "Server Side Request Forgery Prevention Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html", + "cre_ids": ["028-728", "657-084"] + }, + { + "section": "Docker Security Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/Docker_Security_Cheat_Sheet.html", + "cre_ids": ["233-748", "486-813"] + }, + { + "section": "Kubernetes Security Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/Kubernetes_Security_Cheat_Sheet.html", + "cre_ids": ["467-784", "233-748", "486-813"] + }, + { + "section": "Secure Cloud Architecture Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/Secure_Cloud_Architecture_Cheat_Sheet.html", + "cre_ids": ["155-155", "467-784"] + }, + { + "section": "LLM Prompt Injection Prevention Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/LLM_Prompt_Injection_Prevention_Cheat_Sheet.html", + "cre_ids": ["161-451", "760-764"] + }, + { + "section": "AI Agent Security Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/AI_Agent_Security_Cheat_Sheet.html", + "cre_ids": ["117-371", "650-560", "126-668"] + }, + { + "section": "Secure AI Model Ops Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/Secure_AI_Model_Ops_Cheat_Sheet.html", + "cre_ids": ["148-853", "613-285", "613-287"] + } +] diff --git a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py index e234dadda..02003b7bd 100644 --- a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py +++ b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py @@ -6,6 +6,9 @@ import os import re from application.utils.external_project_parsers import base_parser_defs +import json +from pathlib import Path +import logging from application.utils.external_project_parsers.base_parser_defs import ( ParserInterface, ParseResult, @@ -16,6 +19,12 @@ class Cheatsheets(ParserInterface): name = "OWASP Cheat Sheets" cheatsheetseries_base_url = "https://cheatsheetseries.owasp.org/cheatsheets" + supplement_data_file = ( + Path(__file__).resolve().parent.parent + / "data" + / "owasp_cheatsheets_supplement.json" + ) + logger = logging.getLogger(__name__) def cheatsheet( self, section: str, hyperlink: str, tags: List[str] @@ -41,10 +50,22 @@ def official_cheatsheet_url(self, markdown_filename: str) -> str: def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): c_repo = "https://github.com/OWASP/CheatSheetSeries.git" cheatsheets_path = "cheatsheets/" - repo = git.clone(c_repo, sparse_paths=["cheatsheets"], sparse_cone=True) - cheatsheets = self.register_cheatsheets( - repo=repo, cache=cache, cheatsheets_path=cheatsheets_path, repo_path=c_repo - ) + cheatsheets = [] + try: + repo = git.clone(c_repo, sparse_paths=["cheatsheets"], sparse_cone=True) + cheatsheets = self.register_cheatsheets( + repo=repo, + cache=cache, + cheatsheets_path=cheatsheets_path, + repo_path=c_repo, + ) + except Exception as exc: + self.logger.warning( + "Unable to clone OWASP CheatSheetSeries, continuing with supplemental cheat sheets only: %s", + exc, + ) + cheatsheets.extend(self.register_supplemental_cheatsheets(cache=cache)) + cheatsheets = self.deduplicate_entries(cheatsheets) results = {self.name: cheatsheets} base_parser_defs.validate_classification_tags(results) return ParseResult(results=results) @@ -80,3 +101,36 @@ def register_cheatsheets( ) standard_entries.append(cs) return standard_entries + + def register_supplemental_cheatsheets(self, cache: db.Node_collection): + with self.supplement_data_file.open("r", encoding="utf-8") as handle: + supplement_entries = json.load(handle) + + standard_entries = [] + for entry in supplement_entries: + cs = self.cheatsheet( + section=entry["section"], + hyperlink=entry["hyperlink"], + tags=[], + ) + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + for cre in cres: + try: + cs.add_link( + defs.Link( + document=cre.shallow_copy(), + ltype=defs.LinkTypes.AutomaticallyLinkedTo, + ) + ) + except Exception: + continue + if cs.links: + standard_entries.append(cs) + return standard_entries + + def deduplicate_entries(self, entries: List[defs.Standard]) -> List[defs.Standard]: + deduped = {} + for entry in entries: + deduped[(entry.section, entry.hyperlink)] = entry + return list(deduped.values()) From 43367bf5d0690b6eaa5ed68c633ba7bb2686396c Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Tue, 24 Mar 2026 23:04:55 +0530 Subject: [PATCH 07/22] Add refresh scripts for OWASP resources --- scripts/update-cheatsheets.sh | 55 +++++++++ scripts/update-owasp-top10-2025-mappings.sh | 38 ++++++ scripts/update-owasp-top10-standards.sh | 129 ++++++++++++++++++++ 3 files changed, 222 insertions(+) create mode 100755 scripts/update-cheatsheets.sh create mode 100755 scripts/update-owasp-top10-2025-mappings.sh create mode 100644 scripts/update-owasp-top10-standards.sh diff --git a/scripts/update-cheatsheets.sh b/scripts/update-cheatsheets.sh new file mode 100755 index 000000000..48d5eccc5 --- /dev/null +++ b/scripts/update-cheatsheets.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +DB_PATH="${1:-$ROOT_DIR/standards_cache.sqlite}" +VENV_DIR="$ROOT_DIR/venv" + +if [[ ! -d "$VENV_DIR" ]]; then + python3 -m venv "$VENV_DIR" +fi + +source "$VENV_DIR/bin/activate" + +if ! python -c "import flask" >/dev/null 2>&1; then + pip install -r "$ROOT_DIR/requirements.txt" +fi + +CRE_NO_CALCULATE_GAP_ANALYSIS=1 \ +CRE_NO_GEN_EMBEDDINGS=1 \ +python "$ROOT_DIR/cre.py" --cheatsheets_in --cache_file "$DB_PATH" + +python - "$DB_PATH" <<'PY' +import os +import sqlite3 +import sys + +db_path = sys.argv[1] +conn = sqlite3.connect(db_path) +cur = conn.cursor() + +github_prefix = "https://github.com/OWASP/CheatSheetSeries/tree/master/cheatsheets/" +official_prefix = "https://cheatsheetseries.owasp.org/cheatsheets/" + +rows = cur.execute( + """ + select id, link + from node + where name = 'OWASP Cheat Sheets' + and link like ? + """, + (f"{github_prefix}%",), +).fetchall() + +for node_id, link in rows: + filename = os.path.basename(link) + html_name = os.path.splitext(filename)[0] + ".html" + cur.execute( + "update node set link = ? where id = ?", + (f"{official_prefix}{html_name}", node_id), + ) + +conn.commit() +conn.close() +print(f"Normalized {len(rows)} OWASP Cheat Sheet links") +PY diff --git a/scripts/update-owasp-top10-2025-mappings.sh b/scripts/update-owasp-top10-2025-mappings.sh new file mode 100755 index 000000000..04258646b --- /dev/null +++ b/scripts/update-owasp-top10-2025-mappings.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +VENV_DIR="$ROOT_DIR/venv" +CACHE_FILE="${1:-$ROOT_DIR/standards_cache.sqlite}" +TIMESTAMP="$(date +%Y%m%d-%H%M%S)" +BACKUP_FILE="${CACHE_FILE}.bak.${TIMESTAMP}" + +if [[ ! -d "$VENV_DIR" ]]; then + echo "Creating virtual environment in $VENV_DIR" + python3 -m venv "$VENV_DIR" +fi + +source "$VENV_DIR/bin/activate" + +if ! python -c "import requests" >/dev/null 2>&1; then + echo "Installing Python dependencies" + pip install -r "$ROOT_DIR/requirements.txt" +fi + +if [[ -f "$CACHE_FILE" ]]; then + cp "$CACHE_FILE" "$BACKUP_FILE" + echo "Backed up database to $BACKUP_FILE" +fi + +export CRE_NO_NEO4J="${CRE_NO_NEO4J:-1}" +export CRE_NO_GEN_EMBEDDINGS="${CRE_NO_GEN_EMBEDDINGS:-1}" +export CRE_UPSTREAM_MAX_ATTEMPTS="${CRE_UPSTREAM_MAX_ATTEMPTS:-6}" +export CRE_UPSTREAM_RETRY_BACKOFF_SECONDS="${CRE_UPSTREAM_RETRY_BACKOFF_SECONDS:-2}" +export CRE_UPSTREAM_TIMEOUT_SECONDS="${CRE_UPSTREAM_TIMEOUT_SECONDS:-30}" + +echo "Refreshing official OpenCRE upstream data in $CACHE_FILE" +python "$ROOT_DIR/cre.py" --upstream_sync --cache_file "$CACHE_FILE" + +echo "Reapplying OWASP Top 10 2025 CRE mappings" +exec python "$ROOT_DIR/cre.py" --owasp_top10_2025_in --cache_file "$CACHE_FILE" diff --git a/scripts/update-owasp-top10-standards.sh b/scripts/update-owasp-top10-standards.sh new file mode 100644 index 000000000..a795cf872 --- /dev/null +++ b/scripts/update-owasp-top10-standards.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +VENV_DIR="$ROOT_DIR/venv" +CACHE_FILE="${1:-$ROOT_DIR/standards_cache.sqlite}" +TIMESTAMP="$(date +%Y%m%d-%H%M%S)" +BACKUP_FILE="${CACHE_FILE}.bak.${TIMESTAMP}" + +if [[ ! -d "$VENV_DIR" ]]; then + echo "Creating virtual environment in $VENV_DIR" + python3 -m venv "$VENV_DIR" +fi + +source "$VENV_DIR/bin/activate" + +if ! python -c "import requests" >/dev/null 2>&1; then + echo "Installing Python dependencies" + pip install -r "$ROOT_DIR/requirements.txt" +fi + +if [[ -f "$CACHE_FILE" ]]; then + cp "$CACHE_FILE" "$BACKUP_FILE" + echo "Backed up database to $BACKUP_FILE" +fi + +export CRE_NO_NEO4J="${CRE_NO_NEO4J:-1}" +export CRE_NO_GEN_EMBEDDINGS="${CRE_NO_GEN_EMBEDDINGS:-1}" +export CRE_UPSTREAM_MAX_ATTEMPTS="${CRE_UPSTREAM_MAX_ATTEMPTS:-6}" +export CRE_UPSTREAM_RETRY_BACKOFF_SECONDS="${CRE_UPSTREAM_RETRY_BACKOFF_SECONDS:-2}" +export CRE_UPSTREAM_TIMEOUT_SECONDS="${CRE_UPSTREAM_TIMEOUT_SECONDS:-30}" + +echo "Refreshing official OpenCRE upstream data in $CACHE_FILE" +python "$ROOT_DIR/cre.py" --upstream_sync --cache_file "$CACHE_FILE" + +echo "Reapplying OWASP Top 10 standards and CRE mappings" +python "$ROOT_DIR/cre.py" \ + --owasp_top10_2025_in \ + --owasp_api_top10_2023_in \ + --owasp_kubernetes_top10_2025_in \ + --owasp_llm_top10_2025_in \ + --owasp_aisvs_in \ + --cache_file "$CACHE_FILE" + +echo "Selecting preferred Kubernetes Top Ten version" +if python - <<'PY' "$CACHE_FILE" +import sqlite3 +import sys + +cache_file = sys.argv[1] +name_2025 = "OWASP Kubernetes Top Ten 2025 (Draft)" +name_2022 = "OWASP Kubernetes Top Ten 2022" + +conn = sqlite3.connect(cache_file) +cur = conn.cursor() + +linked_2025 = cur.execute( + """ + select count(*) + from node n + join cre_node_links l on l.node = n.id + where n.name = ? + """, + (name_2025,), +).fetchone()[0] + +if linked_2025 > 0: + cur.execute("delete from node where name = ?", (name_2022,)) + print(f"Using {name_2025}; removed {name_2022}") +else: + raise SystemExit(f"{name_2025} not linked") + +conn.commit() +conn.close() +PY +then + : +else + echo "OWASP Kubernetes Top Ten 2025 (Draft) is unavailable or unmapped, importing 2022" + python "$ROOT_DIR/cre.py" \ + --owasp_kubernetes_top10_2022_in \ + --cache_file "$CACHE_FILE" +fi + +echo "Pruning OWASP Top 10 entries that still have no CRE links" +python - <<'PY' "$CACHE_FILE" +import sqlite3 +import sys + +cache_file = sys.argv[1] +standard_names = ( + "OWASP Top 10 2025", + "OWASP API Security Top 10 2023", + "OWASP Kubernetes Top Ten 2025 (Draft)", + "OWASP Top 10 for LLM and Gen AI Apps 2025", + "OWASP AI Security Verification Standard (AISVS)", +) + +conn = sqlite3.connect(cache_file) +cur = conn.cursor() + +has_2022 = cur.execute( + "select 1 from node where name = 'OWASP Kubernetes Top Ten 2022' limit 1" +).fetchone() +if has_2022: + standard_names = standard_names + ("OWASP Kubernetes Top Ten 2022",) + +rows = list( + cur.execute( + f""" + select n.id, n.name, coalesce(n.section_id, ''), coalesce(n.section, '') + from node n + left join cre_node_links l on l.node = n.id + where n.name in ({','.join('?' for _ in standard_names)}) + group by n.id + having count(l.cre) = 0 + """, + standard_names, + ) +) + +for node_id, name, section_id, section in rows: + cur.execute("delete from node where id = ?", (node_id,)) + print(f"Removed unmapped entry: {name} {section_id} {section}".strip()) + +conn.commit() +conn.close() +PY From 336be1aeb7c7bd9f4c550042ff8fed25d6da535f Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Tue, 24 Mar 2026 23:04:55 +0530 Subject: [PATCH 08/22] Retry transient failures during upstream sync --- application/cmd/cre_main.py | 115 ++++++++++++++++++++++++----- application/tests/cre_main_test.py | 107 +++++++++++++++++++++++++++ 2 files changed, 204 insertions(+), 18 deletions(-) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index 3c6bee887..d062ecd85 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -37,6 +37,51 @@ app = None +def fetch_upstream_json( + path: str, + timeout: Optional[float] = None, + max_attempts: Optional[int] = None, + backoff_seconds: Optional[float] = None, +) -> Dict[str, Any]: + base_url = os.environ.get("CRE_UPSTREAM_API_URL", "https://opencre.org/rest/v1") + timeout = timeout or float(os.environ.get("CRE_UPSTREAM_TIMEOUT_SECONDS", "30")) + max_attempts = max_attempts or int(os.environ.get("CRE_UPSTREAM_MAX_ATTEMPTS", "4")) + backoff_seconds = backoff_seconds or float( + os.environ.get("CRE_UPSTREAM_RETRY_BACKOFF_SECONDS", "2") + ) + url = f"{base_url}{path}" + last_error: Optional[Exception] = None + + for attempt in range(1, max_attempts + 1): + try: + response = requests.get(url, timeout=timeout) + if response.status_code == 200: + return response.json() + + status_error = RuntimeError( + f"cannot connect to upstream status code {response.status_code}" + ) + # Retry only on transient upstream failures. + if response.status_code < 500 and response.status_code != 429: + raise status_error + last_error = status_error + except requests.exceptions.RequestException as exc: + last_error = exc + + if attempt < max_attempts: + logger.warning( + "upstream fetch failed for %s on attempt %s/%s, retrying", + url, + attempt, + max_attempts, + ) + time.sleep(backoff_seconds * attempt) + + if last_error: + raise RuntimeError(f"upstream fetch failed for {url}") from last_error + raise RuntimeError(f"upstream fetch failed for {url}") + + def register_node(node: defs.Node, collection: db.Node_collection) -> db.Node: """ for each link find if either the root node or the link have a CRE, @@ -354,6 +399,8 @@ def register_standard( ): if os.environ.get("CRE_NO_GEN_EMBEDDINGS") == "1": generate_embeddings = False + if os.environ.get("CRE_NO_CALCULATE_GAP_ANALYSIS"): + calculate_gap_analysis = False if not standard_entries: logger.warning("register_standard() called with no standard_entries") @@ -590,15 +637,7 @@ def download_graph_from_upstream(cache: str) -> None: collection = db_connect(path=cache).with_graph() def download_cre_from_upstream(creid: str): - cre_response = requests.get( - os.environ.get("CRE_UPSTREAM_API_URL", "https://opencre.org/rest/v1") - + f"/id/{creid}" - ) - if cre_response.status_code != 200: - raise RuntimeError( - f"cannot connect to upstream status code {cre_response.status_code}" - ) - data = cre_response.json() + data = fetch_upstream_json(f"/id/{creid}") credict = data["data"] cre = defs.Document.from_dict(credict) if cre.id in imported_cres: @@ -610,15 +649,7 @@ def download_cre_from_upstream(creid: str): if link.document.doctype == defs.Credoctypes.CRE: download_cre_from_upstream(link.document.id) - root_cres_response = requests.get( - os.environ.get("CRE_UPSTREAM_API_URL", "https://opencre.org/rest/v1") - + "/root_cres" - ) - if root_cres_response.status_code != 200: - raise RuntimeError( - f"cannot connect to upstream status code {root_cres_response.status_code}" - ) - data = root_cres_response.json() + data = fetch_upstream_json("/root_cres") for root_cre in data["data"]: cre = defs.Document.from_dict(root_cre) register_cre(cre, collection) @@ -900,6 +931,54 @@ def run(args: argparse.Namespace) -> None: # pragma: no cover BaseParser().register_resource( secure_headers.SecureHeaders, db_connection_str=args.cache_file ) + if args.owasp_top10_2025_in: + from application.utils.external_project_parsers.parsers import owasp_top10_2025 + + BaseParser().register_resource( + owasp_top10_2025.OwaspTop10_2025, db_connection_str=args.cache_file + ) + if args.owasp_api_top10_2023_in: + from application.utils.external_project_parsers.parsers import ( + owasp_api_top10_2023, + ) + + BaseParser().register_resource( + owasp_api_top10_2023.OwaspApiTop10_2023, + db_connection_str=args.cache_file, + ) + if args.owasp_kubernetes_top10_2022_in: + from application.utils.external_project_parsers.parsers import ( + owasp_kubernetes_top10_2022, + ) + + BaseParser().register_resource( + owasp_kubernetes_top10_2022.OwaspKubernetesTop10_2022, + db_connection_str=args.cache_file, + ) + if args.owasp_kubernetes_top10_2025_in: + from application.utils.external_project_parsers.parsers import ( + owasp_kubernetes_top10_2025, + ) + + BaseParser().register_resource( + owasp_kubernetes_top10_2025.OwaspKubernetesTop10_2025, + db_connection_str=args.cache_file, + ) + if args.owasp_llm_top10_2025_in: + from application.utils.external_project_parsers.parsers import ( + owasp_llm_top10_2025, + ) + + BaseParser().register_resource( + owasp_llm_top10_2025.OwaspLlmTop10_2025, + db_connection_str=args.cache_file, + ) + if args.owasp_aisvs_in: + from application.utils.external_project_parsers.parsers import owasp_aisvs + + BaseParser().register_resource( + owasp_aisvs.OwaspAisvs, db_connection_str=args.cache_file + ) if args.pci_dss_4_in: from application.utils.external_project_parsers.parsers import pci_dss diff --git a/application/tests/cre_main_test.py b/application/tests/cre_main_test.py index 097b0b6d9..68d721ab1 100644 --- a/application/tests/cre_main_test.py +++ b/application/tests/cre_main_test.py @@ -7,6 +7,8 @@ from unittest import mock from unittest.mock import Mock, patch from rq import Queue, job +import requests +from rq import Queue, job from application.utils import redis from application.prompt_client import prompt_client as prompt_client from application.tests.utils import data_gen @@ -470,6 +472,111 @@ def test_register_cre(self) -> None: ], ) + @patch("application.cmd.cre_main.time.sleep") + @patch("application.cmd.cre_main.requests.get") + def test_fetch_upstream_json_retries_transient_failures( + self, mock_get, mock_sleep + ) -> None: + transient_error = requests.exceptions.ConnectionError("reset by peer") + success_response = Mock() + success_response.status_code = 200 + success_response.json.return_value = {"data": []} + mock_get.side_effect = [transient_error, success_response] + + data = main.fetch_upstream_json("/root_cres") + + self.assertEqual(data, {"data": []}) + self.assertEqual(mock_get.call_count, 2) + mock_sleep.assert_called_once() + + def test_parse_file(self) -> None: + file: List[Dict[str, Any]] = [ + { + "description": "Verify that approved cryptographic algorithms are used in the generation, seeding, and verification.", + "doctype": defs.Credoctypes.CRE, + "id": "157-573", + "links": [ + { + "type": defs.LinkTypes.LinkedTo, + "document": { + "doctype": defs.Credoctypes.Standard, + "name": "TOP10", + "section": "https://owasp.org/www-project-top-ten/2017/A5_2017-Broken_Access_Control", + }, + }, + { + "type": defs.LinkTypes.LinkedTo, + "document": { + "doctype": defs.Credoctypes.Standard, + "name": "ISO 25010", + "section": "Secure data storage", + }, + }, + ], + "name": "CREDENTIALS_MANAGEMENT_CRYPTOGRAPHIC_DIRECTIVES", + }, + { + "description": "Desc", + "doctype": defs.Credoctypes.CRE, + "id": "141-141", + "name": "name", + }, + ] + expected = [ + defs.CRE( + doctype=defs.Credoctypes.CRE, + id="157-573", + description="Verify that approved cryptographic algorithms are used in the generation, seeding, and verification.", + name="CREDENTIALS_MANAGEMENT_CRYPTOGRAPHIC_DIRECTIVES", + links=[ + defs.Link( + document=defs.Standard( + doctype=defs.Credoctypes.Standard, + name="TOP10", + section="https://owasp.org/www-project-top-ten/2017/A5_2017-Broken_Access_Control", + ), + ltype=defs.LinkTypes.LinkedTo, + ), + defs.Link( + document=defs.Standard( + doctype=defs.Credoctypes.Standard, + name="ISO 25010", + section="Secure data storage", + ), + ltype=defs.LinkTypes.LinkedTo, + ), + ], + ), + defs.CRE(id="141-141", description="Desc", name="name"), + ] + with self.assertLogs("application.cmd.cre_main", level=logging.FATAL) as logs: + # negative test first parse_file accepts a list of objects + result = main.parse_file( + filename="tests", + yamldocs=[ + "no", + "valid", + "objects", + "here", + { + "1": 2, + }, + ], + scollection=self.collection, + ) + + self.assertEqual(result, None) + self.assertIn( + "CRITICAL:application.cmd.cre_main:Malformed file tests, skipping", + logs.output, + ) + + self.maxDiff = None + + res = main.parse_file( + filename="tests", yamldocs=file, scollection=self.collection + ) + self.assertCountEqual(res, expected) @patch.object(main, "db_connect") @patch.object(Queue, "enqueue_call") @patch.object(redis, "wait_for_jobs") From a20319db0dffa3b029c900f62753cd8d5b7403be Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Thu, 23 Apr 2026 22:53:15 +0530 Subject: [PATCH 09/22] Add file parsing function to convert YAML/JSON documents to defs.Document objects --- application/cmd/cre_main.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index d062ecd85..7c5c08181 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -1075,6 +1075,27 @@ def generate_embeddings(db_url: str) -> None: database = db_connect(path=db_url) prompt_client.PromptHandler(database, load_all_embeddings=True) +def parse_file(filename: str, yamldocs: List[Any], scollection) -> Optional[List[defs.Document]]: + """ + Parse a list of dictionaries (YAML/JSON documents) into defs.Document objects. + Returns None and logs a critical error if any element is not a dict. + """ + if not all(isinstance(doc, dict) for doc in yamldocs): + logger.critical("Malformed file %s, skipping", filename) + return None + + def normalize_links(doc: dict) -> dict: + """Make sure link dicts use 'ltype' key instead of 'type'.""" + if "links" in doc: + for link in doc["links"]: + if "type" in link and "ltype" not in link: + link["ltype"] = link.pop("type") + # Recursively normalize nested documents (if any) + if "document" in link and isinstance(link["document"], dict): + normalize_links(link["document"]) + return doc + + return [defs.Document.from_dict(normalize_links(dict(doc))) for doc in yamldocs] def populate_neo4j_db(cache: str): if ( From b0b40edb7f67e6ff2cd5cb1b1442cdd8c74d5f99 Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Tue, 24 Mar 2026 23:04:55 +0530 Subject: [PATCH 10/22] Add OWASP Top 10 and API importer support --- .../tests/owasp_api_top10_2023_parser_test.py | 43 ++++++++++ .../tests/owasp_top10_2025_parser_test.py | 80 +++++++++++++++++++ .../data/owasp_api_top10_2023.json | 62 ++++++++++++++ .../data/owasp_top10_2025.json | 62 ++++++++++++++ .../parsers/owasp_api_top10_2023.py | 47 +++++++++++ .../parsers/owasp_top10_2025.py | 47 +++++++++++ cre.py | 30 +++++++ 7 files changed, 371 insertions(+) create mode 100644 application/tests/owasp_api_top10_2023_parser_test.py create mode 100644 application/tests/owasp_top10_2025_parser_test.py create mode 100644 application/utils/external_project_parsers/data/owasp_api_top10_2023.json create mode 100644 application/utils/external_project_parsers/data/owasp_top10_2025.json create mode 100644 application/utils/external_project_parsers/parsers/owasp_api_top10_2023.py create mode 100644 application/utils/external_project_parsers/parsers/owasp_top10_2025.py diff --git a/application/tests/owasp_api_top10_2023_parser_test.py b/application/tests/owasp_api_top10_2023_parser_test.py new file mode 100644 index 000000000..806d11bed --- /dev/null +++ b/application/tests/owasp_api_top10_2023_parser_test.py @@ -0,0 +1,43 @@ +import unittest + +from application import create_app, sqla # type: ignore +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.parsers import owasp_api_top10_2023 + + +class TestOwaspApiTop10_2023Parser(unittest.TestCase): + def tearDown(self) -> None: + sqla.session.remove() + sqla.drop_all() + self.app_context.pop() + + def setUp(self) -> None: + self.app = create_app(mode="test") + self.app_context = self.app.app_context() + self.app_context.push() + sqla.create_all() + self.collection = db.Node_collection() + + def test_parse(self) -> None: + for cre_id, name in [ + ("304-667", "Protect API against unauthorized access/modification (IDOR)"), + ("724-770", "Technical application access control"), + ("715-223", "Ensure trusted origin of third party resources"), + ]: + self.collection.add_cre(defs.CRE(id=cre_id, name=name, description="")) + + result = owasp_api_top10_2023.OwaspApiTop10_2023().parse( + self.collection, prompt_client.PromptHandler(database=self.collection) + ) + + entries = result.results["OWASP API Security Top 10 2023"] + self.assertEqual(10, len(entries)) + self.assertEqual("API1", entries[0].sectionID) + self.assertEqual("Broken Object Level Authorization", entries[0].section) + self.assertEqual( + ["304-667", "724-770"], [l.document.id for l in entries[0].links] + ) + self.assertEqual("API10", entries[-1].sectionID) + self.assertEqual(["715-223"], [l.document.id for l in entries[-1].links]) diff --git a/application/tests/owasp_top10_2025_parser_test.py b/application/tests/owasp_top10_2025_parser_test.py new file mode 100644 index 000000000..de4f86a9f --- /dev/null +++ b/application/tests/owasp_top10_2025_parser_test.py @@ -0,0 +1,80 @@ +import unittest + +from application import create_app, sqla # type: ignore +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.parsers import owasp_top10_2025 + + +class TestOwaspTop10_2025Parser(unittest.TestCase): + def tearDown(self) -> None: + sqla.session.remove() + sqla.drop_all() + self.app_context.pop() + + def setUp(self) -> None: + self.app = create_app(mode="test") + self.app_context = self.app.app_context() + self.app_context.push() + sqla.create_all() + self.collection = db.Node_collection() + + def test_parse(self) -> None: + self.collection.add_cre( + defs.CRE(id="177-260", name="Session management", description="") + ) + self.collection.add_cre( + defs.CRE( + id="117-371", + name="Use a centralized access control mechanism", + description="", + ) + ) + self.collection.add_cre( + defs.CRE( + id="724-770", + name="Technical application access control", + description="", + ) + ) + self.collection.add_cre( + defs.CRE( + id="031-447", name="Whitelist all external (HTTP) input", description="" + ) + ) + self.collection.add_cre( + defs.CRE( + id="064-808", name="Encode output context-specifically", description="" + ) + ) + self.collection.add_cre( + defs.CRE(id="760-764", name="Injection protection", description="") + ) + self.collection.add_cre( + defs.CRE(id="513-183", name="Error handling", description="") + ) + + result = owasp_top10_2025.OwaspTop10_2025().parse( + self.collection, + prompt_client.PromptHandler(database=self.collection), + ) + + entries = result.results["OWASP Top 10 2025"] + self.assertEqual(10, len(entries)) + self.assertEqual("A01", entries[0].sectionID) + self.assertEqual("Broken Access Control", entries[0].section) + self.assertEqual( + "https://owasp.org/Top10/2025/A01_2025-Broken_Access_Control/", + entries[0].hyperlink, + ) + self.assertEqual( + ["117-371", "177-260", "724-770"], + [link.document.id for link in entries[0].links], + ) + self.assertEqual( + ["031-447", "064-808", "760-764"], + [link.document.id for link in entries[4].links], + ) + self.assertEqual("A10", entries[-1].sectionID) + self.assertEqual(["513-183"], [link.document.id for link in entries[-1].links]) diff --git a/application/utils/external_project_parsers/data/owasp_api_top10_2023.json b/application/utils/external_project_parsers/data/owasp_api_top10_2023.json new file mode 100644 index 000000000..7a8df0ed0 --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_api_top10_2023.json @@ -0,0 +1,62 @@ +[ + { + "section_id": "API1", + "section": "Broken Object Level Authorization", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa1-broken-object-level-authorization/", + "cre_ids": ["304-667", "724-770"] + }, + { + "section_id": "API2", + "section": "Broken Authentication", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa2-broken-authentication/", + "cre_ids": ["177-260", "586-842", "633-428"] + }, + { + "section_id": "API3", + "section": "Broken Object Property Level Authorization", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa3-broken-object-property-level-authorization/", + "cre_ids": ["538-770", "724-770", "128-128"] + }, + { + "section_id": "API4", + "section": "Unrestricted Resource Consumption", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa4-unrestricted-resource-consumption/", + "cre_ids": ["623-550"] + }, + { + "section_id": "API5", + "section": "Broken Function Level Authorization", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa5-broken-function-level-authorization/", + "cre_ids": ["650-560", "724-770"] + }, + { + "section_id": "API6", + "section": "Unrestricted Access to Sensitive Business Flows", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa6-unrestricted-access-to-sensitive-business-flows/", + "cre_ids": ["534-605", "630-573"] + }, + { + "section_id": "API7", + "section": "Server Side Request Forgery", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa7-server-side-request-forgery/", + "cre_ids": ["028-728", "657-084"] + }, + { + "section_id": "API8", + "section": "Security Misconfiguration", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa8-security-misconfiguration/", + "cre_ids": ["486-813"] + }, + { + "section_id": "API9", + "section": "Improper Inventory Management", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xa9-improper-inventory-management/", + "cre_ids": ["162-655", "863-521"] + }, + { + "section_id": "API10", + "section": "Unsafe Consumption of APIs", + "hyperlink": "https://owasp.org/API-Security/editions/2023/en/0xaa-unsafe-consumption-of-apis/", + "cre_ids": ["715-223"] + } +] diff --git a/application/utils/external_project_parsers/data/owasp_top10_2025.json b/application/utils/external_project_parsers/data/owasp_top10_2025.json new file mode 100644 index 000000000..7e19d1a4e --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_top10_2025.json @@ -0,0 +1,62 @@ +[ + { + "section_id": "A01", + "section": "Broken Access Control", + "hyperlink": "https://owasp.org/Top10/2025/A01_2025-Broken_Access_Control/", + "cre_ids": ["117-371", "177-260", "724-770"] + }, + { + "section_id": "A02", + "section": "Security Misconfiguration", + "hyperlink": "https://owasp.org/Top10/2025/A02_2025-Security_Misconfiguration/", + "cre_ids": ["486-813"] + }, + { + "section_id": "A03", + "section": "Software Supply Chain Failures", + "hyperlink": "https://owasp.org/Top10/2025/A03_2025-Software_Supply_Chain_Failures/", + "cre_ids": ["613-286", "613-287", "715-223", "863-521"] + }, + { + "section_id": "A04", + "section": "Cryptographic Failures", + "hyperlink": "https://owasp.org/Top10/2025/A04_2025-Cryptographic_Failures/", + "cre_ids": ["170-772", "227-045"] + }, + { + "section_id": "A05", + "section": "Injection", + "hyperlink": "https://owasp.org/Top10/2025/A05_2025-Injection/", + "cre_ids": ["031-447", "064-808", "760-764"] + }, + { + "section_id": "A06", + "section": "Insecure Design", + "hyperlink": "https://owasp.org/Top10/2025/A06_2025-Insecure_Design/", + "cre_ids": ["126-668", "155-155"] + }, + { + "section_id": "A07", + "section": "Authentication Failures", + "hyperlink": "https://owasp.org/Top10/2025/A07_2025-Authentication_Failures/", + "cre_ids": ["002-630", "177-260", "586-842", "633-428"] + }, + { + "section_id": "A08", + "section": "Software or Data Integrity Failures", + "hyperlink": "https://owasp.org/Top10/2025/A08_2025-Software_or_Data_Integrity_Failures/", + "cre_ids": ["613-287", "836-068"] + }, + { + "section_id": "A09", + "section": "Security Logging and Alerting Failures", + "hyperlink": "https://owasp.org/Top10/2025/A09_2025-Security_Logging_and_Alerting_Failures/", + "cre_ids": ["067-050", "148-420", "402-706", "843-841"] + }, + { + "section_id": "A10", + "section": "Mishandling of Exceptional Conditions", + "hyperlink": "https://owasp.org/Top10/2025/A10_2025-Mishandling_of_Exceptional_Conditions/", + "cre_ids": ["513-183"] + } +] diff --git a/application/utils/external_project_parsers/parsers/owasp_api_top10_2023.py b/application/utils/external_project_parsers/parsers/owasp_api_top10_2023.py new file mode 100644 index 000000000..08157a1e9 --- /dev/null +++ b/application/utils/external_project_parsers/parsers/owasp_api_top10_2023.py @@ -0,0 +1,47 @@ +import json +from pathlib import Path + +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParseResult, + ParserInterface, +) + + +class OwaspApiTop10_2023(ParserInterface): + name = "OWASP API Security Top 10 2023" + data_file = ( + Path(__file__).resolve().parent.parent / "data" / "owasp_api_top10_2023.json" + ) + + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): + with self.data_file.open("r", encoding="utf-8") as handle: + raw_entries = json.load(handle) + + entries = [] + for entry in raw_entries: + standard = defs.Standard( + name=self.name, + sectionID=entry["section_id"], + section=entry["section"], + hyperlink=entry["hyperlink"], + ) + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + entries.append(standard) + + return ParseResult( + results={self.name: entries}, + calculate_gap_analysis=False, + calculate_embeddings=False, + ) diff --git a/application/utils/external_project_parsers/parsers/owasp_top10_2025.py b/application/utils/external_project_parsers/parsers/owasp_top10_2025.py new file mode 100644 index 000000000..070f869af --- /dev/null +++ b/application/utils/external_project_parsers/parsers/owasp_top10_2025.py @@ -0,0 +1,47 @@ +import json +from pathlib import Path + +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParseResult, + ParserInterface, +) + + +class OwaspTop10_2025(ParserInterface): + name = "OWASP Top 10 2025" + data_file = ( + Path(__file__).resolve().parent.parent / "data" / "owasp_top10_2025.json" + ) + + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): + with self.data_file.open("r", encoding="utf-8") as handle: + raw_entries = json.load(handle) + + entries = [] + for entry in raw_entries: + standard = defs.Standard( + name=self.name, + sectionID=entry["section_id"], + section=entry["section"], + hyperlink=entry["hyperlink"], + ) + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + entries.append(standard) + + return ParseResult( + results={self.name: entries}, + calculate_gap_analysis=False, + calculate_embeddings=False, + ) diff --git a/cre.py b/cre.py index 80dd48617..7a4c691dd 100644 --- a/cre.py +++ b/cre.py @@ -167,6 +167,36 @@ def main() -> None: action="store_true", help="import owasp secure headers", ) + parser.add_argument( + "--owasp_top10_2025_in", + action="store_true", + help="import OWASP Top 10 2025", + ) + parser.add_argument( + "--owasp_api_top10_2023_in", + action="store_true", + help="import OWASP API Security Top 10 2023", + ) + parser.add_argument( + "--owasp_kubernetes_top10_2022_in", + action="store_true", + help="import OWASP Kubernetes Top Ten 2022", + ) + parser.add_argument( + "--owasp_kubernetes_top10_2025_in", + action="store_true", + help="import OWASP Kubernetes Top Ten 2025 draft", + ) + parser.add_argument( + "--owasp_llm_top10_2025_in", + action="store_true", + help="import OWASP Top 10 for LLM and Gen AI Apps 2025", + ) + parser.add_argument( + "--owasp_aisvs_in", + action="store_true", + help="import OWASP AI Security Verification Standard (AISVS)", + ) parser.add_argument( "--pci_dss_3_2_in", action="store_true", From 18f2ef70240c410d3ae7366d5d9b8d917bb7b5cc Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Wed, 1 Apr 2026 16:13:04 +0530 Subject: [PATCH 11/22] Fix cheat sheet parser test expectations on importer branches --- application/tests/cheatsheets_parser_test.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/application/tests/cheatsheets_parser_test.py b/application/tests/cheatsheets_parser_test.py index 1a3ba4bf0..e2c0910d6 100644 --- a/application/tests/cheatsheets_parser_test.py +++ b/application/tests/cheatsheets_parser_test.py @@ -34,7 +34,13 @@ class Repo: repo.working_dir = loc cre = defs.CRE(name="blah", id="223-780") self.collection.add_cre(cre) - with open(os.path.join(os.path.join(loc, "cheatsheets"), "cs.md"), "w") as mdf: + with open( + os.path.join( + os.path.join(loc, "cheatsheets"), + "Secrets_Management_Cheat_Sheet.md", + ), + "w", + ) as mdf: mdf.write(cs) mock_clone.return_value = repo entries = cheatsheets_parser.Cheatsheets().parse( @@ -45,22 +51,26 @@ class Repo: # verify the external tagging convention, not just enum wiring. expected = defs.Standard( name="OWASP Cheat Sheets", - hyperlink="https://github.com/foo/bar/tree/master/cs.md", + hyperlink="https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html", section="Secrets Management Cheat Sheet", - links=[defs.Link(document=cre, ltype=defs.LinkTypes.LinkedTo)], + links=[ + defs.Link( + document=cre, ltype=defs.LinkTypes.AutomaticallyLinkedTo + ) + ], tags=[ "family:guidance", "subtype:cheatsheet", - "source:owasp_cheatsheets", "audience:developer", "maturity:stable", + "source:owasp_cheatsheets", ], ) self.maxDiff = None for name, nodes in entries.results.items(): self.assertEqual(name, parser.name) self.assertEqual(len(nodes), 1) - self.assertCountEqual(expected.todict(), nodes[0].todict()) + self.assertEqual(expected.todict(), nodes[0].todict()) cheatsheets_md = """ # Secrets Management Cheat Sheet From e4df9ae2058619cda8250deb954216a2ea3ae10b Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Wed, 1 Apr 2026 16:14:21 +0530 Subject: [PATCH 12/22] Use official OWASP cheat sheet URLs in importer branches --- .../external_project_parsers/parsers/cheatsheets_parser.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py index e695414d8..e234dadda 100644 --- a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py +++ b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py @@ -15,6 +15,7 @@ class Cheatsheets(ParserInterface): name = "OWASP Cheat Sheets" + cheatsheetseries_base_url = "https://cheatsheetseries.owasp.org/cheatsheets" def cheatsheet( self, section: str, hyperlink: str, tags: List[str] @@ -33,6 +34,10 @@ def cheatsheet( hyperlink=hyperlink, ) + def official_cheatsheet_url(self, markdown_filename: str) -> str: + html_name = os.path.splitext(markdown_filename)[0] + ".html" + return f"{self.cheatsheetseries_base_url}/{html_name}" + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): c_repo = "https://github.com/OWASP/CheatSheetSeries.git" cheatsheets_path = "cheatsheets/" @@ -65,7 +70,7 @@ def register_cheatsheets( name = title.group("title") cre_id = cre.group("cre") cres = cache.get_CREs(external_id=cre_id) - hyperlink = f"{repo_path.replace('.git','')}/tree/master/{cheatsheets_path}{mdfile}" + hyperlink = self.official_cheatsheet_url(mdfile) cs = self.cheatsheet(section=name, hyperlink=hyperlink, tags=[]) for cre in cres: cs.add_link( From 60cd2b0c7b5c2a666ba99fa09bada2e3c85b8f0b Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Tue, 24 Mar 2026 23:04:55 +0530 Subject: [PATCH 13/22] Add OWASP AI resource importer support --- application/tests/owasp_aisvs_parser_test.py | 62 +++++++++++++ .../tests/owasp_llm_top10_2025_parser_test.py | 45 ++++++++++ .../data/owasp_aisvs_1_0.json | 86 +++++++++++++++++++ .../data/owasp_llm_top10_2025.json | 62 +++++++++++++ .../parsers/owasp_aisvs.py | 45 ++++++++++ .../parsers/owasp_llm_top10_2025.py | 47 ++++++++++ 6 files changed, 347 insertions(+) create mode 100644 application/tests/owasp_aisvs_parser_test.py create mode 100644 application/tests/owasp_llm_top10_2025_parser_test.py create mode 100644 application/utils/external_project_parsers/data/owasp_aisvs_1_0.json create mode 100644 application/utils/external_project_parsers/data/owasp_llm_top10_2025.json create mode 100644 application/utils/external_project_parsers/parsers/owasp_aisvs.py create mode 100644 application/utils/external_project_parsers/parsers/owasp_llm_top10_2025.py diff --git a/application/tests/owasp_aisvs_parser_test.py b/application/tests/owasp_aisvs_parser_test.py new file mode 100644 index 000000000..461b2d68d --- /dev/null +++ b/application/tests/owasp_aisvs_parser_test.py @@ -0,0 +1,62 @@ +import unittest + +from application import create_app, sqla # type: ignore +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.parsers import owasp_aisvs + + +class TestOwaspAisvsParser(unittest.TestCase): + def tearDown(self) -> None: + sqla.session.remove() + sqla.drop_all() + self.app_context.pop() + + def setUp(self) -> None: + self.app = create_app(mode="test") + self.app_context = self.app.app_context() + self.app_context.push() + sqla.create_all() + self.collection = db.Node_collection() + + def test_parse(self) -> None: + for cre_id, name in [ + ("227-045", "Identify sensitive data and subject it to a policy"), + ( + "307-507", + "Allow only trusted sources both build time and runtime; therefore perform integrity checks on all resources and code", + ), + ( + "162-655", + "Documentation of all components' business or security function", + ), + ]: + self.collection.add_cre(defs.CRE(id=cre_id, name=name, description="")) + + result = owasp_aisvs.OwaspAisvs().parse( + self.collection, prompt_client.PromptHandler(database=self.collection) + ) + + entries = result.results["OWASP AI Security Verification Standard (AISVS)"] + self.assertEqual(14, len(entries)) + self.assertEqual("AISVS1", entries[0].sectionID) + self.assertEqual( + "Training Data Governance & Bias Management", entries[0].section + ) + self.assertEqual( + "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C01-Training-Data-Governance.md", + entries[0].hyperlink, + ) + self.assertEqual( + ["227-045", "307-507"], [l.document.id for l in entries[0].links] + ) + self.assertEqual("AISVS14", entries[-1].sectionID) + self.assertEqual( + "Human Oversight, Accountability & Governance", entries[-1].section + ) + self.assertEqual( + "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C14-Human-Oversight.md", + entries[-1].hyperlink, + ) + self.assertEqual(["162-655"], [l.document.id for l in entries[-1].links]) diff --git a/application/tests/owasp_llm_top10_2025_parser_test.py b/application/tests/owasp_llm_top10_2025_parser_test.py new file mode 100644 index 000000000..75b282c34 --- /dev/null +++ b/application/tests/owasp_llm_top10_2025_parser_test.py @@ -0,0 +1,45 @@ +import unittest + +from application import create_app, sqla # type: ignore +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.parsers import owasp_llm_top10_2025 + + +class TestOwaspLlmTop10_2025Parser(unittest.TestCase): + def tearDown(self) -> None: + sqla.session.remove() + sqla.drop_all() + self.app_context.pop() + + def setUp(self) -> None: + self.app = create_app(mode="test") + self.app_context = self.app.app_context() + self.app_context.push() + sqla.create_all() + self.collection = db.Node_collection() + + def test_parse(self) -> None: + for cre_id, name in [ + ("161-451", "Output encoding and injection prevention"), + ("064-808", "Encode output context-specifically"), + ("760-764", "Injection protection"), + ("623-550", "Denial Of Service protection"), + ]: + self.collection.add_cre(defs.CRE(id=cre_id, name=name, description="")) + + result = owasp_llm_top10_2025.OwaspLlmTop10_2025().parse( + self.collection, prompt_client.PromptHandler(database=self.collection) + ) + + entries = result.results["OWASP Top 10 for LLM and Gen AI Apps 2025"] + self.assertEqual(10, len(entries)) + self.assertEqual("LLM01", entries[0].sectionID) + self.assertEqual("Prompt Injection", entries[0].section) + self.assertEqual( + ["161-451", "760-764"], [l.document.id for l in entries[0].links] + ) + self.assertEqual(["064-808"], [l.document.id for l in entries[4].links]) + self.assertEqual("LLM10", entries[-1].sectionID) + self.assertEqual(["623-550"], [l.document.id for l in entries[-1].links]) diff --git a/application/utils/external_project_parsers/data/owasp_aisvs_1_0.json b/application/utils/external_project_parsers/data/owasp_aisvs_1_0.json new file mode 100644 index 000000000..c4880546f --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_aisvs_1_0.json @@ -0,0 +1,86 @@ +[ + { + "section_id": "AISVS1", + "section": "Training Data Governance & Bias Management", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C01-Training-Data-Governance.md", + "cre_ids": ["227-045", "307-507"] + }, + { + "section_id": "AISVS2", + "section": "User Input Validation", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C02-User-Input-Validation.md", + "cre_ids": ["031-447", "760-764"] + }, + { + "section_id": "AISVS3", + "section": "Model Lifecycle Management & Change Control", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C03-Model-Lifecycle-Management.md", + "cre_ids": ["148-853", "613-285"] + }, + { + "section_id": "AISVS4", + "section": "Infrastructure, Configuration & Deployment Security", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C04-Infrastructure.md", + "cre_ids": ["233-748", "486-813"] + }, + { + "section_id": "AISVS5", + "section": "Access Control & Identity for AI Components & Users", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C05-Access-Control-and-Identity.md", + "cre_ids": ["633-428", "724-770"] + }, + { + "section_id": "AISVS6", + "section": "Supply Chain Security for Models, Frameworks & Data", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C06-Supply-Chain.md", + "cre_ids": ["613-285", "613-287", "863-521"] + }, + { + "section_id": "AISVS7", + "section": "Model Behavior, Output Control & Safety Assurance", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C07-Model-Behavior.md", + "cre_ids": ["064-808", "141-555"] + }, + { + "section_id": "AISVS8", + "section": "Memory, Embeddings & Vector Database Security", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C08-Memory-Embeddings-and-Vector-Database.md", + "cre_ids": ["126-668", "538-770"] + }, + { + "section_id": "AISVS9", + "section": "Autonomous Orchestration & Agentic Action Security", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C09-Orchestration-and-Agentic-Action.md", + "cre_ids": ["117-371", "650-560"] + }, + { + "section_id": "AISVS10", + "section": "Model Context Protocol (MCP) Security", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C10-MCP-Security.md", + "cre_ids": ["307-507", "715-223"] + }, + { + "section_id": "AISVS11", + "section": "Adversarial Robustness & Privacy Defense", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C11-Adversarial-Robustness.md", + "cre_ids": ["141-555", "623-550"] + }, + { + "section_id": "AISVS12", + "section": "Privacy Protection & Personal Data Management", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C12-Privacy.md", + "cre_ids": ["126-668", "227-045", "482-866"] + }, + { + "section_id": "AISVS13", + "section": "Monitoring, Logging & Anomaly Detection", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C13-Monitoring-and-Logging.md", + "cre_ids": ["058-083", "148-420", "402-706", "843-841"] + }, + { + "section_id": "AISVS14", + "section": "Human Oversight, Accountability & Governance", + "hyperlink": "https://github.com/OWASP/AISVS/tree/main/1.0/en/0x10-C14-Human-Oversight.md", + "cre_ids": ["162-655", "766-162"] + } +] diff --git a/application/utils/external_project_parsers/data/owasp_llm_top10_2025.json b/application/utils/external_project_parsers/data/owasp_llm_top10_2025.json new file mode 100644 index 000000000..b761d5e09 --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_llm_top10_2025.json @@ -0,0 +1,62 @@ +[ + { + "section_id": "LLM01", + "section": "Prompt Injection", + "hyperlink": "https://genai.owasp.org/llmrisk/llm01-prompt-injection/", + "cre_ids": ["161-451", "760-764"] + }, + { + "section_id": "LLM02", + "section": "Sensitive Information Disclosure", + "hyperlink": "https://genai.owasp.org/llmrisk/llm022025-sensitive-information-disclosure/", + "cre_ids": ["126-668", "227-045"] + }, + { + "section_id": "LLM03", + "section": "Supply Chain", + "hyperlink": "https://genai.owasp.org/llmrisk/llm032025-supply-chain/", + "cre_ids": ["613-285", "613-287"] + }, + { + "section_id": "LLM04", + "section": "Data and Model Poisoning", + "hyperlink": "https://genai.owasp.org/llmrisk/llm042025-data-and-model-poisoning/", + "cre_ids": ["307-507", "613-287"] + }, + { + "section_id": "LLM05", + "section": "Improper Output Handling", + "hyperlink": "https://genai.owasp.org/llmrisk/llm052025-improper-output-handling/", + "cre_ids": ["064-808"] + }, + { + "section_id": "LLM06", + "section": "Excessive Agency", + "hyperlink": "https://genai.owasp.org/llmrisk/llm062025-excessive-agency/", + "cre_ids": ["117-371", "650-560"] + }, + { + "section_id": "LLM07", + "section": "System Prompt Leakage", + "hyperlink": "https://genai.owasp.org/llmrisk/llm072025-system-prompt-leakage/", + "cre_ids": ["126-668", "227-045"] + }, + { + "section_id": "LLM08", + "section": "Vector and Embedding Weaknesses", + "hyperlink": "https://genai.owasp.org/llmrisk/llm082025-vector-and-embedding-weaknesses/", + "cre_ids": ["126-668", "538-770"] + }, + { + "section_id": "LLM09", + "section": "Misinformation", + "hyperlink": "https://genai.owasp.org/llmrisk/llm092025-misinformation/", + "cre_ids": ["141-555"] + }, + { + "section_id": "LLM10", + "section": "Unbounded Consumption", + "hyperlink": "https://genai.owasp.org/llmrisk/llm102025-unbounded-consumption/", + "cre_ids": ["267-031", "623-550"] + } +] diff --git a/application/utils/external_project_parsers/parsers/owasp_aisvs.py b/application/utils/external_project_parsers/parsers/owasp_aisvs.py new file mode 100644 index 000000000..cec4abad9 --- /dev/null +++ b/application/utils/external_project_parsers/parsers/owasp_aisvs.py @@ -0,0 +1,45 @@ +import json +from pathlib import Path + +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParseResult, + ParserInterface, +) + + +class OwaspAisvs(ParserInterface): + name = "OWASP AI Security Verification Standard (AISVS)" + data_file = Path(__file__).resolve().parent.parent / "data" / "owasp_aisvs_1_0.json" + + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): + with self.data_file.open("r", encoding="utf-8") as handle: + raw_entries = json.load(handle) + + entries = [] + for entry in raw_entries: + standard = defs.Standard( + name=self.name, + sectionID=entry["section_id"], + section=entry["section"], + hyperlink=entry["hyperlink"], + ) + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + entries.append(standard) + + return ParseResult( + results={self.name: entries}, + calculate_gap_analysis=False, + calculate_embeddings=False, + ) diff --git a/application/utils/external_project_parsers/parsers/owasp_llm_top10_2025.py b/application/utils/external_project_parsers/parsers/owasp_llm_top10_2025.py new file mode 100644 index 000000000..3971b9e6b --- /dev/null +++ b/application/utils/external_project_parsers/parsers/owasp_llm_top10_2025.py @@ -0,0 +1,47 @@ +import json +from pathlib import Path + +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParseResult, + ParserInterface, +) + + +class OwaspLlmTop10_2025(ParserInterface): + name = "OWASP Top 10 for LLM and Gen AI Apps 2025" + data_file = ( + Path(__file__).resolve().parent.parent / "data" / "owasp_llm_top10_2025.json" + ) + + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): + with self.data_file.open("r", encoding="utf-8") as handle: + raw_entries = json.load(handle) + + entries = [] + for entry in raw_entries: + standard = defs.Standard( + name=self.name, + sectionID=entry["section_id"], + section=entry["section"], + hyperlink=entry["hyperlink"], + ) + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + entries.append(standard) + + return ParseResult( + results={self.name: entries}, + calculate_gap_analysis=False, + calculate_embeddings=False, + ) From 5632dc9e92c09f30566eb7d93fa335dc00c8ddc9 Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Tue, 24 Mar 2026 23:04:55 +0530 Subject: [PATCH 14/22] Add OWASP Kubernetes importer support --- ...owasp_kubernetes_top10_2022_parser_test.py | 45 ++++++++ ...owasp_kubernetes_top10_2025_parser_test.py | 102 ++++++++++++++++++ .../data/owasp_kubernetes_top10_2022.json | 62 +++++++++++ .../data/owasp_kubernetes_top10_2025.json | 72 +++++++++++++ .../parsers/owasp_kubernetes_top10_2022.py | 49 +++++++++ .../parsers/owasp_kubernetes_top10_2025.py | 78 ++++++++++++++ 6 files changed, 408 insertions(+) create mode 100644 application/tests/owasp_kubernetes_top10_2022_parser_test.py create mode 100644 application/tests/owasp_kubernetes_top10_2025_parser_test.py create mode 100644 application/utils/external_project_parsers/data/owasp_kubernetes_top10_2022.json create mode 100644 application/utils/external_project_parsers/data/owasp_kubernetes_top10_2025.json create mode 100644 application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2022.py create mode 100644 application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2025.py diff --git a/application/tests/owasp_kubernetes_top10_2022_parser_test.py b/application/tests/owasp_kubernetes_top10_2022_parser_test.py new file mode 100644 index 000000000..30b0922c9 --- /dev/null +++ b/application/tests/owasp_kubernetes_top10_2022_parser_test.py @@ -0,0 +1,45 @@ +import unittest + +from application import create_app, sqla # type: ignore +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.parsers import ( + owasp_kubernetes_top10_2022, +) + + +class TestOwaspKubernetesTop10_2022Parser(unittest.TestCase): + def tearDown(self) -> None: + sqla.session.remove() + sqla.drop_all() + self.app_context.pop() + + def setUp(self) -> None: + self.app = create_app(mode="test") + self.app_context = self.app.app_context() + self.app_context.push() + sqla.create_all() + self.collection = db.Node_collection() + + def test_parse(self) -> None: + for cre_id, name in [ + ("233-748", "Configuration hardening"), + ("486-813", "Configuration"), + ("053-751", "Force build pipeline to check outdated/insecure components"), + ]: + self.collection.add_cre(defs.CRE(id=cre_id, name=name, description="")) + + result = owasp_kubernetes_top10_2022.OwaspKubernetesTop10_2022().parse( + self.collection, prompt_client.PromptHandler(database=self.collection) + ) + + entries = result.results["OWASP Kubernetes Top Ten 2022"] + self.assertEqual(10, len(entries)) + self.assertEqual("K01", entries[0].sectionID) + self.assertEqual("Insecure Workload Configurations", entries[0].section) + self.assertEqual( + ["233-748", "486-813"], [l.document.id for l in entries[0].links] + ) + self.assertEqual("K10", entries[-1].sectionID) + self.assertEqual(["053-751"], [l.document.id for l in entries[-1].links]) diff --git a/application/tests/owasp_kubernetes_top10_2025_parser_test.py b/application/tests/owasp_kubernetes_top10_2025_parser_test.py new file mode 100644 index 000000000..6f444c9a9 --- /dev/null +++ b/application/tests/owasp_kubernetes_top10_2025_parser_test.py @@ -0,0 +1,102 @@ +import unittest +import tempfile +from pathlib import Path + +from application import create_app, sqla # type: ignore +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.parsers import ( + owasp_kubernetes_top10_2025, +) + + +class TestOwaspKubernetesTop10_2025Parser(unittest.TestCase): + def tearDown(self) -> None: + sqla.session.remove() + sqla.drop_all() + self.app_context.pop() + + def setUp(self) -> None: + self.app = create_app(mode="test") + self.app_context = self.app.app_context() + self.app_context.push() + sqla.create_all() + self.collection = db.Node_collection() + + def test_parse(self) -> None: + for cre_id, name in [ + ("233-748", "Configuration hardening"), + ("486-813", "Configuration"), + ("148-420", "Log integrity"), + ("402-706", "Log relevant"), + ("843-841", "Log discretely"), + ]: + self.collection.add_cre(defs.CRE(id=cre_id, name=name, description="")) + + result = owasp_kubernetes_top10_2025.OwaspKubernetesTop10_2025().parse( + self.collection, prompt_client.PromptHandler(database=self.collection) + ) + + entries = result.results["OWASP Kubernetes Top Ten 2025 (Draft)"] + self.assertEqual(10, len(entries)) + self.assertEqual("K01", entries[0].sectionID) + self.assertEqual("Insecure Workload Configurations", entries[0].section) + self.assertEqual( + ["233-748", "486-813"], [l.document.id for l in entries[0].links] + ) + self.assertEqual("K10", entries[-1].sectionID) + self.assertEqual( + ["148-420", "402-706", "843-841"], + [l.document.id for l in entries[-1].links], + ) + + def test_parse_falls_back_to_2022_mapping_when_2025_links_missing(self) -> None: + self.collection.add_cre( + defs.CRE(id="148-420", name="Log integrity", description="") + ) + + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + current_file = tmp_path / "k8s_2025.json" + fallback_file = tmp_path / "k8s_2022.json" + current_file.write_text( + """ +[ + { + "section_id": "K10", + "section": "Inadequate Logging And Monitoring", + "hyperlink": "https://example.com/k10", + "cre_ids": ["999-999"], + "fallback_section_ids": ["K05"] + } +] + """.strip(), + encoding="utf-8", + ) + fallback_file.write_text( + """ +[ + { + "section_id": "K05", + "section": "Inadequate Logging and Monitoring", + "hyperlink": "https://example.com/k05", + "cre_ids": ["148-420"] + } +] + """.strip(), + encoding="utf-8", + ) + + parser = owasp_kubernetes_top10_2025.OwaspKubernetesTop10_2025() + parser.data_file = current_file + parser.fallback_data_file = fallback_file + + result = parser.parse( + self.collection, + prompt_client.PromptHandler(database=self.collection), + ) + + entries = result.results["OWASP Kubernetes Top Ten 2025 (Draft)"] + self.assertEqual(1, len(entries)) + self.assertEqual(["148-420"], [link.document.id for link in entries[0].links]) diff --git a/application/utils/external_project_parsers/data/owasp_kubernetes_top10_2022.json b/application/utils/external_project_parsers/data/owasp_kubernetes_top10_2022.json new file mode 100644 index 000000000..c4eb3d6fd --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_kubernetes_top10_2022.json @@ -0,0 +1,62 @@ +[ + { + "section_id": "K01", + "section": "Insecure Workload Configurations", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K01-insecure-workload-configurations", + "cre_ids": ["233-748", "486-813"] + }, + { + "section_id": "K02", + "section": "Supply Chain Vulnerabilities", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K02-supply-chain-vulnerabilities", + "cre_ids": ["613-285", "613-287"] + }, + { + "section_id": "K03", + "section": "Overly Permissive RBAC Configurations", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K03-overly-permissive-rbac-configurations", + "cre_ids": ["128-128", "724-770"] + }, + { + "section_id": "K04", + "section": "Lack of Centralized Policy Enforcement", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K04-lack-of-centralized-policy-enforcement", + "cre_ids": ["117-371"] + }, + { + "section_id": "K05", + "section": "Inadequate Logging and Monitoring", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K05-inadequate-logging-and-monitoring", + "cre_ids": ["058-083", "148-420", "402-706", "843-841"] + }, + { + "section_id": "K06", + "section": "Broken Authentication Mechanisms", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K06-broken-authentication-mechanisms", + "cre_ids": ["177-260", "586-842", "633-428"] + }, + { + "section_id": "K07", + "section": "Missing Network Segmentation Controls", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K07-missing-network-segmentation-controls", + "cre_ids": ["132-146", "467-784", "515-021"] + }, + { + "section_id": "K08", + "section": "Secrets Management Failures", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K08-secrets-management-failures", + "cre_ids": ["340-375", "774-888", "813-610"] + }, + { + "section_id": "K09", + "section": "Misconfigured Cluster Components", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K09-misconfigured-cluster-components", + "cre_ids": ["233-748", "486-813"] + }, + { + "section_id": "K10", + "section": "Outdated and Vulnerable Kubernetes Components", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/2022/en/src/K10-outdated-and-vulnerable-kubernetes-components", + "cre_ids": ["053-751", "715-334", "863-521"] + } +] diff --git a/application/utils/external_project_parsers/data/owasp_kubernetes_top10_2025.json b/application/utils/external_project_parsers/data/owasp_kubernetes_top10_2025.json new file mode 100644 index 000000000..c55afb059 --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_kubernetes_top10_2025.json @@ -0,0 +1,72 @@ +[ + { + "section_id": "K01", + "section": "Insecure Workload Configurations", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["233-748", "486-813"], + "fallback_section_ids": ["K01"] + }, + { + "section_id": "K02", + "section": "Overly Permissive Authorization Configurations", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["128-128", "724-770"], + "fallback_section_ids": ["K03"] + }, + { + "section_id": "K03", + "section": "Secrets Management Failures", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["340-375", "774-888", "813-610"], + "fallback_section_ids": ["K08"] + }, + { + "section_id": "K04", + "section": "Lack Of Cluster Level Policy Enforcement", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["117-371"], + "fallback_section_ids": ["K04"] + }, + { + "section_id": "K05", + "section": "Missing Network Segmentation Controls", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["132-146", "467-784", "515-021"], + "fallback_section_ids": ["K07"] + }, + { + "section_id": "K06", + "section": "Overly Exposed Kubernetes Components", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["152-725", "640-364"], + "fallback_section_ids": ["K09"] + }, + { + "section_id": "K07", + "section": "Misconfigured And Vulnerable Cluster Components", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["053-751", "233-748", "486-813", "715-334"], + "fallback_section_ids": ["K09", "K10"] + }, + { + "section_id": "K08", + "section": "Cluster To Cloud Lateral Movement", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["132-146", "640-364", "724-770"], + "fallback_section_ids": ["K03", "K07"] + }, + { + "section_id": "K09", + "section": "Broken Authentication Mechanisms", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["177-260", "586-842", "633-428"], + "fallback_section_ids": ["K06"] + }, + { + "section_id": "K10", + "section": "Inadequate Logging And Monitoring", + "hyperlink": "https://owasp.org/www-project-kubernetes-top-ten/", + "cre_ids": ["058-083", "148-420", "402-706", "843-841"], + "fallback_section_ids": ["K05"] + } +] diff --git a/application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2022.py b/application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2022.py new file mode 100644 index 000000000..9d3822ab7 --- /dev/null +++ b/application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2022.py @@ -0,0 +1,49 @@ +import json +from pathlib import Path + +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParseResult, + ParserInterface, +) + + +class OwaspKubernetesTop10_2022(ParserInterface): + name = "OWASP Kubernetes Top Ten 2022" + data_file = ( + Path(__file__).resolve().parent.parent + / "data" + / "owasp_kubernetes_top10_2022.json" + ) + + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): + with self.data_file.open("r", encoding="utf-8") as handle: + raw_entries = json.load(handle) + + entries = [] + for entry in raw_entries: + standard = defs.Standard( + name=self.name, + sectionID=entry["section_id"], + section=entry["section"], + hyperlink=entry["hyperlink"], + ) + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + entries.append(standard) + + return ParseResult( + results={self.name: entries}, + calculate_gap_analysis=False, + calculate_embeddings=False, + ) diff --git a/application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2025.py b/application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2025.py new file mode 100644 index 000000000..31deed8da --- /dev/null +++ b/application/utils/external_project_parsers/parsers/owasp_kubernetes_top10_2025.py @@ -0,0 +1,78 @@ +import json +from pathlib import Path + +from application.database import db +from application.defs import cre_defs as defs +from application.prompt_client import prompt_client +from application.utils.external_project_parsers.base_parser_defs import ( + ParseResult, + ParserInterface, +) + + +class OwaspKubernetesTop10_2025(ParserInterface): + name = "OWASP Kubernetes Top Ten 2025 (Draft)" + data_file = ( + Path(__file__).resolve().parent.parent + / "data" + / "owasp_kubernetes_top10_2025.json" + ) + fallback_data_file = ( + Path(__file__).resolve().parent.parent + / "data" + / "owasp_kubernetes_top10_2022.json" + ) + + def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): + with self.data_file.open("r", encoding="utf-8") as handle: + raw_entries = json.load(handle) + with self.fallback_data_file.open("r", encoding="utf-8") as handle: + fallback_entries = { + entry["section_id"]: entry for entry in json.load(handle) + } + + entries = [] + for entry in raw_entries: + standard = defs.Standard( + name=self.name, + sectionID=entry["section_id"], + section=entry["section"], + hyperlink=entry["hyperlink"], + ) + linked_cre_ids = [] + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + linked_cre_ids.append(cre_id) + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + if not linked_cre_ids: + for section_id in entry.get("fallback_section_ids", []): + fallback_entry = fallback_entries.get(section_id) + if not fallback_entry: + continue + for cre_id in fallback_entry.get("cre_ids", []): + if cre_id in linked_cre_ids: + continue + cres = cache.get_CREs(external_id=cre_id) + if not cres: + continue + linked_cre_ids.append(cre_id) + standard.add_link( + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=cres[0].shallow_copy(), + ) + ) + entries.append(standard) + + return ParseResult( + results={self.name: entries}, + calculate_gap_analysis=False, + calculate_embeddings=False, + ) From 8f022f5fd2a1f8da37b6f711417da05fefe3b1aa Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Tue, 24 Mar 2026 23:04:55 +0530 Subject: [PATCH 15/22] Normalize OWASP cheat sheet references --- application/tests/cheatsheets_parser_test.py | 33 +++++++++- .../data/owasp_cheatsheets_supplement.json | 47 ++++++++++++++ .../parsers/cheatsheets_parser.py | 62 +++++++++++++++++-- 3 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 application/utils/external_project_parsers/data/owasp_cheatsheets_supplement.json diff --git a/application/tests/cheatsheets_parser_test.py b/application/tests/cheatsheets_parser_test.py index e2c0910d6..fb2a9c277 100644 --- a/application/tests/cheatsheets_parser_test.py +++ b/application/tests/cheatsheets_parser_test.py @@ -69,8 +69,37 @@ class Repo: self.maxDiff = None for name, nodes in entries.results.items(): self.assertEqual(name, parser.name) - self.assertEqual(len(nodes), 1) - self.assertEqual(expected.todict(), nodes[0].todict()) + sections = {node.section for node in nodes} + self.assertIn("Secrets Management Cheat Sheet", sections) + secret_entry = [ + node + for node in nodes + if node.section == "Secrets Management Cheat Sheet" + ][0] + self.assertEqual(expected.todict(), secret_entry.todict()) + + def test_register_supplemental_cheatsheets(self) -> None: + for cre_id, name in [ + ("118-110", "API/web services"), + ("724-770", "Technical application access control"), + ("623-550", "Denial Of Service protection"), + ]: + self.collection.add_cre(defs.CRE(name=name, id=cre_id)) + + entries = cheatsheets_parser.Cheatsheets().register_supplemental_cheatsheets( + cache=self.collection + ) + rest = [ + entry for entry in entries if entry.section == "REST Security Cheat Sheet" + ][0] + self.assertEqual( + "https://cheatsheetseries.owasp.org/cheatsheets/REST_Security_Cheat_Sheet.html", + rest.hyperlink, + ) + self.assertEqual( + ["118-110", "724-770", "623-550"], + [link.document.id for link in rest.links], + ) cheatsheets_md = """ # Secrets Management Cheat Sheet diff --git a/application/utils/external_project_parsers/data/owasp_cheatsheets_supplement.json b/application/utils/external_project_parsers/data/owasp_cheatsheets_supplement.json new file mode 100644 index 000000000..4e06bee8c --- /dev/null +++ b/application/utils/external_project_parsers/data/owasp_cheatsheets_supplement.json @@ -0,0 +1,47 @@ +[ + { + "section": "Authorization Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/Authorization_Cheat_Sheet.html", + "cre_ids": ["128-128", "117-371"] + }, + { + "section": "REST Security Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/REST_Security_Cheat_Sheet.html", + "cre_ids": ["118-110", "724-770", "623-550"] + }, + { + "section": "Server Side Request Forgery Prevention Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html", + "cre_ids": ["028-728", "657-084"] + }, + { + "section": "Docker Security Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/Docker_Security_Cheat_Sheet.html", + "cre_ids": ["233-748", "486-813"] + }, + { + "section": "Kubernetes Security Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/Kubernetes_Security_Cheat_Sheet.html", + "cre_ids": ["467-784", "233-748", "486-813"] + }, + { + "section": "Secure Cloud Architecture Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/Secure_Cloud_Architecture_Cheat_Sheet.html", + "cre_ids": ["155-155", "467-784"] + }, + { + "section": "LLM Prompt Injection Prevention Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/LLM_Prompt_Injection_Prevention_Cheat_Sheet.html", + "cre_ids": ["161-451", "760-764"] + }, + { + "section": "AI Agent Security Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/AI_Agent_Security_Cheat_Sheet.html", + "cre_ids": ["117-371", "650-560", "126-668"] + }, + { + "section": "Secure AI Model Ops Cheat Sheet", + "hyperlink": "https://cheatsheetseries.owasp.org/cheatsheets/Secure_AI_Model_Ops_Cheat_Sheet.html", + "cre_ids": ["148-853", "613-285", "613-287"] + } +] diff --git a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py index e234dadda..02003b7bd 100644 --- a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py +++ b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py @@ -6,6 +6,9 @@ import os import re from application.utils.external_project_parsers import base_parser_defs +import json +from pathlib import Path +import logging from application.utils.external_project_parsers.base_parser_defs import ( ParserInterface, ParseResult, @@ -16,6 +19,12 @@ class Cheatsheets(ParserInterface): name = "OWASP Cheat Sheets" cheatsheetseries_base_url = "https://cheatsheetseries.owasp.org/cheatsheets" + supplement_data_file = ( + Path(__file__).resolve().parent.parent + / "data" + / "owasp_cheatsheets_supplement.json" + ) + logger = logging.getLogger(__name__) def cheatsheet( self, section: str, hyperlink: str, tags: List[str] @@ -41,10 +50,22 @@ def official_cheatsheet_url(self, markdown_filename: str) -> str: def parse(self, cache: db.Node_collection, ph: prompt_client.PromptHandler): c_repo = "https://github.com/OWASP/CheatSheetSeries.git" cheatsheets_path = "cheatsheets/" - repo = git.clone(c_repo, sparse_paths=["cheatsheets"], sparse_cone=True) - cheatsheets = self.register_cheatsheets( - repo=repo, cache=cache, cheatsheets_path=cheatsheets_path, repo_path=c_repo - ) + cheatsheets = [] + try: + repo = git.clone(c_repo, sparse_paths=["cheatsheets"], sparse_cone=True) + cheatsheets = self.register_cheatsheets( + repo=repo, + cache=cache, + cheatsheets_path=cheatsheets_path, + repo_path=c_repo, + ) + except Exception as exc: + self.logger.warning( + "Unable to clone OWASP CheatSheetSeries, continuing with supplemental cheat sheets only: %s", + exc, + ) + cheatsheets.extend(self.register_supplemental_cheatsheets(cache=cache)) + cheatsheets = self.deduplicate_entries(cheatsheets) results = {self.name: cheatsheets} base_parser_defs.validate_classification_tags(results) return ParseResult(results=results) @@ -80,3 +101,36 @@ def register_cheatsheets( ) standard_entries.append(cs) return standard_entries + + def register_supplemental_cheatsheets(self, cache: db.Node_collection): + with self.supplement_data_file.open("r", encoding="utf-8") as handle: + supplement_entries = json.load(handle) + + standard_entries = [] + for entry in supplement_entries: + cs = self.cheatsheet( + section=entry["section"], + hyperlink=entry["hyperlink"], + tags=[], + ) + for cre_id in entry.get("cre_ids", []): + cres = cache.get_CREs(external_id=cre_id) + for cre in cres: + try: + cs.add_link( + defs.Link( + document=cre.shallow_copy(), + ltype=defs.LinkTypes.AutomaticallyLinkedTo, + ) + ) + except Exception: + continue + if cs.links: + standard_entries.append(cs) + return standard_entries + + def deduplicate_entries(self, entries: List[defs.Standard]) -> List[defs.Standard]: + deduped = {} + for entry in entries: + deduped[(entry.section, entry.hyperlink)] = entry + return list(deduped.values()) From d547e99dc82a77460bff73f3fa95d3c0d4926094 Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Tue, 24 Mar 2026 23:04:55 +0530 Subject: [PATCH 16/22] Add refresh scripts for OWASP resources --- scripts/update-cheatsheets.sh | 55 +++++++++ scripts/update-owasp-top10-2025-mappings.sh | 38 ++++++ scripts/update-owasp-top10-standards.sh | 129 ++++++++++++++++++++ 3 files changed, 222 insertions(+) create mode 100755 scripts/update-cheatsheets.sh create mode 100755 scripts/update-owasp-top10-2025-mappings.sh create mode 100644 scripts/update-owasp-top10-standards.sh diff --git a/scripts/update-cheatsheets.sh b/scripts/update-cheatsheets.sh new file mode 100755 index 000000000..48d5eccc5 --- /dev/null +++ b/scripts/update-cheatsheets.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +DB_PATH="${1:-$ROOT_DIR/standards_cache.sqlite}" +VENV_DIR="$ROOT_DIR/venv" + +if [[ ! -d "$VENV_DIR" ]]; then + python3 -m venv "$VENV_DIR" +fi + +source "$VENV_DIR/bin/activate" + +if ! python -c "import flask" >/dev/null 2>&1; then + pip install -r "$ROOT_DIR/requirements.txt" +fi + +CRE_NO_CALCULATE_GAP_ANALYSIS=1 \ +CRE_NO_GEN_EMBEDDINGS=1 \ +python "$ROOT_DIR/cre.py" --cheatsheets_in --cache_file "$DB_PATH" + +python - "$DB_PATH" <<'PY' +import os +import sqlite3 +import sys + +db_path = sys.argv[1] +conn = sqlite3.connect(db_path) +cur = conn.cursor() + +github_prefix = "https://github.com/OWASP/CheatSheetSeries/tree/master/cheatsheets/" +official_prefix = "https://cheatsheetseries.owasp.org/cheatsheets/" + +rows = cur.execute( + """ + select id, link + from node + where name = 'OWASP Cheat Sheets' + and link like ? + """, + (f"{github_prefix}%",), +).fetchall() + +for node_id, link in rows: + filename = os.path.basename(link) + html_name = os.path.splitext(filename)[0] + ".html" + cur.execute( + "update node set link = ? where id = ?", + (f"{official_prefix}{html_name}", node_id), + ) + +conn.commit() +conn.close() +print(f"Normalized {len(rows)} OWASP Cheat Sheet links") +PY diff --git a/scripts/update-owasp-top10-2025-mappings.sh b/scripts/update-owasp-top10-2025-mappings.sh new file mode 100755 index 000000000..04258646b --- /dev/null +++ b/scripts/update-owasp-top10-2025-mappings.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +VENV_DIR="$ROOT_DIR/venv" +CACHE_FILE="${1:-$ROOT_DIR/standards_cache.sqlite}" +TIMESTAMP="$(date +%Y%m%d-%H%M%S)" +BACKUP_FILE="${CACHE_FILE}.bak.${TIMESTAMP}" + +if [[ ! -d "$VENV_DIR" ]]; then + echo "Creating virtual environment in $VENV_DIR" + python3 -m venv "$VENV_DIR" +fi + +source "$VENV_DIR/bin/activate" + +if ! python -c "import requests" >/dev/null 2>&1; then + echo "Installing Python dependencies" + pip install -r "$ROOT_DIR/requirements.txt" +fi + +if [[ -f "$CACHE_FILE" ]]; then + cp "$CACHE_FILE" "$BACKUP_FILE" + echo "Backed up database to $BACKUP_FILE" +fi + +export CRE_NO_NEO4J="${CRE_NO_NEO4J:-1}" +export CRE_NO_GEN_EMBEDDINGS="${CRE_NO_GEN_EMBEDDINGS:-1}" +export CRE_UPSTREAM_MAX_ATTEMPTS="${CRE_UPSTREAM_MAX_ATTEMPTS:-6}" +export CRE_UPSTREAM_RETRY_BACKOFF_SECONDS="${CRE_UPSTREAM_RETRY_BACKOFF_SECONDS:-2}" +export CRE_UPSTREAM_TIMEOUT_SECONDS="${CRE_UPSTREAM_TIMEOUT_SECONDS:-30}" + +echo "Refreshing official OpenCRE upstream data in $CACHE_FILE" +python "$ROOT_DIR/cre.py" --upstream_sync --cache_file "$CACHE_FILE" + +echo "Reapplying OWASP Top 10 2025 CRE mappings" +exec python "$ROOT_DIR/cre.py" --owasp_top10_2025_in --cache_file "$CACHE_FILE" diff --git a/scripts/update-owasp-top10-standards.sh b/scripts/update-owasp-top10-standards.sh new file mode 100644 index 000000000..a795cf872 --- /dev/null +++ b/scripts/update-owasp-top10-standards.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +VENV_DIR="$ROOT_DIR/venv" +CACHE_FILE="${1:-$ROOT_DIR/standards_cache.sqlite}" +TIMESTAMP="$(date +%Y%m%d-%H%M%S)" +BACKUP_FILE="${CACHE_FILE}.bak.${TIMESTAMP}" + +if [[ ! -d "$VENV_DIR" ]]; then + echo "Creating virtual environment in $VENV_DIR" + python3 -m venv "$VENV_DIR" +fi + +source "$VENV_DIR/bin/activate" + +if ! python -c "import requests" >/dev/null 2>&1; then + echo "Installing Python dependencies" + pip install -r "$ROOT_DIR/requirements.txt" +fi + +if [[ -f "$CACHE_FILE" ]]; then + cp "$CACHE_FILE" "$BACKUP_FILE" + echo "Backed up database to $BACKUP_FILE" +fi + +export CRE_NO_NEO4J="${CRE_NO_NEO4J:-1}" +export CRE_NO_GEN_EMBEDDINGS="${CRE_NO_GEN_EMBEDDINGS:-1}" +export CRE_UPSTREAM_MAX_ATTEMPTS="${CRE_UPSTREAM_MAX_ATTEMPTS:-6}" +export CRE_UPSTREAM_RETRY_BACKOFF_SECONDS="${CRE_UPSTREAM_RETRY_BACKOFF_SECONDS:-2}" +export CRE_UPSTREAM_TIMEOUT_SECONDS="${CRE_UPSTREAM_TIMEOUT_SECONDS:-30}" + +echo "Refreshing official OpenCRE upstream data in $CACHE_FILE" +python "$ROOT_DIR/cre.py" --upstream_sync --cache_file "$CACHE_FILE" + +echo "Reapplying OWASP Top 10 standards and CRE mappings" +python "$ROOT_DIR/cre.py" \ + --owasp_top10_2025_in \ + --owasp_api_top10_2023_in \ + --owasp_kubernetes_top10_2025_in \ + --owasp_llm_top10_2025_in \ + --owasp_aisvs_in \ + --cache_file "$CACHE_FILE" + +echo "Selecting preferred Kubernetes Top Ten version" +if python - <<'PY' "$CACHE_FILE" +import sqlite3 +import sys + +cache_file = sys.argv[1] +name_2025 = "OWASP Kubernetes Top Ten 2025 (Draft)" +name_2022 = "OWASP Kubernetes Top Ten 2022" + +conn = sqlite3.connect(cache_file) +cur = conn.cursor() + +linked_2025 = cur.execute( + """ + select count(*) + from node n + join cre_node_links l on l.node = n.id + where n.name = ? + """, + (name_2025,), +).fetchone()[0] + +if linked_2025 > 0: + cur.execute("delete from node where name = ?", (name_2022,)) + print(f"Using {name_2025}; removed {name_2022}") +else: + raise SystemExit(f"{name_2025} not linked") + +conn.commit() +conn.close() +PY +then + : +else + echo "OWASP Kubernetes Top Ten 2025 (Draft) is unavailable or unmapped, importing 2022" + python "$ROOT_DIR/cre.py" \ + --owasp_kubernetes_top10_2022_in \ + --cache_file "$CACHE_FILE" +fi + +echo "Pruning OWASP Top 10 entries that still have no CRE links" +python - <<'PY' "$CACHE_FILE" +import sqlite3 +import sys + +cache_file = sys.argv[1] +standard_names = ( + "OWASP Top 10 2025", + "OWASP API Security Top 10 2023", + "OWASP Kubernetes Top Ten 2025 (Draft)", + "OWASP Top 10 for LLM and Gen AI Apps 2025", + "OWASP AI Security Verification Standard (AISVS)", +) + +conn = sqlite3.connect(cache_file) +cur = conn.cursor() + +has_2022 = cur.execute( + "select 1 from node where name = 'OWASP Kubernetes Top Ten 2022' limit 1" +).fetchone() +if has_2022: + standard_names = standard_names + ("OWASP Kubernetes Top Ten 2022",) + +rows = list( + cur.execute( + f""" + select n.id, n.name, coalesce(n.section_id, ''), coalesce(n.section, '') + from node n + left join cre_node_links l on l.node = n.id + where n.name in ({','.join('?' for _ in standard_names)}) + group by n.id + having count(l.cre) = 0 + """, + standard_names, + ) +) + +for node_id, name, section_id, section in rows: + cur.execute("delete from node where id = ?", (node_id,)) + print(f"Removed unmapped entry: {name} {section_id} {section}".strip()) + +conn.commit() +conn.close() +PY From 8a194645c8199d8eda23b61eaca78f592afe3b3b Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Fri, 1 May 2026 01:03:03 +0530 Subject: [PATCH 17/22] Refactor links definition in TestCheatsheetsParser for clarity --- application/tests/cheatsheets_parser_test.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/application/tests/cheatsheets_parser_test.py b/application/tests/cheatsheets_parser_test.py index fb2a9c277..202f7b59e 100644 --- a/application/tests/cheatsheets_parser_test.py +++ b/application/tests/cheatsheets_parser_test.py @@ -53,11 +53,7 @@ class Repo: name="OWASP Cheat Sheets", hyperlink="https://cheatsheetseries.owasp.org/cheatsheets/Secrets_Management_Cheat_Sheet.html", section="Secrets Management Cheat Sheet", - links=[ - defs.Link( - document=cre, ltype=defs.LinkTypes.AutomaticallyLinkedTo - ) - ], + links=[defs.Link(document=cre, ltype=defs.LinkTypes.AutomaticallyLinkedTo)], tags=[ "family:guidance", "subtype:cheatsheet", From 9a073c6975352dd580695477f9e62dac109eb0f0 Mon Sep 17 00:00:00 2001 From: Bornunique911 <69379200+Bornunique911@users.noreply.github.com> Date: Sat, 2 May 2026 11:48:31 +0530 Subject: [PATCH 18/22] =?UTF-8?q?Improved=20boilerplate,=20github=20link?= =?UTF-8?q?=20scrolling=20to=20readme,=20and=20completing=E2=80=A6=20(#898?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Improved boilerplate, github link scrolling to readme, and completing the list of standards with the new AI ones * Update Search.tsx * Improved boilerplate, github link scrolling to readme, and completing… * Update Search.tsx Signed-off-by: Bornunique911 <69379200+Bornunique911@users.noreply.github.com> --------- Signed-off-by: Bornunique911 <69379200+Bornunique911@users.noreply.github.com> Co-authored-by: Rob van der Veer --- application/frontend/src/pages/Search/Search.tsx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/application/frontend/src/pages/Search/Search.tsx b/application/frontend/src/pages/Search/Search.tsx index b572c6da9..3a4806063 100644 --- a/application/frontend/src/pages/Search/Search.tsx +++ b/application/frontend/src/pages/Search/Search.tsx @@ -138,9 +138,10 @@ export const SearchPage = () => {

- OpenCRE is an interactive content linking platform for uniting security standards and - guidelines. It offers easy navigation between documents, requirements and tools, making it - easier for developers and security professionals to find the resources they need. + OpenCRE is an interactive content linking platform that unites security standards and guidelines + into one resource. It makes it easier for practitioners to find information, with mappings, + deeplinks, browse, search, and an AI chatbot. Writers of material just have to refer to one + OpenCRE requirement, to link to coverage if that topic in all the standards.

@@ -193,11 +194,12 @@ export const SearchPage = () => {

OpenCRE currently links OWASP standards (Top 10, ASVS, Proactive Controls, - Cheat sheets, Testing guide, ZAP, Juice shop, SAMM). + Cheat sheets, Testing guide, ZAP, Juice shop, SAMM, AI Exchange, LLM top 10, ML top 10).

Plus several other sources (CWE, CAPEC, NIST 800 53, NIST 800 63b, Cloud - Control Matrix, ISO27001, ISO27002, and NIST SSDF). + Control Matrix, ISO27001, ISO27002, NIST SSDF, and for AI: NIST, ENISA, BIML, MITRE ATLAS, + and ETSI).

@@ -467,7 +469,7 @@ export const SearchPage = () => { Contribute to the open-source project, report issues, and explore the codebase on our GitHub repository.

- + From 2733d10a114ae27f754a46258d38866184190d61 Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Wed, 1 Apr 2026 16:13:04 +0530 Subject: [PATCH 19/22] Fix cheat sheet parser test expectations on importer branches --- application/tests/cheatsheets_parser_test.py | 40 +++++--------------- 1 file changed, 9 insertions(+), 31 deletions(-) diff --git a/application/tests/cheatsheets_parser_test.py b/application/tests/cheatsheets_parser_test.py index 202f7b59e..9dec729c4 100644 --- a/application/tests/cheatsheets_parser_test.py +++ b/application/tests/cheatsheets_parser_test.py @@ -34,6 +34,13 @@ class Repo: repo.working_dir = loc cre = defs.CRE(name="blah", id="223-780") self.collection.add_cre(cre) + with open( + os.path.join( + os.path.join(loc, "cheatsheets"), + "Secrets_Management_Cheat_Sheet.md", + ), + "w", + ) as mdf: with open( os.path.join( os.path.join(loc, "cheatsheets"), @@ -65,37 +72,8 @@ class Repo: self.maxDiff = None for name, nodes in entries.results.items(): self.assertEqual(name, parser.name) - sections = {node.section for node in nodes} - self.assertIn("Secrets Management Cheat Sheet", sections) - secret_entry = [ - node - for node in nodes - if node.section == "Secrets Management Cheat Sheet" - ][0] - self.assertEqual(expected.todict(), secret_entry.todict()) - - def test_register_supplemental_cheatsheets(self) -> None: - for cre_id, name in [ - ("118-110", "API/web services"), - ("724-770", "Technical application access control"), - ("623-550", "Denial Of Service protection"), - ]: - self.collection.add_cre(defs.CRE(name=name, id=cre_id)) - - entries = cheatsheets_parser.Cheatsheets().register_supplemental_cheatsheets( - cache=self.collection - ) - rest = [ - entry for entry in entries if entry.section == "REST Security Cheat Sheet" - ][0] - self.assertEqual( - "https://cheatsheetseries.owasp.org/cheatsheets/REST_Security_Cheat_Sheet.html", - rest.hyperlink, - ) - self.assertEqual( - ["118-110", "724-770", "623-550"], - [link.document.id for link in rest.links], - ) + self.assertEqual(len(nodes), 1) + self.assertEqual(expected.todict(), nodes[0].todict()) cheatsheets_md = """ # Secrets Management Cheat Sheet From 7ea289c690fe0d7491cb5d24102517a7ee931769 Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Wed, 1 Apr 2026 16:14:21 +0530 Subject: [PATCH 20/22] Use official OWASP cheat sheet URLs in importer branches --- .../external_project_parsers/parsers/cheatsheets_parser.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py index 02003b7bd..a2808ab41 100644 --- a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py +++ b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py @@ -19,12 +19,6 @@ class Cheatsheets(ParserInterface): name = "OWASP Cheat Sheets" cheatsheetseries_base_url = "https://cheatsheetseries.owasp.org/cheatsheets" - supplement_data_file = ( - Path(__file__).resolve().parent.parent - / "data" - / "owasp_cheatsheets_supplement.json" - ) - logger = logging.getLogger(__name__) def cheatsheet( self, section: str, hyperlink: str, tags: List[str] From dec11c4dfaabba227721bc975aeeb0e5446cf19c Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Tue, 24 Mar 2026 23:04:55 +0530 Subject: [PATCH 21/22] Normalize OWASP cheat sheet references --- application/tests/cheatsheets_parser_test.py | 33 +++++++++++++++++-- .../parsers/cheatsheets_parser.py | 6 ++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/application/tests/cheatsheets_parser_test.py b/application/tests/cheatsheets_parser_test.py index 9dec729c4..c565af022 100644 --- a/application/tests/cheatsheets_parser_test.py +++ b/application/tests/cheatsheets_parser_test.py @@ -72,8 +72,37 @@ class Repo: self.maxDiff = None for name, nodes in entries.results.items(): self.assertEqual(name, parser.name) - self.assertEqual(len(nodes), 1) - self.assertEqual(expected.todict(), nodes[0].todict()) + sections = {node.section for node in nodes} + self.assertIn("Secrets Management Cheat Sheet", sections) + secret_entry = [ + node + for node in nodes + if node.section == "Secrets Management Cheat Sheet" + ][0] + self.assertEqual(expected.todict(), secret_entry.todict()) + + def test_register_supplemental_cheatsheets(self) -> None: + for cre_id, name in [ + ("118-110", "API/web services"), + ("724-770", "Technical application access control"), + ("623-550", "Denial Of Service protection"), + ]: + self.collection.add_cre(defs.CRE(name=name, id=cre_id)) + + entries = cheatsheets_parser.Cheatsheets().register_supplemental_cheatsheets( + cache=self.collection + ) + rest = [ + entry for entry in entries if entry.section == "REST Security Cheat Sheet" + ][0] + self.assertEqual( + "https://cheatsheetseries.owasp.org/cheatsheets/REST_Security_Cheat_Sheet.html", + rest.hyperlink, + ) + self.assertEqual( + ["118-110", "724-770", "623-550"], + [link.document.id for link in rest.links], + ) cheatsheets_md = """ # Secrets Management Cheat Sheet diff --git a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py index a2808ab41..02003b7bd 100644 --- a/application/utils/external_project_parsers/parsers/cheatsheets_parser.py +++ b/application/utils/external_project_parsers/parsers/cheatsheets_parser.py @@ -19,6 +19,12 @@ class Cheatsheets(ParserInterface): name = "OWASP Cheat Sheets" cheatsheetseries_base_url = "https://cheatsheetseries.owasp.org/cheatsheets" + supplement_data_file = ( + Path(__file__).resolve().parent.parent + / "data" + / "owasp_cheatsheets_supplement.json" + ) + logger = logging.getLogger(__name__) def cheatsheet( self, section: str, hyperlink: str, tags: List[str] From d378a40605ced42da9cdd36b344557792ddc254e Mon Sep 17 00:00:00 2001 From: bornunique911 Date: Thu, 7 May 2026 00:18:07 +0530 Subject: [PATCH 22/22] Fixed black formatting issue --- application/cmd/cre_main.py | 6 +++++- application/tests/cheatsheets_parser_test.py | 16 ++++++++-------- application/tests/cre_main_test.py | 1 + 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/application/cmd/cre_main.py b/application/cmd/cre_main.py index 9a4f6382f..69d73da4d 100644 --- a/application/cmd/cre_main.py +++ b/application/cmd/cre_main.py @@ -1086,7 +1086,10 @@ def generate_embeddings(db_url: str) -> None: database = db_connect(path=db_url) prompt_client.PromptHandler(database, load_all_embeddings=True) -def parse_file(filename: str, yamldocs: List[Any], scollection) -> Optional[List[defs.Document]]: + +def parse_file( + filename: str, yamldocs: List[Any], scollection +) -> Optional[List[defs.Document]]: """ Parse a list of dictionaries (YAML/JSON documents) into defs.Document objects. Returns None and logs a critical error if any element is not a dict. @@ -1108,6 +1111,7 @@ def normalize_links(doc: dict) -> dict: return [defs.Document.from_dict(normalize_links(dict(doc))) for doc in yamldocs] + def regenerate_embeddings(db_url: str) -> None: """Wipe all embedding rows, then rebuild (CRE + every node type) like ``--generate_embeddings``.""" database = db_connect(path=db_url) diff --git a/application/tests/cheatsheets_parser_test.py b/application/tests/cheatsheets_parser_test.py index c565af022..74689c352 100644 --- a/application/tests/cheatsheets_parser_test.py +++ b/application/tests/cheatsheets_parser_test.py @@ -41,14 +41,14 @@ class Repo: ), "w", ) as mdf: - with open( - os.path.join( - os.path.join(loc, "cheatsheets"), - "Secrets_Management_Cheat_Sheet.md", - ), - "w", - ) as mdf: - mdf.write(cs) + with open( + os.path.join( + os.path.join(loc, "cheatsheets"), + "Secrets_Management_Cheat_Sheet.md", + ), + "w", + ) as mdf: + mdf.write(cs) mock_clone.return_value = repo entries = cheatsheets_parser.Cheatsheets().parse( cache=self.collection, ph=PromptHandler(database=self.collection) diff --git a/application/tests/cre_main_test.py b/application/tests/cre_main_test.py index 68d721ab1..38268a2e7 100644 --- a/application/tests/cre_main_test.py +++ b/application/tests/cre_main_test.py @@ -577,6 +577,7 @@ def test_parse_file(self) -> None: filename="tests", yamldocs=file, scollection=self.collection ) self.assertCountEqual(res, expected) + @patch.object(main, "db_connect") @patch.object(Queue, "enqueue_call") @patch.object(redis, "wait_for_jobs")