From f2a6d8023c8f72345e80399a1fcea56726a381a2 Mon Sep 17 00:00:00 2001 From: Jared Atkinson Date: Wed, 27 May 2026 14:46:29 -0700 Subject: [PATCH 1/6] Model org repository-creation capabilities and composed org-secret access: - replace the hardcoded default-role GH_CreateRepository edge with specific GH_CanCreateRepositories, GH_CanCreatePublicRepositories, GH_CanCreateInternalRepositories, and GH_CanCreatePrivateRepositories edges - emit those create-capability edges conditionally for the members org role from collected organization settings, and unconditionally for the owners org role - add composed GH_CanReadSecret edges from org roles to GH_OrgSecret nodes with visibility == "all", including query_composition showing the supporting path - fix GitHub lookup boolean parsing so DuckDB string values like "False" do not incorrectly evaluate as enabled capabilities - correct GH_OrgRole.properties.type so default roles export as default instead of always being labeled custom --- src/openhound_github/kinds/edges.py | 5 ++ src/openhound_github/lookup.py | 45 +++++++++++++ src/openhound_github/models/org_role.py | 79 ++++++++++++++++++++--- src/openhound_github/models/org_secret.py | 49 +++++++++++++- 4 files changed, 168 insertions(+), 10 deletions(-) diff --git a/src/openhound_github/kinds/edges.py b/src/openhound_github/kinds/edges.py index 28425e2..d76e79e 100644 --- a/src/openhound_github/kinds/edges.py +++ b/src/openhound_github/kinds/edges.py @@ -42,6 +42,7 @@ # Secret and variable edges HAS_SECRET = "GH_HasSecret" HAS_VARIABLE = "GH_HasVariable" +CAN_READ_SECRET = "GH_CanReadSecret" # App installation edges INSTALLED_AS = "GH_InstalledAs" @@ -63,6 +64,10 @@ INVITE_MEMBER = "GH_InviteMember" ADD_COLLABORATOR = "GH_AddCollaborator" CREATE_REPOSITORY = "GH_CreateRepository" +CAN_CREATE_REPOSITORIES = "GH_CanCreateRepositories" +CAN_CREATE_PUBLIC_REPOSITORIES = "GH_CanCreatePublicRepositories" +CAN_CREATE_INTERNAL_REPOSITORIES = "GH_CanCreateInternalRepositories" +CAN_CREATE_PRIVATE_REPOSITORIES = "GH_CanCreatePrivateRepositories" CREATE_TEAM = "GH_CreateTeam" TRANSFER_REPOSITORY = "GH_TransferRepository" diff --git a/src/openhound_github/lookup.py b/src/openhound_github/lookup.py index 0d5abd2..c694ee8 100644 --- a/src/openhound_github/lookup.py +++ b/src/openhound_github/lookup.py @@ -69,6 +69,51 @@ def private_repository_node_ids_for_org(self, org_login: str): [org_login], ) + def _find_single_bool(self, query: str, params: list[str]) -> bool: + result = self._find_single_object(query, params) + if result is None: + return False + return result.strip().lower() in {"1", "true", "t", "yes", "y"} + + @lru_cache + def members_can_create_repositories(self, org_login: str) -> bool: + return self._find_single_bool( + f"""SELECT members_can_create_repositories FROM {self.schema}.organizations WHERE login = ?""", + [org_login], + ) + + @lru_cache + def members_can_create_public_repositories(self, org_login: str) -> bool: + return self._find_single_bool( + f"""SELECT members_can_create_public_repositories FROM {self.schema}.organizations WHERE login = ?""", + [org_login], + ) + + @lru_cache + def members_can_create_internal_repositories(self, org_login: str) -> bool: + return self._find_single_bool( + f"""SELECT members_can_create_internal_repositories FROM {self.schema}.organizations WHERE login = ?""", + [org_login], + ) + + @lru_cache + def members_can_create_private_repositories(self, org_login: str) -> bool: + return self._find_single_bool( + f"""SELECT members_can_create_private_repositories FROM {self.schema}.organizations WHERE login = ?""", + [org_login], + ) + + @lru_cache + def members_can_create_any_repositories(self, org_login: str) -> bool: + return any( + ( + self.members_can_create_repositories(org_login), + self.members_can_create_public_repositories(org_login), + self.members_can_create_internal_repositories(org_login), + self.members_can_create_private_repositories(org_login), + ) + ) + @lru_cache def idp(self) -> list: return self._find_all_objects( diff --git a/src/openhound_github/models/org_role.py b/src/openhound_github/models/org_role.py index d401691..3256160 100644 --- a/src/openhound_github/models/org_role.py +++ b/src/openhound_github/models/org_role.py @@ -92,10 +92,31 @@ class GHOrgRoleProperties(GHNodeProperties): EdgeDef( start=nk.ORG_ROLE, end=nk.ORGANIZATION, - kind=ek.CREATE_REPOSITORY, + kind=ek.CAN_CREATE_REPOSITORIES, description="Role can create repositories in the organization", traversable=False, ), + EdgeDef( + start=nk.ORG_ROLE, + end=nk.ORGANIZATION, + kind=ek.CAN_CREATE_PUBLIC_REPOSITORIES, + description="Role can create public repositories in the organization", + traversable=False, + ), + EdgeDef( + start=nk.ORG_ROLE, + end=nk.ORGANIZATION, + kind=ek.CAN_CREATE_INTERNAL_REPOSITORIES, + description="Role can create internal repositories in the organization", + traversable=False, + ), + EdgeDef( + start=nk.ORG_ROLE, + end=nk.ORGANIZATION, + kind=ek.CAN_CREATE_PRIVATE_REPOSITORIES, + description="Role can create private repositories in the organization", + traversable=False, + ), EdgeDef( start=nk.ORG_ROLE, end=nk.ORGANIZATION, @@ -174,7 +195,7 @@ def as_node(self) -> GHNode: displayname=f"{self.org_login}/{self.name}", node_id=self.node_id, short_name=self.name, - type="custom", + type=self.type, environment_name=self.org_login, environmentid=self.org_node_id, query_explicit_members=f"MATCH p=(:GH_User)-[:GH_HasRole]->(:GH_OrgRole {{node_id:'{self.node_id}'}}) RETURN p", @@ -195,7 +216,25 @@ def _owners_edge(self): ) yield Edge( - kind=ek.CREATE_REPOSITORY, + kind=ek.CAN_CREATE_REPOSITORIES, + start=EdgePath(value=self.node_id, match_by="id"), + end=EdgePath(value=self.org_node_id, match_by="id"), + properties=EdgeProperties(traversable=False), + ) + yield Edge( + kind=ek.CAN_CREATE_PUBLIC_REPOSITORIES, + start=EdgePath(value=self.node_id, match_by="id"), + end=EdgePath(value=self.org_node_id, match_by="id"), + properties=EdgeProperties(traversable=False), + ) + yield Edge( + kind=ek.CAN_CREATE_INTERNAL_REPOSITORIES, + start=EdgePath(value=self.node_id, match_by="id"), + end=EdgePath(value=self.org_node_id, match_by="id"), + properties=EdgeProperties(traversable=False), + ) + yield Edge( + kind=ek.CAN_CREATE_PRIVATE_REPOSITORIES, start=EdgePath(value=self.node_id, match_by="id"), end=EdgePath(value=self.org_node_id, match_by="id"), properties=EdgeProperties(traversable=False), @@ -235,12 +274,34 @@ def _owners_edge(self): @property def _members_edge(self): if self.type == "default" and self.name == "members": - yield Edge( - kind=ek.CREATE_REPOSITORY, - start=EdgePath(value=self.node_id, match_by="id"), - end=EdgePath(value=self.org_node_id, match_by="id"), - properties=EdgeProperties(traversable=False), - ) + if self._lookup.members_can_create_repositories(self.org_login): + yield Edge( + kind=ek.CAN_CREATE_REPOSITORIES, + start=EdgePath(value=self.node_id, match_by="id"), + end=EdgePath(value=self.org_node_id, match_by="id"), + properties=EdgeProperties(traversable=False), + ) + if self._lookup.members_can_create_public_repositories(self.org_login): + yield Edge( + kind=ek.CAN_CREATE_PUBLIC_REPOSITORIES, + start=EdgePath(value=self.node_id, match_by="id"), + end=EdgePath(value=self.org_node_id, match_by="id"), + properties=EdgeProperties(traversable=False), + ) + if self._lookup.members_can_create_internal_repositories(self.org_login): + yield Edge( + kind=ek.CAN_CREATE_INTERNAL_REPOSITORIES, + start=EdgePath(value=self.node_id, match_by="id"), + end=EdgePath(value=self.org_node_id, match_by="id"), + properties=EdgeProperties(traversable=False), + ) + if self._lookup.members_can_create_private_repositories(self.org_login): + yield Edge( + kind=ek.CAN_CREATE_PRIVATE_REPOSITORIES, + start=EdgePath(value=self.node_id, match_by="id"), + end=EdgePath(value=self.org_node_id, match_by="id"), + properties=EdgeProperties(traversable=False), + ) yield Edge( kind=ek.CREATE_TEAM, start=EdgePath(value=self.node_id, match_by="id"), diff --git a/src/openhound_github/models/org_secret.py b/src/openhound_github/models/org_secret.py index 1ec9879..f525ba9 100644 --- a/src/openhound_github/models/org_secret.py +++ b/src/openhound_github/models/org_secret.py @@ -6,7 +6,7 @@ from openhound.core.asset import BaseAsset, EdgeDef, NodeDef from openhound.core.models.entries_dataclass import Edge, EdgePath, EdgeProperties -from openhound_github.graph import GHNode, GHNodeProperties +from openhound_github.graph import GHEdgeProperties, GHNode, GHNodeProperties from openhound_github.kinds import edges as ek from openhound_github.kinds import nodes as nk from openhound_github.main import app @@ -55,6 +55,13 @@ class GHOrgSecretProperties(GHNodeProperties): description="Repository can access org secret", traversable=True, ), + EdgeDef( + start=nk.ORG_ROLE, + end=nk.ORG_SECRET, + kind=ek.CAN_READ_SECRET, + description="Org role can read org secret by creating a repository in scope", + traversable=True, + ), ], ) class OrgSecret(BaseAsset): @@ -120,6 +127,45 @@ def _private_repo_edges(self): properties=EdgeProperties(traversable=True), ) + def _read_secret_query(self, role_node_id: str) -> str: + return ( + f"MATCH p=(:GH_OrgRole {{node_id:'{role_node_id}'}})" + f"-[:GH_CanCreateRepositories|GH_CanCreatePublicRepositories|" + f"GH_CanCreateInternalRepositories|GH_CanCreatePrivateRepositories]->" + f"(:GH_Organization)-[:GH_Contains]->" + f"(:GH_OrgSecret {{node_id:'{self.node_id}'}}) RETURN p" + ) + + @property + def _composed_read_edges(self): + if self.visibility != "all": + return + + owners_role_id = f"{self.org_node_id}_owners" + yield Edge( + kind=ek.CAN_READ_SECRET, + start=EdgePath(value=owners_role_id, match_by="id"), + end=EdgePath(value=self.node_id, match_by="id"), + properties=GHEdgeProperties( + traversable=True, + composed=True, + query_composition=self._read_secret_query(owners_role_id), + ), + ) + + if self._lookup.members_can_create_any_repositories(self.org_login): + members_role_id = f"{self.org_node_id}_members" + yield Edge( + kind=ek.CAN_READ_SECRET, + start=EdgePath(value=members_role_id, match_by="id"), + end=EdgePath(value=self.node_id, match_by="id"), + properties=GHEdgeProperties( + traversable=True, + composed=True, + query_composition=self._read_secret_query(members_role_id), + ), + ) + @property def edges(self): yield Edge( @@ -130,6 +176,7 @@ def edges(self): ) yield from self._all_repo_edges yield from self._private_repo_edges + yield from self._composed_read_edges @app.asset( From 7957c15afe19b0f125b2a43fefc94cffd0845d9d Mon Sep 17 00:00:00 2001 From: Jared Atkinson Date: Wed, 27 May 2026 15:01:37 -0700 Subject: [PATCH 2/6] Model secret-scanning alert scope and composed viewer paths: - eliminate `GH_HasSecretScanningAlert` in favor of `GH_Contains` for repository-to-alert scope - keep organization-to-alert scope modeled with `GH_Contains` - add composed `GH_CanReadSecretScanningAlert` edges from repo roles and org roles to `GH_SecretScanningAlert` nodes - attach `query_composition` to each composed alert-read edge so the supporting path through `GH_ViewSecretScanningAlerts` and `GH_Contains` is preserved - update secret-scanning alert node queries to use `GH_Contains` and the new composed alert-reader path - add DuckDB lookup helpers for repo/org roles that can view secret scanning alerts and load `org_roles` into preproc so org-scoped viewer composition resolves during convert --- src/openhound_github/kinds/edges.py | 2 +- src/openhound_github/lookup.py | 53 ++++++++++++ src/openhound_github/main.py | 1 + src/openhound_github/models/repository.py | 2 +- .../models/secret_scanning_alert.py | 83 +++++++++++++++++-- 5 files changed, 133 insertions(+), 8 deletions(-) diff --git a/src/openhound_github/kinds/edges.py b/src/openhound_github/kinds/edges.py index d76e79e..7e9f154 100644 --- a/src/openhound_github/kinds/edges.py +++ b/src/openhound_github/kinds/edges.py @@ -48,7 +48,6 @@ INSTALLED_AS = "GH_InstalledAs" # Other -HAS_SECRET_SCANNING_ALERT = "GH_HasSecretScanningAlert" VALID_TOKEN = "GH_ValidToken" HAS_WORKFLOW = "GH_HasWorkflow" HAS_JOB = "GH_HasJob" @@ -128,6 +127,7 @@ PUSH_PROTECTED_BRANCH = "GH_PushProtectedBranch" DELETE_ALERTS_CODE_SCANNING = "GH_DeleteAlertsCodeScanning" VIEW_SECRET_SCANNING_ALERTS = "GH_ViewSecretScanningAlerts" +CAN_READ_SECRET_SCANNING_ALERT = "GH_CanReadSecretScanningAlert" RUN_ORG_MIGRATION = "GH_RunOrgMigration" MANAGE_SECURITY_PRODUCTS = "GH_ManageSecurityProducts" MANAGE_REPO_SECURITY_PRODUCTS = "GH_ManageRepoSecurityProducts" diff --git a/src/openhound_github/lookup.py b/src/openhound_github/lookup.py index c694ee8..d68b430 100644 --- a/src/openhound_github/lookup.py +++ b/src/openhound_github/lookup.py @@ -1,5 +1,6 @@ from functools import lru_cache +import duckdb from duckdb import DuckDBPyConnection from openhound.core.lookup import LookupManager @@ -75,6 +76,22 @@ def _find_single_bool(self, query: str, params: list[str]) -> bool: return False return result.strip().lower() in {"1", "true", "t", "yes", "y"} + @lru_cache + def _table_exists(self, table_name: str) -> bool: + try: + self.client.execute( + """ + SELECT 1 + FROM information_schema.tables + WHERE table_schema = ? AND table_name = ? + LIMIT 1 + """, + [self.schema, table_name], + ) + return self.client.fetchone() is not None + except duckdb.Error: + return False + @lru_cache def members_can_create_repositories(self, org_login: str) -> bool: return self._find_single_bool( @@ -114,6 +131,38 @@ def members_can_create_any_repositories(self, org_login: str) -> bool: ) ) + @lru_cache + def repo_role_node_ids_with_view_secret_scanning_alerts( + self, repository_node_id: str + ): + return self._find_all_objects( + f""" + SELECT repository_node_id || '_' || name + FROM {self.schema}.repo_roles + WHERE repository_node_id = ? + AND ( + (type = 'default' AND name = 'admin') + OR json_contains(permissions, '"view_secret_scanning_alerts"') + ) + """, + [repository_node_id], + ) + + @lru_cache + def org_role_node_ids_with_view_secret_scanning_alerts(self, org_login: str): + return self._find_all_objects( + f""" + SELECT org_node_id || '_' || name + FROM {self.schema}.org_roles + WHERE org_login = ? + AND ( + (type = 'default' AND name = 'owners') + OR json_contains(permissions, '"view_secret_scanning_alerts"') + ) + """, + [org_login], + ) + @lru_cache def idp(self) -> list: return self._find_all_objects( @@ -311,6 +360,8 @@ def environment_secret(self, secret_name: str, repository_id: str): @lru_cache def org_variable(self, var_name: str, org_login: str): + if not self._table_exists("organization_variables"): + return None return self._find_single_object( f""" SELECT name FROM {self.schema}.organization_variables @@ -321,6 +372,8 @@ def org_variable(self, var_name: str, org_login: str): @lru_cache def repo_variable(self, var_name: str, repository_id: str): + if not self._table_exists("repository_variables"): + return None return self._find_single_object( f""" SELECT name FROM {self.schema}.repository_variables diff --git a/src/openhound_github/main.py b/src/openhound_github/main.py index 7f1bbb7..2a060d7 100644 --- a/src/openhound_github/main.py +++ b/src/openhound_github/main.py @@ -51,6 +51,7 @@ def preproc(ctx: PreProcContext): """ return { "organizations": "organizations", + "org_roles": "org_roles", "repositories": "repositories", "branch_protection_rules": "branch_protection_rules", "branch_push_allowances": "branch_push_allowances", diff --git a/src/openhound_github/models/repository.py b/src/openhound_github/models/repository.py index 38a0c72..2f34ad2 100644 --- a/src/openhound_github/models/repository.py +++ b/src/openhound_github/models/repository.py @@ -256,7 +256,7 @@ def as_node(self) -> GHNode: query_environments=f"MATCH p=(:GH_Repository {{node_id: '{rid}'}})-[:GH_HasEnvironment]->(:GH_Environment) RETURN p", query_secrets=f"MATCH p=(:GH_Repository {{node_id:'{rid}'}})-[:GH_HasSecret]->(:GH_Secret) RETURN p", query_variables=f"MATCH p=(:GH_Repository {{node_id:'{rid}'}})-[:GH_HasVariable]->(:GH_Variable) RETURN p", - query_secret_scanning_alerts=f"MATCH p=(:GH_Repository {{node_id:'{rid}'}})-[:GH_HasSecretScanningAlert]->(:GH_SecretScanningAlert) RETURN p", + query_secret_scanning_alerts=f"MATCH p=(:GH_Repository {{node_id:'{rid}'}})-[:GH_Contains]->(:GH_SecretScanningAlert) RETURN p", query_explicit_readers=f"MATCH p=(role:GH_Role)-[:GH_HasBaseRole|GH_ReadRepoContents*1..]->(r:GH_Repository {{node_id:'{rid}'}}) MATCH p1=(role)<-[:GH_HasRole]-(:GH_User) RETURN p,p1", query_unrolled_readers=f"MATCH p=(role:GH_Role)-[:GH_HasRole|GH_HasBaseRole|GH_MemberOf|GH_ReadRepoContents*1..]->(r:GH_Repository {{node_id:'{rid}'}}) MATCH p1=(role)<-[:GH_HasRole]-(:GH_User) RETURN p,p1", query_explicit_writers=f"MATCH p=(role:GH_Role)-[:GH_HasBaseRole|GH_WriteRepoContents|GH_WriteRepoPullRequests*1..]->(r:GH_Repository {{node_id:'{rid}'}}) MATCH p1=(role)<-[:GH_HasRole]-(:GH_User) RETURN p,p1", diff --git a/src/openhound_github/models/secret_scanning_alert.py b/src/openhound_github/models/secret_scanning_alert.py index 9fce19e..e607a72 100644 --- a/src/openhound_github/models/secret_scanning_alert.py +++ b/src/openhound_github/models/secret_scanning_alert.py @@ -5,7 +5,7 @@ from openhound.core.models.entries_dataclass import Edge, EdgePath, EdgeProperties from pydantic import BaseModel -from openhound_github.graph import GHNode, GHNodeProperties +from openhound_github.graph import GHEdgeProperties, GHNode, GHNodeProperties from openhound_github.kinds import edges as ek from openhound_github.kinds import nodes as nk from openhound_github.main import app @@ -80,10 +80,24 @@ class GHSecretScanningAlertProperties(GHNodeProperties): EdgeDef( start=nk.REPOSITORY, end=nk.SECRET_SCANNING_ALERT, - kind=ek.HAS_SECRET_SCANNING_ALERT, - description="Repository has secret scanning alert", + kind=ek.CONTAINS, + description="Repository contains secret scanning alert", traversable=False, ), + EdgeDef( + start=nk.REPO_ROLE, + end=nk.SECRET_SCANNING_ALERT, + kind=ek.CAN_READ_SECRET_SCANNING_ALERT, + description="Repository role can read secret scanning alert", + traversable=True, + ), + EdgeDef( + start=nk.ORG_ROLE, + end=nk.SECRET_SCANNING_ALERT, + kind=ek.CAN_READ_SECRET_SCANNING_ALERT, + description="Organization role can read secret scanning alert", + traversable=True, + ), EdgeDef( start=nk.SECRET_SCANNING_ALERT, end=nk.USER, @@ -148,15 +162,69 @@ def as_node(self) -> GHNode: validity=self.validity, state=self.state, url=self.url, - query_repository=f"MATCH p=(r:GH_SecretScanningAlert {{node_id:'{aid}'}})<-[:GH_HasSecretScanningAlert]-(repo:GH_Repository) RETURN p", + query_repository=f"MATCH p=(r:GH_SecretScanningAlert {{node_id:'{aid}'}})<-[:GH_Contains]-(repo:GH_Repository) RETURN p", query_alert_viewers=( - f"MATCH p=(role:GH_Role)-[:GH_HasRole|GH_HasBaseRole|GH_MemberOf|GH_ViewSecretScanningAlerts*1..]->" - f"(:GH_Repository)-[:GH_HasSecretScanningAlert]->(:GH_SecretScanningAlert {{node_id:'{aid}'}}) " + f"MATCH p=(role:GH_Role)-[:GH_HasRole|GH_HasBaseRole|GH_MemberOf|GH_CanReadSecretScanningAlert*1..]->" + f"(:GH_SecretScanningAlert {{node_id:'{aid}'}}) " f"MATCH p1=(role)<-[:GH_HasRole]-(:GH_User) RETURN p,p1" ), ), ) + def _repo_alert_view_query(self, role_node_id: str) -> str: + return ( + f"MATCH p=(:GH_RepoRole {{node_id:'{role_node_id}'}})" + f"-[:GH_ViewSecretScanningAlerts]->(:GH_Repository)" + f"-[:GH_Contains]->(:GH_SecretScanningAlert {{node_id:'{self.node_id}'}}) " + f"RETURN p" + ) + + def _org_alert_view_query(self, role_node_id: str) -> str: + return ( + f"MATCH p=(:GH_OrgRole {{node_id:'{role_node_id}'}})" + f"-[:GH_ViewSecretScanningAlerts]->(:GH_Organization)" + f"-[:GH_Contains]->(:GH_SecretScanningAlert {{node_id:'{self.node_id}'}}) " + f"RETURN p" + ) + + @property + def _repo_viewer_edges(self): + if not self.repository: + return + + for (role_node_id,) in self._lookup.repo_role_node_ids_with_view_secret_scanning_alerts( + self.repository.node_id + ): + yield Edge( + kind=ek.CAN_READ_SECRET_SCANNING_ALERT, + start=EdgePath(value=role_node_id, match_by="id"), + end=EdgePath(value=self.node_id, match_by="id"), + properties=GHEdgeProperties( + traversable=True, + composed=True, + query_composition=self._repo_alert_view_query(role_node_id), + ), + ) + + @property + def _org_viewer_edges(self): + if not self.org_node_id: + return + + for (role_node_id,) in self._lookup.org_role_node_ids_with_view_secret_scanning_alerts( + self.org_login + ): + yield Edge( + kind=ek.CAN_READ_SECRET_SCANNING_ALERT, + start=EdgePath(value=role_node_id, match_by="id"), + end=EdgePath(value=self.node_id, match_by="id"), + properties=GHEdgeProperties( + traversable=True, + composed=True, + query_composition=self._org_alert_view_query(role_node_id), + ), + ) + @property def edges(self): if self.repository: @@ -181,3 +249,6 @@ def edges(self): end=EdgePath(value=self.valid_token_user_node_id, match_by="id"), properties=EdgeProperties(traversable=True), ) + + yield from self._repo_viewer_edges + yield from self._org_viewer_edges From 91850391e4b660625684b800f912888aa31eef7d Mon Sep 17 00:00:00 2001 From: Jared Atkinson Date: Wed, 27 May 2026 15:11:58 -0700 Subject: [PATCH 3/6] Updated several relative queries: - Fixed ordering issues that were from legacy DAWGS parsing - Updated query_workflows to unroll the entire workflow tree --- src/openhound_github/models/repository.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/openhound_github/models/repository.py b/src/openhound_github/models/repository.py index 2f34ad2..b75a782 100644 --- a/src/openhound_github/models/repository.py +++ b/src/openhound_github/models/repository.py @@ -251,16 +251,16 @@ def as_node(self) -> GHNode: query_branch_protection_rules=f"MATCH p=(:GH_Repository {{node_id: '{rid}'}})-[:GH_Contains]->(:GH_BranchProtectionRule) RETURN p", query_roles=f"MATCH p=(:GH_RepoRole)-[*1..]->(:GH_Repository {{node_id: '{rid}'}}) RETURN p", query_teams=f"MATCH p=(:GH_Team)-[:GH_MemberOf|GH_HasRole*1..]->(:GH_RepoRole)-[]->(:GH_Repository {{node_id: '{rid}'}}) RETURN p", - query_workflows=f"MATCH p=(:GH_Repository {{node_id:'{rid}'}})-[:GH_HasWorkflow]->(w:GH_Workflow) RETURN p", + query_workflows=f"MATCH p=(:GH_Repository {{node_id:'{rid}'}})-[:GH_HasWorkflow]->(:GH_Workflow)-[:GH_HasJob]->(:GH_WorkflowJob)-[:GH_HasStep]->(step:GH_WorkflowStep) OPTIONAL MATCH p1=(step)-[:GH_UsesSecret]->(:GH_Secret) OPTIONAL MATCH p2=(step)-[:GH_UsesVariable]->(:GH_Variable) RETURN p,p1,p2", query_runners=f"MATCH p=(:GH_Repository {{node_id:'{rid}'}})-[:GH_CanUseRunner]->(:GH_Runner) RETURN p", query_environments=f"MATCH p=(:GH_Repository {{node_id: '{rid}'}})-[:GH_HasEnvironment]->(:GH_Environment) RETURN p", query_secrets=f"MATCH p=(:GH_Repository {{node_id:'{rid}'}})-[:GH_HasSecret]->(:GH_Secret) RETURN p", query_variables=f"MATCH p=(:GH_Repository {{node_id:'{rid}'}})-[:GH_HasVariable]->(:GH_Variable) RETURN p", query_secret_scanning_alerts=f"MATCH p=(:GH_Repository {{node_id:'{rid}'}})-[:GH_Contains]->(:GH_SecretScanningAlert) RETURN p", - query_explicit_readers=f"MATCH p=(role:GH_Role)-[:GH_HasBaseRole|GH_ReadRepoContents*1..]->(r:GH_Repository {{node_id:'{rid}'}}) MATCH p1=(role)<-[:GH_HasRole]-(:GH_User) RETURN p,p1", - query_unrolled_readers=f"MATCH p=(role:GH_Role)-[:GH_HasRole|GH_HasBaseRole|GH_MemberOf|GH_ReadRepoContents*1..]->(r:GH_Repository {{node_id:'{rid}'}}) MATCH p1=(role)<-[:GH_HasRole]-(:GH_User) RETURN p,p1", - query_explicit_writers=f"MATCH p=(role:GH_Role)-[:GH_HasBaseRole|GH_WriteRepoContents|GH_WriteRepoPullRequests*1..]->(r:GH_Repository {{node_id:'{rid}'}}) MATCH p1=(role)<-[:GH_HasRole]-(:GH_User) RETURN p,p1", - query_unrolled_writers=f"MATCH p=(role:GH_Role)-[:GH_HasRole|GH_HasBaseRole|GH_MemberOf|GH_WriteRepoContents|GH_WriteRepoPullRequests*1..]->(r:GH_Repository {{node_id:'{rid}'}}) MATCH p1=(role)<-[:GH_HasRole]-(:GH_User) RETURN p,p1", + query_explicit_readers=f"MATCH p=(role:GH_Role)-[:GH_HasBaseRole|GH_ReadRepoContents*1..]->(r:GH_Repository {{node_id:'{rid}'}}) MATCH p1=(:GH_User)-[:GH_HasRole]->(role) RETURN p,p1", + query_unrolled_readers=f"MATCH p=(role:GH_Role)-[:GH_HasRole|GH_HasBaseRole|GH_MemberOf|GH_ReadRepoContents*1..]->(r:GH_Repository {{node_id:'{rid}'}}) MATCH p1=(:GH_User)-[:GH_HasRole]->(role) RETURN p,p1", + query_explicit_writers=f"MATCH p=(role:GH_Role)-[:GH_HasBaseRole|GH_WriteRepoContents|GH_WriteRepoPullRequests*1..]->(r:GH_Repository {{node_id:'{rid}'}}) MATCH p1=(:GH_User)-[:GH_HasRole]->(role) RETURN p,p1", + query_unrolled_writers=f"MATCH p=(role:GH_Role)-[:GH_HasRole|GH_HasBaseRole|GH_MemberOf|GH_WriteRepoContents|GH_WriteRepoPullRequests*1..]->(r:GH_Repository {{node_id:'{rid}'}}) MATCH p1=(:GH_User)-[:GH_HasRole]->(role) RETURN p,p1", ), ) From 2e5ccd87888db74d4f9eb32f4f6f0ef55c6278c5 Mon Sep 17 00:00:00 2001 From: Jared Atkinson Date: Thu, 28 May 2026 07:59:04 -0700 Subject: [PATCH 4/6] Expose app installation and PAT permission bundles on graph nodes: - serialize `GH_AppInstallation.permissions` onto the node as a JSON string instead of dropping it during node construction - serialize `GH_AppInstallation.events` onto the node as a JSON string for consistency with other structured app-installation metadata - add `organization_permissions` and `repository_permissions` properties to `GH_PersonalAccessToken` nodes - populate PAT permission properties from the collected fine-grained PAT permission object without expanding them into redundant per-edge data - keep `GH_CanAccess` edges as scope-only relationships, with the permission bundle modeled once on the principal node --- src/openhound_github/models/app_installation.py | 7 +++---- .../models/personal_access_token.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/openhound_github/models/app_installation.py b/src/openhound_github/models/app_installation.py index 7b92afb..ca83340 100644 --- a/src/openhound_github/models/app_installation.py +++ b/src/openhound_github/models/app_installation.py @@ -1,5 +1,6 @@ from dataclasses import dataclass from datetime import datetime +import json from typing import ClassVar from dlt.common.libs.pydantic import DltConfig @@ -140,10 +141,8 @@ def as_node(self) -> GHNode: repositories_url=self.repositories_url, repository_selection=self.repository_selection, target_type=self.target_type, - permissions=self.permissions - if isinstance(self.permissions, str) - else None, - events=self.events if isinstance(self.events, str) else None, + permissions=json.dumps(self.permissions) if self.permissions else None, + events=json.dumps(self.events) if self.events else None, created_at=self.created_at, updated_at=self.updated_at, suspended_at=self.suspended_at, diff --git a/src/openhound_github/models/personal_access_token.py b/src/openhound_github/models/personal_access_token.py index 58b1a69..edd3d0c 100644 --- a/src/openhound_github/models/personal_access_token.py +++ b/src/openhound_github/models/personal_access_token.py @@ -1,5 +1,6 @@ from dataclasses import dataclass from datetime import datetime +import json from typing import ClassVar from dlt.common.libs.pydantic import DltConfig @@ -40,6 +41,8 @@ class GHPersonalAccessTokenProperties(GHNodeProperties): owner_login: The login handle of the user who owns the token. repository_selection: Whether the token has access to `all`, `subset`, or `none` of the organization's repositories. token_expired: Whether the token has expired. + organization_permissions: JSON string of the PAT's organization-scoped permissions. + repository_permissions: JSON string of the PAT's repository-scoped permissions. query_organization_permissions: Query for organization permissions. query_user: Query for user. query_repositories: Query for repositories. @@ -57,6 +60,8 @@ class GHPersonalAccessTokenProperties(GHNodeProperties): owner_login: str | None = None repository_selection: str | None = None token_expired: bool | None = None + organization_permissions: str | None = None + repository_permissions: str | None = None query_organization_permissions: str | None = None query_user: str | None = None query_repositories: str | None = None @@ -141,6 +146,16 @@ def as_node(self) -> GHNode: token_expires_at=self.token_expires_at, owner_id=self.owner.id if self.owner else None, token_last_used_at=self.token_last_used_at, + organization_permissions=( + json.dumps(self.permissions.organization) + if self.permissions and self.permissions.organization + else None + ), + repository_permissions=( + json.dumps(self.permissions.repository) + if self.permissions and self.permissions.repository + else None + ), query_organization_permissions=f"MATCH p=(:GH_PersonalAccessToken {{node_id:'{pid}'}})-[:GH_CanAccess]->(:GH_Organization) RETURN p", query_user=f"MATCH p=(:GH_User)-[:GH_HasPersonalAccessToken]->(:GH_PersonalAccessToken {{node_id:'{pid}'}}) RETURN p", query_repositories=f"MATCH p=(:GH_PersonalAccessToken {{node_id:'{pid}'}})-[:GH_CanAccess]->(:GH_Repository) RETURN p LIMIT 1000", From 5528a9b23652265cd83a851bccf2845e76b260a6 Mon Sep 17 00:00:00 2001 From: Jared Atkinson Date: Fri, 29 May 2026 10:59:46 -0700 Subject: [PATCH 5/6] Resolve workflow secret and variable references to environment-scoped targets - add precedence-aware workflow lookup helpers that resolve the effective secret or variable target for a given repository, environment, and name - apply GitHub-style scope precedence during lookup: environment > repository > organization - make org-scope resolution repository-aware so `all`, `private`, and `selected` organization secrets and variables only resolve when they are actually in scope for the repository - add workflow-job and workflow-step support for environment-scoped `GH_UsesSecret` and `GH_UsesVariable` edges - derive step environment context from the containing workflow job via the preprocessed `workflow_jobs` table instead of duplicating environment data on step nodes - change workflow job/step secret and variable edges to target the resolved object directly by `node_id` instead of using compound property matchers - add lookup support for resolving environment node IDs and workflow-job environment names from the DuckDB preproc database - expose `deployment_environmentid` on `GH_EnvironmentVariable` nodes - restore `GH_Contains` edges from `GH_Environment` to `GH_EnvironmentVariable` so environment-scoped variable targets can be traversed consistently with environment secrets --- src/openhound_github/lookup.py | 171 +++++++++++++++++++ src/openhound_github/models/env_variable.py | 17 +- src/openhound_github/models/workflow.py | 41 ++++- src/openhound_github/models/workflow_job.py | 81 ++++----- src/openhound_github/models/workflow_step.py | 84 ++++----- 5 files changed, 282 insertions(+), 112 deletions(-) diff --git a/src/openhound_github/lookup.py b/src/openhound_github/lookup.py index d68b430..3067bb0 100644 --- a/src/openhound_github/lookup.py +++ b/src/openhound_github/lookup.py @@ -358,6 +358,51 @@ def environment_secret(self, secret_name: str, repository_id: str): [secret_name, repository_id], ) + @lru_cache + def environment_secret_for_environment( + self, secret_name: str, repository_id: str, environment_name: str + ): + return self._find_single_object( + f""" + SELECT name FROM {self.schema}.environment_secrets + WHERE name = ? AND repository_node_id = ? AND environment_name = ? + """, + [secret_name, repository_id, environment_name], + ) + + @lru_cache + def repo_visible_org_secret( + self, secret_name: str, repository_id: str, org_login: str + ): + return self._find_single_object( + f""" + SELECT os.name + FROM {self.schema}.organization_secrets os + JOIN {self.schema}.repositories r + ON r.node_id = ? + WHERE os.name = ? + AND os.org_login = ? + AND ( + os.visibility = 'all' + OR ( + os.visibility = 'private' + AND r.visibility IN ('private', 'internal') + ) + OR ( + os.visibility = 'selected' + AND EXISTS ( + SELECT 1 + FROM {self.schema}.selected_organization_secrets sos + WHERE sos.name = os.name + AND sos.org_login = os.org_login + AND sos.repository_node_id = ? + ) + ) + ) + """, + [repository_id, secret_name, org_login, repository_id], + ) + @lru_cache def org_variable(self, var_name: str, org_login: str): if not self._table_exists("organization_variables"): @@ -392,6 +437,55 @@ def environment_variable(self, var_name: str, repository_id: str): [var_name, repository_id], ) + @lru_cache + def environment_variable_for_environment( + self, var_name: str, repository_id: str, environment_name: str + ): + if not self._table_exists("environment_variables"): + return None + return self._find_single_object( + f""" + SELECT name FROM {self.schema}.environment_variables + WHERE name = ? AND repository_node_id = ? AND environment_name = ? + """, + [var_name, repository_id, environment_name], + ) + + @lru_cache + def repo_visible_org_variable( + self, var_name: str, repository_id: str, org_login: str + ): + if not self._table_exists("organization_variables"): + return None + return self._find_single_object( + f""" + SELECT ov.name + FROM {self.schema}.organization_variables ov + JOIN {self.schema}.repositories r + ON r.node_id = ? + WHERE ov.name = ? + AND ov.org_login = ? + AND ( + ov.visibility = 'all' + OR ( + ov.visibility = 'private' + AND r.visibility IN ('private', 'internal') + ) + OR ( + ov.visibility = 'selected' + AND EXISTS ( + SELECT 1 + FROM {self.schema}.selected_organization_variables sov + WHERE sov.name = ov.name + AND sov.org_login = ov.org_login + AND sov.repository_node_id = ? + ) + ) + ) + """, + [repository_id, var_name, org_login, repository_id], + ) + @lru_cache def environment(self, env_name: str, repository_id: str): return self._find_single_object( @@ -402,6 +496,83 @@ def environment(self, env_name: str, repository_id: str): [env_name, repository_id], ) + @lru_cache + def environment_node_id(self, env_name: str, repository_id: str) -> str | None: + return self._find_single_object( + f""" + SELECT node_id FROM {self.schema}.environments + WHERE name = ? AND repository_node_id = ? + """, + [env_name, repository_id], + ) + + @lru_cache + def workflow_job_environment(self, workflow_job_node_id: str) -> str | None: + return self._find_single_object( + f""" + SELECT environment FROM {self.schema}.workflow_jobs + WHERE node_id = ? + """, + [workflow_job_node_id], + ) + + @lru_cache + def secret_target( + self, + secret_name: str, + repository_id: str, + org_login: str, + environment_name: str | None = None, + ) -> tuple[str, str] | None: + if environment_name and self.environment_secret_for_environment( + secret_name, repository_id, environment_name + ): + environment_node_id = self.environment_node_id( + environment_name, repository_id + ) + if environment_node_id: + return ( + "environment", + f"GH_EnvironmentSecret_{environment_node_id}_{secret_name}", + ) + if self.repo_secret(secret_name, repository_id): + return ("repository", f"GH_Secret_{repository_id}_{secret_name}") + if self.repo_visible_org_secret(secret_name, repository_id, org_login): + org_node_id = self.org_id_for_login(org_login) + if org_node_id: + return ("organization", f"GH_OrgSecret_{org_node_id}_{secret_name}") + return None + + @lru_cache + def variable_target( + self, + var_name: str, + repository_id: str, + org_login: str, + environment_name: str | None = None, + ) -> tuple[str, str] | None: + if environment_name and self.environment_variable_for_environment( + var_name, repository_id, environment_name + ): + environment_node_id = self.environment_node_id( + environment_name, repository_id + ) + if environment_node_id: + return ( + "environment", + f"GH_EnvironmentVariable_{environment_node_id}_{var_name}", + ) + if self.repo_variable(var_name, repository_id): + return ("repository", f"GH_Variable_{repository_id}_{var_name}") + if self.repo_visible_org_variable(var_name, repository_id, org_login): + org_node_id = self.org_id_for_login(org_login) + if org_node_id: + return ( + "organization", + f"GH_OrgVariable_{org_node_id}_{var_name}", + ) + return None + @lru_cache def workflow(self, repository_node_id: str, path: str): return self._find_single_object( diff --git a/src/openhound_github/models/env_variable.py b/src/openhound_github/models/env_variable.py index d9cfd80..b97a433 100644 --- a/src/openhound_github/models/env_variable.py +++ b/src/openhound_github/models/env_variable.py @@ -2,6 +2,7 @@ from datetime import datetime from openhound.core.asset import BaseAsset, EdgeDef, NodeDef +from openhound.core.models.entries_dataclass import Edge, EdgePath, EdgeProperties from openhound_github.graph import ( GHNode, @@ -31,6 +32,7 @@ class GHEnvVariableProperties(GHNodeProperties): created_at: datetime | None updated_at: datetime | None repository_name: str + deployment_environmentid: str | None = None @app.asset( @@ -83,6 +85,7 @@ def as_node(self) -> GHNode: displayname=self.name, node_id=vid, deployment_environment_name=self.environment_name, + deployment_environmentid=self.environment_node_id, environment_name=self.org_login, repository_name=self.repository_name, environmentid=self.org_node_id, @@ -94,11 +97,9 @@ def as_node(self) -> GHNode: @property def edges(self): - # TODO: Check if this should indeed not return CONTAINS edge - return [] - # yield Edge( - # kind=ek.CONTAINS, - # start=EdgePath(value=self.environment_node_id, match_by="id"), - # end=EdgePath(value=self.node_id, match_by="id"), - # properties=EdgeProperties(traversable=False), - # ) + yield Edge( + kind=ek.CONTAINS, + start=EdgePath(value=self.environment_node_id, match_by="id"), + end=EdgePath(value=self.node_id, match_by="id"), + properties=EdgeProperties(traversable=False), + ) diff --git a/src/openhound_github/models/workflow.py b/src/openhound_github/models/workflow.py index 6ac64c4..795428b 100644 --- a/src/openhound_github/models/workflow.py +++ b/src/openhound_github/models/workflow.py @@ -16,7 +16,7 @@ EdgePath, EdgeProperties, ) -from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator +from pydantic import BaseModel, ConfigDict, Field, field_validator from openhound_github.graph import GHNode, GHNodeProperties from openhound_github.kinds import edges as ek @@ -239,6 +239,8 @@ class GHWorkflowProperties(GHNodeProperties): html_url: str | None = None branch: str | None = None contents: str | None = None + triggers: list[str] | None = None + trigger_dispatch_inputs: list[str] | None = None query_repository: str | None = None query_editors: str | None = None environment_name: str | None = None @@ -301,6 +303,41 @@ def document(self) -> WorkflowDocument | None: except Exception: return None + @property + def trigger_events(self) -> list[str] | None: + document = self.document + if not document: + return None + + on_value = document.model_extra.get("on") + if isinstance(on_value, str): + return [on_value] + if isinstance(on_value, list): + return [str(item) for item in on_value] + if isinstance(on_value, dict): + return [str(key) for key in on_value.keys()] + return None + + @property + def workflow_dispatch_inputs(self) -> list[str] | None: + document = self.document + if not document: + return None + + on_value = document.model_extra.get("on") + if not isinstance(on_value, dict): + return None + + workflow_dispatch = on_value.get("workflow_dispatch") + if not isinstance(workflow_dispatch, dict): + return None + + inputs = workflow_dispatch.get("inputs") + if not isinstance(inputs, dict): + return None + + return [str(key) for key in inputs.keys()] + def workflow_job_rows(self) -> list[dict[str, Any]]: document = self.document if not document: @@ -428,6 +465,8 @@ def as_node(self) -> GHNode: html_url=self.html_url, branch=self.branch, contents=self._decoded_contents, + triggers=self.trigger_events, + trigger_dispatch_inputs=self.workflow_dispatch_inputs, repository_name=self.repository_name, repository_id=self.repository_node_id, environment_name=self.org_login, diff --git a/src/openhound_github/models/workflow_job.py b/src/openhound_github/models/workflow_job.py index f59188d..14f7f65 100644 --- a/src/openhound_github/models/workflow_job.py +++ b/src/openhound_github/models/workflow_job.py @@ -111,6 +111,13 @@ class GHWorkflowJobProperties(GHNodeProperties): description="Workflow job references organization secret", traversable=False, ), + EdgeDef( + start=nk.WORKFLOW_JOB, + end=nk.ENVIRONMENT_SECRET, + kind=ek.USES_SECRET, + description="Workflow job references environment secret", + traversable=False, + ), EdgeDef( start=nk.WORKFLOW_JOB, end=nk.REPO_VARIABLE, @@ -125,6 +132,13 @@ class GHWorkflowJobProperties(GHNodeProperties): description="Workflow job references organization variable", traversable=False, ), + EdgeDef( + start=nk.WORKFLOW_JOB, + end=nk.ENVIRONMENT_VARIABLE, + kind=ek.USES_VARIABLE, + description="Workflow job references environment variable", + traversable=False, + ), ], ) class WorkflowJob(BaseAsset): @@ -192,69 +206,34 @@ def as_node(self) -> GHNode: @property def _uses_secret_edges(self): for ref in self.secret_references: - if self._lookup.repo_secret(ref.name, self.repository_node_id): + target = self._lookup.secret_target( + ref.name, + self.repository_node_id, + self.org_login, + self.environment, + ) + if target: yield Edge( kind=ek.USES_SECRET, start=EdgePath(value=self.node_id, match_by="id"), - end=ConditionalEdgePath( - kind=nk.REPO_SECRET, - property_matchers=[ - PropertyMatch(key="name", value=ref.name), - PropertyMatch( - key="repository_id", value=self.repository_node_id - ), - ], - ), - properties=EdgeProperties(traversable=False), - ) - - if self._lookup.org_secret(ref.name, self.org_login): - yield Edge( - kind=ek.USES_SECRET, - start=EdgePath(value=self.node_id, match_by="id"), - end=ConditionalEdgePath( - kind=nk.ORG_SECRET, - property_matchers=[ - PropertyMatch(key="name", value=ref.name), - PropertyMatch( - key="environmentid", value=self.org_node_id.upper() - ), - ], - ), + end=EdgePath(value=target[1], match_by="id"), properties=EdgeProperties(traversable=False), ) @property def _uses_variable_edges(self): for ref in self.variable_references: - if self._lookup.repo_variable(ref.name, self.repository_node_id): - yield Edge( - kind=ek.USES_VARIABLE, - start=EdgePath(value=self.node_id, match_by="id"), - end=ConditionalEdgePath( - kind=nk.REPO_VARIABLE, - property_matchers=[ - PropertyMatch(key="name", value=ref.name), - PropertyMatch( - key="repository_id", value=self.repository_node_id - ), - ], - ), - properties=EdgeProperties(traversable=False), - ) - if self._lookup.org_variable(ref.name, self.org_login): + target = self._lookup.variable_target( + ref.name, + self.repository_node_id, + self.org_login, + self.environment, + ) + if target: yield Edge( kind=ek.USES_VARIABLE, start=EdgePath(value=self.node_id, match_by="id"), - end=ConditionalEdgePath( - kind=nk.ORG_VARIABLE, - property_matchers=[ - PropertyMatch(key="name", value=ref.name), - PropertyMatch( - key="environmentid", value=self.org_node_id.upper() - ), - ], - ), + end=EdgePath(value=target[1], match_by="id"), properties=EdgeProperties(traversable=False), ) diff --git a/src/openhound_github/models/workflow_step.py b/src/openhound_github/models/workflow_step.py index 6757302..ad1fec6 100644 --- a/src/openhound_github/models/workflow_step.py +++ b/src/openhound_github/models/workflow_step.py @@ -9,11 +9,9 @@ NodeDef, ) from openhound.core.models.entries_dataclass import ( # type: ignore[import-untyped] - ConditionalEdgePath, Edge, EdgePath, EdgeProperties, - PropertyMatch, ) from pydantic import BaseModel, Field @@ -97,6 +95,13 @@ class GHWorkflowStepProperties(GHNodeProperties): description="Workflow step references organization secret", traversable=False, ), + EdgeDef( + start=nk.WORKFLOW_STEP, + end=nk.ENVIRONMENT_SECRET, + kind=ek.USES_SECRET, + description="Workflow step references environment secret", + traversable=False, + ), EdgeDef( start=nk.WORKFLOW_STEP, end=nk.REPO_VARIABLE, @@ -111,6 +116,13 @@ class GHWorkflowStepProperties(GHNodeProperties): description="Workflow step references organization variable", traversable=False, ), + EdgeDef( + start=nk.WORKFLOW_STEP, + end=nk.ENVIRONMENT_VARIABLE, + kind=ek.USES_VARIABLE, + description="Workflow step references environment variable", + traversable=False, + ), ], ) class WorkflowStep(BaseAsset): @@ -173,69 +185,37 @@ def as_node(self) -> GHNode: @property def _uses_secret_edges(self): + environment_name = self._lookup.workflow_job_environment(self.job_node_id) for ref in self.secret_references: - if self._lookup.repo_secret(ref.name, self.repository_node_id): + target = self._lookup.secret_target( + ref.name, + self.repository_node_id, + self.org_login, + environment_name, + ) + if target: yield Edge( kind=ek.USES_SECRET, start=EdgePath(value=self.node_id, match_by="id"), - end=ConditionalEdgePath( - kind=nk.REPO_SECRET, - property_matchers=[ - PropertyMatch(key="name", value=ref.name), - PropertyMatch( - key="repository_id", value=self.repository_node_id - ), - ], - ), - properties=EdgeProperties(traversable=False), - ) - if self._lookup.org_secret(ref.name, self.org_login): - yield Edge( - kind=ek.USES_SECRET, - start=EdgePath(value=self.node_id, match_by="id"), - end=ConditionalEdgePath( - kind=nk.ORG_SECRET, - property_matchers=[ - PropertyMatch(key="name", value=ref.name), - PropertyMatch( - key="environmentid", value=self.org_node_id.upper() - ), - ], - ), + end=EdgePath(value=target[1], match_by="id"), properties=EdgeProperties(traversable=False), ) @property def _uses_variable_edges(self): + environment_name = self._lookup.workflow_job_environment(self.job_node_id) for ref in self.variable_references: - if self._lookup.repo_variable(ref.name, self.repository_node_id): - yield Edge( - kind=ek.USES_VARIABLE, - start=EdgePath(value=self.node_id, match_by="id"), - end=ConditionalEdgePath( - kind=nk.REPO_VARIABLE, - property_matchers=[ - PropertyMatch(key="name", value=ref.name), - PropertyMatch( - key="repository_id", value=self.repository_node_id - ), - ], - ), - properties=EdgeProperties(traversable=False), - ) - if self._lookup.org_variable(ref.name, self.org_login): + target = self._lookup.variable_target( + ref.name, + self.repository_node_id, + self.org_login, + environment_name, + ) + if target: yield Edge( kind=ek.USES_VARIABLE, start=EdgePath(value=self.node_id, match_by="id"), - end=ConditionalEdgePath( - kind=nk.ORG_VARIABLE, - property_matchers=[ - PropertyMatch(key="name", value=ref.name), - PropertyMatch( - key="environmentid", value=self.org_node_id.upper() - ), - ], - ), + end=EdgePath(value=target[1], match_by="id"), properties=EdgeProperties(traversable=False), ) From 6b28bbd39b8a0e0e7d2089e27ef336963bb5dcf5 Mon Sep 17 00:00:00 2001 From: Jared Atkinson Date: Fri, 29 May 2026 13:42:31 -0700 Subject: [PATCH 6/6] Add workflow trigger analysis, environment-scoped reference resolution, and pwn-request modeling - add `triggers` and `trigger_dispatch_inputs` workflow properties derived from the workflow `on:` block - add `is_pwn_requestable` workflow analysis for workflows triggered by `pull_request_target` that check out attacker-controlled PR-head content - emit `GH_CanPwnRequest` from `read`, `write`, and `admin` repo roles to pwn-requestable repositories and matching branches --- src/openhound_github/kinds/edges.py | 1 + src/openhound_github/lookup.py | 41 ++++++ src/openhound_github/models/workflow.py | 134 +++++++++++++++++- src/openhound_github/models/workflow_job.py | 4 +- .../resources/organization.py | 2 - 5 files changed, 177 insertions(+), 5 deletions(-) diff --git a/src/openhound_github/kinds/edges.py b/src/openhound_github/kinds/edges.py index 7e9f154..e36740c 100644 --- a/src/openhound_github/kinds/edges.py +++ b/src/openhound_github/kinds/edges.py @@ -55,6 +55,7 @@ DEPENDS_ON = "GH_DependsOn" DEPLOYS_TO = "GH_DeploysTo" CALLS_WORKFLOW = "GH_CallsWorkflow" +CAN_PWN_REQUEST = "GH_CanPwnRequest" USES_SECRET = "GH_UsesSecret" USES_VARIABLE = "GH_UsesVariable" HAS_ENVIRONMENT = "GH_HasEnvironment" diff --git a/src/openhound_github/lookup.py b/src/openhound_github/lookup.py index 3067bb0..7dea737 100644 --- a/src/openhound_github/lookup.py +++ b/src/openhound_github/lookup.py @@ -57,6 +57,20 @@ def repository_node_ids_for_org(self, org_login: str): [org_login], ) + @lru_cache + def repository_visibility(self, repository_node_id: str) -> str | None: + return self._find_single_object( + f"""SELECT visibility FROM {self.schema}.repositories WHERE node_id = ?""", + [repository_node_id], + ) + + @lru_cache + def repository_allow_forking(self, repository_node_id: str) -> bool: + return self._find_single_bool( + f"""SELECT allow_forking FROM {self.schema}.repositories WHERE node_id = ?""", + [repository_node_id], + ) + @lru_cache def private_repository_node_ids(self): return self._find_all_objects( @@ -131,6 +145,33 @@ def members_can_create_any_repositories(self, org_login: str) -> bool: ) ) + @lru_cache + def members_can_fork_private_repositories(self, org_login: str) -> bool: + return self._find_single_bool( + f"""SELECT members_can_fork_private_repositories FROM {self.schema}.organizations WHERE login = ?""", + [org_login], + ) + + @lru_cache + def repo_role_node_ids_with_read_repo_contents(self, repository_node_id: str): + return self._find_all_objects( + f""" + SELECT repository_node_id || '_' || name + FROM {self.schema}.repo_roles + WHERE repository_node_id = ? + AND type = 'default' + AND name IN ('read', 'write', 'admin') + """, + [repository_node_id], + ) + + @lru_cache + def branches_for_repository(self, repository_node_id: str): + return self._find_all_objects( + f"""SELECT id, name FROM {self.schema}.branches WHERE repository_node_id = ?""", + [repository_node_id], + ) + @lru_cache def repo_role_node_ids_with_view_secret_scanning_alerts( self, repository_node_id: str diff --git a/src/openhound_github/models/workflow.py b/src/openhound_github/models/workflow.py index 795428b..100c8a0 100644 --- a/src/openhound_github/models/workflow.py +++ b/src/openhound_github/models/workflow.py @@ -1,4 +1,5 @@ import base64 +import fnmatch import re from dataclasses import dataclass from datetime import datetime @@ -129,6 +130,14 @@ def is_self_hosted(self) -> bool: return "self-hosted" in [str(item) for item in self.runs_on] return False + @property + def container_value(self) -> str | None: + if self.container is None: + return None + if isinstance(self.container, str): + return self.container + return str(self.container) + class WorkflowDocument(BaseModel): model_config = ConfigDict(extra="allow") @@ -241,6 +250,7 @@ class GHWorkflowProperties(GHNodeProperties): contents: str | None = None triggers: list[str] | None = None trigger_dispatch_inputs: list[str] | None = None + is_pwn_requestable: bool = False query_repository: str | None = None query_editors: str | None = None environment_name: str | None = None @@ -261,6 +271,20 @@ class GHWorkflowProperties(GHNodeProperties): description="Repository contains workflow", traversable=False, ), + EdgeDef( + start=nk.REPO_ROLE, + end=nk.REPOSITORY, + kind=ek.CAN_PWN_REQUEST, + description="Repo role can exploit a pwn-requestable workflow on the repository", + traversable=True, + ), + EdgeDef( + start=nk.REPO_ROLE, + end=nk.BRANCH, + kind=ek.CAN_PWN_REQUEST, + description="Repo role can exploit a pwn-requestable workflow on a targeted branch", + traversable=True, + ), ], ) class Workflow(BaseAsset): @@ -338,6 +362,112 @@ def workflow_dispatch_inputs(self) -> list[str] | None: return [str(key) for key in inputs.keys()] + @property + def pull_request_target_branches(self) -> list[str] | None: + document = self.document + if not document: + return None + + on_value = document.model_extra.get("on") + if not isinstance(on_value, dict): + return None + + pull_request_target = on_value.get("pull_request_target") + if pull_request_target is None: + return None + if isinstance(pull_request_target, dict): + branches = pull_request_target.get("branches") + if isinstance(branches, str): + return [branches] + if isinstance(branches, list): + return [str(branch) for branch in branches] + return None + + @property + def is_pwn_requestable(self) -> bool: + document = self.document + if not document: + return False + + on_value = document.model_extra.get("on") + has_pull_request_target = False + if isinstance(on_value, str): + has_pull_request_target = on_value == "pull_request_target" + elif isinstance(on_value, list): + has_pull_request_target = "pull_request_target" in [ + str(item) for item in on_value + ] + elif isinstance(on_value, dict): + has_pull_request_target = "pull_request_target" in on_value + + if not has_pull_request_target: + return False + + for job in document.jobs.values(): + for step in job.steps: + if not step.uses or not step.with_: + continue + action = action_parts(step.uses) + if action["action_slug"] != "actions/checkout": + continue + ref = step.with_.get("ref") + if str(ref).strip() in { + "${{ github.event.pull_request.head.sha }}", + "${{ github.event.pull_request.head.ref }}", + "${{ github.head_ref }}", + }: + return True + return False + + @property + def _repo_is_forkable_for_pwn_request(self) -> bool: + visibility = self._lookup.repository_visibility(self.repository_node_id) + if visibility == "public": + return True + if visibility in {"private", "internal"}: + return self._lookup.repository_allow_forking( + self.repository_node_id + ) and self._lookup.members_can_fork_private_repositories(self.org_login) + return False + + @property + def _pwn_request_branch_ids(self) -> list[str]: + if not self.is_pwn_requestable: + return [] + + patterns = self.pull_request_target_branches + branches = self._lookup.branches_for_repository(self.repository_node_id) + if not patterns: + return [branch_id for branch_id, _ in branches] + + matched = [] + for branch_id, branch_name in branches: + if any(fnmatch.fnmatchcase(branch_name, pattern) for pattern in patterns): + matched.append(branch_id) + return matched + + @property + def _can_pwn_request_edges(self): + if not self.is_pwn_requestable or not self._repo_is_forkable_for_pwn_request: + return + + for (role_node_id,) in self._lookup.repo_role_node_ids_with_read_repo_contents( + self.repository_node_id + ): + yield Edge( + kind=ek.CAN_PWN_REQUEST, + start=EdgePath(value=role_node_id, match_by="id"), + end=EdgePath(value=self.repository_node_id, match_by="id"), + properties=EdgeProperties(traversable=True), + ) + for branch_id in self._pwn_request_branch_ids: + yield Edge( + kind=ek.CAN_PWN_REQUEST, + start=EdgePath(value=role_node_id, match_by="id"), + end=EdgePath(value=branch_id, match_by="id"), + properties=EdgeProperties(traversable=True), + ) + def workflow_job_rows(self) -> list[dict[str, Any]]: document = self.document if not document: @@ -367,7 +497,7 @@ def workflow_job_rows(self) -> list[dict[str, Any]]: "job_key": job_key, "runs_on": job.runs_on, "is_self_hosted": job.is_self_hosted, - "container": job.container, + "container": job.container_value, "environment": job.environment_name, "permissions": job.permissions if job.permissions is not None @@ -444,6 +574,7 @@ def edges(self): end=EdgePath(value=self.node_id, match_by="id"), properties=EdgeProperties(traversable=False), ) + yield from self._can_pwn_request_edges @property def _decoded_contents(self): @@ -467,6 +598,7 @@ def as_node(self) -> GHNode: contents=self._decoded_contents, triggers=self.trigger_events, trigger_dispatch_inputs=self.workflow_dispatch_inputs, + is_pwn_requestable=self.is_pwn_requestable, repository_name=self.repository_name, repository_id=self.repository_node_id, environment_name=self.org_login, diff --git a/src/openhound_github/models/workflow_job.py b/src/openhound_github/models/workflow_job.py index 14f7f65..32b4869 100644 --- a/src/openhound_github/models/workflow_job.py +++ b/src/openhound_github/models/workflow_job.py @@ -51,7 +51,7 @@ class GHWorkflowJobProperties(GHNodeProperties): job_key: str | None = None runs_on: Any = None is_self_hosted: bool = False - container: Any = None + container: str | None = None environment: str | None = None permissions: list[str] | None = None uses_reusable: str | None = None @@ -155,7 +155,7 @@ class WorkflowJob(BaseAsset): org_login: str runs_on: Any = None is_self_hosted: bool = False - container: Any = None + container: str | None = None environment: str | None = None permissions: list[str] | None = None uses_reusable: str | None = None diff --git a/src/openhound_github/resources/organization.py b/src/openhound_github/resources/organization.py index 66fb672..7abe998 100644 --- a/src/openhound_github/resources/organization.py +++ b/src/openhound_github/resources/organization.py @@ -1,5 +1,3 @@ -import base64 -import binascii from collections.abc import Iterable from dataclasses import dataclass, field from datetime import datetime