Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions .github/workflows/_identity_mapper_container.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: Identity Mapper Container

on:
workflow_call:

jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout Code
uses: actions/checkout@v6

- name: Generate Image Name
run: echo IMAGE_REPOSITORY=ghcr.io/$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]' | tr '[_]' '[\-]')-identity-mapper >> $GITHUB_ENV

- name: Log in to GitHub Docker Registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v4.1.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract Version from Tag
id: tags
run: echo version=$(echo "${{ github.ref }}" | awk -F '[@v]' '{print $3}') >> $GITHUB_OUTPUT

- name: Docker Metadata
id: meta
uses: docker/metadata-action@v5.10.0
with:
images: ${{ env.IMAGE_REPOSITORY }}
tags: |
type=ref,event=branch
type=raw,value=latest,enable={{is_default_branch}}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4.0.0
with:
driver-opts: network=host

- name: Build Image
uses: docker/build-push-action@v6.18.0
with:
context: backend/identity-mapper
push: ${{ github.event_name == 'push' }}
load: false
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
3 changes: 3 additions & 0 deletions .github/workflows/_kyverno_policy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
helm dep build charts/workflows
helm dep build charts/sessionspaces
helm dep build charts/sessionspaces/charts/database
helm dep build charts/identity-mapper
- name: Install Kyverno
run: |
Expand All @@ -39,6 +40,7 @@ jobs:
- name: Install Workflows CRDs
run: |
helm template workflows charts/workflows --namespace workflows --create-namespace | yq e 'select(.kind == "CustomResourceDefinition")' | tee -a /tmp/crds.yaml | kubectl apply -f -
kubectl apply -f charts/identity-mapper/crds/useridentity.yaml
- name: Wait for CRDs
run: |
Expand All @@ -60,6 +62,7 @@ jobs:
# To make testing work with this policy in place, it will be required to emulate the existence
# of a POSIX uid label as part of request.userInfo.extra
helm template workflows charts/workflows | yq e '. | select(.kind == "Policy" or .kind == "ClusterPolicy" or .kind == "GeneratingPolicy" or .kind == "ClusterRole" or .kind == "ClusterRoleBinding") | select(.metadata.name != "workflows-posix-uid-label")' | tee -a /tmp/policies.yaml | kubectl apply -f -
helm template identity-mapper charts/identity-mapper | yq e '. | select(.kind == "Policy" or .kind == "ClusterPolicy" or .kind == "GeneratingPolicy" or .kind == "ClusterRole" or .kind == "ClusterRoleBinding")' | tee -a /tmp/policies.yaml | kubectl apply -f -
cat /tmp/policies.yaml | yq e '. | select(.kind == "Policy" or .kind == "ClusterPolicy")' | kubectl wait --for=condition=Ready --timeout=120s -f -
- name: Wait for Kyverno
Expand Down
24 changes: 24 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,22 @@ on:
pull_request:

jobs:

changes:
Comment thread
davehadley marked this conversation as resolved.
runs-on: ubuntu-latest
outputs:
identity_mapper: ${{ steps.filter.outputs.identity_mapper }}
steps:
- uses: actions/checkout@v6

- name: Detect changes
id: filter
uses: dorny/paths-filter@v3
with:
filters: |
identity_mapper:
- 'backend/identity-mapper/**'
helm_lint:
# Deduplicate jobs from pull requests and branch pushes within the same repo.
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.repository
Expand Down Expand Up @@ -77,6 +93,14 @@ jobs:
contents: read
packages: write

identity_mapper_container:
needs: changes
if: needs.changes.outputs.identity_mapper == 'true' && (github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.repository)
uses: ./.github/workflows/_identity_mapper_container.yaml
permissions:
contents: read
packages: write

auth_core_code:
# Deduplicate jobs from pull requests and branch pushes within the same repo.
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.repository
Expand Down
1 change: 1 addition & 0 deletions backend/identity-mapper/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__/
1 change: 1 addition & 0 deletions backend/identity-mapper/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.13
25 changes: 25 additions & 0 deletions backend/identity-mapper/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
FROM python:3.13-slim-trixie

# The installer requires curl (and certificates) to download the release archive
RUN apt-get update && apt-get install -y --no-install-recommends curl ca-certificates

# Download the latest installer
ADD https://astral.sh/uv/0.11.16/install.sh /uv-installer.sh

# Run the installer then remove it
RUN sh /uv-installer.sh && rm /uv-installer.sh

# Ensure the installed binary is on the `PATH`
ENV PATH="/root/.local/bin/:$PATH"

# Copy the project into the image
COPY . /app

# Disable development dependencies
ENV UV_NO_DEV=1

# Sync the project into a new environment, asserting the lockfile is up to date
WORKDIR /app
RUN uv sync --locked

CMD ["uv", "run", "--no-sync", "identity-mapper"]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no new line

5 changes: 5 additions & 0 deletions backend/identity-mapper/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# identity-mapper

This is an MVP to collect user group information to patch Argo Worflows and Pod securityContext based on LDAP information.

TODO: replace with a rust implementation.
35 changes: 35 additions & 0 deletions backend/identity-mapper/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[project]
name = "identity-mapper"
version = "0.1.0"
description = "Determines pod security context for Diamond Workflows"
readme = "README.md"
authors = [
{ name = "David Hadley", email = "davehadley@users.noreply.github.com" }
]
requires-python = ">=3.13"
dependencies = [
"kubernetes==35.0.0",
"ldap3>=2.9.1",
"structlog>=25.5.0",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do not see structlog being used but logging is used throughout.

"jsonpatch>=1.33",
]

[project.scripts]
identity-mapper = "identity_mapper:__main__._main"

[build-system]
requires = ["uv_build>=0.11.15,<0.12.0"]
build-backend = "uv_build"

[dependency-groups]
dev = [
"pytest>=9.0.3",
"ruff>=0.15.13",
]

[tool.ruff.lint]
select = ["ALL"]
ignore = ["N806", "D203", "D213", "D106", "S104", "D101", "D103", "D102", "COM812", "RET504", "C901"]

[tool.ruff.lint.per-file-ignores]
"tests/**" = ["S101", "D103", "D100", "INP001", "PLR2004", ]
1 change: 1 addition & 0 deletions backend/identity-mapper/src/identity_mapper/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tool to synchronize Analysis Platform user information with LDAP."""
40 changes: 40 additions & 0 deletions backend/identity-mapper/src/identity_mapper/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Synchronize LDAP and Kubernetes user IDs and groups."""

import logging

import kubernetes
import ldap3

from ._lookup_identities_in_kubernetes import lookup_identities_in_kubernetes
from ._lookup_identities_in_ldap import lookup_identities_in_ldap
from ._sync_ldap_to_kubernetes import sync_ldap_to_kubernetes

_logger = logging.getLogger(__name__)


def _get_kubernetes_client() -> kubernetes.client.CustomObjectsApi:
try:
kubernetes.config.load_incluster_config()
except kubernetes.config.ConfigException:
kubernetes.config.load_kube_config()
return kubernetes.client.CustomObjectsApi()


def _main() -> None:
_logger.info("Connecting to LDAP")
ldap_server: str = "ldap://ldapmaster.diamond.ac.uk"
server = ldap3.Server(ldap_server)
ldap = ldap3.Connection(server, auto_bind=True)
_logger.info("Initializing kubernetes client")
kubectl = _get_kubernetes_client()
_logger.info("Looking up identities in LDAP")
ldap_identities = lookup_identities_in_ldap(ldap)
_logger.info("Looking up identities in Kubernetes")
kubernetes_identities = lookup_identities_in_kubernetes(kubectl)
_logger.info("Syncronizing identities")
sync_ldap_to_kubernetes(kubectl, ldap_identities, kubernetes_identities)
_logger.info("Complete.")


if __name__ == "__main__":
_main()
13 changes: 13 additions & 0 deletions backend/identity-mapper/src/identity_mapper/_identity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from typing import TypedDict


class Identity(TypedDict):
uid: int
gid: int
supplementalGroups: list[int]


class IdentityCrd:
GROUP = "workflows.internal.diamond.ac.uk"
VERSION = "v1"
PLURAL = "useridentities"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import kubernetes

from ._identity import Identity, IdentityCrd


def lookup_identities_in_kubernetes(
kubectl: kubernetes.client.CustomObjectsApi,
) -> dict[int, Identity]:
current_crds = kubectl.list_cluster_custom_object(
group=IdentityCrd.GROUP, version=IdentityCrd.VERSION, plural=IdentityCrd.PLURAL
)
current_state = {
int(item["spec"].get("uid")): {
"uid": int(item["spec"].get("uid")),
"gid": int(item["spec"].get("gid")),
"supplementalGroups": list(
map(int, item["spec"].get("supplementalGroups", []))
),
}
for item in current_crds.get("items", [])
}
return current_state
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import ldap3

from ._identity import Identity

_BASE_DN = "dc=diamond,dc=ac,dc=uk"


def lookup_identities_in_ldap(
ldap: ldap3.Connection,
) -> dict[int, Identity]:
people_base_dn = _BASE_DN
Copy link
Copy Markdown
Contributor

@TBThomas56 TBThomas56 Jun 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the key here is int. but in sync it is a string. Is that on purpose? Wouldn't that cause the difference in the K8s state always? And user identity to be recreated every 30 minutes?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sync_ldap_to_kubernetes expected int keys:

def sync_ldap_to_kubernetes(
    kubectl: kubernetes.client.CustomObjectsApi,
    ldap_data: dict[int, Identity],
    kubernetes_data: dict[int, Identity],
)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Identities are (currently) synced every 30 minutes.

They are only applied if they have changed.

This period will be changed in a future PR to match the period of cloud teams updates (3 hours).

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

okay. I understood that. Just a check for when updating, will it be seen 'as a change' if you poll integers (as required by ldap) and then cross-referencing it with desired_state in _sync_ldap_to_k8s.py which has it as a str(uid_name)

group_base_dn = _BASE_DN

user_filter = "(objectClass=posixAccount)"

ldap.search(
people_base_dn,
user_filter,
attributes=["uid", "uidNumber", "gidNumber"],
)

users: list[tuple[int, str, int]] = []
usernames: set[str] = set()
primary_gids: set[int] = set()

for e in ldap.entries:
if not (e.uid.value and e.uidNumber.value and e.gidNumber.value):
continue
uid_num = int(e.uidNumber.value)
username = str(e.uid.value)
gid_num = int(e.gidNumber.value)

users.append((uid_num, username, gid_num))
usernames.add(username)
primary_gids.add(gid_num)

if not users:
return {}

ldap.search(
group_base_dn,
"(objectClass=posixGroup)",
attributes=["cn", "gidNumber", "memberUid"],
)

gid_to_cn: dict[int, str] = {}
user_to_groups: dict[str, list[int]] = {u: [] for u in usernames}

for g in ldap.entries:
if not (g.cn.value and g.gidNumber.value):
continue

cn = str(g.cn.value)
gid = int(g.gidNumber.value)

gid_to_cn.setdefault(gid, cn)

member_uid = getattr(g, "memberUid", None)
values = getattr(member_uid, "values", None)
members = list(values) if values else []

for m in members:
mu = str(m)
if mu in user_to_groups:
user_to_groups[mu].append({"name": cn, "gid": gid})

out = {}
for uid_num, username, primary_gid in users:
supplementary = [
grp["gid"]
for grp in user_to_groups.get(username, [])
if grp["gid"] != primary_gid
]
supplementary.sort()
out[uid_num] = {
"uid": uid_num,
"gid": primary_gid,
"supplementalGroups": supplementary,
}

return out
Loading
Loading