From 8b9272e8fc1e1aa0798c99b28497d9d2a94c7321 Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Thu, 9 Apr 2026 11:27:28 +0200
Subject: [PATCH 01/17] Scaffold cortex placement api shim
---
.github/workflows/push-charts.yaml | 19 -
.github/workflows/push-images.yaml | 45 ++
.github/workflows/update-appversion.yml | 21 +
.gitignore | 1 +
AGENTS.md | 3 +-
Dockerfile | 8 +-
Tiltfile | 24 +-
cmd/{ => manager}/main.go | 0
cmd/shim/main.go | 9 +
helm/bundles/cortex-placement-shim/Chart.yaml | 20 +
.../alerts/placement-shim.alerts.yaml | 734 ++++++++++++++++++
.../templates/alerts.yaml | 17 +
.../templates/clusterrole.yaml | 23 +
.../templates/clusterrolebinding.yaml | 14 +
.../bundles/cortex-placement-shim/values.yaml | 23 +
helm/library/cortex-shim/Chart.lock | 6 +
helm/library/cortex-shim/Chart.yaml | 8 +
.../cortex-shim/templates/_helpers.tpl | 50 ++
.../cortex-shim/templates/clusterrole.yaml | 100 +++
.../templates/clusterrolebinding.yaml | 34 +
.../cortex-shim/templates/deployment.yaml | 112 +++
.../cortex-shim/templates/service.yaml | 33 +
.../cortex-shim/templates/serviceaccount.yaml | 15 +
.../cortex-shim/templates/servicemonitor.yaml | 16 +
helm/library/cortex-shim/values.yaml | 68 ++
.../cortex/templates/manager/manager.yaml | 2 +-
internal/shim/placement/.gitkeep | 0
27 files changed, 1379 insertions(+), 26 deletions(-)
rename cmd/{ => manager}/main.go (100%)
create mode 100644 cmd/shim/main.go
create mode 100644 helm/bundles/cortex-placement-shim/Chart.yaml
create mode 100644 helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml
create mode 100644 helm/bundles/cortex-placement-shim/templates/alerts.yaml
create mode 100644 helm/bundles/cortex-placement-shim/templates/clusterrole.yaml
create mode 100644 helm/bundles/cortex-placement-shim/templates/clusterrolebinding.yaml
create mode 100644 helm/bundles/cortex-placement-shim/values.yaml
create mode 100644 helm/library/cortex-shim/Chart.lock
create mode 100644 helm/library/cortex-shim/Chart.yaml
create mode 100644 helm/library/cortex-shim/templates/_helpers.tpl
create mode 100644 helm/library/cortex-shim/templates/clusterrole.yaml
create mode 100644 helm/library/cortex-shim/templates/clusterrolebinding.yaml
create mode 100644 helm/library/cortex-shim/templates/deployment.yaml
create mode 100644 helm/library/cortex-shim/templates/service.yaml
create mode 100644 helm/library/cortex-shim/templates/serviceaccount.yaml
create mode 100644 helm/library/cortex-shim/templates/servicemonitor.yaml
create mode 100644 helm/library/cortex-shim/values.yaml
create mode 100644 internal/shim/placement/.gitkeep
diff --git a/.github/workflows/push-charts.yaml b/.github/workflows/push-charts.yaml
index 2e3577275..a4559d15a 100644
--- a/.github/workflows/push-charts.yaml
+++ b/.github/workflows/push-charts.yaml
@@ -27,25 +27,6 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- - name: Get all changed helm/library/cortex Chart.yaml files
- id: changed-chart-yaml-files-core
- uses: tj-actions/changed-files@v47
- with:
- files: |
- helm/library/cortex/Chart.yaml
- - name: Push cortex core charts to registry
- if: steps.changed-chart-yaml-files-core.outputs.all_changed_files != ''
- shell: bash
- env:
- ALL_CHANGED_FILES: ${{ steps.changed-chart-yaml-files-core.outputs.all_changed_files }}
- run: |
- for CHART_FILE in ${ALL_CHANGED_FILES}; do
- CHART_DIR=$(dirname $CHART_FILE)
- helm package $CHART_DIR --dependency-update --destination $CHART_DIR
- CHART_PACKAGE=$(ls $CHART_DIR/*.tgz)
- helm push $CHART_PACKAGE oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/
- done
-
- name: Get all changed library Chart.yaml files
id: changed-chart-yaml-files-library
uses: tj-actions/changed-files@v47
diff --git a/.github/workflows/push-images.yaml b/.github/workflows/push-images.yaml
index 997595976..3085b503b 100644
--- a/.github/workflows/push-images.yaml
+++ b/.github/workflows/push-images.yaml
@@ -72,6 +72,50 @@ jobs:
subject-digest: ${{ steps.push_cortex_postgres.outputs.digest }}
push-to-registry: true
+ # Only build and push the cortex-shim image if there are changes related
+ # to the cortex shims (e.g., in cmd/shim or internal/shim).
+ - name: Get all changed shim/ files
+ id: changed_shim_files
+ uses: tj-actions/changed-files@v47
+ with:
+ files: |
+ cmd/shim/**
+ internal/shim/**
+ - name: Docker Meta (Cortex Shim)
+ if: steps.changed_shim_files.outputs.all_changed_files != ''
+ id: meta_cortex_shim
+ uses: docker/metadata-action@v6
+ with:
+ images: ${{ env.REGISTRY }}/${{ github.repository }}-shim
+ tags: |
+ type=semver,pattern={{version}}
+ type=semver,pattern={{major}}.{{minor}}
+ type=sha
+ latest
+ env:
+ DOCKER_METADATA_SHORT_SHA_LENGTH: 8
+ - name: Build and Push Cortex Shim
+ if: steps.changed_shim_files.outputs.all_changed_files != ''
+ id: push_cortex_shim
+ uses: docker/build-push-action@v7
+ with:
+ context: cmd/shim
+ platforms: linux/amd64,linux/arm64
+ push: true
+ tags: ${{ steps.meta_cortex_shim.outputs.tags }}
+ labels: ${{ steps.meta_cortex_shim.outputs.labels }}
+ build-args: |
+ GIT_TAG=${{ github.ref_name }}
+ GIT_COMMIT=${{ github.sha }}
+ GOMAIN=cmd/shim/main.go
+ - name: Generate Artifact Attestation for Cortex Shim
+ if: steps.changed_shim_files.outputs.all_changed_files != ''
+ uses: actions/attest-build-provenance@v4
+ with:
+ subject-name: ${{ env.REGISTRY }}/${{ github.repository }}-shim
+ subject-digest: ${{ steps.push_cortex_shim.outputs.digest }}
+ push-to-registry: true
+
# Build & push new cortex image
- name: Docker Meta (Cortex)
id: meta_cortex
@@ -98,6 +142,7 @@ jobs:
build-args: |
GIT_TAG=${{ github.ref_name }}
GIT_COMMIT=${{ github.sha }}
+ GOMAIN=cmd/manager/main.go
- name: Generate Artifact Attestation for Cortex
uses: actions/attest-build-provenance@v4
with:
diff --git a/.github/workflows/update-appversion.yml b/.github/workflows/update-appversion.yml
index cc5ccdc9f..20087fa80 100644
--- a/.github/workflows/update-appversion.yml
+++ b/.github/workflows/update-appversion.yml
@@ -44,6 +44,27 @@ jobs:
git commit -m "Bump cortex-postgres chart appVersions to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit"
git push origin HEAD:main
+ # Only bumped if there are changes in shim-related directories
+ - name: Get all changed shim files
+ id: changed_shim_files
+ uses: tj-actions/changed-files@v47
+ with:
+ files: |
+ internal/shim/**
+ cmd/shim/**
+ - name: Update appVersion in cortex-shim Chart.yaml
+ if: steps.changed_shim_files.outputs.all_changed_files != ''
+ run: |
+ sed -i 's/^\([ ]*appVersion:[ ]*\).*/\1"${{ steps.vars.outputs.sha }}"/' helm/library/cortex-shim/Chart.yaml
+ - name: Commit and push changes for cortex-shim
+ if: steps.changed_shim_files.outputs.all_changed_files != ''
+ run: |
+ git config user.name "github-actions[bot]"
+ git config user.email "github-actions[bot]@users.noreply.github.com"
+ git add helm/library/cortex-shim/Chart.yaml
+ git commit -m "Bump cortex-shim chart appVersions to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit"
+ git push origin HEAD:main
+
- name: Update appVersion in helm/library/cortex/Chart.yaml
run: |
sed -i 's/^\([ ]*appVersion:[ ]*\).*/\1"${{ steps.vars.outputs.sha }}"/' helm/library/cortex/Chart.yaml
diff --git a/.gitignore b/.gitignore
index 04bac2d09..7e21248bc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@ cortex.secrets.yaml
!.editorconfig
!.gitignore
!.github
+!.gitkeep
!.golangci.yaml
!.license-scan-overrides.jsonl
!.license-scan-rules.json
diff --git a/AGENTS.md b/AGENTS.md
index 6f2e12a17..59747bd8c 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -50,7 +50,8 @@ Helm charts:
## Repository Structure
Code:
-- `cmd/main.go` is the entry point for the manager, which starts the controllers and webhooks
+- `cmd/manager/main.go` is the entry point for the manager, which starts the controllers and webhooks
+- `cmd/shim/main.go` is the entry point for cortex shims exposing cortex capabilities over REST endpoints
- `api/v1alpha1` is where the CRD specs of cortex lives
- `api/external` contains messages sent to cortex via http from external openstack services
- `internal/scheduling` contains the logic for scheduling in different cloud domains
diff --git a/Dockerfile b/Dockerfile
index 6f7e79bea..2580e9637 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,6 +6,8 @@ ARG TARGETARCH
ARG GO_MOD_PATH=.
ARG GOCACHE=/root/.cache/go-build
ENV GOCACHE=${GOCACHE}
+ARG GOMAIN=cmd/manager/main.go
+ENV GOMAIN=${GOMAIN}
# Note: avoid using COPY to /lib which will lead to docker build errors.
WORKDIR /workspace/${GO_MOD_PATH}
@@ -29,13 +31,13 @@ ENV GOOS=${TARGETOS:-linux}
ENV GOARCH=${TARGETARCH}
RUN --mount=type=cache,target=/go/pkg/mod/ \
--mount=type=cache,target=${GOCACHE} \
- go build -a -o /manager cmd/main.go
+ go build -a -o /main ${GOMAIN}
# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
FROM gcr.io/distroless/static:nonroot
WORKDIR /
-COPY --from=builder /manager .
+COPY --from=builder /main .
USER 65532:65532
-ENTRYPOINT ["/manager"]
+ENTRYPOINT ["/main"]
diff --git a/Tiltfile b/Tiltfile
index 6871d18b3..bc87f4d30 100644
--- a/Tiltfile
+++ b/Tiltfile
@@ -7,7 +7,10 @@
analytics_settings(False)
# Use the ACTIVE_DEPLOYMENTS env var to select which Cortex bundles to deploy.
-ACTIVE_DEPLOYMENTS_ENV = os.getenv('ACTIVE_DEPLOYMENTS', 'nova,manila,cinder,ironcore,pods')
+ACTIVE_DEPLOYMENTS_ENV = os.getenv(
+ 'ACTIVE_DEPLOYMENTS',
+ 'nova,manila,cinder,ironcore,pods,placement',
+)
if ACTIVE_DEPLOYMENTS_ENV == "":
ACTIVE_DEPLOYMENTS = [] # Catch "".split(",") = [""]
else:
@@ -78,13 +81,22 @@ local('kubectl wait --namespace cert-manager --for=condition=available deploymen
url = 'https://raw.githubusercontent.com/cobaltcore-dev/openstack-hypervisor-operator/refs/heads/main/charts/openstack-hypervisor-operator/crds/kvm.cloud.sap_hypervisors.yaml'
local('curl -L ' + url + ' | kubectl apply -f -')
-########### Cortex Operator & CRDs
+########### Cortex Manager & CRDs
docker_build('ghcr.io/cobaltcore-dev/cortex', '.',
dockerfile='Dockerfile',
+ build_args={'GOMAIN': 'cmd/manager/main.go'},
only=['internal/', 'cmd/', 'api/', 'pkg', 'go.mod', 'go.sum', 'Dockerfile'],
)
local('sh helm/sync.sh helm/library/cortex')
+########### Cortex Shim
+docker_build('ghcr.io/cobaltcore-dev/cortex-shim', '.',
+ dockerfile='Dockerfile',
+ build_args={'GOMAIN': 'cmd/shim/main.go'},
+ only=['internal/', 'cmd/', 'api/', 'pkg', 'go.mod', 'go.sum', 'Dockerfile'],
+)
+local('sh helm/sync.sh helm/library/cortex-shim')
+
########### Cortex Bundles
docker_build('ghcr.io/cobaltcore-dev/cortex-postgres', 'postgres')
@@ -98,6 +110,7 @@ bundle_charts = [
('helm/bundles/cortex-cinder', 'cortex-cinder'),
('helm/bundles/cortex-ironcore', 'cortex-ironcore'),
('helm/bundles/cortex-pods', 'cortex-pods'),
+ ('helm/bundles/cortex-placement-shim', 'cortex-placement-shim'),
]
dep_charts = {
'cortex-crds': [
@@ -123,6 +136,9 @@ dep_charts = {
('helm/library/cortex-postgres', 'cortex-postgres'),
('helm/library/cortex', 'cortex'),
],
+ 'cortex-placement-shim': [
+ ('helm/library/cortex-shim', 'cortex-shim'),
+ ],
}
for (bundle_chart_path, bundle_chart_name) in bundle_charts:
@@ -255,6 +271,10 @@ if 'pods' in ACTIVE_DEPLOYMENTS:
k8s_yaml('samples/pods/pod.yaml')
k8s_resource('test-pod', labels=['Cortex-Pods'])
+if 'placement' in ACTIVE_DEPLOYMENTS:
+ print("Activating Cortex Placement Shim bundle")
+ k8s_yaml(helm('./helm/bundles/cortex-placement-shim', name='cortex-placement-shim', values=tilt_values, set=env_set_overrides))
+
########### Dev Dependencies
local('sh helm/sync.sh helm/dev/cortex-prometheus-operator')
k8s_yaml(helm('./helm/dev/cortex-prometheus-operator', name='cortex-prometheus-operator')) # Operator
diff --git a/cmd/main.go b/cmd/manager/main.go
similarity index 100%
rename from cmd/main.go
rename to cmd/manager/main.go
diff --git a/cmd/shim/main.go b/cmd/shim/main.go
new file mode 100644
index 000000000..6b0634229
--- /dev/null
+++ b/cmd/shim/main.go
@@ -0,0 +1,9 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package main
+
+func main() {
+ // TODO: this needs scaffolding, for now it just does nothing.
+ select {}
+}
diff --git a/helm/bundles/cortex-placement-shim/Chart.yaml b/helm/bundles/cortex-placement-shim/Chart.yaml
new file mode 100644
index 000000000..7f53ed347
--- /dev/null
+++ b/helm/bundles/cortex-placement-shim/Chart.yaml
@@ -0,0 +1,20 @@
+# Copyright SAP SE
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+name: cortex-placement-shim
+description: A Helm chart deploying the Cortex placement shim.
+type: application
+version: 0.0.1
+appVersion: 0.1.0
+dependencies:
+ # from: file://../../library/cortex-shim
+ - name: cortex-shim
+ repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
+ version: 0.0.1
+ # Owner info adds a configmap to the kubernetes cluster with information on
+ # the service owner. This makes it easier to find out who to contact in case
+ # of issues. See: https://github.com/sapcc/helm-charts/pkgs/container/helm-charts%2Fowner-info
+ - name: owner-info
+ repository: oci://ghcr.io/sapcc/helm-charts
+ version: 1.0.0
diff --git a/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml b/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml
new file mode 100644
index 000000000..41bf29794
--- /dev/null
+++ b/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml
@@ -0,0 +1,734 @@
+groups:
+- name: cortex-nova-alerts
+ rules:
+ - alert: CortexNovaSchedulingDown
+ expr: |
+ up{pod=~"cortex-nova-scheduling-.*"} != 1 or
+ absent(up{pod=~"cortex-nova-scheduling-.*"})
+ for: 5m
+ labels:
+ context: liveness
+ dashboard: cortex/cortex
+ service: cortex
+ severity: critical
+ support_group: workload-management
+ playbook: docs/support/playbook/cortex/down
+ annotations:
+ summary: "Cortex Scheduling for Nova is down"
+ description: >
+ The Cortex scheduling service is down. Scheduling requests from Nova will
+ not be served. This is non-critical for vmware virtual machines, but
+ blocks kvm virtual machines from being scheduled. Thus, it is
+ recommended to immediately investigate and resolve the issue.
+
+ - alert: CortexNovaKnowledgeDown
+ expr: |
+ up{pod=~"cortex-nova-knowledge-.*"} != 1 or
+ absent(up{pod=~"cortex-nova-knowledge-.*"})
+ for: 5m
+ labels:
+ context: liveness
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ playbook: docs/support/playbook/cortex/down
+ annotations:
+ summary: "Cortex Knowledge for Nova is down"
+ description: >
+ The Cortex Knowledge service is down. This is no immediate problem,
+ since cortex is still able to process requests,
+ but the quality of the responses may be affected.
+
+ - alert: CortexNovaDeschedulerPipelineErroring
+ expr: delta(cortex_detector_pipeline_run_duration_seconds_count{component="nova-scheduling", error="true"}[2m]) > 0
+ for: 5m
+ labels:
+ context: descheduler
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Descheduler pipeline is erroring."
+ description: >
+ The Cortex descheduler pipeline is encountering errors during its execution.
+ This may indicate issues with the descheduling logic or the underlying infrastructure.
+ It is recommended to investigate the descheduler logs and the state of the VMs being processed.
+
+ - alert: CortexNovaHttpRequest400sTooHigh
+ expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-nova-metrics", status=~"4.+"}[5m]) > 0.1
+ for: 5m
+ labels:
+ context: api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Nova Scheduler HTTP request 400 errors too high"
+ description: >
+ Nova Scheduler is responding to placement requests with HTTP 4xx
+ errors. This is expected when the scheduling request cannot be served
+ by Cortex. However, it could also indicate that the request format has
+ changed and Cortex is unable to parse it.
+
+ - alert: CortexNovaSchedulingHttpRequest500sTooHigh
+ expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-nova-metrics", status=~"5.+" }[5m]) > 0.1
+ for: 5m
+ labels:
+ context: api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Nova Scheduler HTTP request 500 errors too high"
+ description: >
+ Nova Scheduler is responding to placement requests with HTTP 5xx errors.
+ This is not expected and indicates that Cortex is having some internal problem.
+ Nova will continue to place new VMs, but the placement will be less desirable.
+ Thus, no immediate action is needed.
+
+ - alert: CortexNovaHighMemoryUsage
+ expr: process_resident_memory_bytes{service="cortex-nova-metrics"} > 6000 * 1024 * 1024
+ for: 5m
+ labels:
+ context: memory
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "`{{$labels.component}}` uses too much memory"
+ description: >
+ `{{$labels.component}}` should not be using more than 6000 MiB of memory. Usually it
+ should use much less, so there may be a memory leak or other changes
+ that are causing the memory usage to increase significantly.
+
+ - alert: CortexNovaHighCPUUsage
+ expr: rate(process_cpu_seconds_total{service="cortex-nova-metrics"}[1m]) > 0.5
+ for: 5m
+ labels:
+ context: cpu
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "`{{$labels.component}}` uses too much CPU"
+ description: >
+ `{{$labels.component}}` should not be using more than 50% of a single CPU core. Usually
+ it should use much less, so there may be a CPU leak or other changes
+ that are causing the CPU usage to increase significantly.
+
+ - alert: CortexNovaTooManyDBConnectionAttempts
+ expr: rate(cortex_db_connection_attempts_total{service="cortex-nova-metrics"}[5m]) > 0.1
+ for: 5m
+ labels:
+ context: db
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "`{{$labels.component}}` is trying to connect to the database too often"
+ description: >
+ `{{$labels.component}}` is trying to connect to the database too often. This may happen
+ when the database is down or the connection parameters are misconfigured.
+
+ - alert: CortexNovaSyncNotSuccessful
+ expr: cortex_sync_request_processed_total{service="cortex-nova-metrics"} - cortex_sync_request_duration_seconds_count{service="cortex-nova-metrics"} > 0
+ for: 5m
+ labels:
+ context: syncstatus
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "`{{$labels.component}}` Sync not successful"
+ description: >
+ `{{$labels.component}}` experienced an issue syncing data from the datasource `{{$labels.datasource}}`. This may
+ happen when the datasource (OpenStack, Prometheus, etc.) is down or
+ the sync module is misconfigured. No immediate action is needed, since
+ the sync module will retry the sync operation and the currently synced
+ data will be kept. However, when this problem persists for a longer
+ time the service will have a less recent view of the datacenter.
+
+ - alert: CortexNovaSyncObjectsDroppedToZero
+ expr: cortex_sync_objects{service="cortex-nova-metrics", datasource!="openstack_migrations"} == 0
+ for: 60m
+ labels:
+ context: syncobjects
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "`{{$labels.component}}` is not syncing any new data from `{{$labels.datasource}}`"
+ description: >
+ `{{$labels.component}}` is not syncing any objects from the datasource `{{$labels.datasource}}`. This may happen
+ when the datasource (OpenStack, Prometheus, etc.) is down or the sync
+ module is misconfigured. No immediate action is needed, since the sync
+ module will retry the sync operation and the currently synced data will
+ be kept. However, when this problem persists for a longer time the
+ service will have a less recent view of the datacenter.
+
+ - alert: CortexNovaDatasourceUnready
+ expr: cortex_datasource_state{domain="nova",state!="ready"} != 0
+ for: 60m
+ labels:
+ context: datasources
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Datasource `{{$labels.datasource}}` is in `{{$labels.state}}` state"
+ description: >
+ This may indicate issues with the datasource
+ connectivity or configuration. It is recommended to investigate the
+ datasource status and logs for more details.
+
+ - alert: CortexNovaKnowledgeUnready
+ expr: cortex_knowledge_state{domain="nova",state!="ready"} != 0
+ for: 60m
+ labels:
+ context: knowledge
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Knowledge `{{$labels.knowledge}}` is in `{{$labels.state}}` state"
+ description: >
+ This may indicate issues with the knowledge
+ configuration. It is recommended to investigate the
+ knowledge status and logs for more details.
+
+ - alert: CortexNovaDecisionsWithErrors
+ expr: cortex_decision_state{domain="nova",state="error"} > 0
+ for: 5m
+ labels:
+ context: decisions
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Some decisions are in error state for operator `{{$labels.operator}}`"
+ description: >
+ The cortex scheduling pipeline generated decisions that are in error state.
+ This may indicate issues with the decision logic or the underlying infrastructure.
+ It is recommended to investigate the decision logs and the state of the
+ VMs being processed.
+
+ - alert: CortexNovaTooManyDecisionsWaiting
+ expr: cortex_decision_state{domain="nova",state="waiting"} > 10
+ for: 5m
+ labels:
+ context: decisions
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Too many decisions are in waiting state for operator `{{$labels.operator}}`"
+ description: >
+ The cortex scheduling pipeline has a high number of decisions for which
+ no target host has been assigned yet.
+
+ This may indicate a backlog in processing or issues with the decision logic.
+ It is recommended to investigate the decision logs and the state of the
+ VMs being processed.
+
+ - alert: CortexNovaKPIUnready
+ expr: |
+ cortex_kpi_state{domain="nova",state!="ready"} != 0
+ for: 60m
+ labels:
+ context: kpis
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "KPI `{{$labels.kpi}}` is in `{{$labels.state}}` state"
+ description: >
+ This may indicate issues with the KPI
+ configuration. It is recommended to investigate the
+ KPI status and logs for more details.
+
+ - alert: CortexNovaPipelineUnready
+ expr: cortex_pipeline_state{domain="nova",state!="ready"} != 0
+ for: 5m
+ labels:
+ context: pipelines
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Pipeline `{{$labels.pipeline}}` is in `{{$labels.state}}` state"
+ description: >
+ This may indicate issues with the pipeline
+ configuration. It is recommended to investigate the
+ pipeline status and logs for more details.
+
+ # Committed Resource Info API Alerts
+ - alert: CortexNovaCommittedResourceInfoHttpRequest500sTooHigh
+ expr: rate(cortex_committed_resource_info_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
+ for: 5m
+ labels:
+ context: committed-resource-api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource info API HTTP 500 errors too high"
+ description: >
+ The committed resource info API (Limes LIQUID integration) is responding
+ with HTTP 5xx errors. This indicates internal problems building service info,
+ such as invalid flavor group data. Limes will not be able to discover available
+ resources until the issue is resolved.
+
+ # Committed Resource Change API Alerts
+ - alert: CortexNovaCommittedResourceHttpRequest400sTooHigh
+ expr: rate(cortex_committed_resource_change_api_requests_total{service="cortex-nova-metrics", status_code=~"4.."}[5m]) > 0.1
+ for: 5m
+ labels:
+ context: committed-resource-api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource change API HTTP 400 errors too high"
+ description: >
+ The committed resource change API (Limes LIQUID integration) is responding
+ with HTTP 4xx errors. This may happen when Limes sends a request with
+ an outdated info version (409), the API is temporarily unavailable,
+ or the request format is invalid. Limes will typically retry these
+ requests, so no immediate action is needed unless the errors persist.
+
+ - alert: CortexNovaCommittedResourceHttpRequest500sTooHigh
+ expr: rate(cortex_committed_resource_change_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
+ for: 5m
+ labels:
+ context: committed-resource-api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource change API HTTP 500 errors too high"
+ description: >
+ The committed resource change API (Limes LIQUID integration) is responding
+ with HTTP 5xx errors. This is not expected and indicates that Cortex
+ is having an internal problem processing commitment changes. Limes will
+ continue to retry, but new commitments may not be fulfilled until the
+ issue is resolved.
+
+ - alert: CortexNovaCommittedResourceLatencyTooHigh
+ expr: histogram_quantile(0.95, sum(rate(cortex_committed_resource_change_api_request_duration_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (le)) > 30
+ for: 5m
+ labels:
+ context: committed-resource-api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource change API latency too high"
+ description: >
+ The committed resource change API (Limes LIQUID integration) is experiencing
+ high latency (p95 > 30s). This may indicate that the scheduling pipeline
+ is under heavy load or that reservation scheduling is taking longer than
+ expected. Limes requests may time out, causing commitment changes to fail.
+
+ - alert: CortexNovaCommittedResourceRejectionRateTooHigh
+ expr: |
+ sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics", result="rejected"}[5m]))
+ / sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics"}[5m])) > 0.5
+ for: 5m
+ labels:
+ context: committed-resource-api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource rejection rate too high"
+ description: >
+ More than 50% of commitment change requests are being rejected.
+ This may indicate insufficient capacity in the datacenter to fulfill
+ new commitments, or issues with the commitment scheduling logic.
+ Rejected commitments are rolled back, so Limes will see them as failed
+ and may retry or report the failure to users.
+
+ - alert: CortexNovaCommittedResourceTimeoutsTooHigh
+ expr: increase(cortex_committed_resource_change_api_timeouts_total{service="cortex-nova-metrics"}[5m]) > 0
+ for: 5m
+ labels:
+ context: committed-resource-api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource change API timeouts too high"
+ description: >
+ The committed resource change API (Limes LIQUID integration) timed out
+ while waiting for reservations to become ready. This indicates that the
+ scheduling pipeline is overloaded or reservations are taking too long
+ to be scheduled. Affected commitment changes are rolled back and Limes
+ will see them as failed. Consider investigating the scheduler performance
+ or increasing the timeout configuration.
+
+ # Committed Resource Usage API Alerts
+ - alert: CortexNovaCommittedResourceUsageHttpRequest400sTooHigh
+ expr: rate(cortex_committed_resource_usage_api_requests_total{service="cortex-nova-metrics", status_code=~"4.."}[5m]) > 0.1
+ for: 5m
+ labels:
+ context: committed-resource-api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource usage API HTTP 400 errors too high"
+ description: >
+ The committed resource usage API (Limes LIQUID integration) is responding
+ with HTTP 4xx errors. This may indicate invalid project IDs or malformed
+ requests from Limes. Limes will typically retry these requests.
+
+ - alert: CortexNovaCommittedResourceUsageHttpRequest500sTooHigh
+ expr: rate(cortex_committed_resource_usage_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
+ for: 5m
+ labels:
+ context: committed-resource-api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource usage API HTTP 500 errors too high"
+ description: >
+ The committed resource usage API (Limes LIQUID integration) is responding
+ with HTTP 5xx errors. This indicates internal problems fetching reservations
+ or Nova server data. Limes may receive stale or incomplete usage data.
+
+ - alert: CortexNovaCommittedResourceUsageLatencyTooHigh
+ expr: histogram_quantile(0.95, sum(rate(cortex_committed_resource_usage_api_request_duration_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (le)) > 5
+ for: 5m
+ labels:
+ context: committed-resource-api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource usage API latency too high"
+ description: >
+ The committed resource usage API (Limes LIQUID integration) is experiencing
+ high latency (p95 > 5s). This may indicate slow Nova API responses or
+ database queries. Limes scrapes may time out, affecting quota reporting.
+
+ # Committed Resource Capacity API Alerts
+ - alert: CortexNovaCommittedResourceCapacityHttpRequest400sTooHigh
+ expr: rate(cortex_committed_resource_capacity_api_requests_total{service="cortex-nova-metrics", status_code=~"4.."}[5m]) > 0.1
+ for: 5m
+ labels:
+ context: committed-resource-api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource capacity API HTTP 400 errors too high"
+ description: >
+ The committed resource capacity API (Limes LIQUID integration) is responding
+ with HTTP 4xx errors. This may indicate malformed requests from Limes.
+
+ - alert: CortexNovaCommittedResourceCapacityHttpRequest500sTooHigh
+ expr: rate(cortex_committed_resource_capacity_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
+ for: 5m
+ labels:
+ context: committed-resource-api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource capacity API HTTP 500 errors too high"
+ description: >
+ The committed resource capacity API (Limes LIQUID integration) is responding
+ with HTTP 5xx errors. This indicates internal problems calculating cluster
+ capacity. Limes may receive stale or incomplete capacity data.
+
+ - alert: CortexNovaCommittedResourceCapacityLatencyTooHigh
+ expr: histogram_quantile(0.95, sum(rate(cortex_committed_resource_capacity_api_request_duration_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (le)) > 5
+ for: 5m
+ labels:
+ context: committed-resource-api
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource capacity API latency too high"
+ description: >
+ The committed resource capacity API (Limes LIQUID integration) is experiencing
+ high latency (p95 > 5s). This may indicate slow database queries or knowledge
+ CRD retrieval. Limes scrapes may time out, affecting capacity reporting.
+
+ # Committed Resource Syncer Alerts
+ - alert: CortexNovaCommittedResourceSyncerErrorsHigh
+ expr: increase(cortex_committed_resource_syncer_errors_total{service="cortex-nova-metrics"}[1h]) > 3
+ for: 5m
+ labels:
+ context: committed-resource-syncer
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource syncer experiencing errors"
+ description: >
+ The committed resource syncer has encountered multiple errors in the last hour.
+ This may indicate connectivity issues with Limes. Check the syncer logs for error details.
+
+ - alert: CortexNovaCommittedResourceSyncerUnitMismatchRateHigh
+ expr: |
+ (
+ sum(rate(cortex_committed_resource_syncer_commitments_skipped_total{service="cortex-nova-metrics", reason="unit_mismatch"}[1h]))
+ / sum(rate(cortex_committed_resource_syncer_commitments_total{service="cortex-nova-metrics"}[1h]))
+ ) > 0.05
+ and on() sum(rate(cortex_committed_resource_syncer_commitments_total{service="cortex-nova-metrics"}[1h])) > 0
+ for: 15m
+ labels:
+ context: committed-resource-syncer
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource syncer unit mismatch rate >5%"
+ description: >
+ More than 5% of commitments are being skipped due to unit mismatches between
+ Limes and Cortex flavor groups. This happens when Limes has not yet been
+ updated to use the new unit format after a flavor group change. The affected
+ commitments will keep their existing reservations until Limes notices the update.
+ Check the logs if this error persists for longer time.
+
+ - alert: CortexNovaCommittedResourceSyncerUnknownFlavorGroupRateHigh
+ expr: |
+ (
+ sum(rate(cortex_committed_resource_syncer_commitments_skipped_total{service="cortex-nova-metrics", reason="unknown_flavor_group"}[1h]))
+ / sum(rate(cortex_committed_resource_syncer_commitments_total{service="cortex-nova-metrics"}[1h]))
+ ) > 0
+ and on() sum(rate(cortex_committed_resource_syncer_commitments_total{service="cortex-nova-metrics"}[1h])) > 0
+ for: 15m
+ labels:
+ context: committed-resource-syncer
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource syncer unknown flavor group rate >0%"
+ description: >
+ Some commitments reference flavor groups that don't exist in
+ Cortex Knowledge (anymore). This may indicate that flavor group configuration is
+ out of sync between Limes and Cortex, or that Knowledge extraction is failing.
+ Check the flavor group Knowledge CRD and history to see what was changed.
+
+ - alert: CortexNovaCommittedResourceSyncerLocalChangeRateHigh
+ expr: |
+ (
+ (
+ rate(cortex_committed_resource_syncer_reservations_created_total{service="cortex-nova-metrics"}[1h]) +
+ rate(cortex_committed_resource_syncer_reservations_deleted_total{service="cortex-nova-metrics"}[1h]) +
+ rate(cortex_committed_resource_syncer_reservations_repaired_total{service="cortex-nova-metrics"}[1h])
+ ) / rate(cortex_committed_resource_syncer_commitments_processed_total{service="cortex-nova-metrics"}[1h])
+ ) > 0.01
+ and on() rate(cortex_committed_resource_syncer_commitments_processed_total{service="cortex-nova-metrics"}[1h]) > 0
+ for: 15m
+ labels:
+ context: committed-resource-syncer
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource syncer local change rate >1%"
+ description: >
+ More than 1% of synced commitments are requiring reservation changes
+ (creates, deletes, or repairs). This is higher than expected for steady-state
+ operation and may indicate data inconsistencies, external modifications to
+ reservations, or issues with the CRDs. Check Cortex logs for details.
+
+ - alert: CortexNovaCommittedResourceSyncerRepairRateHigh
+ expr: |
+ (
+ rate(cortex_committed_resource_syncer_reservations_repaired_total{service="cortex-nova-metrics"}[1h])
+ / rate(cortex_committed_resource_syncer_commitments_processed_total{service="cortex-nova-metrics"}[1h])
+ ) > 0
+ and on() rate(cortex_committed_resource_syncer_commitments_processed_total{service="cortex-nova-metrics"}[1h]) > 0
+ for: 15m
+ labels:
+ context: committed-resource-syncer
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Committed Resource syncer repair rate >0%"
+ description: >
+ Some commitments have reservations that needed repair
+ (wrong metadata like project ID or flavor group). This may indicate data
+ corruption, bugs in reservation creation, or external modifications.
+ Reservations are automatically repaired, but the root cause should be
+ investigated if this alert persists.
+
+ - alert: CortexNovaDoesntFindValidKVMHosts
+ expr: sum by (az, hvtype) (cortex_vm_faults{hvtype=~"CH|QEMU",faultmsg=~".*No valid host was found.*"}) > 0
+ for: 5m
+ labels:
+ context: scheduling
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Nova scheduling cannot find valid KVM hosts"
+ description: >
+ Cortex is seeing faulty vms in `{{$labels.az}}` where Nova scheduling
+ failed to find a valid `{{$labels.hvtype}}` host. This may indicate
+ capacity issues, misconfigured filters, or resource constraints in the
+ datacenter. Investigate the affected VMs and hypervisor availability.
+
+ - alert: CortexNovaNewDatasourcesNotReconciling
+ expr: count by(datasource) (cortex_datasource_seconds_until_reconcile{queued="false",domain="nova"}) > 0
+ for: 60m
+ labels:
+ context: datasources
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "New datasource `{{$labels.datasource}}` has not reconciled"
+ description: >
+ A new datasource `{{$labels.datasource}}` has been added but has not
+ completed its first reconciliation yet. This may indicate issues with
+ the datasource controller's workqueue overprioritizing other datasources.
+
+ - alert: CortexNovaExistingDatasourcesLackingBehind
+ expr: |
+ sum by(datasource) (cortex_datasource_seconds_until_reconcile{queued="true",domain="nova"}) < -600
+ and on(datasource) cortex_datasource_state{state="ready",domain="nova"} == 1
+ for: 10m
+ labels:
+ context: datasources
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Existing datasource `{{$labels.datasource}}` is lacking behind"
+ description: >
+ An existing datasource `{{$labels.datasource}}` has been queued for
+ reconciliation for more than 10 minutes. This may indicate issues with
+ the datasource controller's workqueue or that this or another datasource
+ is taking an unusually long time to reconcile.
+
+ - alert: CortexNovaReconcileErrorsHigh
+ expr: |
+ (sum by (controller) (rate(controller_runtime_reconcile_errors_total{service="cortex-nova-metrics"}[5m])))
+ / (sum by (controller) (rate(controller_runtime_reconcile_total{service="cortex-nova-metrics"}[5m]))) > 0.1
+ for: 15m
+ labels:
+ context: controller-errors
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Controller reconcile error rate >10%"
+ description: >
+ More than 10% of controller reconciles are resulting in errors. This may
+ indicate issues with the controller logic, connectivity problems, or
+ external factors causing failures. Check the controller logs for error
+ details and investigate the affected resources.
+
+ - alert: CortexNovaReconcileDurationHigher10Min
+ expr: |
+ (sum by (controller) (rate(controller_runtime_reconcile_time_seconds_sum{service="cortex-nova-metrics"}[5m])))
+ / (sum by (controller) (rate(controller_runtime_reconcile_time_seconds_count{service="cortex-nova-metrics"}[5m]))) > 600
+ for: 15m
+ labels:
+ context: controller-duration
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Controller reconciliation takes longer than ({{ $value | humanizeDuration }})"
+ description: "Reconcile duration higher than 10m while reconciling {{ $labels.controller }}"
+
+ - alert: CortexNovaWorkqueueNotDrained
+ expr: |
+ sum by (name) (workqueue_depth{service="cortex-nova-metrics"}) > 0
+ for: 60m
+ labels:
+ context: controller-workqueue
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Controller {{ $labels.name }}'s backlog is not being drained."
+ description: >
+ The workqueue for controller {{ $labels.name }} has a backlog that is
+ not being drained. This may indicate that the controller is overwhelmed
+ with work or is stuck on certain resources. Check the controller logs
+ and the state of the resources it manages for more details.
+
+ - alert: CortexNovaWebhookLatencyHigh
+ expr: |
+ histogram_quantile(0.9, avg(rate(controller_runtime_webhook_latency_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (webhook, le)) > 0.2
+ for: 15m
+ labels:
+ context: controller-webhook
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Controller webhook {{ $labels.webhook }} latency is high"
+ description: >
+ The latency for webhook {{ $labels.webhook }} is higher than expected (p90 > 200ms).
+ This may indicate performance issues with the webhook server or the logic it executes.
+ Check the webhook server logs and monitor its resource usage for more insights.
+
+ - alert: CortexNovaWebhookErrorsHigh
+ expr: |
+ (sum by (webhook) (rate(controller_runtime_webhook_requests_total{code!="200", service="cortex-nova-metrics"}[5m])))
+ / (sum by (webhook) (rate(controller_runtime_webhook_requests_total{service="cortex-nova-metrics"}[5m]))) > 0.1
+ for: 15m
+ labels:
+ context: controller-webhook
+ dashboard: cortex/cortex
+ service: cortex
+ severity: warning
+ support_group: workload-management
+ annotations:
+ summary: "Controller webhook {{ $labels.webhook }} is experiencing errors"
+ description: >
+ The webhook {{ $labels.webhook }} has experienced errors in the last 5 minutes.
+ This may indicate issues with the webhook logic, connectivity problems, or
+ external factors causing failures. Check the webhook server logs for error
+ details and investigate the affected resources.
\ No newline at end of file
diff --git a/helm/bundles/cortex-placement-shim/templates/alerts.yaml b/helm/bundles/cortex-placement-shim/templates/alerts.yaml
new file mode 100644
index 000000000..7db3b96e6
--- /dev/null
+++ b/helm/bundles/cortex-placement-shim/templates/alerts.yaml
@@ -0,0 +1,17 @@
+# Copyright SAP SE
+# SPDX-License-Identifier: Apache-2.0
+
+{{- if .Values.alerts.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ name: cortex-placement-shim-alerts
+ labels:
+ type: alerting-rules
+ prometheus: {{ required ".Values.alerts.prometheus missing" .Values.alerts.prometheus | quote }}
+spec:
+ {{- $files := .Files.Glob "alerts/*.alerts.yaml" }}
+ {{- range $path, $file := $files }}
+ {{ $file | toString | nindent 2 }}
+ {{- end }}
+{{- end }}
diff --git a/helm/bundles/cortex-placement-shim/templates/clusterrole.yaml b/helm/bundles/cortex-placement-shim/templates/clusterrole.yaml
new file mode 100644
index 000000000..489878c89
--- /dev/null
+++ b/helm/bundles/cortex-placement-shim/templates/clusterrole.yaml
@@ -0,0 +1,23 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: cortex-placement-shim-role-hypervisor
+rules:
+- apiGroups:
+ - kvm.cloud.sap
+ resources:
+ - hypervisors
+ verbs:
+ - get
+ - list
+ - patch
+ - update
+ - watch
+- apiGroups:
+ - kvm.cloud.sap
+ resources:
+ - hypervisors/status
+ verbs:
+ - get
\ No newline at end of file
diff --git a/helm/bundles/cortex-placement-shim/templates/clusterrolebinding.yaml b/helm/bundles/cortex-placement-shim/templates/clusterrolebinding.yaml
new file mode 100644
index 000000000..0388373f9
--- /dev/null
+++ b/helm/bundles/cortex-placement-shim/templates/clusterrolebinding.yaml
@@ -0,0 +1,14 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: cortex-placement-shim-rolebinding-hypervisor
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: cortex-placement-shim-role-hypervisor
+subjects:
+- kind: ServiceAccount
+ name: cortex-placement-shim
+ namespace: {{ .Release.Namespace }}
\ No newline at end of file
diff --git a/helm/bundles/cortex-placement-shim/values.yaml b/helm/bundles/cortex-placement-shim/values.yaml
new file mode 100644
index 000000000..40aa9cb11
--- /dev/null
+++ b/helm/bundles/cortex-placement-shim/values.yaml
@@ -0,0 +1,23 @@
+# Copyright SAP SE
+# SPDX-License-Identifier: Apache-2.0
+
+owner-info:
+ enabled: true
+ helm-chart-url: "https://github.com/cobaltcore-dev/cortex/helm/bundles/cortex-placement-shim"
+ maintainers:
+ - "arno.uhlig@sap.com"
+ - "julius.clausnitzer@sap.com"
+ - "malte.viering@sap.com"
+ - "marcel.gute@sap.com"
+ - "markus.wieland@sap.com"
+ - "p.matthes@sap.com"
+ support-group: "workload-management"
+ service: "cortex-placement-shim"
+
+alerts:
+ enabled: true
+ prometheus: openstack
+
+cortex-shim:
+ namePrefix: cortex-placement
+ conf: {} # TODO
diff --git a/helm/library/cortex-shim/Chart.lock b/helm/library/cortex-shim/Chart.lock
new file mode 100644
index 000000000..db4c5823b
--- /dev/null
+++ b/helm/library/cortex-shim/Chart.lock
@@ -0,0 +1,6 @@
+dependencies:
+- name: owner-info
+ repository: oci://ghcr.io/sapcc/helm-charts
+ version: 1.0.0
+digest: sha256:7643f231cc4ebda347fd12ec62fe4445c280e2b71d27eec555f3025290f5038f
+generated: "2025-08-26T10:55:05.888651+02:00"
diff --git a/helm/library/cortex-shim/Chart.yaml b/helm/library/cortex-shim/Chart.yaml
new file mode 100644
index 000000000..5282dc655
--- /dev/null
+++ b/helm/library/cortex-shim/Chart.yaml
@@ -0,0 +1,8 @@
+apiVersion: v2
+name: cortex-shim
+description: A Helm chart to distribute cortex shims.
+type: application
+version: 0.0.1
+appVersion: "sha-3e56acea"
+icon: "https://example.com/icon.png"
+dependencies: []
diff --git a/helm/library/cortex-shim/templates/_helpers.tpl b/helm/library/cortex-shim/templates/_helpers.tpl
new file mode 100644
index 000000000..782e14eef
--- /dev/null
+++ b/helm/library/cortex-shim/templates/_helpers.tpl
@@ -0,0 +1,50 @@
+{{- define "chart.name" -}}
+{{- if .Chart }}
+ {{- if .Chart.Name }}
+ {{- .Chart.Name | trunc 63 | trimSuffix "-" }}
+ {{- else if .Values.nameOverride }}
+ {{ .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+ {{- else }}
+ scheduling
+ {{- end }}
+{{- else }}
+ scheduling
+{{- end }}
+{{- end }}
+
+
+{{- define "chart.labels" -}}
+{{- if .Chart.AppVersion -}}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+{{- if .Chart.Version }}
+helm.sh/chart: {{ .Chart.Version | quote }}
+{{- end }}
+app.kubernetes.io/name: {{ include "chart.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+
+{{- define "chart.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "chart.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+
+{{- define "chart.hasMutatingWebhooks" -}}
+{{- $hasMutating := false }}
+{{- range . }}
+ {{- if eq .type "mutating" }}
+ $hasMutating = true }}{{- end }}
+{{- end }}
+{{ $hasMutating }}}}{{- end }}
+
+
+{{- define "chart.hasValidatingWebhooks" -}}
+{{- $hasValidating := false }}
+{{- range . }}
+ {{- if eq .type "validating" }}
+ $hasValidating = true }}{{- end }}
+{{- end }}
+{{ $hasValidating }}}}{{- end }}
diff --git a/helm/library/cortex-shim/templates/clusterrole.yaml b/helm/library/cortex-shim/templates/clusterrole.yaml
new file mode 100644
index 000000000..74f8e7ad4
--- /dev/null
+++ b/helm/library/cortex-shim/templates/clusterrole.yaml
@@ -0,0 +1,100 @@
+# Roles that grant the shims access to cortex crds.
+{{- if .Values.rbac.enable }}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: {{ .Values.namePrefix }}-shim-role
+rules:
+- apiGroups:
+ - cortex.cloud
+ resources:
+ - knowledges
+ - datasources
+ - reservations
+ - decisions
+ - deschedulings
+ - pipelines
+ - kpis
+ - histories
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
+- apiGroups:
+ - cortex.cloud
+ resources:
+ - knowledges/finalizers
+ - datasources/finalizers
+ - reservations/finalizers
+ - decisions/finalizers
+ - deschedulings/finalizers
+ - pipelines/finalizers
+ - kpis/finalizers
+ - histories/finalizers
+ verbs:
+ - update
+- apiGroups:
+ - cortex.cloud
+ resources:
+ - knowledges/status
+ - datasources/status
+ - reservations/status
+ - decisions/status
+ - deschedulings/status
+ - pipelines/status
+ - kpis/status
+ - histories/status
+ verbs:
+ - get
+ - patch
+ - update
+- apiGroups:
+ - events.k8s.io
+ resources:
+ - events
+ verbs:
+ - create
+ - patch
+{{- end -}}
+{{- if and .Values.rbac.enable .Values.metrics.enable }}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: {{ .Values.namePrefix }}-metrics-reader
+rules:
+- nonResourceURLs:
+ - "/metrics"
+ verbs:
+ - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: {{ .Values.namePrefix }}-metrics-auth-role
+rules:
+- apiGroups:
+ - authentication.k8s.io
+ resources:
+ - tokenreviews
+ verbs:
+ - create
+- apiGroups:
+ - authorization.k8s.io
+ resources:
+ - subjectaccessreviews
+ verbs:
+ - create
+{{- end -}}
+
diff --git a/helm/library/cortex-shim/templates/clusterrolebinding.yaml b/helm/library/cortex-shim/templates/clusterrolebinding.yaml
new file mode 100644
index 000000000..ca82a0119
--- /dev/null
+++ b/helm/library/cortex-shim/templates/clusterrolebinding.yaml
@@ -0,0 +1,34 @@
+{{- if .Values.rbac.enable }}
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: {{ .Values.namePrefix }}-shim-rolebinding
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: {{ .Values.namePrefix }}-shim-role
+subjects:
+- kind: ServiceAccount
+ name: {{ .Values.namePrefix }}-{{ .Values.deployment.serviceAccountName }}
+ namespace: {{ .Release.Namespace }}
+{{- end -}}
+{{- if and .Values.rbac.enable .Values.metrics.enable }}
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: {{ .Values.namePrefix }}-metrics-auth-rolebinding
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: {{ .Values.namePrefix }}-metrics-auth-role
+subjects:
+- kind: ServiceAccount
+ name: {{ .Values.namePrefix }}-{{ .Values.deployment.serviceAccountName }}
+ namespace: {{ .Release.Namespace }}
+{{- end -}}
+
diff --git a/helm/library/cortex-shim/templates/deployment.yaml b/helm/library/cortex-shim/templates/deployment.yaml
new file mode 100644
index 000000000..b38eb3c02
--- /dev/null
+++ b/helm/library/cortex-shim/templates/deployment.yaml
@@ -0,0 +1,112 @@
+# This file is safe from kubebuilder edit --plugins=helm/v1-alpha
+# If you want to re-generate, add the --force flag.
+
+{{- if .Values.deployment.enable }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: {{ .Values.namePrefix }}-shim
+ namespace: {{ .Release.Namespace }}
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+spec:
+ replicas: {{ .Values.deployment.replicas }}
+ selector:
+ matchLabels:
+ {{- include "chart.selectorLabels" . | nindent 6 }}
+ template:
+ metadata:
+ annotations:
+ kubectl.kubernetes.io/default-container: shim
+ labels:
+ {{- include "chart.labels" . | nindent 8 }}
+ {{- if and .Values.deployment.pod .Values.deployment.pod.labels }}
+ {{- range $key, $value := .Values.deployment.pod.labels }}
+ {{ $key }}: {{ $value }}
+ {{- end }}
+ {{- end }}
+ spec:
+ containers:
+ - name: shim
+ args:
+ {{- range .Values.deployment.container.args }}
+ - {{ . }}
+ {{- end }}
+ ports:
+ - name: api
+ containerPort: 8080
+ protocol: TCP
+ - name: metrics
+ containerPort: 2112
+ protocol: TCP
+ command:
+ - /main
+ image: {{ .Values.deployment.container.image.repository }}:{{ .Values.deployment.container.image.tag | default .Chart.AppVersion }}
+ {{- if .Values.deployment.container.image.pullPolicy }}
+ imagePullPolicy: {{ .Values.deployment.container.image.pullPolicy }}
+ {{- end }}
+ {{- if .Values.deployment.container.env }}
+ env:
+ {{- range $key, $value := .Values.deployment.container.env }}
+ - name: {{ $key }}
+ value: {{ $value }}
+ {{- end }}
+ {{- end }}
+ livenessProbe:
+ {{- toYaml .Values.deployment.container.livenessProbe | nindent 12 }}
+ readinessProbe:
+ {{- toYaml .Values.deployment.container.readinessProbe | nindent 12 }}
+ resources:
+ {{- toYaml .Values.deployment.container.resources | nindent 12 }}
+ securityContext:
+ {{- toYaml .Values.deployment.container.securityContext | nindent 12 }}
+ volumeMounts:
+ - name: shim-config-volume
+ mountPath: /etc/config
+ - name: shim-secrets-volume
+ mountPath: /etc/secrets
+ readOnly: true
+ securityContext:
+ {{- toYaml .Values.deployment.securityContext | nindent 8 }}
+ serviceAccountName: {{ .Values.namePrefix }}-{{ .Values.deployment.serviceAccountName }}
+ terminationGracePeriodSeconds: {{ .Values.deployment.terminationGracePeriodSeconds }}
+ volumes:
+ # Custom values to configure the shim.
+ - name: shim-config-volume
+ configMap:
+ name: {{ .Values.namePrefix }}-shim-config
+ - name: shim-secrets-volume
+ secret:
+ secretName: {{ .Values.namePrefix }}-shim-secrets
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ .Values.namePrefix }}-shim-config
+data:
+ conf.json: |-
+ {{- $mergedConf := dict }}
+ {{- if .Values.global.conf }}
+ {{- $mergedConf = .Values.global.conf }}
+ {{- end }}
+ {{- if .Values.conf }}
+ {{- $mergedConf = mergeOverwrite .Values.conf $mergedConf }}
+ {{- end }}
+ {{ toJson $mergedConf }}
+---
+apiVersion: v1
+kind: Secret
+metadata:
+ name: {{ .Values.namePrefix }}-shim-secrets
+type: Opaque
+data:
+ secrets.json: |-
+ {{- $mergedSecrets := dict }}
+ {{- if .Values.global.secrets }}
+ {{- $mergedSecrets = .Values.global.secrets }}
+ {{- end }}
+ {{- if .Values.secrets }}
+ {{- $mergedSecrets = mergeOverwrite .Values.secrets $mergedSecrets }}
+ {{- end }}
+ {{ toJson $mergedSecrets | b64enc }}
+{{- end }}
\ No newline at end of file
diff --git a/helm/library/cortex-shim/templates/service.yaml b/helm/library/cortex-shim/templates/service.yaml
new file mode 100644
index 000000000..549ceed95
--- /dev/null
+++ b/helm/library/cortex-shim/templates/service.yaml
@@ -0,0 +1,33 @@
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ .Values.namePrefix }}-shim-service
+ namespace: {{ .Release.Namespace }}
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+spec:
+ ports:
+ - port: 8080
+ targetPort: api
+ protocol: TCP
+ name: api
+ selector:
+ app.kubernetes.io/name: {{ include "chart.name" . }}
+{{- if .Values.metrics.enable }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: {{ .Values.namePrefix }}-shim-metrics-service
+ namespace: {{ .Release.Namespace }}
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+spec:
+ ports:
+ - port: 2112
+ targetPort: metrics
+ protocol: TCP
+ name: metrics
+ selector:
+ app.kubernetes.io/name: {{ include "chart.name" . }}
+{{- end }}
diff --git a/helm/library/cortex-shim/templates/serviceaccount.yaml b/helm/library/cortex-shim/templates/serviceaccount.yaml
new file mode 100644
index 000000000..ea0789dd0
--- /dev/null
+++ b/helm/library/cortex-shim/templates/serviceaccount.yaml
@@ -0,0 +1,15 @@
+{{- if .Values.rbac.enable }}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ {{- if and .Values.deployment.serviceAccount .Values.deployment.serviceAccount.annotations }}
+ annotations:
+ {{- range $key, $value := .Values.deployment.serviceAccount.annotations }}
+ {{ $key }}: {{ $value }}
+ {{- end }}
+ {{- end }}
+ name: {{ .Values.namePrefix }}-{{ .Values.deployment.serviceAccountName }}
+ namespace: {{ .Release.Namespace }}
+{{- end -}}
diff --git a/helm/library/cortex-shim/templates/servicemonitor.yaml b/helm/library/cortex-shim/templates/servicemonitor.yaml
new file mode 100644
index 000000000..803e66dd5
--- /dev/null
+++ b/helm/library/cortex-shim/templates/servicemonitor.yaml
@@ -0,0 +1,16 @@
+# To integrate with Prometheus.
+{{- if .Values.prometheus.enable }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ name: {{ .Values.namePrefix }}-shim-metrics-monitor
+ namespace: {{ .Release.Namespace }}
+spec:
+ endpoints:
+ - port: metrics
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: {{ include "chart.name" . }}
+{{- end }}
diff --git a/helm/library/cortex-shim/values.yaml b/helm/library/cortex-shim/values.yaml
new file mode 100644
index 000000000..6434e823a
--- /dev/null
+++ b/helm/library/cortex-shim/values.yaml
@@ -0,0 +1,68 @@
+deployment:
+ enable: true
+ replicas: 1
+ container:
+ image:
+ repository: ghcr.io/cobaltcore-dev/cortex-shim
+ args:
+ - "--metrics-bind-address=:2112"
+ - "--health-probe-bind-address=:8081"
+ - "--metrics-secure=false"
+ resources:
+ limits:
+ cpu: 500m
+ memory: 2048Mi
+ requests:
+ cpu: 10m
+ memory: 64Mi
+ livenessProbe:
+ initialDelaySeconds: 15
+ periodSeconds: 20
+ httpGet:
+ path: /healthz
+ port: 8081
+ readinessProbe:
+ initialDelaySeconds: 5
+ periodSeconds: 10
+ httpGet:
+ path: /readyz
+ port: 8081
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - "ALL"
+ securityContext:
+ runAsNonRoot: true
+ seccompProfile:
+ type: RuntimeDefault
+ terminationGracePeriodSeconds: 10
+ serviceAccountName: shim
+
+# [METRICS]: Set to true to generate manifests for exporting metrics.
+# To disable metrics export set false, and ensure that the
+# ControllerManager argument "--metrics-bind-address=:8443" is removed.
+metrics:
+ enable: true
+
+# [RBAC]: To enable RBAC (Permissions) configurations
+rbac:
+ enable: true
+
+# [PROMETHEUS]: To enable a ServiceMonitor to export metrics to Prometheus set true
+prometheus:
+ enable: true
+
+global:
+ conf: {}
+
+# Use this to unambiguate multiple cortex deployments in the same cluster.
+namePrefix: cortex
+conf:
+ # The scheduling domain this operator is responsible for.
+ schedulingDomain: cortex
+ # Used to differentiate different cortex deployments in the same cluster (e.g. leader election ID)
+ leaderElectionID: cortex-unknown
+ enabledControllers:
+ # The explanation controller is available for all decision resources.
+ - explanation-controller
diff --git a/helm/library/cortex/templates/manager/manager.yaml b/helm/library/cortex/templates/manager/manager.yaml
index 73672164f..0c9f362aa 100644
--- a/helm/library/cortex/templates/manager/manager.yaml
+++ b/helm/library/cortex/templates/manager/manager.yaml
@@ -51,7 +51,7 @@ spec:
protocol: TCP
{{- end }}
command:
- - /manager
+ - /main
image: {{ .Values.controllerManager.container.image.repository }}:{{ .Values.controllerManager.container.image.tag | default .Chart.AppVersion }}
{{- if .Values.controllerManager.container.image.pullPolicy }}
imagePullPolicy: {{ .Values.controllerManager.container.image.pullPolicy }}
diff --git a/internal/shim/placement/.gitkeep b/internal/shim/placement/.gitkeep
new file mode 100644
index 000000000..e69de29bb
From 1f7b644c1cd15776b62cf2f4b686dae30ec46d2a Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Thu, 9 Apr 2026 11:56:31 +0200
Subject: [PATCH 02/17] Add monitoring labels and scaffold manager (w/o leader
election)
---
cmd/shim/main.go | 248 +++++++++++++++++-
.../bundles/cortex-placement-shim/values.yaml | 6 +-
helm/library/cortex-shim/values.yaml | 9 +-
3 files changed, 252 insertions(+), 11 deletions(-)
diff --git a/cmd/shim/main.go b/cmd/shim/main.go
index 6b0634229..d59490c3c 100644
--- a/cmd/shim/main.go
+++ b/cmd/shim/main.go
@@ -3,7 +3,251 @@
package main
+import (
+ "context"
+ "crypto/tls"
+ "errors"
+ "flag"
+ "net/http"
+ "os"
+ "path/filepath"
+
+ "github.com/cobaltcore-dev/cortex/api/v1alpha1"
+ "github.com/cobaltcore-dev/cortex/pkg/conf"
+ "github.com/cobaltcore-dev/cortex/pkg/monitoring"
+ hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
+ "github.com/sapcc/go-bits/httpext"
+ "k8s.io/apimachinery/pkg/runtime"
+ utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+ clientgoscheme "k8s.io/client-go/kubernetes/scheme"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/certwatcher"
+ "sigs.k8s.io/controller-runtime/pkg/healthz"
+ "sigs.k8s.io/controller-runtime/pkg/log/zap"
+ "sigs.k8s.io/controller-runtime/pkg/metrics"
+ "sigs.k8s.io/controller-runtime/pkg/metrics/filters"
+ metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
+ "sigs.k8s.io/controller-runtime/pkg/webhook"
+)
+
+var (
+ // Scheme defines the scheme for the API types used by the shim.
+ scheme = runtime.NewScheme()
+ // setupLog is the logger used for setup operations in the shim.
+ setupLog = ctrl.Log.WithName("setup")
+)
+
+func init() {
+ // Bind the Kubernetes client-go scheme and the custom API types to the
+ // scheme used by the shim.
+ utilruntime.Must(clientgoscheme.AddToScheme(scheme))
+ utilruntime.Must(v1alpha1.AddToScheme(scheme)) // Cortex crds
+ utilruntime.Must(hv1.AddToScheme(scheme)) // Hypervisor crd
+}
+
func main() {
- // TODO: this needs scaffolding, for now it just does nothing.
- select {}
+ ctx := context.Background()
+ restConfig := ctrl.GetConfigOrDie()
+
+ var metricsAddr string
+ var metricsCertPath, metricsCertName, metricsCertKey string
+ var webhookCertPath, webhookCertName, webhookCertKey string
+ // The shim does not require leader election, but this flag is provided to
+ // stay consistent with the kubebuilder scaffold.
+ var enableLeaderElection bool
+ var probeAddr string
+ var secureMetrics bool
+ var enableHTTP2 bool
+ var tlsOpts []func(*tls.Config)
+ flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+
+ "Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.")
+ flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
+ flag.BoolVar(&enableLeaderElection, "leader-elect", false,
+ "Enable leader election for controller manager. "+
+ "Enabling this will ensure there is only one active controller manager.")
+ flag.BoolVar(&secureMetrics, "metrics-secure", true,
+ "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.")
+ flag.StringVar(&webhookCertPath, "webhook-cert-path", "", "The directory that contains the webhook certificate.")
+ flag.StringVar(&webhookCertName, "webhook-cert-name", "tls.crt", "The name of the webhook certificate file.")
+ flag.StringVar(&webhookCertKey, "webhook-cert-key", "tls.key", "The name of the webhook key file.")
+ flag.StringVar(&metricsCertPath, "metrics-cert-path", "",
+ "The directory that contains the metrics server certificate.")
+ flag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.")
+ flag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.")
+ flag.BoolVar(&enableHTTP2, "enable-http2", false,
+ "If set, HTTP/2 will be enabled for the metrics and webhook servers")
+ opts := zap.Options{
+ Development: true,
+ }
+ opts.BindFlags(flag.CommandLine)
+ flag.Parse()
+
+ // Check that we're really running this shim without leader election enabled.
+ if enableLeaderElection {
+ err := errors.New("leader election should not be enabled for the shim")
+ setupLog.Error(err, "invalid configuration")
+ os.Exit(1)
+ }
+
+ ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
+
+ // if the enable-http2 flag is false (the default), http/2 should be disabled
+ // due to its vulnerabilities. More specifically, disabling http/2 will
+ // prevent from being vulnerable to the HTTP/2 Stream Cancellation and
+ // Rapid Reset CVEs. For more information see:
+ // - https://github.com/advisories/GHSA-qppj-fm5r-hxr3
+ // - https://github.com/advisories/GHSA-4374-p667-p6c8
+ disableHTTP2 := func(c *tls.Config) {
+ setupLog.Info("disabling http/2")
+ c.NextProtos = []string{"http/1.1"}
+ }
+
+ if !enableHTTP2 {
+ tlsOpts = append(tlsOpts, disableHTTP2)
+ }
+
+ // Create watchers for metrics and webhooks certificates
+ var metricsCertWatcher, webhookCertWatcher *certwatcher.CertWatcher
+
+ // Initial webhook TLS options
+ webhookTLSOpts := tlsOpts
+
+ if webhookCertPath != "" {
+ setupLog.Info("Initializing webhook certificate watcher using provided certificates",
+ "webhook-cert-path", webhookCertPath, "webhook-cert-name", webhookCertName, "webhook-cert-key", webhookCertKey)
+
+ var err error
+ webhookCertWatcher, err = certwatcher.New(
+ filepath.Join(webhookCertPath, webhookCertName),
+ filepath.Join(webhookCertPath, webhookCertKey),
+ )
+ if err != nil {
+ setupLog.Error(err, "Failed to initialize webhook certificate watcher")
+ os.Exit(1)
+ }
+
+ webhookTLSOpts = append(webhookTLSOpts, func(config *tls.Config) {
+ config.GetCertificate = webhookCertWatcher.GetCertificate
+ })
+ }
+
+ webhookServer := webhook.NewServer(webhook.Options{
+ TLSOpts: webhookTLSOpts,
+ })
+
+ // Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server.
+ // More info:
+ // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/metrics/server
+ // - https://book.kubebuilder.io/reference/metrics.html
+ metricsServerOptions := metricsserver.Options{
+ BindAddress: metricsAddr,
+ SecureServing: secureMetrics,
+ TLSOpts: tlsOpts,
+ }
+
+ if secureMetrics {
+ // FilterProvider is used to protect the metrics endpoint with authn/authz.
+ // These configurations ensure that only authorized users and service accounts
+ // can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info:
+ // https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/metrics/filters#WithAuthenticationAndAuthorization
+ metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization
+ }
+
+ // If the certificate is not specified, controller-runtime will automatically
+ // generate self-signed certificates for the metrics server. While convenient for development and testing,
+ // this setup is not recommended for production.
+ //
+ // If you enable certManager, uncomment the following lines:
+ // - [METRICS-WITH-CERTS] at config/default/kustomization.yaml to generate and use certificates
+ // managed by cert-manager for the metrics server.
+ // - [PROMETHEUS-WITH-CERTS] at config/prometheus/kustomization.yaml for TLS certification.
+ if metricsCertPath != "" {
+ setupLog.Info("Initializing metrics certificate watcher using provided certificates",
+ "metrics-cert-path", metricsCertPath, "metrics-cert-name", metricsCertName, "metrics-cert-key", metricsCertKey)
+
+ var err error
+ metricsCertWatcher, err = certwatcher.New(
+ filepath.Join(metricsCertPath, metricsCertName),
+ filepath.Join(metricsCertPath, metricsCertKey),
+ )
+ if err != nil {
+ setupLog.Error(err, "to initialize metrics certificate watcher", "error", err)
+ os.Exit(1)
+ }
+
+ metricsServerOptions.TLSOpts = append(metricsServerOptions.TLSOpts, func(config *tls.Config) {
+ config.GetCertificate = metricsCertWatcher.GetCertificate
+ })
+ }
+
+ mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
+ Scheme: scheme,
+ Metrics: metricsServerOptions,
+ WebhookServer: webhookServer,
+ HealthProbeBindAddress: probeAddr,
+ // Kept for consistency with kubebuilder scaffold, but the shim should
+ // always run with leader election disabled.
+ LeaderElection: enableLeaderElection,
+ })
+ if err != nil {
+ setupLog.Error(err, "unable to start manager")
+ os.Exit(1)
+ }
+
+ // TODO: Initialize multicluster client here.
+
+ // Our custom monitoring registry can add prometheus labels to all metrics.
+ // This is useful to distinguish metrics from different deployments.
+ metricsConfig := conf.GetConfigOrDie[monitoring.Config]()
+ metrics.Registry = monitoring.WrapRegistry(metrics.Registry, metricsConfig)
+
+ // API endpoint.
+ mux := http.NewServeMux()
+
+ // +kubebuilder:scaffold:builder
+
+ if metricsCertWatcher != nil {
+ setupLog.Info("Adding metrics certificate watcher to manager")
+ if err := mgr.Add(metricsCertWatcher); err != nil {
+ setupLog.Error(err, "unable to add metrics certificate watcher to manager")
+ os.Exit(1)
+ }
+ }
+
+ if webhookCertWatcher != nil {
+ setupLog.Info("Adding webhook certificate watcher to manager")
+ if err := mgr.Add(webhookCertWatcher); err != nil {
+ setupLog.Error(err, "unable to add webhook certificate watcher to manager")
+ os.Exit(1)
+ }
+ }
+
+ if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
+ setupLog.Error(err, "unable to set up health check")
+ os.Exit(1)
+ }
+ if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
+ setupLog.Error(err, "unable to set up ready check")
+ os.Exit(1)
+ }
+
+ errchan := make(chan error)
+ go func() {
+ errchan <- func() error {
+ setupLog.Info("starting api server", "address", ":8080")
+ return httpext.ListenAndServeContext(ctx, ":8080", mux)
+ }()
+ }()
+ go func() {
+ if err := <-errchan; err != nil {
+ setupLog.Error(err, "problem running api server")
+ os.Exit(1)
+ }
+ }()
+
+ setupLog.Info("starting manager")
+ if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
+ setupLog.Error(err, "problem running manager")
+ os.Exit(1)
+ }
}
diff --git a/helm/bundles/cortex-placement-shim/values.yaml b/helm/bundles/cortex-placement-shim/values.yaml
index 40aa9cb11..6dd793653 100644
--- a/helm/bundles/cortex-placement-shim/values.yaml
+++ b/helm/bundles/cortex-placement-shim/values.yaml
@@ -20,4 +20,8 @@ alerts:
cortex-shim:
namePrefix: cortex-placement
- conf: {} # TODO
+ conf:
+ monitoring:
+ labels:
+ github_org: cobaltcore-dev
+ github_repo: cortex
diff --git a/helm/library/cortex-shim/values.yaml b/helm/library/cortex-shim/values.yaml
index 6434e823a..1c45c2542 100644
--- a/helm/library/cortex-shim/values.yaml
+++ b/helm/library/cortex-shim/values.yaml
@@ -58,11 +58,4 @@ global:
# Use this to unambiguate multiple cortex deployments in the same cluster.
namePrefix: cortex
-conf:
- # The scheduling domain this operator is responsible for.
- schedulingDomain: cortex
- # Used to differentiate different cortex deployments in the same cluster (e.g. leader election ID)
- leaderElectionID: cortex-unknown
- enabledControllers:
- # The explanation controller is available for all decision resources.
- - explanation-controller
+conf: {} # No config for now that's needed by all the shims.
From 03fc00764340f722b9992c8062e573182ec0eeca Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Thu, 9 Apr 2026 12:01:19 +0200
Subject: [PATCH 03/17] Remove alerts
---
.../alerts/placement-shim.alerts.yaml | 735 +-----------------
1 file changed, 2 insertions(+), 733 deletions(-)
diff --git a/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml b/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml
index 41bf29794..03aea7763 100644
--- a/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml
+++ b/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml
@@ -1,734 +1,3 @@
groups:
-- name: cortex-nova-alerts
- rules:
- - alert: CortexNovaSchedulingDown
- expr: |
- up{pod=~"cortex-nova-scheduling-.*"} != 1 or
- absent(up{pod=~"cortex-nova-scheduling-.*"})
- for: 5m
- labels:
- context: liveness
- dashboard: cortex/cortex
- service: cortex
- severity: critical
- support_group: workload-management
- playbook: docs/support/playbook/cortex/down
- annotations:
- summary: "Cortex Scheduling for Nova is down"
- description: >
- The Cortex scheduling service is down. Scheduling requests from Nova will
- not be served. This is non-critical for vmware virtual machines, but
- blocks kvm virtual machines from being scheduled. Thus, it is
- recommended to immediately investigate and resolve the issue.
-
- - alert: CortexNovaKnowledgeDown
- expr: |
- up{pod=~"cortex-nova-knowledge-.*"} != 1 or
- absent(up{pod=~"cortex-nova-knowledge-.*"})
- for: 5m
- labels:
- context: liveness
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- playbook: docs/support/playbook/cortex/down
- annotations:
- summary: "Cortex Knowledge for Nova is down"
- description: >
- The Cortex Knowledge service is down. This is no immediate problem,
- since cortex is still able to process requests,
- but the quality of the responses may be affected.
-
- - alert: CortexNovaDeschedulerPipelineErroring
- expr: delta(cortex_detector_pipeline_run_duration_seconds_count{component="nova-scheduling", error="true"}[2m]) > 0
- for: 5m
- labels:
- context: descheduler
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Descheduler pipeline is erroring."
- description: >
- The Cortex descheduler pipeline is encountering errors during its execution.
- This may indicate issues with the descheduling logic or the underlying infrastructure.
- It is recommended to investigate the descheduler logs and the state of the VMs being processed.
-
- - alert: CortexNovaHttpRequest400sTooHigh
- expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-nova-metrics", status=~"4.+"}[5m]) > 0.1
- for: 5m
- labels:
- context: api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Nova Scheduler HTTP request 400 errors too high"
- description: >
- Nova Scheduler is responding to placement requests with HTTP 4xx
- errors. This is expected when the scheduling request cannot be served
- by Cortex. However, it could also indicate that the request format has
- changed and Cortex is unable to parse it.
-
- - alert: CortexNovaSchedulingHttpRequest500sTooHigh
- expr: rate(cortex_scheduler_api_request_duration_seconds_count{service="cortex-nova-metrics", status=~"5.+" }[5m]) > 0.1
- for: 5m
- labels:
- context: api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Nova Scheduler HTTP request 500 errors too high"
- description: >
- Nova Scheduler is responding to placement requests with HTTP 5xx errors.
- This is not expected and indicates that Cortex is having some internal problem.
- Nova will continue to place new VMs, but the placement will be less desirable.
- Thus, no immediate action is needed.
-
- - alert: CortexNovaHighMemoryUsage
- expr: process_resident_memory_bytes{service="cortex-nova-metrics"} > 6000 * 1024 * 1024
- for: 5m
- labels:
- context: memory
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "`{{$labels.component}}` uses too much memory"
- description: >
- `{{$labels.component}}` should not be using more than 6000 MiB of memory. Usually it
- should use much less, so there may be a memory leak or other changes
- that are causing the memory usage to increase significantly.
-
- - alert: CortexNovaHighCPUUsage
- expr: rate(process_cpu_seconds_total{service="cortex-nova-metrics"}[1m]) > 0.5
- for: 5m
- labels:
- context: cpu
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "`{{$labels.component}}` uses too much CPU"
- description: >
- `{{$labels.component}}` should not be using more than 50% of a single CPU core. Usually
- it should use much less, so there may be a CPU leak or other changes
- that are causing the CPU usage to increase significantly.
-
- - alert: CortexNovaTooManyDBConnectionAttempts
- expr: rate(cortex_db_connection_attempts_total{service="cortex-nova-metrics"}[5m]) > 0.1
- for: 5m
- labels:
- context: db
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "`{{$labels.component}}` is trying to connect to the database too often"
- description: >
- `{{$labels.component}}` is trying to connect to the database too often. This may happen
- when the database is down or the connection parameters are misconfigured.
-
- - alert: CortexNovaSyncNotSuccessful
- expr: cortex_sync_request_processed_total{service="cortex-nova-metrics"} - cortex_sync_request_duration_seconds_count{service="cortex-nova-metrics"} > 0
- for: 5m
- labels:
- context: syncstatus
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "`{{$labels.component}}` Sync not successful"
- description: >
- `{{$labels.component}}` experienced an issue syncing data from the datasource `{{$labels.datasource}}`. This may
- happen when the datasource (OpenStack, Prometheus, etc.) is down or
- the sync module is misconfigured. No immediate action is needed, since
- the sync module will retry the sync operation and the currently synced
- data will be kept. However, when this problem persists for a longer
- time the service will have a less recent view of the datacenter.
-
- - alert: CortexNovaSyncObjectsDroppedToZero
- expr: cortex_sync_objects{service="cortex-nova-metrics", datasource!="openstack_migrations"} == 0
- for: 60m
- labels:
- context: syncobjects
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "`{{$labels.component}}` is not syncing any new data from `{{$labels.datasource}}`"
- description: >
- `{{$labels.component}}` is not syncing any objects from the datasource `{{$labels.datasource}}`. This may happen
- when the datasource (OpenStack, Prometheus, etc.) is down or the sync
- module is misconfigured. No immediate action is needed, since the sync
- module will retry the sync operation and the currently synced data will
- be kept. However, when this problem persists for a longer time the
- service will have a less recent view of the datacenter.
-
- - alert: CortexNovaDatasourceUnready
- expr: cortex_datasource_state{domain="nova",state!="ready"} != 0
- for: 60m
- labels:
- context: datasources
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Datasource `{{$labels.datasource}}` is in `{{$labels.state}}` state"
- description: >
- This may indicate issues with the datasource
- connectivity or configuration. It is recommended to investigate the
- datasource status and logs for more details.
-
- - alert: CortexNovaKnowledgeUnready
- expr: cortex_knowledge_state{domain="nova",state!="ready"} != 0
- for: 60m
- labels:
- context: knowledge
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Knowledge `{{$labels.knowledge}}` is in `{{$labels.state}}` state"
- description: >
- This may indicate issues with the knowledge
- configuration. It is recommended to investigate the
- knowledge status and logs for more details.
-
- - alert: CortexNovaDecisionsWithErrors
- expr: cortex_decision_state{domain="nova",state="error"} > 0
- for: 5m
- labels:
- context: decisions
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Some decisions are in error state for operator `{{$labels.operator}}`"
- description: >
- The cortex scheduling pipeline generated decisions that are in error state.
- This may indicate issues with the decision logic or the underlying infrastructure.
- It is recommended to investigate the decision logs and the state of the
- VMs being processed.
-
- - alert: CortexNovaTooManyDecisionsWaiting
- expr: cortex_decision_state{domain="nova",state="waiting"} > 10
- for: 5m
- labels:
- context: decisions
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Too many decisions are in waiting state for operator `{{$labels.operator}}`"
- description: >
- The cortex scheduling pipeline has a high number of decisions for which
- no target host has been assigned yet.
-
- This may indicate a backlog in processing or issues with the decision logic.
- It is recommended to investigate the decision logs and the state of the
- VMs being processed.
-
- - alert: CortexNovaKPIUnready
- expr: |
- cortex_kpi_state{domain="nova",state!="ready"} != 0
- for: 60m
- labels:
- context: kpis
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "KPI `{{$labels.kpi}}` is in `{{$labels.state}}` state"
- description: >
- This may indicate issues with the KPI
- configuration. It is recommended to investigate the
- KPI status and logs for more details.
-
- - alert: CortexNovaPipelineUnready
- expr: cortex_pipeline_state{domain="nova",state!="ready"} != 0
- for: 5m
- labels:
- context: pipelines
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Pipeline `{{$labels.pipeline}}` is in `{{$labels.state}}` state"
- description: >
- This may indicate issues with the pipeline
- configuration. It is recommended to investigate the
- pipeline status and logs for more details.
-
- # Committed Resource Info API Alerts
- - alert: CortexNovaCommittedResourceInfoHttpRequest500sTooHigh
- expr: rate(cortex_committed_resource_info_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
- for: 5m
- labels:
- context: committed-resource-api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource info API HTTP 500 errors too high"
- description: >
- The committed resource info API (Limes LIQUID integration) is responding
- with HTTP 5xx errors. This indicates internal problems building service info,
- such as invalid flavor group data. Limes will not be able to discover available
- resources until the issue is resolved.
-
- # Committed Resource Change API Alerts
- - alert: CortexNovaCommittedResourceHttpRequest400sTooHigh
- expr: rate(cortex_committed_resource_change_api_requests_total{service="cortex-nova-metrics", status_code=~"4.."}[5m]) > 0.1
- for: 5m
- labels:
- context: committed-resource-api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource change API HTTP 400 errors too high"
- description: >
- The committed resource change API (Limes LIQUID integration) is responding
- with HTTP 4xx errors. This may happen when Limes sends a request with
- an outdated info version (409), the API is temporarily unavailable,
- or the request format is invalid. Limes will typically retry these
- requests, so no immediate action is needed unless the errors persist.
-
- - alert: CortexNovaCommittedResourceHttpRequest500sTooHigh
- expr: rate(cortex_committed_resource_change_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
- for: 5m
- labels:
- context: committed-resource-api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource change API HTTP 500 errors too high"
- description: >
- The committed resource change API (Limes LIQUID integration) is responding
- with HTTP 5xx errors. This is not expected and indicates that Cortex
- is having an internal problem processing commitment changes. Limes will
- continue to retry, but new commitments may not be fulfilled until the
- issue is resolved.
-
- - alert: CortexNovaCommittedResourceLatencyTooHigh
- expr: histogram_quantile(0.95, sum(rate(cortex_committed_resource_change_api_request_duration_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (le)) > 30
- for: 5m
- labels:
- context: committed-resource-api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource change API latency too high"
- description: >
- The committed resource change API (Limes LIQUID integration) is experiencing
- high latency (p95 > 30s). This may indicate that the scheduling pipeline
- is under heavy load or that reservation scheduling is taking longer than
- expected. Limes requests may time out, causing commitment changes to fail.
-
- - alert: CortexNovaCommittedResourceRejectionRateTooHigh
- expr: |
- sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics", result="rejected"}[5m]))
- / sum(rate(cortex_committed_resource_change_api_commitment_changes_total{service="cortex-nova-metrics"}[5m])) > 0.5
- for: 5m
- labels:
- context: committed-resource-api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource rejection rate too high"
- description: >
- More than 50% of commitment change requests are being rejected.
- This may indicate insufficient capacity in the datacenter to fulfill
- new commitments, or issues with the commitment scheduling logic.
- Rejected commitments are rolled back, so Limes will see them as failed
- and may retry or report the failure to users.
-
- - alert: CortexNovaCommittedResourceTimeoutsTooHigh
- expr: increase(cortex_committed_resource_change_api_timeouts_total{service="cortex-nova-metrics"}[5m]) > 0
- for: 5m
- labels:
- context: committed-resource-api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource change API timeouts too high"
- description: >
- The committed resource change API (Limes LIQUID integration) timed out
- while waiting for reservations to become ready. This indicates that the
- scheduling pipeline is overloaded or reservations are taking too long
- to be scheduled. Affected commitment changes are rolled back and Limes
- will see them as failed. Consider investigating the scheduler performance
- or increasing the timeout configuration.
-
- # Committed Resource Usage API Alerts
- - alert: CortexNovaCommittedResourceUsageHttpRequest400sTooHigh
- expr: rate(cortex_committed_resource_usage_api_requests_total{service="cortex-nova-metrics", status_code=~"4.."}[5m]) > 0.1
- for: 5m
- labels:
- context: committed-resource-api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource usage API HTTP 400 errors too high"
- description: >
- The committed resource usage API (Limes LIQUID integration) is responding
- with HTTP 4xx errors. This may indicate invalid project IDs or malformed
- requests from Limes. Limes will typically retry these requests.
-
- - alert: CortexNovaCommittedResourceUsageHttpRequest500sTooHigh
- expr: rate(cortex_committed_resource_usage_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
- for: 5m
- labels:
- context: committed-resource-api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource usage API HTTP 500 errors too high"
- description: >
- The committed resource usage API (Limes LIQUID integration) is responding
- with HTTP 5xx errors. This indicates internal problems fetching reservations
- or Nova server data. Limes may receive stale or incomplete usage data.
-
- - alert: CortexNovaCommittedResourceUsageLatencyTooHigh
- expr: histogram_quantile(0.95, sum(rate(cortex_committed_resource_usage_api_request_duration_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (le)) > 5
- for: 5m
- labels:
- context: committed-resource-api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource usage API latency too high"
- description: >
- The committed resource usage API (Limes LIQUID integration) is experiencing
- high latency (p95 > 5s). This may indicate slow Nova API responses or
- database queries. Limes scrapes may time out, affecting quota reporting.
-
- # Committed Resource Capacity API Alerts
- - alert: CortexNovaCommittedResourceCapacityHttpRequest400sTooHigh
- expr: rate(cortex_committed_resource_capacity_api_requests_total{service="cortex-nova-metrics", status_code=~"4.."}[5m]) > 0.1
- for: 5m
- labels:
- context: committed-resource-api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource capacity API HTTP 400 errors too high"
- description: >
- The committed resource capacity API (Limes LIQUID integration) is responding
- with HTTP 4xx errors. This may indicate malformed requests from Limes.
-
- - alert: CortexNovaCommittedResourceCapacityHttpRequest500sTooHigh
- expr: rate(cortex_committed_resource_capacity_api_requests_total{service="cortex-nova-metrics", status_code=~"5.."}[5m]) > 0.1
- for: 5m
- labels:
- context: committed-resource-api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource capacity API HTTP 500 errors too high"
- description: >
- The committed resource capacity API (Limes LIQUID integration) is responding
- with HTTP 5xx errors. This indicates internal problems calculating cluster
- capacity. Limes may receive stale or incomplete capacity data.
-
- - alert: CortexNovaCommittedResourceCapacityLatencyTooHigh
- expr: histogram_quantile(0.95, sum(rate(cortex_committed_resource_capacity_api_request_duration_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (le)) > 5
- for: 5m
- labels:
- context: committed-resource-api
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource capacity API latency too high"
- description: >
- The committed resource capacity API (Limes LIQUID integration) is experiencing
- high latency (p95 > 5s). This may indicate slow database queries or knowledge
- CRD retrieval. Limes scrapes may time out, affecting capacity reporting.
-
- # Committed Resource Syncer Alerts
- - alert: CortexNovaCommittedResourceSyncerErrorsHigh
- expr: increase(cortex_committed_resource_syncer_errors_total{service="cortex-nova-metrics"}[1h]) > 3
- for: 5m
- labels:
- context: committed-resource-syncer
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource syncer experiencing errors"
- description: >
- The committed resource syncer has encountered multiple errors in the last hour.
- This may indicate connectivity issues with Limes. Check the syncer logs for error details.
-
- - alert: CortexNovaCommittedResourceSyncerUnitMismatchRateHigh
- expr: |
- (
- sum(rate(cortex_committed_resource_syncer_commitments_skipped_total{service="cortex-nova-metrics", reason="unit_mismatch"}[1h]))
- / sum(rate(cortex_committed_resource_syncer_commitments_total{service="cortex-nova-metrics"}[1h]))
- ) > 0.05
- and on() sum(rate(cortex_committed_resource_syncer_commitments_total{service="cortex-nova-metrics"}[1h])) > 0
- for: 15m
- labels:
- context: committed-resource-syncer
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource syncer unit mismatch rate >5%"
- description: >
- More than 5% of commitments are being skipped due to unit mismatches between
- Limes and Cortex flavor groups. This happens when Limes has not yet been
- updated to use the new unit format after a flavor group change. The affected
- commitments will keep their existing reservations until Limes notices the update.
- Check the logs if this error persists for longer time.
-
- - alert: CortexNovaCommittedResourceSyncerUnknownFlavorGroupRateHigh
- expr: |
- (
- sum(rate(cortex_committed_resource_syncer_commitments_skipped_total{service="cortex-nova-metrics", reason="unknown_flavor_group"}[1h]))
- / sum(rate(cortex_committed_resource_syncer_commitments_total{service="cortex-nova-metrics"}[1h]))
- ) > 0
- and on() sum(rate(cortex_committed_resource_syncer_commitments_total{service="cortex-nova-metrics"}[1h])) > 0
- for: 15m
- labels:
- context: committed-resource-syncer
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource syncer unknown flavor group rate >0%"
- description: >
- Some commitments reference flavor groups that don't exist in
- Cortex Knowledge (anymore). This may indicate that flavor group configuration is
- out of sync between Limes and Cortex, or that Knowledge extraction is failing.
- Check the flavor group Knowledge CRD and history to see what was changed.
-
- - alert: CortexNovaCommittedResourceSyncerLocalChangeRateHigh
- expr: |
- (
- (
- rate(cortex_committed_resource_syncer_reservations_created_total{service="cortex-nova-metrics"}[1h]) +
- rate(cortex_committed_resource_syncer_reservations_deleted_total{service="cortex-nova-metrics"}[1h]) +
- rate(cortex_committed_resource_syncer_reservations_repaired_total{service="cortex-nova-metrics"}[1h])
- ) / rate(cortex_committed_resource_syncer_commitments_processed_total{service="cortex-nova-metrics"}[1h])
- ) > 0.01
- and on() rate(cortex_committed_resource_syncer_commitments_processed_total{service="cortex-nova-metrics"}[1h]) > 0
- for: 15m
- labels:
- context: committed-resource-syncer
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource syncer local change rate >1%"
- description: >
- More than 1% of synced commitments are requiring reservation changes
- (creates, deletes, or repairs). This is higher than expected for steady-state
- operation and may indicate data inconsistencies, external modifications to
- reservations, or issues with the CRDs. Check Cortex logs for details.
-
- - alert: CortexNovaCommittedResourceSyncerRepairRateHigh
- expr: |
- (
- rate(cortex_committed_resource_syncer_reservations_repaired_total{service="cortex-nova-metrics"}[1h])
- / rate(cortex_committed_resource_syncer_commitments_processed_total{service="cortex-nova-metrics"}[1h])
- ) > 0
- and on() rate(cortex_committed_resource_syncer_commitments_processed_total{service="cortex-nova-metrics"}[1h]) > 0
- for: 15m
- labels:
- context: committed-resource-syncer
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Committed Resource syncer repair rate >0%"
- description: >
- Some commitments have reservations that needed repair
- (wrong metadata like project ID or flavor group). This may indicate data
- corruption, bugs in reservation creation, or external modifications.
- Reservations are automatically repaired, but the root cause should be
- investigated if this alert persists.
-
- - alert: CortexNovaDoesntFindValidKVMHosts
- expr: sum by (az, hvtype) (cortex_vm_faults{hvtype=~"CH|QEMU",faultmsg=~".*No valid host was found.*"}) > 0
- for: 5m
- labels:
- context: scheduling
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Nova scheduling cannot find valid KVM hosts"
- description: >
- Cortex is seeing faulty vms in `{{$labels.az}}` where Nova scheduling
- failed to find a valid `{{$labels.hvtype}}` host. This may indicate
- capacity issues, misconfigured filters, or resource constraints in the
- datacenter. Investigate the affected VMs and hypervisor availability.
-
- - alert: CortexNovaNewDatasourcesNotReconciling
- expr: count by(datasource) (cortex_datasource_seconds_until_reconcile{queued="false",domain="nova"}) > 0
- for: 60m
- labels:
- context: datasources
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "New datasource `{{$labels.datasource}}` has not reconciled"
- description: >
- A new datasource `{{$labels.datasource}}` has been added but has not
- completed its first reconciliation yet. This may indicate issues with
- the datasource controller's workqueue overprioritizing other datasources.
-
- - alert: CortexNovaExistingDatasourcesLackingBehind
- expr: |
- sum by(datasource) (cortex_datasource_seconds_until_reconcile{queued="true",domain="nova"}) < -600
- and on(datasource) cortex_datasource_state{state="ready",domain="nova"} == 1
- for: 10m
- labels:
- context: datasources
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Existing datasource `{{$labels.datasource}}` is lacking behind"
- description: >
- An existing datasource `{{$labels.datasource}}` has been queued for
- reconciliation for more than 10 minutes. This may indicate issues with
- the datasource controller's workqueue or that this or another datasource
- is taking an unusually long time to reconcile.
-
- - alert: CortexNovaReconcileErrorsHigh
- expr: |
- (sum by (controller) (rate(controller_runtime_reconcile_errors_total{service="cortex-nova-metrics"}[5m])))
- / (sum by (controller) (rate(controller_runtime_reconcile_total{service="cortex-nova-metrics"}[5m]))) > 0.1
- for: 15m
- labels:
- context: controller-errors
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Controller reconcile error rate >10%"
- description: >
- More than 10% of controller reconciles are resulting in errors. This may
- indicate issues with the controller logic, connectivity problems, or
- external factors causing failures. Check the controller logs for error
- details and investigate the affected resources.
-
- - alert: CortexNovaReconcileDurationHigher10Min
- expr: |
- (sum by (controller) (rate(controller_runtime_reconcile_time_seconds_sum{service="cortex-nova-metrics"}[5m])))
- / (sum by (controller) (rate(controller_runtime_reconcile_time_seconds_count{service="cortex-nova-metrics"}[5m]))) > 600
- for: 15m
- labels:
- context: controller-duration
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Controller reconciliation takes longer than ({{ $value | humanizeDuration }})"
- description: "Reconcile duration higher than 10m while reconciling {{ $labels.controller }}"
-
- - alert: CortexNovaWorkqueueNotDrained
- expr: |
- sum by (name) (workqueue_depth{service="cortex-nova-metrics"}) > 0
- for: 60m
- labels:
- context: controller-workqueue
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Controller {{ $labels.name }}'s backlog is not being drained."
- description: >
- The workqueue for controller {{ $labels.name }} has a backlog that is
- not being drained. This may indicate that the controller is overwhelmed
- with work or is stuck on certain resources. Check the controller logs
- and the state of the resources it manages for more details.
-
- - alert: CortexNovaWebhookLatencyHigh
- expr: |
- histogram_quantile(0.9, avg(rate(controller_runtime_webhook_latency_seconds_bucket{service="cortex-nova-metrics"}[5m])) by (webhook, le)) > 0.2
- for: 15m
- labels:
- context: controller-webhook
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Controller webhook {{ $labels.webhook }} latency is high"
- description: >
- The latency for webhook {{ $labels.webhook }} is higher than expected (p90 > 200ms).
- This may indicate performance issues with the webhook server or the logic it executes.
- Check the webhook server logs and monitor its resource usage for more insights.
-
- - alert: CortexNovaWebhookErrorsHigh
- expr: |
- (sum by (webhook) (rate(controller_runtime_webhook_requests_total{code!="200", service="cortex-nova-metrics"}[5m])))
- / (sum by (webhook) (rate(controller_runtime_webhook_requests_total{service="cortex-nova-metrics"}[5m]))) > 0.1
- for: 15m
- labels:
- context: controller-webhook
- dashboard: cortex/cortex
- service: cortex
- severity: warning
- support_group: workload-management
- annotations:
- summary: "Controller webhook {{ $labels.webhook }} is experiencing errors"
- description: >
- The webhook {{ $labels.webhook }} has experienced errors in the last 5 minutes.
- This may indicate issues with the webhook logic, connectivity problems, or
- external factors causing failures. Check the webhook server logs for error
- details and investigate the affected resources.
\ No newline at end of file
+- name: cortex-placement-shim-alerts
+ rules: []
\ No newline at end of file
From 154338f9560e497ba9ff9191257f2dd79311a6c7 Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Thu, 9 Apr 2026 13:20:52 +0200
Subject: [PATCH 04/17] PR feedback
---
.github/workflows/push-images.yaml | 7 ++++++-
cmd/shim/main.go | 9 ++++-----
helm/library/cortex-shim/templates/_helpers.tpl | 12 ++++++++----
helm/library/cortex-shim/templates/service.yaml | 4 ++--
helm/library/cortex-shim/values.yaml | 4 ++--
5 files changed, 22 insertions(+), 14 deletions(-)
diff --git a/.github/workflows/push-images.yaml b/.github/workflows/push-images.yaml
index 3085b503b..f3be685ce 100644
--- a/.github/workflows/push-images.yaml
+++ b/.github/workflows/push-images.yaml
@@ -81,6 +81,11 @@ jobs:
files: |
cmd/shim/**
internal/shim/**
+ api/**
+ pkg/**
+ go.mod
+ go.sum
+ Dockerfile
- name: Docker Meta (Cortex Shim)
if: steps.changed_shim_files.outputs.all_changed_files != ''
id: meta_cortex_shim
@@ -99,7 +104,7 @@ jobs:
id: push_cortex_shim
uses: docker/build-push-action@v7
with:
- context: cmd/shim
+ context: .
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ steps.meta_cortex_shim.outputs.tags }}
diff --git a/cmd/shim/main.go b/cmd/shim/main.go
index d59490c3c..970c8c934 100644
--- a/cmd/shim/main.go
+++ b/cmd/shim/main.go
@@ -4,7 +4,6 @@
package main
import (
- "context"
"crypto/tls"
"errors"
"flag"
@@ -46,7 +45,7 @@ func init() {
}
func main() {
- ctx := context.Background()
+ ctx := ctrl.SetupSignalHandler()
restConfig := ctrl.GetConfigOrDie()
var metricsAddr string
@@ -110,7 +109,7 @@ func main() {
var metricsCertWatcher, webhookCertWatcher *certwatcher.CertWatcher
// Initial webhook TLS options
- webhookTLSOpts := tlsOpts
+ webhookTLSOpts := append([]func(*tls.Config){}, tlsOpts...)
if webhookCertPath != "" {
setupLog.Info("Initializing webhook certificate watcher using provided certificates",
@@ -142,7 +141,7 @@ func main() {
metricsServerOptions := metricsserver.Options{
BindAddress: metricsAddr,
SecureServing: secureMetrics,
- TLSOpts: tlsOpts,
+ TLSOpts: append([]func(*tls.Config){}, tlsOpts...),
}
if secureMetrics {
@@ -246,7 +245,7 @@ func main() {
}()
setupLog.Info("starting manager")
- if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
+ if err := mgr.Start(ctx); err != nil {
setupLog.Error(err, "problem running manager")
os.Exit(1)
}
diff --git a/helm/library/cortex-shim/templates/_helpers.tpl b/helm/library/cortex-shim/templates/_helpers.tpl
index 782e14eef..cca33d701 100644
--- a/helm/library/cortex-shim/templates/_helpers.tpl
+++ b/helm/library/cortex-shim/templates/_helpers.tpl
@@ -36,15 +36,19 @@ app.kubernetes.io/instance: {{ .Release.Name }}
{{- $hasMutating := false }}
{{- range . }}
{{- if eq .type "mutating" }}
- $hasMutating = true }}{{- end }}
+ {{- $hasMutating = true -}}
+ {{- end }}
+{{- end }}
+{{ $hasMutating }}
{{- end }}
-{{ $hasMutating }}}}{{- end }}
{{- define "chart.hasValidatingWebhooks" -}}
{{- $hasValidating := false }}
{{- range . }}
{{- if eq .type "validating" }}
- $hasValidating = true }}{{- end }}
+ {{- $hasValidating = true -}}
+ {{- end }}
+{{- end }}
+{{ $hasValidating }}
{{- end }}
-{{ $hasValidating }}}}{{- end }}
diff --git a/helm/library/cortex-shim/templates/service.yaml b/helm/library/cortex-shim/templates/service.yaml
index 549ceed95..faf3082a3 100644
--- a/helm/library/cortex-shim/templates/service.yaml
+++ b/helm/library/cortex-shim/templates/service.yaml
@@ -12,7 +12,7 @@ spec:
protocol: TCP
name: api
selector:
- app.kubernetes.io/name: {{ include "chart.name" . }}
+ {{- include "chart.selectorLabels" . | nindent 4 }}
{{- if .Values.metrics.enable }}
---
apiVersion: v1
@@ -29,5 +29,5 @@ spec:
protocol: TCP
name: metrics
selector:
- app.kubernetes.io/name: {{ include "chart.name" . }}
+ {{- include "chart.selectorLabels" . | nindent 4 }}
{{- end }}
diff --git a/helm/library/cortex-shim/values.yaml b/helm/library/cortex-shim/values.yaml
index 1c45c2542..1d1bc844c 100644
--- a/helm/library/cortex-shim/values.yaml
+++ b/helm/library/cortex-shim/values.yaml
@@ -40,8 +40,8 @@ deployment:
serviceAccountName: shim
# [METRICS]: Set to true to generate manifests for exporting metrics.
-# To disable metrics export set false, and ensure that the
-# ControllerManager argument "--metrics-bind-address=:8443" is removed.
+# To disable metrics export set false, and remove the container args
+# "--metrics-bind-address=:2112" and "--metrics-secure=false".
metrics:
enable: true
From 2305af318d99847d11ecae5598ae305f0a2d4092 Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Thu, 9 Apr 2026 13:36:10 +0200
Subject: [PATCH 05/17] PR feedback
---
cmd/shim/main.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/cmd/shim/main.go b/cmd/shim/main.go
index 970c8c934..9feea8d5f 100644
--- a/cmd/shim/main.go
+++ b/cmd/shim/main.go
@@ -170,7 +170,7 @@ func main() {
filepath.Join(metricsCertPath, metricsCertKey),
)
if err != nil {
- setupLog.Error(err, "to initialize metrics certificate watcher", "error", err)
+ setupLog.Error(err, "Failed to initialize metrics certificate watcher")
os.Exit(1)
}
From 6252f5c6d4fa944e71b9f3cbf7d25c2d8f93dcba Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Thu, 9 Apr 2026 13:42:55 +0200
Subject: [PATCH 06/17] 3 replicas by default
---
helm/library/cortex-shim/values.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/helm/library/cortex-shim/values.yaml b/helm/library/cortex-shim/values.yaml
index 1d1bc844c..63574fbe4 100644
--- a/helm/library/cortex-shim/values.yaml
+++ b/helm/library/cortex-shim/values.yaml
@@ -1,6 +1,6 @@
deployment:
enable: true
- replicas: 1
+ replicas: 3
container:
image:
repository: ghcr.io/cobaltcore-dev/cortex-shim
From 33d181f84176b10128175381862621482568190d Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Thu, 9 Apr 2026 14:20:37 +0200
Subject: [PATCH 07/17] Scaffold empty placement api handlers
---
cmd/shim/main.go | 7 ++
.../handlers/allocation_candidates.go | 40 +++++++
.../shim/placement/handlers/allocations.go | 78 +++++++++++++
internal/shim/placement/handlers/reshaper.go | 30 +++++
.../placement/handlers/resource_classes.go | 72 ++++++++++++
.../handlers/resource_provider_aggregates.go | 45 ++++++++
.../handlers/resource_provider_allocations.go | 24 ++++
.../handlers/resource_provider_inventories.go | 103 ++++++++++++++++++
.../handlers/resource_provider_traits.go | 54 +++++++++
.../handlers/resource_provider_usages.go | 24 ++++
.../placement/handlers/resource_providers.go | 84 ++++++++++++++
internal/shim/placement/handlers/root.go | 23 ++++
internal/shim/placement/handlers/traits.go | 60 ++++++++++
internal/shim/placement/handlers/usages.go | 27 +++++
internal/shim/placement/handlers/zz_index.go | 73 +++++++++++++
15 files changed, 744 insertions(+)
create mode 100644 internal/shim/placement/handlers/allocation_candidates.go
create mode 100644 internal/shim/placement/handlers/allocations.go
create mode 100644 internal/shim/placement/handlers/reshaper.go
create mode 100644 internal/shim/placement/handlers/resource_classes.go
create mode 100644 internal/shim/placement/handlers/resource_provider_aggregates.go
create mode 100644 internal/shim/placement/handlers/resource_provider_allocations.go
create mode 100644 internal/shim/placement/handlers/resource_provider_inventories.go
create mode 100644 internal/shim/placement/handlers/resource_provider_traits.go
create mode 100644 internal/shim/placement/handlers/resource_provider_usages.go
create mode 100644 internal/shim/placement/handlers/resource_providers.go
create mode 100644 internal/shim/placement/handlers/root.go
create mode 100644 internal/shim/placement/handlers/traits.go
create mode 100644 internal/shim/placement/handlers/usages.go
create mode 100644 internal/shim/placement/handlers/zz_index.go
diff --git a/cmd/shim/main.go b/cmd/shim/main.go
index 9feea8d5f..41be2c084 100644
--- a/cmd/shim/main.go
+++ b/cmd/shim/main.go
@@ -12,6 +12,7 @@ import (
"path/filepath"
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
+ placementhandlers "github.com/cobaltcore-dev/cortex/internal/shim/placement/handlers"
"github.com/cobaltcore-dev/cortex/pkg/conf"
"github.com/cobaltcore-dev/cortex/pkg/monitoring"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
@@ -57,6 +58,7 @@ func main() {
var probeAddr string
var secureMetrics bool
var enableHTTP2 bool
+ var enablePlacementShim bool
var tlsOpts []func(*tls.Config)
flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+
"Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.")
@@ -75,6 +77,8 @@ func main() {
flag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.")
flag.BoolVar(&enableHTTP2, "enable-http2", false,
"If set, HTTP/2 will be enabled for the metrics and webhook servers")
+ flag.BoolVar(&enablePlacementShim, "placement-shim", false,
+ "If set, the placement API shim handlers are registered on the API server.")
opts := zap.Options{
Development: true,
}
@@ -202,6 +206,9 @@ func main() {
// API endpoint.
mux := http.NewServeMux()
+ if enablePlacementShim {
+ placementhandlers.RegisterRoutes(mux)
+ }
// +kubebuilder:scaffold:builder
diff --git a/internal/shim/placement/handlers/allocation_candidates.go b/internal/shim/placement/handlers/allocation_candidates.go
new file mode 100644
index 000000000..8d864a9a4
--- /dev/null
+++ b/internal/shim/placement/handlers/allocation_candidates.go
@@ -0,0 +1,40 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandleListAllocationCandidates handles GET /allocation_candidates requests.
+//
+// Returns a collection of allocation requests and resource provider summaries
+// that can satisfy a given set of resource and trait requirements. This is the
+// primary endpoint used by Nova's scheduler to find suitable hosts for
+// instance placement.
+//
+// The resources query parameter specifies required capacity as a comma-
+// separated list (e.g. VCPU:4,MEMORY_MB:2048,DISK_GB:64). The required
+// parameter filters by traits, supporting forbidden traits via ! prefix
+// (since 1.22) and the in: syntax for any-of semantics (since 1.39).
+// The member_of parameter filters by aggregate membership with support for
+// forbidden aggregates via ! prefix (since 1.32).
+//
+// Since microversion 1.25, granular request groups are supported via numbered
+// suffixes (resourcesN, requiredN, member_ofN) to express requirements that
+// may be satisfied by different providers. The group_policy parameter (1.26+)
+// controls whether groups must each be satisfied by a single provider or may
+// span multiple. The in_tree parameter (1.31+) constrains results to a
+// specific provider tree.
+//
+// Each returned allocation request is directly usable as the body for
+// PUT /allocations/{consumer_uuid}. The provider_summaries section includes
+// inventory capacity and usage for informed decision-making. Available since
+// microversion 1.10.
+func HandleListAllocationCandidates(w http.ResponseWriter, r *http.Request) {
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+}
diff --git a/internal/shim/placement/handlers/allocations.go b/internal/shim/placement/handlers/allocations.go
new file mode 100644
index 000000000..aa4ba99f2
--- /dev/null
+++ b/internal/shim/placement/handlers/allocations.go
@@ -0,0 +1,78 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandleManageAllocations handles POST /allocations requests.
+//
+// Atomically creates, updates, or deletes allocations for multiple consumers
+// in a single request. This is the primary mechanism for operations that must
+// modify allocations across several consumers atomically, such as live
+// migrations and move operations where resources are transferred from one
+// consumer to another. Available since microversion 1.13.
+//
+// The request body is keyed by consumer UUID, each containing an allocations
+// dictionary (keyed by resource provider UUID), along with project_id and
+// user_id. Since microversion 1.28, consumer_generation enables consumer-
+// level concurrency control. Since microversion 1.38, a consumer_type field
+// (e.g. INSTANCE, MIGRATION) is supported. Returns 204 No Content on
+// success, or 409 Conflict if inventory is insufficient or a concurrent
+// update is detected (error code: placement.concurrent_update).
+func HandleManageAllocations(w http.ResponseWriter, r *http.Request) {
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+}
+
+// HandleListAllocations handles GET /allocations/{consumer_uuid} requests.
+//
+// Returns all allocation records for the consumer identified by
+// {consumer_uuid}, across all resource providers. The response contains an
+// allocations dictionary keyed by resource provider UUID. If the consumer has
+// no allocations, an empty dictionary is returned.
+//
+// The response has grown across microversions: project_id and user_id were
+// added at 1.12, consumer_generation at 1.28, and consumer_type at 1.38.
+// The consumer_generation and consumer_type fields are absent when the
+// consumer has no allocations.
+func HandleListAllocations(w http.ResponseWriter, r *http.Request) {
+ consumerUUID := r.PathValue("consumer_uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path,
+ "consumer_uuid", consumerUUID)
+}
+
+// HandleUpdateAllocations handles PUT /allocations/{consumer_uuid} requests.
+//
+// Creates or replaces all allocation records for a single consumer. If
+// allocations already exist for this consumer, they are entirely replaced
+// by the new set. The request format changed at microversion 1.12 from an
+// array-based layout to an object keyed by resource provider UUID.
+// Microversion 1.28 added consumer_generation for concurrency control,
+// and 1.38 introduced consumer_type.
+//
+// Returns 204 No Content on success. Returns 409 Conflict if there is
+// insufficient inventory or if a concurrent update was detected.
+func HandleUpdateAllocations(w http.ResponseWriter, r *http.Request) {
+ consumerUUID := r.PathValue("consumer_uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path,
+ "consumer_uuid", consumerUUID)
+}
+
+// HandleDeleteAllocations handles DELETE /allocations/{consumer_uuid} requests.
+//
+// Removes all allocation records for the consumer across all resource
+// providers. Returns 204 No Content on success, or 404 Not Found if the
+// consumer has no existing allocations.
+func HandleDeleteAllocations(w http.ResponseWriter, r *http.Request) {
+ consumerUUID := r.PathValue("consumer_uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path,
+ "consumer_uuid", consumerUUID)
+}
diff --git a/internal/shim/placement/handlers/reshaper.go b/internal/shim/placement/handlers/reshaper.go
new file mode 100644
index 000000000..59556c94a
--- /dev/null
+++ b/internal/shim/placement/handlers/reshaper.go
@@ -0,0 +1,30 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandlePostReshaper handles POST /reshaper requests.
+//
+// Atomically migrates resource provider inventories and associated allocations
+// in a single transaction. This endpoint is used when a provider tree needs to
+// be restructured — for example, moving inventory from a root provider into
+// newly created child providers — without leaving allocations in an
+// inconsistent state during the transition.
+//
+// The request body contains the complete set of inventories (keyed by
+// resource provider UUID) and allocations (keyed by consumer UUID) that
+// should exist after the operation. The Placement service validates all
+// inputs atomically and applies them in a single database transaction.
+// Returns 204 No Content on success. Returns 409 Conflict if any referenced
+// resource provider does not exist or if inventory/allocation constraints
+// would be violated. Available since microversion 1.30.
+func HandlePostReshaper(w http.ResponseWriter, r *http.Request) {
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+}
diff --git a/internal/shim/placement/handlers/resource_classes.go b/internal/shim/placement/handlers/resource_classes.go
new file mode 100644
index 000000000..81548b3b5
--- /dev/null
+++ b/internal/shim/placement/handlers/resource_classes.go
@@ -0,0 +1,72 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandleListResourceClasses handles GET /resource_classes requests.
+//
+// Returns the complete list of all resource classes, including both standard
+// classes (e.g. VCPU, MEMORY_MB, DISK_GB, PCI_DEVICE, SRIOV_NET_VF) and
+// deployer-defined custom classes prefixed with CUSTOM_. Resource classes
+// categorize the types of resources that resource providers can offer as
+// inventory. Available since microversion 1.2.
+func HandleListResourceClasses(w http.ResponseWriter, r *http.Request) {
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+}
+
+// HandleCreateResourceClass handles POST /resource_classes requests.
+//
+// Creates a new custom resource class. The name must be prefixed with CUSTOM_
+// to distinguish it from standard resource classes. Returns 201 Created with
+// a Location header on success. Returns 400 Bad Request if the CUSTOM_ prefix
+// is missing, and 409 Conflict if a class with the same name already exists.
+// Available since microversion 1.2.
+func HandleCreateResourceClass(w http.ResponseWriter, r *http.Request) {
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+}
+
+// HandleShowResourceClass handles GET /resource_classes/{name} requests.
+//
+// Returns a representation of a single resource class identified by name.
+// This can be used to verify the existence of a resource class. Returns 404
+// if the class does not exist. Available since microversion 1.2.
+func HandleShowResourceClass(w http.ResponseWriter, r *http.Request) {
+ name := r.PathValue("name")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
+}
+
+// HandleUpdateResourceClass handles PUT /resource_classes/{name} requests.
+//
+// Behavior differs by microversion. Since microversion 1.7, this endpoint
+// creates or validates the existence of a single resource class: it returns
+// 201 Created for a new class or 204 No Content if the class already exists.
+// The name must carry the CUSTOM_ prefix. In earlier versions (1.2-1.6), the
+// endpoint allowed renaming a class via a request body, but this usage is
+// discouraged. Returns 400 Bad Request if the CUSTOM_ prefix is missing.
+func HandleUpdateResourceClass(w http.ResponseWriter, r *http.Request) {
+ name := r.PathValue("name")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
+}
+
+// HandleDeleteResourceClass handles DELETE /resource_classes/{name} requests.
+//
+// Deletes a custom resource class. Only custom classes (prefixed with CUSTOM_)
+// may be deleted; attempting to delete a standard class returns 400 Bad
+// Request. Returns 409 Conflict if any resource provider has inventory of this
+// class, and 404 if the class does not exist. Returns 204 No Content on
+// success. Available since microversion 1.2.
+func HandleDeleteResourceClass(w http.ResponseWriter, r *http.Request) {
+ name := r.PathValue("name")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
+}
diff --git a/internal/shim/placement/handlers/resource_provider_aggregates.go b/internal/shim/placement/handlers/resource_provider_aggregates.go
new file mode 100644
index 000000000..172c40b2f
--- /dev/null
+++ b/internal/shim/placement/handlers/resource_provider_aggregates.go
@@ -0,0 +1,45 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandleListResourceProviderAggregates handles
+// GET /resource_providers/{uuid}/aggregates requests.
+//
+// Returns the list of aggregate UUIDs associated with the resource provider.
+// Aggregates model relationships among providers such as shared storage,
+// affinity/anti-affinity groups, and availability zones. Returns an empty
+// list if the provider has no aggregate associations. Available since
+// microversion 1.1.
+//
+// The response format changed at microversion 1.19: earlier versions return
+// only a flat array of UUIDs, while 1.19+ returns an object that also
+// includes the resource_provider_generation for concurrency tracking. Returns
+// 404 if the provider does not exist.
+func HandleListResourceProviderAggregates(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
+
+// HandleUpdateResourceProviderAggregates handles
+// PUT /resource_providers/{uuid}/aggregates requests.
+//
+// Replaces the complete set of aggregate associations for a resource provider.
+// Any aggregate UUIDs that do not yet exist are created automatically. The
+// request format changed at microversion 1.19: earlier versions accept a
+// plain array of UUIDs, while 1.19+ expects an object containing an
+// aggregates array and a resource_provider_generation for optimistic
+// concurrency control. Returns 409 Conflict if the generation does not match
+// (1.19+). Returns 200 with the updated aggregate list on success.
+func HandleUpdateResourceProviderAggregates(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
diff --git a/internal/shim/placement/handlers/resource_provider_allocations.go b/internal/shim/placement/handlers/resource_provider_allocations.go
new file mode 100644
index 000000000..0e27ca7e6
--- /dev/null
+++ b/internal/shim/placement/handlers/resource_provider_allocations.go
@@ -0,0 +1,24 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandleListResourceProviderAllocations handles
+// GET /resource_providers/{uuid}/allocations requests.
+//
+// Returns all allocations made against the resource provider identified by
+// {uuid}, keyed by consumer UUID. This provides a provider-centric view of
+// consumption, complementing the consumer-centric GET /allocations/{consumer}
+// endpoint. The response includes the resource_provider_generation. Returns
+// 404 if the provider does not exist.
+func HandleListResourceProviderAllocations(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
diff --git a/internal/shim/placement/handlers/resource_provider_inventories.go b/internal/shim/placement/handlers/resource_provider_inventories.go
new file mode 100644
index 000000000..303370e69
--- /dev/null
+++ b/internal/shim/placement/handlers/resource_provider_inventories.go
@@ -0,0 +1,103 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandleListResourceProviderInventories handles
+// GET /resource_providers/{uuid}/inventories requests.
+//
+// Returns all inventory records for the resource provider identified by
+// {uuid}. The response contains an inventories dictionary keyed by resource
+// class, with each entry describing capacity constraints: total, reserved,
+// min_unit, max_unit, step_size, and allocation_ratio. Also returns the
+// resource_provider_generation, which is needed for subsequent update or
+// delete operations. Returns 404 if the provider does not exist.
+func HandleListResourceProviderInventories(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
+
+// HandleUpdateResourceProviderInventories handles
+// PUT /resource_providers/{uuid}/inventories requests.
+//
+// Atomically replaces the entire set of inventory records for a provider.
+// The request must include the resource_provider_generation for optimistic
+// concurrency control — if the generation does not match, the request fails
+// with 409 Conflict. The inventories field is a dictionary keyed by resource
+// class, each specifying at minimum a total value. Omitted inventory classes
+// are deleted. Returns 409 Conflict if allocations exceed the new capacity
+// or if a concurrent update has occurred.
+func HandleUpdateResourceProviderInventories(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
+
+// HandleDeleteResourceProviderInventories handles
+// DELETE /resource_providers/{uuid}/inventories requests.
+//
+// Deletes all inventory records for a resource provider. This operation is
+// not safe for concurrent use; the recommended alternative for concurrent
+// environments is PUT with an empty inventories dictionary. Returns 409
+// Conflict if allocations exist against any of the provider's inventories.
+// Returns 404 if the provider does not exist. Available since microversion
+// 1.5.
+func HandleDeleteResourceProviderInventories(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
+
+// HandleShowResourceProviderInventory handles
+// GET /resource_providers/{uuid}/inventories/{resource_class} requests.
+//
+// Returns a single inventory record for one resource class on the specified
+// provider. The response includes total, reserved, min_unit, max_unit,
+// step_size, allocation_ratio, and the resource_provider_generation. Returns
+// 404 if the provider or inventory for that class does not exist.
+func HandleShowResourceProviderInventory(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ resourceClass := r.PathValue("resource_class")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path,
+ "uuid", uuid, "resource_class", resourceClass)
+}
+
+// HandleUpdateResourceProviderInventory handles
+// PUT /resource_providers/{uuid}/inventories/{resource_class} requests.
+//
+// Creates or replaces the inventory record for a single resource class on
+// the provider. The request must include resource_provider_generation for
+// concurrency control and a total value. Optional fields control allocation
+// constraints (allocation_ratio, min_unit, max_unit, step_size, reserved).
+// Since microversion 1.26, the reserved value must not exceed total. Returns
+// 409 Conflict on generation mismatch or if allocations would be violated.
+func HandleUpdateResourceProviderInventory(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ resourceClass := r.PathValue("resource_class")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path,
+ "uuid", uuid, "resource_class", resourceClass)
+}
+
+// HandleDeleteResourceProviderInventory handles
+// DELETE /resource_providers/{uuid}/inventories/{resource_class} requests.
+//
+// Deletes the inventory record for a specific resource class on the provider.
+// Returns 409 Conflict if allocations exist against this provider and resource
+// class combination, or if a concurrent update has occurred. Returns 404 if
+// the provider or inventory does not exist. Returns 204 No Content on success.
+func HandleDeleteResourceProviderInventory(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ resourceClass := r.PathValue("resource_class")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path,
+ "uuid", uuid, "resource_class", resourceClass)
+}
diff --git a/internal/shim/placement/handlers/resource_provider_traits.go b/internal/shim/placement/handlers/resource_provider_traits.go
new file mode 100644
index 000000000..915ccda05
--- /dev/null
+++ b/internal/shim/placement/handlers/resource_provider_traits.go
@@ -0,0 +1,54 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandleListResourceProviderTraits handles
+// GET /resource_providers/{uuid}/traits requests.
+//
+// Returns the list of traits associated with the resource provider identified
+// by {uuid}. The response includes an array of trait name strings and the
+// resource_provider_generation for concurrency tracking. Returns 404 if the
+// provider does not exist.
+func HandleListResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
+
+// HandleUpdateResourceProviderTraits handles
+// PUT /resource_providers/{uuid}/traits requests.
+//
+// Replaces the complete set of trait associations for a resource provider.
+// The request body must include a traits array and the
+// resource_provider_generation for optimistic concurrency control. All
+// previously associated traits are removed and replaced by the specified set.
+// Returns 400 Bad Request if any of the specified traits are invalid (i.e.
+// not returned by GET /traits). Returns 409 Conflict if the generation does
+// not match.
+func HandleUpdateResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
+
+// HandleDeleteResourceProviderTraits handles
+// DELETE /resource_providers/{uuid}/traits requests.
+//
+// Removes all trait associations from a resource provider. Because this
+// endpoint does not accept a resource_provider_generation, it is not safe
+// for concurrent use. In environments where multiple clients manage traits
+// for the same provider, prefer PUT with an empty traits list instead.
+// Returns 404 if the provider does not exist. Returns 409 Conflict on
+// concurrent modification. Returns 204 No Content on success.
+func HandleDeleteResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
diff --git a/internal/shim/placement/handlers/resource_provider_usages.go b/internal/shim/placement/handlers/resource_provider_usages.go
new file mode 100644
index 000000000..319da4a70
--- /dev/null
+++ b/internal/shim/placement/handlers/resource_provider_usages.go
@@ -0,0 +1,24 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandleListResourceProviderUsages handles
+// GET /resource_providers/{uuid}/usages requests.
+//
+// Returns aggregated resource consumption for the resource provider identified
+// by {uuid}. The response contains a usages dictionary keyed by resource class
+// with integer usage amounts, along with the resource_provider_generation.
+// Unlike the provider allocations endpoint, this does not break down usage by
+// individual consumer. Returns 404 if the provider does not exist.
+func HandleListResourceProviderUsages(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
diff --git a/internal/shim/placement/handlers/resource_providers.go b/internal/shim/placement/handlers/resource_providers.go
new file mode 100644
index 000000000..6f4f975fe
--- /dev/null
+++ b/internal/shim/placement/handlers/resource_providers.go
@@ -0,0 +1,84 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandleListResourceProviders handles GET /resource_providers requests.
+//
+// Returns a filtered list of resource providers. Resource providers are
+// entities that provide consumable inventory of one or more classes of
+// resources (e.g. a compute node providing VCPU, MEMORY_MB, DISK_GB).
+//
+// Supports numerous filter parameters including name, uuid, member_of
+// (aggregate membership), resources (capacity filtering), in_tree (provider
+// tree membership), and required (trait filtering). Multiple filters are
+// combined with boolean AND logic. Many of these filters were added in later
+// microversions: resources filtering at 1.3, tree queries at 1.14, trait
+// requirements at 1.18, forbidden traits at 1.22, forbidden aggregates at
+// 1.32, and the in: syntax for required at 1.39.
+func HandleListResourceProviders(w http.ResponseWriter, r *http.Request) {
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+}
+
+// HandleCreateResourceProvider handles POST /resource_providers requests.
+//
+// Creates a new resource provider. The request must include a name and may
+// optionally specify a UUID and a parent_provider_uuid (since 1.14) to place
+// the provider in a hierarchical tree. If no UUID is supplied, one is
+// generated. Before microversion 1.37, the parent of a resource provider
+// could not be changed after creation.
+//
+// The response changed at microversion 1.20: earlier versions return only
+// an HTTP 201 with a Location header, while 1.20+ returns the full resource
+// provider object in the body. Returns 409 Conflict if a provider with the
+// same name or UUID already exists.
+func HandleCreateResourceProvider(w http.ResponseWriter, r *http.Request) {
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+}
+
+// HandleShowResourceProvider handles GET /resource_providers/{uuid} requests.
+//
+// Returns a single resource provider identified by its UUID. The response
+// includes the provider's name, generation (used for concurrency control in
+// subsequent updates), and links. Starting at microversion 1.14, the response
+// also includes parent_provider_uuid and root_provider_uuid to describe the
+// provider's position in a hierarchical tree. Returns 404 if the provider
+// does not exist.
+func HandleShowResourceProvider(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
+
+// HandleUpdateResourceProvider handles PUT /resource_providers/{uuid} requests.
+//
+// Updates a resource provider's name and, starting at microversion 1.14, its
+// parent_provider_uuid. Since microversion 1.37, the parent may be changed to
+// any existing provider UUID that would not create a loop in the tree, or set
+// to null to make the provider a root. Returns 409 Conflict if another
+// provider already has the requested name.
+func HandleUpdateResourceProvider(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
+
+// HandleDeleteResourceProvider handles DELETE /resource_providers/{uuid} requests.
+//
+// Deletes a resource provider and disassociates all its aggregates and
+// inventories. The operation fails with 409 Conflict if there are any
+// allocations against the provider's inventories or if the provider has
+// child providers in a tree hierarchy. Returns 204 No Content on success.
+func HandleDeleteResourceProvider(w http.ResponseWriter, r *http.Request) {
+ uuid := r.PathValue("uuid")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+}
diff --git a/internal/shim/placement/handlers/root.go b/internal/shim/placement/handlers/root.go
new file mode 100644
index 000000000..bfef909b4
--- /dev/null
+++ b/internal/shim/placement/handlers/root.go
@@ -0,0 +1,23 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandleGetRoot handles GET / requests.
+//
+// Returns information about all known major versions of the Placement API,
+// including the minimum and maximum supported microversions for each version.
+// Currently only one major version (v1.0) exists. Each version entry includes
+// its status (e.g. CURRENT), links for discovery, and the microversion range
+// supported by the running service. Clients use this endpoint to discover API
+// capabilities and negotiate microversions before making further requests.
+func HandleGetRoot(w http.ResponseWriter, r *http.Request) {
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+}
diff --git a/internal/shim/placement/handlers/traits.go b/internal/shim/placement/handlers/traits.go
new file mode 100644
index 000000000..381b109b2
--- /dev/null
+++ b/internal/shim/placement/handlers/traits.go
@@ -0,0 +1,60 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandleListTraits handles GET /traits requests.
+//
+// Returns a list of valid trait strings. Traits describe qualitative aspects
+// of a resource provider (e.g. HW_CPU_X86_AVX2, STORAGE_DISK_SSD). The list
+// includes both standard traits from the os-traits library and custom traits
+// prefixed with CUSTOM_.
+//
+// Supports optional query parameters: name allows filtering by prefix
+// (startswith:CUSTOM) or by an explicit list (in:TRAIT1,TRAIT2), and
+// associated filters to only traits that are or are not associated with at
+// least one resource provider.
+func HandleListTraits(w http.ResponseWriter, r *http.Request) {
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+}
+
+// HandleShowTrait handles GET /traits/{name} requests.
+//
+// Checks whether a trait with the given name exists. Returns 204 No Content
+// (with no response body) if the trait is found, or 404 Not Found otherwise.
+func HandleShowTrait(w http.ResponseWriter, r *http.Request) {
+ name := r.PathValue("name")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
+}
+
+// HandleUpdateTrait handles PUT /traits/{name} requests.
+//
+// Creates a new custom trait. Only traits prefixed with CUSTOM_ may be
+// created; standard traits are read-only. Returns 201 Created if the trait
+// is newly inserted, or 204 No Content if it already exists. Returns 400
+// Bad Request if the name does not carry the CUSTOM_ prefix.
+func HandleUpdateTrait(w http.ResponseWriter, r *http.Request) {
+ name := r.PathValue("name")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
+}
+
+// HandleDeleteTrait handles DELETE /traits/{name} requests.
+//
+// Deletes a custom trait. Standard traits (those without the CUSTOM_ prefix)
+// cannot be deleted and will return 400 Bad Request. Returns 409 Conflict if
+// the trait is still associated with any resource provider. Returns 404 if
+// the trait does not exist. Returns 204 No Content on success.
+func HandleDeleteTrait(w http.ResponseWriter, r *http.Request) {
+ name := r.PathValue("name")
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
+}
diff --git a/internal/shim/placement/handlers/usages.go b/internal/shim/placement/handlers/usages.go
new file mode 100644
index 000000000..2fd5e66a6
--- /dev/null
+++ b/internal/shim/placement/handlers/usages.go
@@ -0,0 +1,27 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "net/http"
+
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+)
+
+// HandleListUsages handles GET /usages requests.
+//
+// Returns a report of aggregated resource usage for a given project, and
+// optionally a specific user within that project. The project_id query
+// parameter is required; user_id is optional.
+//
+// The response format changed at microversion 1.38: earlier versions return
+// a flat dictionary of resource class to usage totals, while 1.38+ groups
+// usages by consumer_type (e.g. INSTANCE, MIGRATION, all, unknown), with
+// each group containing resource totals and a consumer_count. Since
+// microversion 1.38, an optional consumer_type query parameter allows
+// filtering the results. Available since microversion 1.9.
+func HandleListUsages(w http.ResponseWriter, r *http.Request) {
+ log := logf.FromContext(r.Context())
+ log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+}
diff --git a/internal/shim/placement/handlers/zz_index.go b/internal/shim/placement/handlers/zz_index.go
new file mode 100644
index 000000000..fba5d76b1
--- /dev/null
+++ b/internal/shim/placement/handlers/zz_index.go
@@ -0,0 +1,73 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import "net/http"
+
+// RegisterRoutes binds all Placement API handlers to the given mux. The
+// route patterns use the Go 1.22+ ServeMux syntax with explicit HTTP methods
+// and path wildcards. The routes mirror the OpenStack Placement API surface
+// as documented at https://docs.openstack.org/api-ref/placement/.
+func RegisterRoutes(mux *http.ServeMux) {
+ // Root
+ mux.HandleFunc("GET /{$}", HandleGetRoot)
+
+ // Resource providers
+ mux.HandleFunc("GET /resource_providers", HandleListResourceProviders)
+ mux.HandleFunc("POST /resource_providers", HandleCreateResourceProvider)
+ mux.HandleFunc("GET /resource_providers/{uuid}", HandleShowResourceProvider)
+ mux.HandleFunc("PUT /resource_providers/{uuid}", HandleUpdateResourceProvider)
+ mux.HandleFunc("DELETE /resource_providers/{uuid}", HandleDeleteResourceProvider)
+
+ // Resource classes
+ mux.HandleFunc("GET /resource_classes", HandleListResourceClasses)
+ mux.HandleFunc("POST /resource_classes", HandleCreateResourceClass)
+ mux.HandleFunc("GET /resource_classes/{name}", HandleShowResourceClass)
+ mux.HandleFunc("PUT /resource_classes/{name}", HandleUpdateResourceClass)
+ mux.HandleFunc("DELETE /resource_classes/{name}", HandleDeleteResourceClass)
+
+ // Resource provider inventories
+ mux.HandleFunc("GET /resource_providers/{uuid}/inventories", HandleListResourceProviderInventories)
+ mux.HandleFunc("PUT /resource_providers/{uuid}/inventories", HandleUpdateResourceProviderInventories)
+ mux.HandleFunc("DELETE /resource_providers/{uuid}/inventories", HandleDeleteResourceProviderInventories)
+ mux.HandleFunc("GET /resource_providers/{uuid}/inventories/{resource_class}", HandleShowResourceProviderInventory)
+ mux.HandleFunc("PUT /resource_providers/{uuid}/inventories/{resource_class}", HandleUpdateResourceProviderInventory)
+ mux.HandleFunc("DELETE /resource_providers/{uuid}/inventories/{resource_class}", HandleDeleteResourceProviderInventory)
+
+ // Resource provider aggregates
+ mux.HandleFunc("GET /resource_providers/{uuid}/aggregates", HandleListResourceProviderAggregates)
+ mux.HandleFunc("PUT /resource_providers/{uuid}/aggregates", HandleUpdateResourceProviderAggregates)
+
+ // Traits
+ mux.HandleFunc("GET /traits", HandleListTraits)
+ mux.HandleFunc("GET /traits/{name}", HandleShowTrait)
+ mux.HandleFunc("PUT /traits/{name}", HandleUpdateTrait)
+ mux.HandleFunc("DELETE /traits/{name}", HandleDeleteTrait)
+
+ // Resource provider traits
+ mux.HandleFunc("GET /resource_providers/{uuid}/traits", HandleListResourceProviderTraits)
+ mux.HandleFunc("PUT /resource_providers/{uuid}/traits", HandleUpdateResourceProviderTraits)
+ mux.HandleFunc("DELETE /resource_providers/{uuid}/traits", HandleDeleteResourceProviderTraits)
+
+ // Allocations
+ mux.HandleFunc("POST /allocations", HandleManageAllocations)
+ mux.HandleFunc("GET /allocations/{consumer_uuid}", HandleListAllocations)
+ mux.HandleFunc("PUT /allocations/{consumer_uuid}", HandleUpdateAllocations)
+ mux.HandleFunc("DELETE /allocations/{consumer_uuid}", HandleDeleteAllocations)
+
+ // Resource provider allocations
+ mux.HandleFunc("GET /resource_providers/{uuid}/allocations", HandleListResourceProviderAllocations)
+
+ // Usages
+ mux.HandleFunc("GET /usages", HandleListUsages)
+
+ // Resource provider usages
+ mux.HandleFunc("GET /resource_providers/{uuid}/usages", HandleListResourceProviderUsages)
+
+ // Allocation candidates
+ mux.HandleFunc("GET /allocation_candidates", HandleListAllocationCandidates)
+
+ // Reshaper
+ mux.HandleFunc("POST /reshaper", HandlePostReshaper)
+}
From 7ba4f545089e7c6f5087756fb7e568477cf85590 Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Thu, 9 Apr 2026 14:30:17 +0200
Subject: [PATCH 08/17] Add path parameter validation
---
.../shim/placement/handlers/allocations.go | 15 +++++--
.../placement/handlers/resource_classes.go | 15 +++++--
.../handlers/resource_provider_aggregates.go | 10 ++++-
.../handlers/resource_provider_allocations.go | 5 ++-
.../handlers/resource_provider_inventories.go | 45 +++++++++++++++----
.../handlers/resource_provider_traits.go | 15 +++++--
.../handlers/resource_provider_usages.go | 5 ++-
.../placement/handlers/resource_providers.go | 15 +++++--
internal/shim/placement/handlers/traits.go | 15 +++++--
.../shim/placement/handlers/validation.go | 38 ++++++++++++++++
10 files changed, 150 insertions(+), 28 deletions(-)
create mode 100644 internal/shim/placement/handlers/validation.go
diff --git a/internal/shim/placement/handlers/allocations.go b/internal/shim/placement/handlers/allocations.go
index aa4ba99f2..0e125e126 100644
--- a/internal/shim/placement/handlers/allocations.go
+++ b/internal/shim/placement/handlers/allocations.go
@@ -41,7 +41,10 @@ func HandleManageAllocations(w http.ResponseWriter, r *http.Request) {
// The consumer_generation and consumer_type fields are absent when the
// consumer has no allocations.
func HandleListAllocations(w http.ResponseWriter, r *http.Request) {
- consumerUUID := r.PathValue("consumer_uuid")
+ consumerUUID, ok := requiredUUIDPathParam(w, r, "consumer_uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"consumer_uuid", consumerUUID)
@@ -59,7 +62,10 @@ func HandleListAllocations(w http.ResponseWriter, r *http.Request) {
// Returns 204 No Content on success. Returns 409 Conflict if there is
// insufficient inventory or if a concurrent update was detected.
func HandleUpdateAllocations(w http.ResponseWriter, r *http.Request) {
- consumerUUID := r.PathValue("consumer_uuid")
+ consumerUUID, ok := requiredUUIDPathParam(w, r, "consumer_uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"consumer_uuid", consumerUUID)
@@ -71,7 +77,10 @@ func HandleUpdateAllocations(w http.ResponseWriter, r *http.Request) {
// providers. Returns 204 No Content on success, or 404 Not Found if the
// consumer has no existing allocations.
func HandleDeleteAllocations(w http.ResponseWriter, r *http.Request) {
- consumerUUID := r.PathValue("consumer_uuid")
+ consumerUUID, ok := requiredUUIDPathParam(w, r, "consumer_uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"consumer_uuid", consumerUUID)
diff --git a/internal/shim/placement/handlers/resource_classes.go b/internal/shim/placement/handlers/resource_classes.go
index 81548b3b5..f5f8453c0 100644
--- a/internal/shim/placement/handlers/resource_classes.go
+++ b/internal/shim/placement/handlers/resource_classes.go
@@ -39,7 +39,10 @@ func HandleCreateResourceClass(w http.ResponseWriter, r *http.Request) {
// This can be used to verify the existence of a resource class. Returns 404
// if the class does not exist. Available since microversion 1.2.
func HandleShowResourceClass(w http.ResponseWriter, r *http.Request) {
- name := r.PathValue("name")
+ name, ok := requiredPathParam(w, r, "name")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
}
@@ -53,7 +56,10 @@ func HandleShowResourceClass(w http.ResponseWriter, r *http.Request) {
// endpoint allowed renaming a class via a request body, but this usage is
// discouraged. Returns 400 Bad Request if the CUSTOM_ prefix is missing.
func HandleUpdateResourceClass(w http.ResponseWriter, r *http.Request) {
- name := r.PathValue("name")
+ name, ok := requiredPathParam(w, r, "name")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
}
@@ -66,7 +72,10 @@ func HandleUpdateResourceClass(w http.ResponseWriter, r *http.Request) {
// class, and 404 if the class does not exist. Returns 204 No Content on
// success. Available since microversion 1.2.
func HandleDeleteResourceClass(w http.ResponseWriter, r *http.Request) {
- name := r.PathValue("name")
+ name, ok := requiredPathParam(w, r, "name")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
}
diff --git a/internal/shim/placement/handlers/resource_provider_aggregates.go b/internal/shim/placement/handlers/resource_provider_aggregates.go
index 172c40b2f..131969fb1 100644
--- a/internal/shim/placement/handlers/resource_provider_aggregates.go
+++ b/internal/shim/placement/handlers/resource_provider_aggregates.go
@@ -23,7 +23,10 @@ import (
// includes the resource_provider_generation for concurrency tracking. Returns
// 404 if the provider does not exist.
func HandleListResourceProviderAggregates(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -39,7 +42,10 @@ func HandleListResourceProviderAggregates(w http.ResponseWriter, r *http.Request
// concurrency control. Returns 409 Conflict if the generation does not match
// (1.19+). Returns 200 with the updated aggregate list on success.
func HandleUpdateResourceProviderAggregates(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
diff --git a/internal/shim/placement/handlers/resource_provider_allocations.go b/internal/shim/placement/handlers/resource_provider_allocations.go
index 0e27ca7e6..c7b0dfe70 100644
--- a/internal/shim/placement/handlers/resource_provider_allocations.go
+++ b/internal/shim/placement/handlers/resource_provider_allocations.go
@@ -18,7 +18,10 @@ import (
// endpoint. The response includes the resource_provider_generation. Returns
// 404 if the provider does not exist.
func HandleListResourceProviderAllocations(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
diff --git a/internal/shim/placement/handlers/resource_provider_inventories.go b/internal/shim/placement/handlers/resource_provider_inventories.go
index 303370e69..9c23e56d3 100644
--- a/internal/shim/placement/handlers/resource_provider_inventories.go
+++ b/internal/shim/placement/handlers/resource_provider_inventories.go
@@ -19,7 +19,10 @@ import (
// resource_provider_generation, which is needed for subsequent update or
// delete operations. Returns 404 if the provider does not exist.
func HandleListResourceProviderInventories(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -35,7 +38,10 @@ func HandleListResourceProviderInventories(w http.ResponseWriter, r *http.Reques
// are deleted. Returns 409 Conflict if allocations exceed the new capacity
// or if a concurrent update has occurred.
func HandleUpdateResourceProviderInventories(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -50,7 +56,10 @@ func HandleUpdateResourceProviderInventories(w http.ResponseWriter, r *http.Requ
// Returns 404 if the provider does not exist. Available since microversion
// 1.5.
func HandleDeleteResourceProviderInventories(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -63,8 +72,14 @@ func HandleDeleteResourceProviderInventories(w http.ResponseWriter, r *http.Requ
// step_size, allocation_ratio, and the resource_provider_generation. Returns
// 404 if the provider or inventory for that class does not exist.
func HandleShowResourceProviderInventory(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
- resourceClass := r.PathValue("resource_class")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
+ resourceClass, ok := requiredPathParam(w, r, "resource_class")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"uuid", uuid, "resource_class", resourceClass)
@@ -80,8 +95,14 @@ func HandleShowResourceProviderInventory(w http.ResponseWriter, r *http.Request)
// Since microversion 1.26, the reserved value must not exceed total. Returns
// 409 Conflict on generation mismatch or if allocations would be violated.
func HandleUpdateResourceProviderInventory(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
- resourceClass := r.PathValue("resource_class")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
+ resourceClass, ok := requiredPathParam(w, r, "resource_class")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"uuid", uuid, "resource_class", resourceClass)
@@ -95,8 +116,14 @@ func HandleUpdateResourceProviderInventory(w http.ResponseWriter, r *http.Reques
// class combination, or if a concurrent update has occurred. Returns 404 if
// the provider or inventory does not exist. Returns 204 No Content on success.
func HandleDeleteResourceProviderInventory(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
- resourceClass := r.PathValue("resource_class")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
+ resourceClass, ok := requiredPathParam(w, r, "resource_class")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"uuid", uuid, "resource_class", resourceClass)
diff --git a/internal/shim/placement/handlers/resource_provider_traits.go b/internal/shim/placement/handlers/resource_provider_traits.go
index 915ccda05..a21b35ddc 100644
--- a/internal/shim/placement/handlers/resource_provider_traits.go
+++ b/internal/shim/placement/handlers/resource_provider_traits.go
@@ -17,7 +17,10 @@ import (
// resource_provider_generation for concurrency tracking. Returns 404 if the
// provider does not exist.
func HandleListResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -33,7 +36,10 @@ func HandleListResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
// not returned by GET /traits). Returns 409 Conflict if the generation does
// not match.
func HandleUpdateResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -48,7 +54,10 @@ func HandleUpdateResourceProviderTraits(w http.ResponseWriter, r *http.Request)
// Returns 404 if the provider does not exist. Returns 409 Conflict on
// concurrent modification. Returns 204 No Content on success.
func HandleDeleteResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
diff --git a/internal/shim/placement/handlers/resource_provider_usages.go b/internal/shim/placement/handlers/resource_provider_usages.go
index 319da4a70..418862b4a 100644
--- a/internal/shim/placement/handlers/resource_provider_usages.go
+++ b/internal/shim/placement/handlers/resource_provider_usages.go
@@ -18,7 +18,10 @@ import (
// Unlike the provider allocations endpoint, this does not break down usage by
// individual consumer. Returns 404 if the provider does not exist.
func HandleListResourceProviderUsages(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
diff --git a/internal/shim/placement/handlers/resource_providers.go b/internal/shim/placement/handlers/resource_providers.go
index 6f4f975fe..3ecc6428e 100644
--- a/internal/shim/placement/handlers/resource_providers.go
+++ b/internal/shim/placement/handlers/resource_providers.go
@@ -53,7 +53,10 @@ func HandleCreateResourceProvider(w http.ResponseWriter, r *http.Request) {
// provider's position in a hierarchical tree. Returns 404 if the provider
// does not exist.
func HandleShowResourceProvider(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -66,7 +69,10 @@ func HandleShowResourceProvider(w http.ResponseWriter, r *http.Request) {
// to null to make the provider a root. Returns 409 Conflict if another
// provider already has the requested name.
func HandleUpdateResourceProvider(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -78,7 +84,10 @@ func HandleUpdateResourceProvider(w http.ResponseWriter, r *http.Request) {
// allocations against the provider's inventories or if the provider has
// child providers in a tree hierarchy. Returns 204 No Content on success.
func HandleDeleteResourceProvider(w http.ResponseWriter, r *http.Request) {
- uuid := r.PathValue("uuid")
+ uuid, ok := requiredUUIDPathParam(w, r, "uuid")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
diff --git a/internal/shim/placement/handlers/traits.go b/internal/shim/placement/handlers/traits.go
index 381b109b2..92ea4b017 100644
--- a/internal/shim/placement/handlers/traits.go
+++ b/internal/shim/placement/handlers/traits.go
@@ -30,7 +30,10 @@ func HandleListTraits(w http.ResponseWriter, r *http.Request) {
// Checks whether a trait with the given name exists. Returns 204 No Content
// (with no response body) if the trait is found, or 404 Not Found otherwise.
func HandleShowTrait(w http.ResponseWriter, r *http.Request) {
- name := r.PathValue("name")
+ name, ok := requiredPathParam(w, r, "name")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
}
@@ -42,7 +45,10 @@ func HandleShowTrait(w http.ResponseWriter, r *http.Request) {
// is newly inserted, or 204 No Content if it already exists. Returns 400
// Bad Request if the name does not carry the CUSTOM_ prefix.
func HandleUpdateTrait(w http.ResponseWriter, r *http.Request) {
- name := r.PathValue("name")
+ name, ok := requiredPathParam(w, r, "name")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
}
@@ -54,7 +60,10 @@ func HandleUpdateTrait(w http.ResponseWriter, r *http.Request) {
// the trait is still associated with any resource provider. Returns 404 if
// the trait does not exist. Returns 204 No Content on success.
func HandleDeleteTrait(w http.ResponseWriter, r *http.Request) {
- name := r.PathValue("name")
+ name, ok := requiredPathParam(w, r, "name")
+ if !ok {
+ return
+ }
log := logf.FromContext(r.Context())
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
}
diff --git a/internal/shim/placement/handlers/validation.go b/internal/shim/placement/handlers/validation.go
new file mode 100644
index 000000000..5f8bcf2b9
--- /dev/null
+++ b/internal/shim/placement/handlers/validation.go
@@ -0,0 +1,38 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package handlers
+
+import (
+ "fmt"
+ "net/http"
+
+ "github.com/google/uuid"
+)
+
+// requiredPathParam extracts a path parameter by name and verifies that it is
+// non-empty. If the value is missing, it writes a 400 response and returns
+// an empty string.
+func requiredPathParam(w http.ResponseWriter, r *http.Request, name string) (string, bool) {
+ v := r.PathValue(name)
+ if v == "" {
+ http.Error(w, fmt.Sprintf("missing path parameter: %s", name), http.StatusBadRequest)
+ return "", false
+ }
+ return v, true
+}
+
+// requiredUUIDPathParam extracts a path parameter by name and verifies that it
+// is a valid UUID. If the value is missing or not a valid UUID, it writes a
+// 400 response and returns an empty string.
+func requiredUUIDPathParam(w http.ResponseWriter, r *http.Request, name string) (string, bool) {
+ v, ok := requiredPathParam(w, r, name)
+ if !ok {
+ return "", false
+ }
+ if err := uuid.Validate(v); err != nil {
+ http.Error(w, fmt.Sprintf("invalid UUID in path parameter %s: %s", name, v), http.StatusBadRequest)
+ return "", false
+ }
+ return v, true
+}
From 41c28b775074aef29f71c781e05d88709edb5ad8 Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Thu, 9 Apr 2026 17:01:42 +0200
Subject: [PATCH 09/17] Add hypervisor informer cache
---
cmd/shim/main.go | 9 +-
internal/shim/placement/.gitkeep | 0
...tes.go => handle_allocation_candidates.go} | 7 +-
.../allocations.go => handle_allocations.go} | 22 ++--
.../reshaper.go => handle_reshaper.go} | 7 +-
..._classes.go => handle_resource_classes.go} | 27 ++--
...=> handle_resource_provider_aggregates.go} | 12 +-
...> handle_resource_provider_allocations.go} | 7 +-
...> handle_resource_provider_inventories.go} | 32 +++--
....go => handle_resource_provider_traits.go} | 17 +--
....go => handle_resource_provider_usages.go} | 7 +-
...viders.go => handle_resource_providers.go} | 27 ++--
.../{handlers/root.go => handle_root.go} | 7 +-
.../{handlers/traits.go => handle_traits.go} | 22 ++--
.../{handlers/usages.go => handle_usages.go} | 7 +-
internal/shim/placement/handlers/zz_index.go | 73 -----------
internal/shim/placement/shim.go | 118 ++++++++++++++++++
.../placement/{handlers => }/validation.go | 2 +-
18 files changed, 244 insertions(+), 159 deletions(-)
delete mode 100644 internal/shim/placement/.gitkeep
rename internal/shim/placement/{handlers/allocation_candidates.go => handle_allocation_candidates.go} (91%)
rename internal/shim/placement/{handlers/allocations.go => handle_allocations.go} (85%)
rename internal/shim/placement/{handlers/reshaper.go => handle_reshaper.go} (88%)
rename internal/shim/placement/{handlers/resource_classes.go => handle_resource_classes.go} (80%)
rename internal/shim/placement/{handlers/resource_provider_aggregates.go => handle_resource_provider_aggregates.go} (85%)
rename internal/shim/placement/{handlers/resource_provider_allocations.go => handle_resource_provider_allocations.go} (82%)
rename internal/shim/placement/{handlers/resource_provider_inventories.go => handle_resource_provider_inventories.go} (83%)
rename internal/shim/placement/{handlers/resource_provider_traits.go => handle_resource_provider_traits.go} (82%)
rename internal/shim/placement/{handlers/resource_provider_usages.go => handle_resource_provider_usages.go} (83%)
rename internal/shim/placement/{handlers/resource_providers.go => handle_resource_providers.go} (83%)
rename internal/shim/placement/{handlers/root.go => handle_root.go} (83%)
rename internal/shim/placement/{handlers/traits.go => handle_traits.go} (80%)
rename internal/shim/placement/{handlers/usages.go => handle_usages.go} (86%)
delete mode 100644 internal/shim/placement/handlers/zz_index.go
create mode 100644 internal/shim/placement/shim.go
rename internal/shim/placement/{handlers => }/validation.go (98%)
diff --git a/cmd/shim/main.go b/cmd/shim/main.go
index 41be2c084..ce81d6b44 100644
--- a/cmd/shim/main.go
+++ b/cmd/shim/main.go
@@ -12,7 +12,7 @@ import (
"path/filepath"
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
- placementhandlers "github.com/cobaltcore-dev/cortex/internal/shim/placement/handlers"
+ "github.com/cobaltcore-dev/cortex/internal/shim/placement"
"github.com/cobaltcore-dev/cortex/pkg/conf"
"github.com/cobaltcore-dev/cortex/pkg/monitoring"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
@@ -207,7 +207,12 @@ func main() {
// API endpoint.
mux := http.NewServeMux()
if enablePlacementShim {
- placementhandlers.RegisterRoutes(mux)
+ placementShim := &placement.Shim{Client: mgr.GetClient()}
+ if err := placementShim.SetupWithManager(mgr); err != nil {
+ setupLog.Error(err, "unable to set up placement shim")
+ os.Exit(1)
+ }
+ placementShim.RegisterRoutes(mux)
}
// +kubebuilder:scaffold:builder
diff --git a/internal/shim/placement/.gitkeep b/internal/shim/placement/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/internal/shim/placement/handlers/allocation_candidates.go b/internal/shim/placement/handle_allocation_candidates.go
similarity index 91%
rename from internal/shim/placement/handlers/allocation_candidates.go
rename to internal/shim/placement/handle_allocation_candidates.go
index 8d864a9a4..b252d0026 100644
--- a/internal/shim/placement/handlers/allocation_candidates.go
+++ b/internal/shim/placement/handle_allocation_candidates.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -34,7 +34,8 @@ import (
// PUT /allocations/{consumer_uuid}. The provider_summaries section includes
// inventory capacity and usage for informed decision-making. Available since
// microversion 1.10.
-func HandleListAllocationCandidates(w http.ResponseWriter, r *http.Request) {
- log := logf.FromContext(r.Context())
+func (s *Shim) HandleListAllocationCandidates(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
}
diff --git a/internal/shim/placement/handlers/allocations.go b/internal/shim/placement/handle_allocations.go
similarity index 85%
rename from internal/shim/placement/handlers/allocations.go
rename to internal/shim/placement/handle_allocations.go
index 0e125e126..f406ea5da 100644
--- a/internal/shim/placement/handlers/allocations.go
+++ b/internal/shim/placement/handle_allocations.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -24,8 +24,9 @@ import (
// (e.g. INSTANCE, MIGRATION) is supported. Returns 204 No Content on
// success, or 409 Conflict if inventory is insufficient or a concurrent
// update is detected (error code: placement.concurrent_update).
-func HandleManageAllocations(w http.ResponseWriter, r *http.Request) {
- log := logf.FromContext(r.Context())
+func (s *Shim) HandleManageAllocations(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
}
@@ -40,12 +41,13 @@ func HandleManageAllocations(w http.ResponseWriter, r *http.Request) {
// added at 1.12, consumer_generation at 1.28, and consumer_type at 1.38.
// The consumer_generation and consumer_type fields are absent when the
// consumer has no allocations.
-func HandleListAllocations(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleListAllocations(w http.ResponseWriter, r *http.Request) {
consumerUUID, ok := requiredUUIDPathParam(w, r, "consumer_uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"consumer_uuid", consumerUUID)
}
@@ -61,12 +63,13 @@ func HandleListAllocations(w http.ResponseWriter, r *http.Request) {
//
// Returns 204 No Content on success. Returns 409 Conflict if there is
// insufficient inventory or if a concurrent update was detected.
-func HandleUpdateAllocations(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleUpdateAllocations(w http.ResponseWriter, r *http.Request) {
consumerUUID, ok := requiredUUIDPathParam(w, r, "consumer_uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"consumer_uuid", consumerUUID)
}
@@ -76,12 +79,13 @@ func HandleUpdateAllocations(w http.ResponseWriter, r *http.Request) {
// Removes all allocation records for the consumer across all resource
// providers. Returns 204 No Content on success, or 404 Not Found if the
// consumer has no existing allocations.
-func HandleDeleteAllocations(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleDeleteAllocations(w http.ResponseWriter, r *http.Request) {
consumerUUID, ok := requiredUUIDPathParam(w, r, "consumer_uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"consumer_uuid", consumerUUID)
}
diff --git a/internal/shim/placement/handlers/reshaper.go b/internal/shim/placement/handle_reshaper.go
similarity index 88%
rename from internal/shim/placement/handlers/reshaper.go
rename to internal/shim/placement/handle_reshaper.go
index 59556c94a..fe0d85069 100644
--- a/internal/shim/placement/handlers/reshaper.go
+++ b/internal/shim/placement/handle_reshaper.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -24,7 +24,8 @@ import (
// Returns 204 No Content on success. Returns 409 Conflict if any referenced
// resource provider does not exist or if inventory/allocation constraints
// would be violated. Available since microversion 1.30.
-func HandlePostReshaper(w http.ResponseWriter, r *http.Request) {
- log := logf.FromContext(r.Context())
+func (s *Shim) HandlePostReshaper(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
}
diff --git a/internal/shim/placement/handlers/resource_classes.go b/internal/shim/placement/handle_resource_classes.go
similarity index 80%
rename from internal/shim/placement/handlers/resource_classes.go
rename to internal/shim/placement/handle_resource_classes.go
index f5f8453c0..554c43034 100644
--- a/internal/shim/placement/handlers/resource_classes.go
+++ b/internal/shim/placement/handle_resource_classes.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -16,8 +16,9 @@ import (
// deployer-defined custom classes prefixed with CUSTOM_. Resource classes
// categorize the types of resources that resource providers can offer as
// inventory. Available since microversion 1.2.
-func HandleListResourceClasses(w http.ResponseWriter, r *http.Request) {
- log := logf.FromContext(r.Context())
+func (s *Shim) HandleListResourceClasses(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
}
@@ -28,8 +29,9 @@ func HandleListResourceClasses(w http.ResponseWriter, r *http.Request) {
// a Location header on success. Returns 400 Bad Request if the CUSTOM_ prefix
// is missing, and 409 Conflict if a class with the same name already exists.
// Available since microversion 1.2.
-func HandleCreateResourceClass(w http.ResponseWriter, r *http.Request) {
- log := logf.FromContext(r.Context())
+func (s *Shim) HandleCreateResourceClass(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
}
@@ -38,12 +40,13 @@ func HandleCreateResourceClass(w http.ResponseWriter, r *http.Request) {
// Returns a representation of a single resource class identified by name.
// This can be used to verify the existence of a resource class. Returns 404
// if the class does not exist. Available since microversion 1.2.
-func HandleShowResourceClass(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleShowResourceClass(w http.ResponseWriter, r *http.Request) {
name, ok := requiredPathParam(w, r, "name")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
}
@@ -55,12 +58,13 @@ func HandleShowResourceClass(w http.ResponseWriter, r *http.Request) {
// The name must carry the CUSTOM_ prefix. In earlier versions (1.2-1.6), the
// endpoint allowed renaming a class via a request body, but this usage is
// discouraged. Returns 400 Bad Request if the CUSTOM_ prefix is missing.
-func HandleUpdateResourceClass(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleUpdateResourceClass(w http.ResponseWriter, r *http.Request) {
name, ok := requiredPathParam(w, r, "name")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
}
@@ -71,11 +75,12 @@ func HandleUpdateResourceClass(w http.ResponseWriter, r *http.Request) {
// Request. Returns 409 Conflict if any resource provider has inventory of this
// class, and 404 if the class does not exist. Returns 204 No Content on
// success. Available since microversion 1.2.
-func HandleDeleteResourceClass(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleDeleteResourceClass(w http.ResponseWriter, r *http.Request) {
name, ok := requiredPathParam(w, r, "name")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
}
diff --git a/internal/shim/placement/handlers/resource_provider_aggregates.go b/internal/shim/placement/handle_resource_provider_aggregates.go
similarity index 85%
rename from internal/shim/placement/handlers/resource_provider_aggregates.go
rename to internal/shim/placement/handle_resource_provider_aggregates.go
index 131969fb1..ce8febe50 100644
--- a/internal/shim/placement/handlers/resource_provider_aggregates.go
+++ b/internal/shim/placement/handle_resource_provider_aggregates.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -22,12 +22,13 @@ import (
// only a flat array of UUIDs, while 1.19+ returns an object that also
// includes the resource_provider_generation for concurrency tracking. Returns
// 404 if the provider does not exist.
-func HandleListResourceProviderAggregates(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleListResourceProviderAggregates(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -41,11 +42,12 @@ func HandleListResourceProviderAggregates(w http.ResponseWriter, r *http.Request
// aggregates array and a resource_provider_generation for optimistic
// concurrency control. Returns 409 Conflict if the generation does not match
// (1.19+). Returns 200 with the updated aggregate list on success.
-func HandleUpdateResourceProviderAggregates(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleUpdateResourceProviderAggregates(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
diff --git a/internal/shim/placement/handlers/resource_provider_allocations.go b/internal/shim/placement/handle_resource_provider_allocations.go
similarity index 82%
rename from internal/shim/placement/handlers/resource_provider_allocations.go
rename to internal/shim/placement/handle_resource_provider_allocations.go
index c7b0dfe70..b3f6dcd68 100644
--- a/internal/shim/placement/handlers/resource_provider_allocations.go
+++ b/internal/shim/placement/handle_resource_provider_allocations.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -17,11 +17,12 @@ import (
// consumption, complementing the consumer-centric GET /allocations/{consumer}
// endpoint. The response includes the resource_provider_generation. Returns
// 404 if the provider does not exist.
-func HandleListResourceProviderAllocations(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleListResourceProviderAllocations(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
diff --git a/internal/shim/placement/handlers/resource_provider_inventories.go b/internal/shim/placement/handle_resource_provider_inventories.go
similarity index 83%
rename from internal/shim/placement/handlers/resource_provider_inventories.go
rename to internal/shim/placement/handle_resource_provider_inventories.go
index 9c23e56d3..c79f924b6 100644
--- a/internal/shim/placement/handlers/resource_provider_inventories.go
+++ b/internal/shim/placement/handle_resource_provider_inventories.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -18,12 +18,13 @@ import (
// min_unit, max_unit, step_size, and allocation_ratio. Also returns the
// resource_provider_generation, which is needed for subsequent update or
// delete operations. Returns 404 if the provider does not exist.
-func HandleListResourceProviderInventories(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleListResourceProviderInventories(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -37,12 +38,13 @@ func HandleListResourceProviderInventories(w http.ResponseWriter, r *http.Reques
// class, each specifying at minimum a total value. Omitted inventory classes
// are deleted. Returns 409 Conflict if allocations exceed the new capacity
// or if a concurrent update has occurred.
-func HandleUpdateResourceProviderInventories(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleUpdateResourceProviderInventories(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -55,12 +57,13 @@ func HandleUpdateResourceProviderInventories(w http.ResponseWriter, r *http.Requ
// Conflict if allocations exist against any of the provider's inventories.
// Returns 404 if the provider does not exist. Available since microversion
// 1.5.
-func HandleDeleteResourceProviderInventories(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleDeleteResourceProviderInventories(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -71,7 +74,7 @@ func HandleDeleteResourceProviderInventories(w http.ResponseWriter, r *http.Requ
// provider. The response includes total, reserved, min_unit, max_unit,
// step_size, allocation_ratio, and the resource_provider_generation. Returns
// 404 if the provider or inventory for that class does not exist.
-func HandleShowResourceProviderInventory(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleShowResourceProviderInventory(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
@@ -80,7 +83,8 @@ func HandleShowResourceProviderInventory(w http.ResponseWriter, r *http.Request)
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"uuid", uuid, "resource_class", resourceClass)
}
@@ -94,7 +98,7 @@ func HandleShowResourceProviderInventory(w http.ResponseWriter, r *http.Request)
// constraints (allocation_ratio, min_unit, max_unit, step_size, reserved).
// Since microversion 1.26, the reserved value must not exceed total. Returns
// 409 Conflict on generation mismatch or if allocations would be violated.
-func HandleUpdateResourceProviderInventory(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleUpdateResourceProviderInventory(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
@@ -103,7 +107,8 @@ func HandleUpdateResourceProviderInventory(w http.ResponseWriter, r *http.Reques
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"uuid", uuid, "resource_class", resourceClass)
}
@@ -115,7 +120,7 @@ func HandleUpdateResourceProviderInventory(w http.ResponseWriter, r *http.Reques
// Returns 409 Conflict if allocations exist against this provider and resource
// class combination, or if a concurrent update has occurred. Returns 404 if
// the provider or inventory does not exist. Returns 204 No Content on success.
-func HandleDeleteResourceProviderInventory(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleDeleteResourceProviderInventory(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
@@ -124,7 +129,8 @@ func HandleDeleteResourceProviderInventory(w http.ResponseWriter, r *http.Reques
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"uuid", uuid, "resource_class", resourceClass)
}
diff --git a/internal/shim/placement/handlers/resource_provider_traits.go b/internal/shim/placement/handle_resource_provider_traits.go
similarity index 82%
rename from internal/shim/placement/handlers/resource_provider_traits.go
rename to internal/shim/placement/handle_resource_provider_traits.go
index a21b35ddc..5c18bf85b 100644
--- a/internal/shim/placement/handlers/resource_provider_traits.go
+++ b/internal/shim/placement/handle_resource_provider_traits.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -16,12 +16,13 @@ import (
// by {uuid}. The response includes an array of trait name strings and the
// resource_provider_generation for concurrency tracking. Returns 404 if the
// provider does not exist.
-func HandleListResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleListResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -35,12 +36,13 @@ func HandleListResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
// Returns 400 Bad Request if any of the specified traits are invalid (i.e.
// not returned by GET /traits). Returns 409 Conflict if the generation does
// not match.
-func HandleUpdateResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleUpdateResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -53,11 +55,12 @@ func HandleUpdateResourceProviderTraits(w http.ResponseWriter, r *http.Request)
// for the same provider, prefer PUT with an empty traits list instead.
// Returns 404 if the provider does not exist. Returns 409 Conflict on
// concurrent modification. Returns 204 No Content on success.
-func HandleDeleteResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleDeleteResourceProviderTraits(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
diff --git a/internal/shim/placement/handlers/resource_provider_usages.go b/internal/shim/placement/handle_resource_provider_usages.go
similarity index 83%
rename from internal/shim/placement/handlers/resource_provider_usages.go
rename to internal/shim/placement/handle_resource_provider_usages.go
index 418862b4a..78c7cc450 100644
--- a/internal/shim/placement/handlers/resource_provider_usages.go
+++ b/internal/shim/placement/handle_resource_provider_usages.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -17,11 +17,12 @@ import (
// with integer usage amounts, along with the resource_provider_generation.
// Unlike the provider allocations endpoint, this does not break down usage by
// individual consumer. Returns 404 if the provider does not exist.
-func HandleListResourceProviderUsages(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleListResourceProviderUsages(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
diff --git a/internal/shim/placement/handlers/resource_providers.go b/internal/shim/placement/handle_resource_providers.go
similarity index 83%
rename from internal/shim/placement/handlers/resource_providers.go
rename to internal/shim/placement/handle_resource_providers.go
index 3ecc6428e..de6213e43 100644
--- a/internal/shim/placement/handlers/resource_providers.go
+++ b/internal/shim/placement/handle_resource_providers.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -22,8 +22,9 @@ import (
// microversions: resources filtering at 1.3, tree queries at 1.14, trait
// requirements at 1.18, forbidden traits at 1.22, forbidden aggregates at
// 1.32, and the in: syntax for required at 1.39.
-func HandleListResourceProviders(w http.ResponseWriter, r *http.Request) {
- log := logf.FromContext(r.Context())
+func (s *Shim) HandleListResourceProviders(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
}
@@ -39,8 +40,9 @@ func HandleListResourceProviders(w http.ResponseWriter, r *http.Request) {
// an HTTP 201 with a Location header, while 1.20+ returns the full resource
// provider object in the body. Returns 409 Conflict if a provider with the
// same name or UUID already exists.
-func HandleCreateResourceProvider(w http.ResponseWriter, r *http.Request) {
- log := logf.FromContext(r.Context())
+func (s *Shim) HandleCreateResourceProvider(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
}
@@ -52,12 +54,13 @@ func HandleCreateResourceProvider(w http.ResponseWriter, r *http.Request) {
// also includes parent_provider_uuid and root_provider_uuid to describe the
// provider's position in a hierarchical tree. Returns 404 if the provider
// does not exist.
-func HandleShowResourceProvider(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleShowResourceProvider(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -68,12 +71,13 @@ func HandleShowResourceProvider(w http.ResponseWriter, r *http.Request) {
// any existing provider UUID that would not create a loop in the tree, or set
// to null to make the provider a root. Returns 409 Conflict if another
// provider already has the requested name.
-func HandleUpdateResourceProvider(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleUpdateResourceProvider(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
@@ -83,11 +87,12 @@ func HandleUpdateResourceProvider(w http.ResponseWriter, r *http.Request) {
// inventories. The operation fails with 409 Conflict if there are any
// allocations against the provider's inventories or if the provider has
// child providers in a tree hierarchy. Returns 204 No Content on success.
-func HandleDeleteResourceProvider(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleDeleteResourceProvider(w http.ResponseWriter, r *http.Request) {
uuid, ok := requiredUUIDPathParam(w, r, "uuid")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
}
diff --git a/internal/shim/placement/handlers/root.go b/internal/shim/placement/handle_root.go
similarity index 83%
rename from internal/shim/placement/handlers/root.go
rename to internal/shim/placement/handle_root.go
index bfef909b4..d734d4528 100644
--- a/internal/shim/placement/handlers/root.go
+++ b/internal/shim/placement/handle_root.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -17,7 +17,8 @@ import (
// its status (e.g. CURRENT), links for discovery, and the microversion range
// supported by the running service. Clients use this endpoint to discover API
// capabilities and negotiate microversions before making further requests.
-func HandleGetRoot(w http.ResponseWriter, r *http.Request) {
- log := logf.FromContext(r.Context())
+func (s *Shim) HandleGetRoot(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
}
diff --git a/internal/shim/placement/handlers/traits.go b/internal/shim/placement/handle_traits.go
similarity index 80%
rename from internal/shim/placement/handlers/traits.go
rename to internal/shim/placement/handle_traits.go
index 92ea4b017..0d6ba8e32 100644
--- a/internal/shim/placement/handlers/traits.go
+++ b/internal/shim/placement/handle_traits.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -20,8 +20,9 @@ import (
// (startswith:CUSTOM) or by an explicit list (in:TRAIT1,TRAIT2), and
// associated filters to only traits that are or are not associated with at
// least one resource provider.
-func HandleListTraits(w http.ResponseWriter, r *http.Request) {
- log := logf.FromContext(r.Context())
+func (s *Shim) HandleListTraits(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
}
@@ -29,12 +30,13 @@ func HandleListTraits(w http.ResponseWriter, r *http.Request) {
//
// Checks whether a trait with the given name exists. Returns 204 No Content
// (with no response body) if the trait is found, or 404 Not Found otherwise.
-func HandleShowTrait(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleShowTrait(w http.ResponseWriter, r *http.Request) {
name, ok := requiredPathParam(w, r, "name")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
}
@@ -44,12 +46,13 @@ func HandleShowTrait(w http.ResponseWriter, r *http.Request) {
// created; standard traits are read-only. Returns 201 Created if the trait
// is newly inserted, or 204 No Content if it already exists. Returns 400
// Bad Request if the name does not carry the CUSTOM_ prefix.
-func HandleUpdateTrait(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleUpdateTrait(w http.ResponseWriter, r *http.Request) {
name, ok := requiredPathParam(w, r, "name")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
}
@@ -59,11 +62,12 @@ func HandleUpdateTrait(w http.ResponseWriter, r *http.Request) {
// cannot be deleted and will return 400 Bad Request. Returns 409 Conflict if
// the trait is still associated with any resource provider. Returns 404 if
// the trait does not exist. Returns 204 No Content on success.
-func HandleDeleteTrait(w http.ResponseWriter, r *http.Request) {
+func (s *Shim) HandleDeleteTrait(w http.ResponseWriter, r *http.Request) {
name, ok := requiredPathParam(w, r, "name")
if !ok {
return
}
- log := logf.FromContext(r.Context())
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
}
diff --git a/internal/shim/placement/handlers/usages.go b/internal/shim/placement/handle_usages.go
similarity index 86%
rename from internal/shim/placement/handlers/usages.go
rename to internal/shim/placement/handle_usages.go
index 2fd5e66a6..32e276c8d 100644
--- a/internal/shim/placement/handlers/usages.go
+++ b/internal/shim/placement/handle_usages.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"net/http"
@@ -21,7 +21,8 @@ import (
// each group containing resource totals and a consumer_count. Since
// microversion 1.38, an optional consumer_type query parameter allows
// filtering the results. Available since microversion 1.9.
-func HandleListUsages(w http.ResponseWriter, r *http.Request) {
- log := logf.FromContext(r.Context())
+func (s *Shim) HandleListUsages(w http.ResponseWriter, r *http.Request) {
+ ctx := r.Context()
+ log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
}
diff --git a/internal/shim/placement/handlers/zz_index.go b/internal/shim/placement/handlers/zz_index.go
deleted file mode 100644
index fba5d76b1..000000000
--- a/internal/shim/placement/handlers/zz_index.go
+++ /dev/null
@@ -1,73 +0,0 @@
-// Copyright SAP SE
-// SPDX-License-Identifier: Apache-2.0
-
-package handlers
-
-import "net/http"
-
-// RegisterRoutes binds all Placement API handlers to the given mux. The
-// route patterns use the Go 1.22+ ServeMux syntax with explicit HTTP methods
-// and path wildcards. The routes mirror the OpenStack Placement API surface
-// as documented at https://docs.openstack.org/api-ref/placement/.
-func RegisterRoutes(mux *http.ServeMux) {
- // Root
- mux.HandleFunc("GET /{$}", HandleGetRoot)
-
- // Resource providers
- mux.HandleFunc("GET /resource_providers", HandleListResourceProviders)
- mux.HandleFunc("POST /resource_providers", HandleCreateResourceProvider)
- mux.HandleFunc("GET /resource_providers/{uuid}", HandleShowResourceProvider)
- mux.HandleFunc("PUT /resource_providers/{uuid}", HandleUpdateResourceProvider)
- mux.HandleFunc("DELETE /resource_providers/{uuid}", HandleDeleteResourceProvider)
-
- // Resource classes
- mux.HandleFunc("GET /resource_classes", HandleListResourceClasses)
- mux.HandleFunc("POST /resource_classes", HandleCreateResourceClass)
- mux.HandleFunc("GET /resource_classes/{name}", HandleShowResourceClass)
- mux.HandleFunc("PUT /resource_classes/{name}", HandleUpdateResourceClass)
- mux.HandleFunc("DELETE /resource_classes/{name}", HandleDeleteResourceClass)
-
- // Resource provider inventories
- mux.HandleFunc("GET /resource_providers/{uuid}/inventories", HandleListResourceProviderInventories)
- mux.HandleFunc("PUT /resource_providers/{uuid}/inventories", HandleUpdateResourceProviderInventories)
- mux.HandleFunc("DELETE /resource_providers/{uuid}/inventories", HandleDeleteResourceProviderInventories)
- mux.HandleFunc("GET /resource_providers/{uuid}/inventories/{resource_class}", HandleShowResourceProviderInventory)
- mux.HandleFunc("PUT /resource_providers/{uuid}/inventories/{resource_class}", HandleUpdateResourceProviderInventory)
- mux.HandleFunc("DELETE /resource_providers/{uuid}/inventories/{resource_class}", HandleDeleteResourceProviderInventory)
-
- // Resource provider aggregates
- mux.HandleFunc("GET /resource_providers/{uuid}/aggregates", HandleListResourceProviderAggregates)
- mux.HandleFunc("PUT /resource_providers/{uuid}/aggregates", HandleUpdateResourceProviderAggregates)
-
- // Traits
- mux.HandleFunc("GET /traits", HandleListTraits)
- mux.HandleFunc("GET /traits/{name}", HandleShowTrait)
- mux.HandleFunc("PUT /traits/{name}", HandleUpdateTrait)
- mux.HandleFunc("DELETE /traits/{name}", HandleDeleteTrait)
-
- // Resource provider traits
- mux.HandleFunc("GET /resource_providers/{uuid}/traits", HandleListResourceProviderTraits)
- mux.HandleFunc("PUT /resource_providers/{uuid}/traits", HandleUpdateResourceProviderTraits)
- mux.HandleFunc("DELETE /resource_providers/{uuid}/traits", HandleDeleteResourceProviderTraits)
-
- // Allocations
- mux.HandleFunc("POST /allocations", HandleManageAllocations)
- mux.HandleFunc("GET /allocations/{consumer_uuid}", HandleListAllocations)
- mux.HandleFunc("PUT /allocations/{consumer_uuid}", HandleUpdateAllocations)
- mux.HandleFunc("DELETE /allocations/{consumer_uuid}", HandleDeleteAllocations)
-
- // Resource provider allocations
- mux.HandleFunc("GET /resource_providers/{uuid}/allocations", HandleListResourceProviderAllocations)
-
- // Usages
- mux.HandleFunc("GET /usages", HandleListUsages)
-
- // Resource provider usages
- mux.HandleFunc("GET /resource_providers/{uuid}/usages", HandleListResourceProviderUsages)
-
- // Allocation candidates
- mux.HandleFunc("GET /allocation_candidates", HandleListAllocationCandidates)
-
- // Reshaper
- mux.HandleFunc("POST /reshaper", HandlePostReshaper)
-}
diff --git a/internal/shim/placement/shim.go b/internal/shim/placement/shim.go
new file mode 100644
index 000000000..e4acf1642
--- /dev/null
+++ b/internal/shim/placement/shim.go
@@ -0,0 +1,118 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "context"
+ "net/http"
+
+ hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+// IndexHypervisorByID is the field index key for looking up Hypervisor
+// objects by their OpenStack hypervisor ID (status.hypervisorId).
+const IndexHypervisorByID = ".status.hypervisorId"
+
+// Shim is the placement API shim. It holds a controller-runtime client for
+// making Kubernetes API calls and exposes HTTP handlers that mirror the
+// OpenStack Placement API surface.
+type Shim struct {
+ client.Client
+}
+
+// SetupWithManager registers field indexes on the manager's cache so that
+// subsequent list calls are served from the informer cache rather than
+// hitting the API server. This must be called before the manager is started.
+//
+// Calling IndexField internally invokes GetInformer, which creates and
+// registers a shared informer for the indexed type (hv1.Hypervisor) with the
+// cache. The informer is started later when mgr.Start() is called. This
+// means no separate controller or empty Reconcile loop is needed — the
+// index registration alone is sufficient to warm the cache.
+func (s *Shim) SetupWithManager(mgr ctrl.Manager) error {
+ return mgr.GetFieldIndexer().IndexField(
+ context.Background(),
+ &hv1.Hypervisor{},
+ IndexHypervisorByID,
+ func(obj client.Object) []string {
+ h, ok := obj.(*hv1.Hypervisor)
+ if !ok {
+ return nil
+ }
+ if h.Status.HypervisorID == "" {
+ return nil
+ }
+ return []string{h.Status.HypervisorID}
+ },
+ )
+}
+
+// RegisterRoutes binds all Placement API handlers to the given mux. The
+// route patterns use the Go 1.22+ ServeMux syntax with explicit HTTP methods
+// and path wildcards. The routes mirror the OpenStack Placement API surface
+// as documented at https://docs.openstack.org/api-ref/placement/.
+func (s *Shim) RegisterRoutes(mux *http.ServeMux) {
+ // Root
+ mux.HandleFunc("GET /{$}", s.HandleGetRoot)
+
+ // Resource providers
+ mux.HandleFunc("GET /resource_providers", s.HandleListResourceProviders)
+ mux.HandleFunc("POST /resource_providers", s.HandleCreateResourceProvider)
+ mux.HandleFunc("GET /resource_providers/{uuid}", s.HandleShowResourceProvider)
+ mux.HandleFunc("PUT /resource_providers/{uuid}", s.HandleUpdateResourceProvider)
+ mux.HandleFunc("DELETE /resource_providers/{uuid}", s.HandleDeleteResourceProvider)
+
+ // Resource classes
+ mux.HandleFunc("GET /resource_classes", s.HandleListResourceClasses)
+ mux.HandleFunc("POST /resource_classes", s.HandleCreateResourceClass)
+ mux.HandleFunc("GET /resource_classes/{name}", s.HandleShowResourceClass)
+ mux.HandleFunc("PUT /resource_classes/{name}", s.HandleUpdateResourceClass)
+ mux.HandleFunc("DELETE /resource_classes/{name}", s.HandleDeleteResourceClass)
+
+ // Resource provider inventories
+ mux.HandleFunc("GET /resource_providers/{uuid}/inventories", s.HandleListResourceProviderInventories)
+ mux.HandleFunc("PUT /resource_providers/{uuid}/inventories", s.HandleUpdateResourceProviderInventories)
+ mux.HandleFunc("DELETE /resource_providers/{uuid}/inventories", s.HandleDeleteResourceProviderInventories)
+ mux.HandleFunc("GET /resource_providers/{uuid}/inventories/{resource_class}", s.HandleShowResourceProviderInventory)
+ mux.HandleFunc("PUT /resource_providers/{uuid}/inventories/{resource_class}", s.HandleUpdateResourceProviderInventory)
+ mux.HandleFunc("DELETE /resource_providers/{uuid}/inventories/{resource_class}", s.HandleDeleteResourceProviderInventory)
+
+ // Resource provider aggregates
+ mux.HandleFunc("GET /resource_providers/{uuid}/aggregates", s.HandleListResourceProviderAggregates)
+ mux.HandleFunc("PUT /resource_providers/{uuid}/aggregates", s.HandleUpdateResourceProviderAggregates)
+
+ // Traits
+ mux.HandleFunc("GET /traits", s.HandleListTraits)
+ mux.HandleFunc("GET /traits/{name}", s.HandleShowTrait)
+ mux.HandleFunc("PUT /traits/{name}", s.HandleUpdateTrait)
+ mux.HandleFunc("DELETE /traits/{name}", s.HandleDeleteTrait)
+
+ // Resource provider traits
+ mux.HandleFunc("GET /resource_providers/{uuid}/traits", s.HandleListResourceProviderTraits)
+ mux.HandleFunc("PUT /resource_providers/{uuid}/traits", s.HandleUpdateResourceProviderTraits)
+ mux.HandleFunc("DELETE /resource_providers/{uuid}/traits", s.HandleDeleteResourceProviderTraits)
+
+ // Allocations
+ mux.HandleFunc("POST /allocations", s.HandleManageAllocations)
+ mux.HandleFunc("GET /allocations/{consumer_uuid}", s.HandleListAllocations)
+ mux.HandleFunc("PUT /allocations/{consumer_uuid}", s.HandleUpdateAllocations)
+ mux.HandleFunc("DELETE /allocations/{consumer_uuid}", s.HandleDeleteAllocations)
+
+ // Resource provider allocations
+ mux.HandleFunc("GET /resource_providers/{uuid}/allocations", s.HandleListResourceProviderAllocations)
+
+ // Usages
+ mux.HandleFunc("GET /usages", s.HandleListUsages)
+
+ // Resource provider usages
+ mux.HandleFunc("GET /resource_providers/{uuid}/usages", s.HandleListResourceProviderUsages)
+
+ // Allocation candidates
+ mux.HandleFunc("GET /allocation_candidates", s.HandleListAllocationCandidates)
+
+ // Reshaper
+ mux.HandleFunc("POST /reshaper", s.HandlePostReshaper)
+}
diff --git a/internal/shim/placement/handlers/validation.go b/internal/shim/placement/validation.go
similarity index 98%
rename from internal/shim/placement/handlers/validation.go
rename to internal/shim/placement/validation.go
index 5f8bcf2b9..55a46c406 100644
--- a/internal/shim/placement/handlers/validation.go
+++ b/internal/shim/placement/validation.go
@@ -1,7 +1,7 @@
// Copyright SAP SE
// SPDX-License-Identifier: Apache-2.0
-package handlers
+package placement
import (
"fmt"
From 45778bacd7b214d8b4deca912062b26b48bbf1a7 Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Thu, 9 Apr 2026 19:10:53 +0200
Subject: [PATCH 10/17] Connect shim to placement on startup
---
cmd/shim/main.go | 3 +-
.../bundles/cortex-placement-shim/values.yaml | 16 ++
.../cortex-shim/templates/deployment.yaml | 6 +-
helm/library/cortex-shim/values.yaml | 4 +-
internal/shim/placement/shim.go | 200 ++++++++++++------
5 files changed, 157 insertions(+), 72 deletions(-)
diff --git a/cmd/shim/main.go b/cmd/shim/main.go
index ce81d6b44..0a68c7298 100644
--- a/cmd/shim/main.go
+++ b/cmd/shim/main.go
@@ -208,7 +208,8 @@ func main() {
mux := http.NewServeMux()
if enablePlacementShim {
placementShim := &placement.Shim{Client: mgr.GetClient()}
- if err := placementShim.SetupWithManager(mgr); err != nil {
+ setupLog.Info("Adding placement shim to manager")
+ if err := placementShim.SetupWithManager(ctx, mgr); err != nil {
setupLog.Error(err, "unable to set up placement shim")
os.Exit(1)
}
diff --git a/helm/bundles/cortex-placement-shim/values.yaml b/helm/bundles/cortex-placement-shim/values.yaml
index 6dd793653..2fa998d6b 100644
--- a/helm/bundles/cortex-placement-shim/values.yaml
+++ b/helm/bundles/cortex-placement-shim/values.yaml
@@ -20,8 +20,24 @@ alerts:
cortex-shim:
namePrefix: cortex-placement
+ deployment:
+ container:
+ extraArgs: ["--placement-shim=true"]
conf:
monitoring:
labels:
github_org: cobaltcore-dev
github_repo: cortex
+ # Uncomment and set the following values to enable SSO for the placement
+ # shim. The shim will use the provided SSO credentials to talk to openstack
+ # over ingress.
+ # sso:
+ # cert: |
+ # -----BEGIN CERTIFICATE-----
+ # Your certificate here
+ # -----END CERTIFICATE-----
+ # certKey: |
+ # -----BEGIN PRIVATE KEY-----
+ # Your private key here
+ # -----END PRIVATE KEY-----
+ # selfSigned: "false"
diff --git a/helm/library/cortex-shim/templates/deployment.yaml b/helm/library/cortex-shim/templates/deployment.yaml
index b38eb3c02..7d658e87c 100644
--- a/helm/library/cortex-shim/templates/deployment.yaml
+++ b/helm/library/cortex-shim/templates/deployment.yaml
@@ -1,6 +1,3 @@
-# This file is safe from kubebuilder edit --plugins=helm/v1-alpha
-# If you want to re-generate, add the --force flag.
-
{{- if .Values.deployment.enable }}
apiVersion: apps/v1
kind: Deployment
@@ -32,6 +29,9 @@ spec:
{{- range .Values.deployment.container.args }}
- {{ . }}
{{- end }}
+ {{- range .Values.deployment.container.extraArgs }}
+ - {{ . }}
+ {{- end }}
ports:
- name: api
containerPort: 8080
diff --git a/helm/library/cortex-shim/values.yaml b/helm/library/cortex-shim/values.yaml
index 63574fbe4..3acead93b 100644
--- a/helm/library/cortex-shim/values.yaml
+++ b/helm/library/cortex-shim/values.yaml
@@ -8,6 +8,7 @@ deployment:
- "--metrics-bind-address=:2112"
- "--health-probe-bind-address=:8081"
- "--metrics-secure=false"
+ extraArgs: []
resources:
limits:
cpu: 500m
@@ -53,9 +54,6 @@ rbac:
prometheus:
enable: true
-global:
- conf: {}
-
# Use this to unambiguate multiple cortex deployments in the same cluster.
namePrefix: cortex
conf: {} # No config for now that's needed by all the shims.
diff --git a/internal/shim/placement/shim.go b/internal/shim/placement/shim.go
index e4acf1642..c15808291 100644
--- a/internal/shim/placement/shim.go
+++ b/internal/shim/placement/shim.go
@@ -5,8 +5,11 @@ package placement
import (
"context"
+ "errors"
"net/http"
+ "github.com/cobaltcore-dev/cortex/pkg/conf"
+ "github.com/cobaltcore-dev/cortex/pkg/sso"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -16,11 +19,77 @@ import (
// objects by their OpenStack hypervisor ID (status.hypervisorId).
const IndexHypervisorByID = ".status.hypervisorId"
+var (
+ // setupLog is a controller-runtime logger used for setup and route
+ // registration. Individual handlers should use their own loggers derived
+ // from the request context.
+ setupLog = ctrl.Log.WithName("placement-shim")
+)
+
+// config holds configuration for the placement shim.
+type config struct {
+ // SSO is an optional reference to a Kubernetes secret containing
+ // credentials to talk to openstack over ingress via single-sign-on.
+ SSO *sso.SSOConfig `json:"sso,omitempty"`
+ // PlacementURL is the URL of the OpenStack Placement API the shim
+ // should forward requests to.
+ PlacementURL string `json:"placementURL,omitempty"`
+}
+
// Shim is the placement API shim. It holds a controller-runtime client for
// making Kubernetes API calls and exposes HTTP handlers that mirror the
// OpenStack Placement API surface.
type Shim struct {
client.Client
+ config config
+ // HTTP client that can talk to openstack placement, if needed, over
+ // ingress with single-sign-on.
+ httpClient *http.Client
+}
+
+// Start is called after the manager has started and the cache is running.
+// It can be used to perform any initialization that requires the cache to be
+// running.
+func (s *Shim) Start(ctx context.Context) (err error) {
+ setupLog.Info("Starting placement shim")
+ s.httpClient = http.DefaultClient
+ if s.config.SSO != nil {
+ setupLog.Info("SSO config provided, creating HTTP client for placement API")
+ s.httpClient, err = sso.NewHTTPClient(*s.config.SSO)
+ if err != nil {
+ setupLog.Error(err, "Failed to create HTTP client from SSO config")
+ return err
+ }
+ setupLog.Info("Successfully created HTTP client from SSO config")
+ } else {
+ setupLog.Info("No SSO config provided, using default HTTP client for placement API")
+ }
+ // Try establish a connection to the placement API to fail fast if the
+ // configuration is invalid. Directly call the root endpoint for that.
+ setupLog.Info("Testing connection to placement API", "url", s.config.PlacementURL)
+ if s.config.PlacementURL == "" {
+ err := errors.New("placement URL is not configured")
+ setupLog.Error(err, "Invalid configuration for placement shim")
+ return err
+ }
+ req, err := http.NewRequestWithContext(ctx, "GET", s.config.PlacementURL, nil)
+ if err != nil {
+ setupLog.Error(err, "Failed to create HTTP request to placement API")
+ return err
+ }
+ resp, err := s.httpClient.Do(req)
+ if err != nil {
+ setupLog.Error(err, "Failed to connect to placement API")
+ return err
+ }
+ defer resp.Body.Close()
+ if resp.StatusCode != http.StatusOK {
+ err := errors.New("unexpected response from placement API")
+ setupLog.Error(err, "Failed to call placement API", "status", resp.Status)
+ return err
+ }
+ setupLog.Info("Successfully connected to placement API")
+ return nil
}
// SetupWithManager registers field indexes on the manager's cache so that
@@ -32,11 +101,19 @@ type Shim struct {
// cache. The informer is started later when mgr.Start() is called. This
// means no separate controller or empty Reconcile loop is needed — the
// index registration alone is sufficient to warm the cache.
-func (s *Shim) SetupWithManager(mgr ctrl.Manager) error {
- return mgr.GetFieldIndexer().IndexField(
- context.Background(),
- &hv1.Hypervisor{},
- IndexHypervisorByID,
+func (s *Shim) SetupWithManager(ctx context.Context, mgr ctrl.Manager) (err error) {
+ setupLog.Info("Setting up placement shim with manager")
+ if err := mgr.Add(s); err != nil { // Bind Start(ctx)
+ setupLog.Error(err, "Failed to bind start routine")
+ return err
+ }
+ s.config, err = conf.GetConfig[config]()
+ if err != nil {
+ setupLog.Error(err, "Failed to load placement shim config")
+ return err
+ }
+ setupLog.Info("Indexing Hypervisors by hypervisor ID")
+ err = mgr.GetFieldIndexer().IndexField(ctx, &hv1.Hypervisor{}, IndexHypervisorByID,
func(obj client.Object) []string {
h, ok := obj.(*hv1.Hypervisor)
if !ok {
@@ -48,6 +125,12 @@ func (s *Shim) SetupWithManager(mgr ctrl.Manager) error {
return []string{h.Status.HypervisorID}
},
)
+ if err != nil {
+ setupLog.Error(err, "Failed to index Hypervisors by hypervisor ID")
+ return err
+ }
+ setupLog.Info("Successfully indexed Hypervisors by hypervisor ID")
+ return nil
}
// RegisterRoutes binds all Placement API handlers to the given mux. The
@@ -55,64 +138,51 @@ func (s *Shim) SetupWithManager(mgr ctrl.Manager) error {
// and path wildcards. The routes mirror the OpenStack Placement API surface
// as documented at https://docs.openstack.org/api-ref/placement/.
func (s *Shim) RegisterRoutes(mux *http.ServeMux) {
- // Root
- mux.HandleFunc("GET /{$}", s.HandleGetRoot)
-
- // Resource providers
- mux.HandleFunc("GET /resource_providers", s.HandleListResourceProviders)
- mux.HandleFunc("POST /resource_providers", s.HandleCreateResourceProvider)
- mux.HandleFunc("GET /resource_providers/{uuid}", s.HandleShowResourceProvider)
- mux.HandleFunc("PUT /resource_providers/{uuid}", s.HandleUpdateResourceProvider)
- mux.HandleFunc("DELETE /resource_providers/{uuid}", s.HandleDeleteResourceProvider)
-
- // Resource classes
- mux.HandleFunc("GET /resource_classes", s.HandleListResourceClasses)
- mux.HandleFunc("POST /resource_classes", s.HandleCreateResourceClass)
- mux.HandleFunc("GET /resource_classes/{name}", s.HandleShowResourceClass)
- mux.HandleFunc("PUT /resource_classes/{name}", s.HandleUpdateResourceClass)
- mux.HandleFunc("DELETE /resource_classes/{name}", s.HandleDeleteResourceClass)
-
- // Resource provider inventories
- mux.HandleFunc("GET /resource_providers/{uuid}/inventories", s.HandleListResourceProviderInventories)
- mux.HandleFunc("PUT /resource_providers/{uuid}/inventories", s.HandleUpdateResourceProviderInventories)
- mux.HandleFunc("DELETE /resource_providers/{uuid}/inventories", s.HandleDeleteResourceProviderInventories)
- mux.HandleFunc("GET /resource_providers/{uuid}/inventories/{resource_class}", s.HandleShowResourceProviderInventory)
- mux.HandleFunc("PUT /resource_providers/{uuid}/inventories/{resource_class}", s.HandleUpdateResourceProviderInventory)
- mux.HandleFunc("DELETE /resource_providers/{uuid}/inventories/{resource_class}", s.HandleDeleteResourceProviderInventory)
-
- // Resource provider aggregates
- mux.HandleFunc("GET /resource_providers/{uuid}/aggregates", s.HandleListResourceProviderAggregates)
- mux.HandleFunc("PUT /resource_providers/{uuid}/aggregates", s.HandleUpdateResourceProviderAggregates)
-
- // Traits
- mux.HandleFunc("GET /traits", s.HandleListTraits)
- mux.HandleFunc("GET /traits/{name}", s.HandleShowTrait)
- mux.HandleFunc("PUT /traits/{name}", s.HandleUpdateTrait)
- mux.HandleFunc("DELETE /traits/{name}", s.HandleDeleteTrait)
-
- // Resource provider traits
- mux.HandleFunc("GET /resource_providers/{uuid}/traits", s.HandleListResourceProviderTraits)
- mux.HandleFunc("PUT /resource_providers/{uuid}/traits", s.HandleUpdateResourceProviderTraits)
- mux.HandleFunc("DELETE /resource_providers/{uuid}/traits", s.HandleDeleteResourceProviderTraits)
-
- // Allocations
- mux.HandleFunc("POST /allocations", s.HandleManageAllocations)
- mux.HandleFunc("GET /allocations/{consumer_uuid}", s.HandleListAllocations)
- mux.HandleFunc("PUT /allocations/{consumer_uuid}", s.HandleUpdateAllocations)
- mux.HandleFunc("DELETE /allocations/{consumer_uuid}", s.HandleDeleteAllocations)
-
- // Resource provider allocations
- mux.HandleFunc("GET /resource_providers/{uuid}/allocations", s.HandleListResourceProviderAllocations)
-
- // Usages
- mux.HandleFunc("GET /usages", s.HandleListUsages)
-
- // Resource provider usages
- mux.HandleFunc("GET /resource_providers/{uuid}/usages", s.HandleListResourceProviderUsages)
-
- // Allocation candidates
- mux.HandleFunc("GET /allocation_candidates", s.HandleListAllocationCandidates)
-
- // Reshaper
- mux.HandleFunc("POST /reshaper", s.HandlePostReshaper)
+ setupLog.Info("Registering placement API routes")
+ handlers := []struct {
+ method string
+ pattern string
+ handler http.HandlerFunc
+ }{
+ {"GET", "/{$}", s.HandleGetRoot},
+ {"GET", "/resource_providers", s.HandleListResourceProviders},
+ {"POST", "/resource_providers", s.HandleCreateResourceProvider},
+ {"GET", "/resource_providers/{uuid}", s.HandleShowResourceProvider},
+ {"PUT", "/resource_providers/{uuid}", s.HandleUpdateResourceProvider},
+ {"DELETE", "/resource_providers/{uuid}", s.HandleDeleteResourceProvider},
+ {"GET", "/resource_classes", s.HandleListResourceClasses},
+ {"POST", "/resource_classes", s.HandleCreateResourceClass},
+ {"GET", "/resource_classes/{name}", s.HandleShowResourceClass},
+ {"PUT", "/resource_classes/{name}", s.HandleUpdateResourceClass},
+ {"DELETE", "/resource_classes/{name}", s.HandleDeleteResourceClass},
+ {"GET", "/resource_providers/{uuid}/inventories", s.HandleListResourceProviderInventories},
+ {"PUT", "/resource_providers/{uuid}/inventories", s.HandleUpdateResourceProviderInventories},
+ {"DELETE", "/resource_providers/{uuid}/inventories", s.HandleDeleteResourceProviderInventories},
+ {"GET", "/resource_providers/{uuid}/inventories/{resource_class}", s.HandleShowResourceProviderInventory},
+ {"PUT", "/resource_providers/{uuid}/inventories/{resource_class}", s.HandleUpdateResourceProviderInventory},
+ {"DELETE", "/resource_providers/{uuid}/inventories/{resource_class}", s.HandleDeleteResourceProviderInventory},
+ {"GET", "/resource_providers/{uuid}/aggregates", s.HandleListResourceProviderAggregates},
+ {"PUT", "/resource_providers/{uuid}/aggregates", s.HandleUpdateResourceProviderAggregates},
+ {"GET", "/traits", s.HandleListTraits},
+ {"GET", "/traits/{name}", s.HandleShowTrait},
+ {"PUT", "/traits/{name}", s.HandleUpdateTrait},
+ {"DELETE", "/traits/{name}", s.HandleDeleteTrait},
+ {"GET", "/resource_providers/{uuid}/traits", s.HandleListResourceProviderTraits},
+ {"PUT", "/resource_providers/{uuid}/traits", s.HandleUpdateResourceProviderTraits},
+ {"DELETE", "/resource_providers/{uuid}/traits", s.HandleDeleteResourceProviderTraits},
+ {"POST", "/allocations", s.HandleManageAllocations},
+ {"GET", "/allocations/{consumer_uuid}", s.HandleListAllocations},
+ {"PUT", "/allocations/{consumer_uuid}", s.HandleUpdateAllocations},
+ {"DELETE", "/allocations/{consumer_uuid}", s.HandleDeleteAllocations},
+ {"GET", "/resource_providers/{uuid}/allocations", s.HandleListResourceProviderAllocations},
+ {"GET", "/usages", s.HandleListUsages},
+ {"GET", "/resource_providers/{uuid}/usages", s.HandleListResourceProviderUsages},
+ {"GET", "/allocation_candidates", s.HandleListAllocationCandidates},
+ {"POST", "/reshaper", s.HandlePostReshaper},
+ }
+ for _, h := range handlers {
+ setupLog.Info("Registering route", "method", h.method, "pattern", h.pattern)
+ mux.HandleFunc(h.method+" "+h.pattern, h.handler)
+ }
+ setupLog.Info("Successfully registered placement API routes")
}
From e5cdc1d93a26fb78fec3da81e7491b3f0c0c53ee Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Fri, 10 Apr 2026 08:12:45 +0200
Subject: [PATCH 11/17] Implement passthrough for all http handlers
---
.../placement/handle_allocation_candidates.go | 1 +
internal/shim/placement/handle_allocations.go | 4 ++
internal/shim/placement/handle_reshaper.go | 1 +
.../shim/placement/handle_resource_classes.go | 5 ++
.../handle_resource_provider_aggregates.go | 2 +
.../handle_resource_provider_allocations.go | 1 +
.../handle_resource_provider_inventories.go | 6 ++
.../handle_resource_provider_traits.go | 3 +
.../handle_resource_provider_usages.go | 1 +
.../placement/handle_resource_providers.go | 5 ++
internal/shim/placement/handle_root.go | 1 +
internal/shim/placement/handle_traits.go | 4 ++
internal/shim/placement/handle_usages.go | 1 +
internal/shim/placement/shim.go | 68 +++++++++++++++++--
pkg/sso/sso.go | 29 +++++---
15 files changed, 117 insertions(+), 15 deletions(-)
diff --git a/internal/shim/placement/handle_allocation_candidates.go b/internal/shim/placement/handle_allocation_candidates.go
index b252d0026..f80b9aa0f 100644
--- a/internal/shim/placement/handle_allocation_candidates.go
+++ b/internal/shim/placement/handle_allocation_candidates.go
@@ -38,4 +38,5 @@ func (s *Shim) HandleListAllocationCandidates(w http.ResponseWriter, r *http.Req
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/handle_allocations.go b/internal/shim/placement/handle_allocations.go
index f406ea5da..ee365d109 100644
--- a/internal/shim/placement/handle_allocations.go
+++ b/internal/shim/placement/handle_allocations.go
@@ -28,6 +28,7 @@ func (s *Shim) HandleManageAllocations(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+ s.forward(w, r)
}
// HandleListAllocations handles GET /allocations/{consumer_uuid} requests.
@@ -50,6 +51,7 @@ func (s *Shim) HandleListAllocations(w http.ResponseWriter, r *http.Request) {
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"consumer_uuid", consumerUUID)
+ s.forward(w, r)
}
// HandleUpdateAllocations handles PUT /allocations/{consumer_uuid} requests.
@@ -72,6 +74,7 @@ func (s *Shim) HandleUpdateAllocations(w http.ResponseWriter, r *http.Request) {
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"consumer_uuid", consumerUUID)
+ s.forward(w, r)
}
// HandleDeleteAllocations handles DELETE /allocations/{consumer_uuid} requests.
@@ -88,4 +91,5 @@ func (s *Shim) HandleDeleteAllocations(w http.ResponseWriter, r *http.Request) {
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"consumer_uuid", consumerUUID)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/handle_reshaper.go b/internal/shim/placement/handle_reshaper.go
index fe0d85069..f08af7f9a 100644
--- a/internal/shim/placement/handle_reshaper.go
+++ b/internal/shim/placement/handle_reshaper.go
@@ -28,4 +28,5 @@ func (s *Shim) HandlePostReshaper(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/handle_resource_classes.go b/internal/shim/placement/handle_resource_classes.go
index 554c43034..407071e26 100644
--- a/internal/shim/placement/handle_resource_classes.go
+++ b/internal/shim/placement/handle_resource_classes.go
@@ -20,6 +20,7 @@ func (s *Shim) HandleListResourceClasses(w http.ResponseWriter, r *http.Request)
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+ s.forward(w, r)
}
// HandleCreateResourceClass handles POST /resource_classes requests.
@@ -33,6 +34,7 @@ func (s *Shim) HandleCreateResourceClass(w http.ResponseWriter, r *http.Request)
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+ s.forward(w, r)
}
// HandleShowResourceClass handles GET /resource_classes/{name} requests.
@@ -48,6 +50,7 @@ func (s *Shim) HandleShowResourceClass(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
+ s.forward(w, r)
}
// HandleUpdateResourceClass handles PUT /resource_classes/{name} requests.
@@ -66,6 +69,7 @@ func (s *Shim) HandleUpdateResourceClass(w http.ResponseWriter, r *http.Request)
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
+ s.forward(w, r)
}
// HandleDeleteResourceClass handles DELETE /resource_classes/{name} requests.
@@ -83,4 +87,5 @@ func (s *Shim) HandleDeleteResourceClass(w http.ResponseWriter, r *http.Request)
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/handle_resource_provider_aggregates.go b/internal/shim/placement/handle_resource_provider_aggregates.go
index ce8febe50..c270f6730 100644
--- a/internal/shim/placement/handle_resource_provider_aggregates.go
+++ b/internal/shim/placement/handle_resource_provider_aggregates.go
@@ -30,6 +30,7 @@ func (s *Shim) HandleListResourceProviderAggregates(w http.ResponseWriter, r *ht
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
// HandleUpdateResourceProviderAggregates handles
@@ -50,4 +51,5 @@ func (s *Shim) HandleUpdateResourceProviderAggregates(w http.ResponseWriter, r *
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/handle_resource_provider_allocations.go b/internal/shim/placement/handle_resource_provider_allocations.go
index b3f6dcd68..e36bbebd9 100644
--- a/internal/shim/placement/handle_resource_provider_allocations.go
+++ b/internal/shim/placement/handle_resource_provider_allocations.go
@@ -25,4 +25,5 @@ func (s *Shim) HandleListResourceProviderAllocations(w http.ResponseWriter, r *h
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/handle_resource_provider_inventories.go b/internal/shim/placement/handle_resource_provider_inventories.go
index c79f924b6..20d1c52dc 100644
--- a/internal/shim/placement/handle_resource_provider_inventories.go
+++ b/internal/shim/placement/handle_resource_provider_inventories.go
@@ -26,6 +26,7 @@ func (s *Shim) HandleListResourceProviderInventories(w http.ResponseWriter, r *h
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
// HandleUpdateResourceProviderInventories handles
@@ -46,6 +47,7 @@ func (s *Shim) HandleUpdateResourceProviderInventories(w http.ResponseWriter, r
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
// HandleDeleteResourceProviderInventories handles
@@ -65,6 +67,7 @@ func (s *Shim) HandleDeleteResourceProviderInventories(w http.ResponseWriter, r
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
// HandleShowResourceProviderInventory handles
@@ -87,6 +90,7 @@ func (s *Shim) HandleShowResourceProviderInventory(w http.ResponseWriter, r *htt
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"uuid", uuid, "resource_class", resourceClass)
+ s.forward(w, r)
}
// HandleUpdateResourceProviderInventory handles
@@ -111,6 +115,7 @@ func (s *Shim) HandleUpdateResourceProviderInventory(w http.ResponseWriter, r *h
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"uuid", uuid, "resource_class", resourceClass)
+ s.forward(w, r)
}
// HandleDeleteResourceProviderInventory handles
@@ -133,4 +138,5 @@ func (s *Shim) HandleDeleteResourceProviderInventory(w http.ResponseWriter, r *h
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path,
"uuid", uuid, "resource_class", resourceClass)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/handle_resource_provider_traits.go b/internal/shim/placement/handle_resource_provider_traits.go
index 5c18bf85b..75250a76e 100644
--- a/internal/shim/placement/handle_resource_provider_traits.go
+++ b/internal/shim/placement/handle_resource_provider_traits.go
@@ -24,6 +24,7 @@ func (s *Shim) HandleListResourceProviderTraits(w http.ResponseWriter, r *http.R
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
// HandleUpdateResourceProviderTraits handles
@@ -44,6 +45,7 @@ func (s *Shim) HandleUpdateResourceProviderTraits(w http.ResponseWriter, r *http
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
// HandleDeleteResourceProviderTraits handles
@@ -63,4 +65,5 @@ func (s *Shim) HandleDeleteResourceProviderTraits(w http.ResponseWriter, r *http
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/handle_resource_provider_usages.go b/internal/shim/placement/handle_resource_provider_usages.go
index 78c7cc450..c13d0ae65 100644
--- a/internal/shim/placement/handle_resource_provider_usages.go
+++ b/internal/shim/placement/handle_resource_provider_usages.go
@@ -25,4 +25,5 @@ func (s *Shim) HandleListResourceProviderUsages(w http.ResponseWriter, r *http.R
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/handle_resource_providers.go b/internal/shim/placement/handle_resource_providers.go
index de6213e43..b7a21018f 100644
--- a/internal/shim/placement/handle_resource_providers.go
+++ b/internal/shim/placement/handle_resource_providers.go
@@ -26,6 +26,7 @@ func (s *Shim) HandleListResourceProviders(w http.ResponseWriter, r *http.Reques
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+ s.forward(w, r)
}
// HandleCreateResourceProvider handles POST /resource_providers requests.
@@ -44,6 +45,7 @@ func (s *Shim) HandleCreateResourceProvider(w http.ResponseWriter, r *http.Reque
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+ s.forward(w, r)
}
// HandleShowResourceProvider handles GET /resource_providers/{uuid} requests.
@@ -62,6 +64,7 @@ func (s *Shim) HandleShowResourceProvider(w http.ResponseWriter, r *http.Request
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
// HandleUpdateResourceProvider handles PUT /resource_providers/{uuid} requests.
@@ -79,6 +82,7 @@ func (s *Shim) HandleUpdateResourceProvider(w http.ResponseWriter, r *http.Reque
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
// HandleDeleteResourceProvider handles DELETE /resource_providers/{uuid} requests.
@@ -95,4 +99,5 @@ func (s *Shim) HandleDeleteResourceProvider(w http.ResponseWriter, r *http.Reque
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "uuid", uuid)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/handle_root.go b/internal/shim/placement/handle_root.go
index d734d4528..10821bf42 100644
--- a/internal/shim/placement/handle_root.go
+++ b/internal/shim/placement/handle_root.go
@@ -21,4 +21,5 @@ func (s *Shim) HandleGetRoot(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/handle_traits.go b/internal/shim/placement/handle_traits.go
index 0d6ba8e32..7cb645552 100644
--- a/internal/shim/placement/handle_traits.go
+++ b/internal/shim/placement/handle_traits.go
@@ -24,6 +24,7 @@ func (s *Shim) HandleListTraits(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+ s.forward(w, r)
}
// HandleShowTrait handles GET /traits/{name} requests.
@@ -38,6 +39,7 @@ func (s *Shim) HandleShowTrait(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
+ s.forward(w, r)
}
// HandleUpdateTrait handles PUT /traits/{name} requests.
@@ -54,6 +56,7 @@ func (s *Shim) HandleUpdateTrait(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
+ s.forward(w, r)
}
// HandleDeleteTrait handles DELETE /traits/{name} requests.
@@ -70,4 +73,5 @@ func (s *Shim) HandleDeleteTrait(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path, "name", name)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/handle_usages.go b/internal/shim/placement/handle_usages.go
index 32e276c8d..2e3308c1e 100644
--- a/internal/shim/placement/handle_usages.go
+++ b/internal/shim/placement/handle_usages.go
@@ -25,4 +25,5 @@ func (s *Shim) HandleListUsages(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
log := logf.FromContext(ctx)
log.Info("placement request", "method", r.Method, "path", r.URL.Path)
+ s.forward(w, r)
}
diff --git a/internal/shim/placement/shim.go b/internal/shim/placement/shim.go
index c15808291..4ef93c66a 100644
--- a/internal/shim/placement/shim.go
+++ b/internal/shim/placement/shim.go
@@ -6,13 +6,16 @@ package placement
import (
"context"
"errors"
+ "io"
"net/http"
+ "time"
"github.com/cobaltcore-dev/cortex/pkg/conf"
"github.com/cobaltcore-dev/cortex/pkg/sso"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
)
// IndexHypervisorByID is the field index key for looking up Hypervisor
@@ -52,18 +55,29 @@ type Shim struct {
// running.
func (s *Shim) Start(ctx context.Context) (err error) {
setupLog.Info("Starting placement shim")
- s.httpClient = http.DefaultClient
+ // Build the transport with optional SSO TLS credentials.
+ var transport *http.Transport
if s.config.SSO != nil {
- setupLog.Info("SSO config provided, creating HTTP client for placement API")
- s.httpClient, err = sso.NewHTTPClient(*s.config.SSO)
+ setupLog.Info("SSO config provided, creating transport for placement API")
+ transport, err = sso.NewTransport(*s.config.SSO)
if err != nil {
- setupLog.Error(err, "Failed to create HTTP client from SSO config")
+ setupLog.Error(err, "Failed to create transport from SSO config")
return err
}
- setupLog.Info("Successfully created HTTP client from SSO config")
} else {
- setupLog.Info("No SSO config provided, using default HTTP client for placement API")
+ setupLog.Info("No SSO config provided, using plain transport for placement API")
+ transport = &http.Transport{}
}
+ // All proxy traffic goes to one placement API host, so raise the
+ // per-host idle connection limit from the default of 2.
+ transport.MaxIdleConns = 100
+ transport.MaxIdleConnsPerHost = 100
+ // Guard against a hung upstream or slow TLS negotiation.
+ transport.TLSHandshakeTimeout = 10 * time.Second
+ transport.ResponseHeaderTimeout = 60 * time.Second
+ transport.ExpectContinueTimeout = 1 * time.Second
+ transport.IdleConnTimeout = 90 * time.Second
+ s.httpClient = &http.Client{Transport: transport}
// Try establish a connection to the placement API to fail fast if the
// configuration is invalid. Directly call the root endpoint for that.
setupLog.Info("Testing connection to placement API", "url", s.config.PlacementURL)
@@ -92,6 +106,48 @@ func (s *Shim) Start(ctx context.Context) (err error) {
return nil
}
+// forward proxies the incoming HTTP request to the upstream placement API
+// and copies the response (status, headers, body) back to the client.
+func (s *Shim) forward(w http.ResponseWriter, r *http.Request) {
+ log := logf.FromContext(r.Context())
+
+ // Build upstream URL: config.PlacementURL + original path + query string.
+ upstreamURL := s.config.PlacementURL + r.URL.Path
+ if r.URL.RawQuery != "" {
+ upstreamURL += "?" + r.URL.RawQuery
+ }
+
+ // Create upstream request preserving method, body, and context.
+ upstreamReq, err := http.NewRequestWithContext(r.Context(), r.Method, upstreamURL, r.Body)
+ if err != nil {
+ log.Error(err, "failed to create upstream request", "url", upstreamURL)
+ http.Error(w, "failed to create upstream request", http.StatusBadGateway)
+ return
+ }
+
+ // Copy all incoming headers.
+ upstreamReq.Header = r.Header.Clone()
+
+ resp, err := s.httpClient.Do(upstreamReq)
+ if err != nil {
+ log.Error(err, "failed to reach placement API", "url", upstreamURL)
+ http.Error(w, "failed to reach placement API", http.StatusBadGateway)
+ return
+ }
+ defer resp.Body.Close()
+
+ // Copy response headers, status code, and body back to the caller.
+ for k, vs := range resp.Header {
+ for _, v := range vs {
+ w.Header().Add(k, v)
+ }
+ }
+ w.WriteHeader(resp.StatusCode)
+ if _, err := io.Copy(w, resp.Body); err != nil {
+ log.Error(err, "failed to copy upstream response body")
+ }
+}
+
// SetupWithManager registers field indexes on the manager's cache so that
// subsequent list calls are served from the informer cache rather than
// hitting the API server. This must be called before the manager is started.
diff --git a/pkg/sso/sso.go b/pkg/sso/sso.go
index 069533518..c5535d915 100644
--- a/pkg/sso/sso.go
+++ b/pkg/sso/sso.go
@@ -70,15 +70,13 @@ func (c Connector) FromSecretRef(ctx context.Context, ref corev1.SecretReference
return NewHTTPClient(conf)
}
-// Create a new HTTP client with the given SSO configuration
-// and logging for each request.
-func NewHTTPClient(conf SSOConfig) (*http.Client, error) {
+// NewTransport returns an *http.Transport configured with TLS client
+// certificates from the given SSO config. If no certificate is provided,
+// a plain *http.Transport is returned.
+func NewTransport(conf SSOConfig) (*http.Transport, error) {
if conf.Cert == "" {
- // Disable SSO if no certificate is provided.
- slog.Debug("making http requests without SSO")
- return &http.Client{Transport: &requestLogger{T: &http.Transport{}}}, nil
+ return &http.Transport{}, nil
}
- // If we have a public key, we also need a private key.
if conf.CertKey == "" {
return nil, errors.New("missing cert key for SSO")
}
@@ -91,7 +89,7 @@ func NewHTTPClient(conf SSOConfig) (*http.Client, error) {
}
caCertPool := x509.NewCertPool()
caCertPool.AddCert(cert.Leaf)
- return &http.Client{Transport: &requestLogger{T: &http.Transport{
+ return &http.Transport{
TLSClientConfig: &tls.Config{
Certificates: []tls.Certificate{cert},
RootCAs: caCertPool,
@@ -99,5 +97,18 @@ func NewHTTPClient(conf SSOConfig) (*http.Client, error) {
//nolint:gosec
InsecureSkipVerify: conf.SelfSigned,
},
- }}}, nil
+ }, nil
+}
+
+// Create a new HTTP client with the given SSO configuration
+// and logging for each request.
+func NewHTTPClient(conf SSOConfig) (*http.Client, error) {
+ transport, err := NewTransport(conf)
+ if err != nil {
+ return nil, err
+ }
+ if conf.Cert == "" {
+ slog.Debug("making http requests without SSO")
+ }
+ return &http.Client{Transport: &requestLogger{T: transport}}, nil
}
From 5822dafea7b0ec8668f0e02a22ff123e110233d8 Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Fri, 10 Apr 2026 08:34:21 +0200
Subject: [PATCH 12/17] Unit tests and linting
---
.../handle_allocation_candidates_test.go | 21 ++
.../shim/placement/handle_allocations_test.go | 78 +++++++
.../shim/placement/handle_reshaper_test.go | 21 ++
.../placement/handle_resource_classes_test.go | 57 +++++
...andle_resource_provider_aggregates_test.go | 51 +++++
...ndle_resource_provider_allocations_test.go | 30 +++
...ndle_resource_provider_inventories_test.go | 139 ++++++++++++
.../handle_resource_provider_traits_test.go | 72 ++++++
.../handle_resource_provider_usages_test.go | 30 +++
.../handle_resource_providers_test.go | 86 +++++++
internal/shim/placement/handle_root_test.go | 21 ++
internal/shim/placement/handle_traits_test.go | 49 ++++
internal/shim/placement/handle_usages_test.go | 21 ++
internal/shim/placement/shim.go | 2 +-
internal/shim/placement/shim_test.go | 209 ++++++++++++++++++
internal/shim/placement/validation.go | 2 +-
internal/shim/placement/validation_test.go | 89 ++++++++
17 files changed, 976 insertions(+), 2 deletions(-)
create mode 100644 internal/shim/placement/handle_allocation_candidates_test.go
create mode 100644 internal/shim/placement/handle_allocations_test.go
create mode 100644 internal/shim/placement/handle_reshaper_test.go
create mode 100644 internal/shim/placement/handle_resource_classes_test.go
create mode 100644 internal/shim/placement/handle_resource_provider_aggregates_test.go
create mode 100644 internal/shim/placement/handle_resource_provider_allocations_test.go
create mode 100644 internal/shim/placement/handle_resource_provider_inventories_test.go
create mode 100644 internal/shim/placement/handle_resource_provider_traits_test.go
create mode 100644 internal/shim/placement/handle_resource_provider_usages_test.go
create mode 100644 internal/shim/placement/handle_resource_providers_test.go
create mode 100644 internal/shim/placement/handle_root_test.go
create mode 100644 internal/shim/placement/handle_traits_test.go
create mode 100644 internal/shim/placement/handle_usages_test.go
create mode 100644 internal/shim/placement/shim_test.go
create mode 100644 internal/shim/placement/validation_test.go
diff --git a/internal/shim/placement/handle_allocation_candidates_test.go b/internal/shim/placement/handle_allocation_candidates_test.go
new file mode 100644
index 000000000..de75a96af
--- /dev/null
+++ b/internal/shim/placement/handle_allocation_candidates_test.go
@@ -0,0 +1,21 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandleListAllocationCandidates(t *testing.T) {
+ var gotPath string
+ s := newTestShim(t, http.StatusOK, `{"allocation_requests":[]}`, &gotPath)
+ w := serveHandler(t, "GET", "/allocation_candidates", s.HandleListAllocationCandidates, "/allocation_candidates")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ if gotPath != "/allocation_candidates" {
+ t.Fatalf("upstream path = %q, want /allocation_candidates", gotPath)
+ }
+}
diff --git a/internal/shim/placement/handle_allocations_test.go b/internal/shim/placement/handle_allocations_test.go
new file mode 100644
index 000000000..c42cf86e0
--- /dev/null
+++ b/internal/shim/placement/handle_allocations_test.go
@@ -0,0 +1,78 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandleManageAllocations(t *testing.T) {
+ var gotPath string
+ s := newTestShim(t, http.StatusNoContent, "", &gotPath)
+ w := serveHandler(t, "POST", "/allocations", s.HandleManageAllocations, "/allocations")
+ if w.Code != http.StatusNoContent {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusNoContent)
+ }
+ if gotPath != "/allocations" {
+ t.Fatalf("upstream path = %q, want /allocations", gotPath)
+ }
+}
+
+func TestHandleListAllocations(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/allocations/{consumer_uuid}",
+ s.HandleListAllocations, "/allocations/"+validUUID)
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/allocations/{consumer_uuid}",
+ s.HandleListAllocations, "/allocations/bad")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestHandleUpdateAllocations(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusNoContent, "", nil)
+ w := serveHandler(t, "PUT", "/allocations/{consumer_uuid}",
+ s.HandleUpdateAllocations, "/allocations/"+validUUID)
+ if w.Code != http.StatusNoContent {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusNoContent)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "PUT", "/allocations/{consumer_uuid}",
+ s.HandleUpdateAllocations, "/allocations/bad")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestHandleDeleteAllocations(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusNoContent, "", nil)
+ w := serveHandler(t, "DELETE", "/allocations/{consumer_uuid}",
+ s.HandleDeleteAllocations, "/allocations/"+validUUID)
+ if w.Code != http.StatusNoContent {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusNoContent)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "DELETE", "/allocations/{consumer_uuid}",
+ s.HandleDeleteAllocations, "/allocations/bad")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
diff --git a/internal/shim/placement/handle_reshaper_test.go b/internal/shim/placement/handle_reshaper_test.go
new file mode 100644
index 000000000..e00eff2e2
--- /dev/null
+++ b/internal/shim/placement/handle_reshaper_test.go
@@ -0,0 +1,21 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandlePostReshaper(t *testing.T) {
+ var gotPath string
+ s := newTestShim(t, http.StatusNoContent, "", &gotPath)
+ w := serveHandler(t, "POST", "/reshaper", s.HandlePostReshaper, "/reshaper")
+ if w.Code != http.StatusNoContent {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusNoContent)
+ }
+ if gotPath != "/reshaper" {
+ t.Fatalf("upstream path = %q, want /reshaper", gotPath)
+ }
+}
diff --git a/internal/shim/placement/handle_resource_classes_test.go b/internal/shim/placement/handle_resource_classes_test.go
new file mode 100644
index 000000000..80ffdf40e
--- /dev/null
+++ b/internal/shim/placement/handle_resource_classes_test.go
@@ -0,0 +1,57 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandleListResourceClasses(t *testing.T) {
+ var gotPath string
+ s := newTestShim(t, http.StatusOK, `{"resource_classes":[]}`, &gotPath)
+ w := serveHandler(t, "GET", "/resource_classes", s.HandleListResourceClasses, "/resource_classes")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ if gotPath != "/resource_classes" {
+ t.Fatalf("upstream path = %q, want /resource_classes", gotPath)
+ }
+}
+
+func TestHandleCreateResourceClass(t *testing.T) {
+ s := newTestShim(t, http.StatusCreated, "{}", nil)
+ w := serveHandler(t, "POST", "/resource_classes", s.HandleCreateResourceClass, "/resource_classes")
+ if w.Code != http.StatusCreated {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusCreated)
+ }
+}
+
+func TestHandleShowResourceClass(t *testing.T) {
+ var gotPath string
+ s := newTestShim(t, http.StatusOK, "{}", &gotPath)
+ w := serveHandler(t, "GET", "/resource_classes/{name}", s.HandleShowResourceClass, "/resource_classes/VCPU")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ if gotPath != "/resource_classes/VCPU" {
+ t.Fatalf("upstream path = %q, want /resource_classes/VCPU", gotPath)
+ }
+}
+
+func TestHandleUpdateResourceClass(t *testing.T) {
+ s := newTestShim(t, http.StatusNoContent, "", nil)
+ w := serveHandler(t, "PUT", "/resource_classes/{name}", s.HandleUpdateResourceClass, "/resource_classes/CUSTOM_FOO")
+ if w.Code != http.StatusNoContent {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusNoContent)
+ }
+}
+
+func TestHandleDeleteResourceClass(t *testing.T) {
+ s := newTestShim(t, http.StatusNoContent, "", nil)
+ w := serveHandler(t, "DELETE", "/resource_classes/{name}", s.HandleDeleteResourceClass, "/resource_classes/CUSTOM_BAR")
+ if w.Code != http.StatusNoContent {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusNoContent)
+ }
+}
diff --git a/internal/shim/placement/handle_resource_provider_aggregates_test.go b/internal/shim/placement/handle_resource_provider_aggregates_test.go
new file mode 100644
index 000000000..f55b09fed
--- /dev/null
+++ b/internal/shim/placement/handle_resource_provider_aggregates_test.go
@@ -0,0 +1,51 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandleListResourceProviderAggregates(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}/aggregates",
+ s.HandleListResourceProviderAggregates,
+ "/resource_providers/"+validUUID+"/aggregates")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}/aggregates",
+ s.HandleListResourceProviderAggregates,
+ "/resource_providers/not-a-uuid/aggregates")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestHandleUpdateResourceProviderAggregates(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "PUT", "/resource_providers/{uuid}/aggregates",
+ s.HandleUpdateResourceProviderAggregates,
+ "/resource_providers/"+validUUID+"/aggregates")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "PUT", "/resource_providers/{uuid}/aggregates",
+ s.HandleUpdateResourceProviderAggregates,
+ "/resource_providers/not-a-uuid/aggregates")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
diff --git a/internal/shim/placement/handle_resource_provider_allocations_test.go b/internal/shim/placement/handle_resource_provider_allocations_test.go
new file mode 100644
index 000000000..98834afab
--- /dev/null
+++ b/internal/shim/placement/handle_resource_provider_allocations_test.go
@@ -0,0 +1,30 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandleListResourceProviderAllocations(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}/allocations",
+ s.HandleListResourceProviderAllocations,
+ "/resource_providers/"+validUUID+"/allocations")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}/allocations",
+ s.HandleListResourceProviderAllocations,
+ "/resource_providers/not-a-uuid/allocations")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
diff --git a/internal/shim/placement/handle_resource_provider_inventories_test.go b/internal/shim/placement/handle_resource_provider_inventories_test.go
new file mode 100644
index 000000000..054e48e32
--- /dev/null
+++ b/internal/shim/placement/handle_resource_provider_inventories_test.go
@@ -0,0 +1,139 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandleListResourceProviderInventories(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}/inventories",
+ s.HandleListResourceProviderInventories,
+ "/resource_providers/"+validUUID+"/inventories")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}/inventories",
+ s.HandleListResourceProviderInventories,
+ "/resource_providers/not-a-uuid/inventories")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestHandleUpdateResourceProviderInventories(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "PUT", "/resource_providers/{uuid}/inventories",
+ s.HandleUpdateResourceProviderInventories,
+ "/resource_providers/"+validUUID+"/inventories")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "PUT", "/resource_providers/{uuid}/inventories",
+ s.HandleUpdateResourceProviderInventories,
+ "/resource_providers/not-a-uuid/inventories")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestHandleDeleteResourceProviderInventories(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusNoContent, "", nil)
+ w := serveHandler(t, "DELETE", "/resource_providers/{uuid}/inventories",
+ s.HandleDeleteResourceProviderInventories,
+ "/resource_providers/"+validUUID+"/inventories")
+ if w.Code != http.StatusNoContent {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusNoContent)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "DELETE", "/resource_providers/{uuid}/inventories",
+ s.HandleDeleteResourceProviderInventories,
+ "/resource_providers/not-a-uuid/inventories")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestHandleShowResourceProviderInventory(t *testing.T) {
+ t.Run("valid", func(t *testing.T) {
+ var gotPath string
+ s := newTestShim(t, http.StatusOK, "{}", &gotPath)
+ path := "/resource_providers/" + validUUID + "/inventories/VCPU"
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}/inventories/{resource_class}",
+ s.HandleShowResourceProviderInventory, path)
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ if gotPath != path {
+ t.Fatalf("upstream path = %q, want %q", gotPath, path)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}/inventories/{resource_class}",
+ s.HandleShowResourceProviderInventory,
+ "/resource_providers/not-a-uuid/inventories/VCPU")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestHandleUpdateResourceProviderInventory(t *testing.T) {
+ t.Run("valid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "PUT", "/resource_providers/{uuid}/inventories/{resource_class}",
+ s.HandleUpdateResourceProviderInventory,
+ "/resource_providers/"+validUUID+"/inventories/VCPU")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "PUT", "/resource_providers/{uuid}/inventories/{resource_class}",
+ s.HandleUpdateResourceProviderInventory,
+ "/resource_providers/not-a-uuid/inventories/VCPU")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestHandleDeleteResourceProviderInventory(t *testing.T) {
+ t.Run("valid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusNoContent, "", nil)
+ w := serveHandler(t, "DELETE", "/resource_providers/{uuid}/inventories/{resource_class}",
+ s.HandleDeleteResourceProviderInventory,
+ "/resource_providers/"+validUUID+"/inventories/VCPU")
+ if w.Code != http.StatusNoContent {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusNoContent)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "DELETE", "/resource_providers/{uuid}/inventories/{resource_class}",
+ s.HandleDeleteResourceProviderInventory,
+ "/resource_providers/not-a-uuid/inventories/VCPU")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
diff --git a/internal/shim/placement/handle_resource_provider_traits_test.go b/internal/shim/placement/handle_resource_provider_traits_test.go
new file mode 100644
index 000000000..809f0503f
--- /dev/null
+++ b/internal/shim/placement/handle_resource_provider_traits_test.go
@@ -0,0 +1,72 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandleListResourceProviderTraits(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}/traits",
+ s.HandleListResourceProviderTraits,
+ "/resource_providers/"+validUUID+"/traits")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}/traits",
+ s.HandleListResourceProviderTraits,
+ "/resource_providers/not-a-uuid/traits")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestHandleUpdateResourceProviderTraits(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "PUT", "/resource_providers/{uuid}/traits",
+ s.HandleUpdateResourceProviderTraits,
+ "/resource_providers/"+validUUID+"/traits")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "PUT", "/resource_providers/{uuid}/traits",
+ s.HandleUpdateResourceProviderTraits,
+ "/resource_providers/not-a-uuid/traits")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestHandleDeleteResourceProviderTraits(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusNoContent, "", nil)
+ w := serveHandler(t, "DELETE", "/resource_providers/{uuid}/traits",
+ s.HandleDeleteResourceProviderTraits,
+ "/resource_providers/"+validUUID+"/traits")
+ if w.Code != http.StatusNoContent {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusNoContent)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "DELETE", "/resource_providers/{uuid}/traits",
+ s.HandleDeleteResourceProviderTraits,
+ "/resource_providers/not-a-uuid/traits")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
diff --git a/internal/shim/placement/handle_resource_provider_usages_test.go b/internal/shim/placement/handle_resource_provider_usages_test.go
new file mode 100644
index 000000000..76541a993
--- /dev/null
+++ b/internal/shim/placement/handle_resource_provider_usages_test.go
@@ -0,0 +1,30 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandleListResourceProviderUsages(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}/usages",
+ s.HandleListResourceProviderUsages,
+ "/resource_providers/"+validUUID+"/usages")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}/usages",
+ s.HandleListResourceProviderUsages,
+ "/resource_providers/not-a-uuid/usages")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
diff --git a/internal/shim/placement/handle_resource_providers_test.go b/internal/shim/placement/handle_resource_providers_test.go
new file mode 100644
index 000000000..520a32c0b
--- /dev/null
+++ b/internal/shim/placement/handle_resource_providers_test.go
@@ -0,0 +1,86 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandleListResourceProviders(t *testing.T) {
+ var gotPath string
+ s := newTestShim(t, http.StatusOK, `{"resource_providers":[]}`, &gotPath)
+ w := serveHandler(t, "GET", "/resource_providers", s.HandleListResourceProviders, "/resource_providers")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ if gotPath != "/resource_providers" {
+ t.Fatalf("upstream path = %q, want /resource_providers", gotPath)
+ }
+}
+
+func TestHandleCreateResourceProvider(t *testing.T) {
+ s := newTestShim(t, http.StatusCreated, "{}", nil)
+ w := serveHandler(t, "POST", "/resource_providers", s.HandleCreateResourceProvider, "/resource_providers")
+ if w.Code != http.StatusCreated {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusCreated)
+ }
+}
+
+func TestHandleShowResourceProvider(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}", s.HandleShowResourceProvider,
+ "/resource_providers/"+validUUID)
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "GET", "/resource_providers/{uuid}", s.HandleShowResourceProvider,
+ "/resource_providers/not-a-uuid")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestHandleUpdateResourceProvider(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "PUT", "/resource_providers/{uuid}", s.HandleUpdateResourceProvider,
+ "/resource_providers/"+validUUID)
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "PUT", "/resource_providers/{uuid}", s.HandleUpdateResourceProvider,
+ "/resource_providers/not-a-uuid")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestHandleDeleteResourceProvider(t *testing.T) {
+ t.Run("valid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusNoContent, "", nil)
+ w := serveHandler(t, "DELETE", "/resource_providers/{uuid}", s.HandleDeleteResourceProvider,
+ "/resource_providers/"+validUUID)
+ if w.Code != http.StatusNoContent {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusNoContent)
+ }
+ })
+ t.Run("invalid uuid", func(t *testing.T) {
+ s := newTestShim(t, http.StatusOK, "{}", nil)
+ w := serveHandler(t, "DELETE", "/resource_providers/{uuid}", s.HandleDeleteResourceProvider,
+ "/resource_providers/not-a-uuid")
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
diff --git a/internal/shim/placement/handle_root_test.go b/internal/shim/placement/handle_root_test.go
new file mode 100644
index 000000000..e342f6a68
--- /dev/null
+++ b/internal/shim/placement/handle_root_test.go
@@ -0,0 +1,21 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandleGetRoot(t *testing.T) {
+ var gotPath string
+ s := newTestShim(t, http.StatusOK, `{"versions":[]}`, &gotPath)
+ w := serveHandler(t, "GET", "/{$}", s.HandleGetRoot, "/")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ if gotPath != "/" {
+ t.Fatalf("upstream path = %q, want %q", gotPath, "/")
+ }
+}
diff --git a/internal/shim/placement/handle_traits_test.go b/internal/shim/placement/handle_traits_test.go
new file mode 100644
index 000000000..09d5a8586
--- /dev/null
+++ b/internal/shim/placement/handle_traits_test.go
@@ -0,0 +1,49 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandleListTraits(t *testing.T) {
+ var gotPath string
+ s := newTestShim(t, http.StatusOK, `{"traits":[]}`, &gotPath)
+ w := serveHandler(t, "GET", "/traits", s.HandleListTraits, "/traits")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ if gotPath != "/traits" {
+ t.Fatalf("upstream path = %q, want /traits", gotPath)
+ }
+}
+
+func TestHandleShowTrait(t *testing.T) {
+ var gotPath string
+ s := newTestShim(t, http.StatusNoContent, "", &gotPath)
+ w := serveHandler(t, "GET", "/traits/{name}", s.HandleShowTrait, "/traits/HW_CPU_X86_AVX2")
+ if w.Code != http.StatusNoContent {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusNoContent)
+ }
+ if gotPath != "/traits/HW_CPU_X86_AVX2" {
+ t.Fatalf("upstream path = %q, want /traits/HW_CPU_X86_AVX2", gotPath)
+ }
+}
+
+func TestHandleUpdateTrait(t *testing.T) {
+ s := newTestShim(t, http.StatusCreated, "", nil)
+ w := serveHandler(t, "PUT", "/traits/{name}", s.HandleUpdateTrait, "/traits/CUSTOM_TRAIT")
+ if w.Code != http.StatusCreated {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusCreated)
+ }
+}
+
+func TestHandleDeleteTrait(t *testing.T) {
+ s := newTestShim(t, http.StatusNoContent, "", nil)
+ w := serveHandler(t, "DELETE", "/traits/{name}", s.HandleDeleteTrait, "/traits/CUSTOM_TRAIT")
+ if w.Code != http.StatusNoContent {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusNoContent)
+ }
+}
diff --git a/internal/shim/placement/handle_usages_test.go b/internal/shim/placement/handle_usages_test.go
new file mode 100644
index 000000000..46d91681b
--- /dev/null
+++ b/internal/shim/placement/handle_usages_test.go
@@ -0,0 +1,21 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "testing"
+)
+
+func TestHandleListUsages(t *testing.T) {
+ var gotPath string
+ s := newTestShim(t, http.StatusOK, `{"usages":{}}`, &gotPath)
+ w := serveHandler(t, "GET", "/usages", s.HandleListUsages, "/usages")
+ if w.Code != http.StatusOK {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusOK)
+ }
+ if gotPath != "/usages" {
+ t.Fatalf("upstream path = %q, want /usages", gotPath)
+ }
+}
diff --git a/internal/shim/placement/shim.go b/internal/shim/placement/shim.go
index 4ef93c66a..2623a3baa 100644
--- a/internal/shim/placement/shim.go
+++ b/internal/shim/placement/shim.go
@@ -86,7 +86,7 @@ func (s *Shim) Start(ctx context.Context) (err error) {
setupLog.Error(err, "Invalid configuration for placement shim")
return err
}
- req, err := http.NewRequestWithContext(ctx, "GET", s.config.PlacementURL, nil)
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, s.config.PlacementURL, http.NoBody)
if err != nil {
setupLog.Error(err, "Failed to create HTTP request to placement API")
return err
diff --git a/internal/shim/placement/shim_test.go b/internal/shim/placement/shim_test.go
new file mode 100644
index 000000000..fc81d9b1f
--- /dev/null
+++ b/internal/shim/placement/shim_test.go
@@ -0,0 +1,209 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "io"
+ "net/http"
+ "net/http/httptest"
+ "strings"
+ "testing"
+)
+
+const validUUID = "d9b3a520-2a3c-4f6b-8b9a-1c2d3e4f5a6b"
+
+// newTestShim creates a Shim backed by an upstream test server that returns
+// the given status and body for every request. It records the last request
+// path in *gotPath when non-nil.
+func newTestShim(t *testing.T, status int, body string, gotPath *string) *Shim {
+ t.Helper()
+ upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ if gotPath != nil {
+ *gotPath = r.URL.Path
+ }
+ w.WriteHeader(status)
+ if _, err := w.Write([]byte(body)); err != nil {
+ t.Errorf("failed to write response body: %v", err)
+ }
+ }))
+ t.Cleanup(upstream.Close)
+ return &Shim{
+ config: config{PlacementURL: upstream.URL},
+ httpClient: upstream.Client(),
+ }
+}
+
+// serveHandler registers a single handler on a fresh mux and serves the
+// request through it, returning the recorded response.
+func serveHandler(t *testing.T, method, pattern string, handler http.HandlerFunc, reqPath string) *httptest.ResponseRecorder {
+ t.Helper()
+ mux := http.NewServeMux()
+ mux.HandleFunc(method+" "+pattern, handler)
+ req := httptest.NewRequest(method, reqPath, http.NoBody)
+ w := httptest.NewRecorder()
+ mux.ServeHTTP(w, req)
+ return w
+}
+
+func TestForward(t *testing.T) {
+ tests := []struct {
+ name string
+ path string
+ query string
+ method string
+ body string
+ reqHeaders map[string]string
+ upstreamStatus int
+ upstreamBody string
+ upstreamHeader map[string]string
+ }{
+ {
+ name: "GET with query string",
+ path: "/resource_providers",
+ query: "name=test",
+ method: "GET",
+ upstreamStatus: http.StatusOK,
+ upstreamBody: `{"resource_providers":[]}`,
+ upstreamHeader: map[string]string{"Content-Type": "application/json"},
+ },
+ {
+ name: "PUT with body and headers",
+ path: "/resource_providers/abc",
+ method: "PUT",
+ body: `{"name":"new"}`,
+ reqHeaders: map[string]string{"X-Custom": "val"},
+ upstreamStatus: http.StatusOK,
+ upstreamBody: `{"uuid":"abc"}`,
+ },
+ {
+ name: "upstream error",
+ path: "/fail",
+ method: "GET",
+ upstreamStatus: http.StatusNotFound,
+ upstreamBody: "not found",
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ // Verify the path and query were forwarded.
+ if r.URL.Path != tt.path {
+ t.Errorf("upstream path = %q, want %q", r.URL.Path, tt.path)
+ }
+ if r.URL.RawQuery != tt.query {
+ t.Errorf("upstream query = %q, want %q", r.URL.RawQuery, tt.query)
+ }
+ if r.Method != tt.method {
+ t.Errorf("upstream method = %q, want %q", r.Method, tt.method)
+ }
+ // Verify headers were copied.
+ for k, v := range tt.reqHeaders {
+ if got := r.Header.Get(k); got != v {
+ t.Errorf("upstream header %q = %q, want %q", k, got, v)
+ }
+ }
+ // Verify body was copied.
+ if tt.body != "" {
+ b, err := io.ReadAll(r.Body)
+ if err != nil {
+ t.Fatalf("failed to read upstream body: %v", err)
+ }
+ if string(b) != tt.body {
+ t.Errorf("upstream body = %q, want %q", string(b), tt.body)
+ }
+ }
+ for k, v := range tt.upstreamHeader {
+ w.Header().Set(k, v)
+ }
+ w.WriteHeader(tt.upstreamStatus)
+ if _, err := w.Write([]byte(tt.upstreamBody)); err != nil {
+ t.Fatalf("failed to write upstream body: %v", err)
+ }
+ }))
+ defer upstream.Close()
+
+ s := &Shim{
+ config: config{PlacementURL: upstream.URL},
+ httpClient: upstream.Client(),
+ }
+ target := tt.path
+ if tt.query != "" {
+ target += "?" + tt.query
+ }
+ var bodyReader io.Reader
+ if tt.body != "" {
+ bodyReader = strings.NewReader(tt.body)
+ }
+ req := httptest.NewRequest(tt.method, target, bodyReader)
+ for k, v := range tt.reqHeaders {
+ req.Header.Set(k, v)
+ }
+ w := httptest.NewRecorder()
+ s.forward(w, req)
+
+ if w.Code != tt.upstreamStatus {
+ t.Fatalf("status = %d, want %d", w.Code, tt.upstreamStatus)
+ }
+ if got := w.Body.String(); got != tt.upstreamBody {
+ t.Fatalf("body = %q, want %q", got, tt.upstreamBody)
+ }
+ for k, v := range tt.upstreamHeader {
+ if got := w.Header().Get(k); got != v {
+ t.Errorf("response header %q = %q, want %q", k, got, v)
+ }
+ }
+ })
+ }
+}
+
+func TestForwardUpstreamUnreachable(t *testing.T) {
+ s := &Shim{
+ config: config{PlacementURL: "http://127.0.0.1:1"},
+ httpClient: &http.Client{},
+ }
+ req := httptest.NewRequest(http.MethodGet, "/", http.NoBody)
+ w := httptest.NewRecorder()
+ s.forward(w, req)
+ if w.Code != http.StatusBadGateway {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadGateway)
+ }
+}
+
+func TestRegisterRoutes(t *testing.T) {
+ upstream := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
+ w.WriteHeader(http.StatusOK)
+ }))
+ defer upstream.Close()
+ s := &Shim{
+ config: config{PlacementURL: upstream.URL},
+ httpClient: upstream.Client(),
+ }
+ mux := http.NewServeMux()
+ s.RegisterRoutes(mux)
+ // Verify a sample of routes are registered. Unregistered patterns
+ // return 404 from the default mux; registered ones reach the upstream.
+ routes := []struct {
+ method string
+ path string
+ }{
+ {"GET", "/"},
+ {"GET", "/resource_providers"},
+ {"POST", "/resource_providers"},
+ {"GET", "/traits"},
+ {"GET", "/allocation_candidates"},
+ {"POST", "/reshaper"},
+ {"POST", "/allocations"},
+ {"GET", "/usages"},
+ }
+ for _, rt := range routes {
+ t.Run(rt.method+" "+rt.path, func(t *testing.T) {
+ req := httptest.NewRequest(rt.method, rt.path, http.NoBody)
+ w := httptest.NewRecorder()
+ mux.ServeHTTP(w, req)
+ if w.Code == http.StatusNotFound {
+ t.Fatalf("route %s %s returned 404, expected it to be registered", rt.method, rt.path)
+ }
+ })
+ }
+}
diff --git a/internal/shim/placement/validation.go b/internal/shim/placement/validation.go
index 55a46c406..b025cbfd7 100644
--- a/internal/shim/placement/validation.go
+++ b/internal/shim/placement/validation.go
@@ -16,7 +16,7 @@ import (
func requiredPathParam(w http.ResponseWriter, r *http.Request, name string) (string, bool) {
v := r.PathValue(name)
if v == "" {
- http.Error(w, fmt.Sprintf("missing path parameter: %s", name), http.StatusBadRequest)
+ http.Error(w, "missing path parameter: "+name, http.StatusBadRequest)
return "", false
}
return v, true
diff --git a/internal/shim/placement/validation_test.go b/internal/shim/placement/validation_test.go
new file mode 100644
index 000000000..b0b39c27e
--- /dev/null
+++ b/internal/shim/placement/validation_test.go
@@ -0,0 +1,89 @@
+// Copyright SAP SE
+// SPDX-License-Identifier: Apache-2.0
+
+package placement
+
+import (
+ "net/http"
+ "net/http/httptest"
+ "testing"
+)
+
+func TestRequiredPathParam(t *testing.T) {
+ t.Run("valid param", func(t *testing.T) {
+ mux := http.NewServeMux()
+ var gotValue string
+ var gotOK bool
+ mux.HandleFunc("GET /test/{name}", func(w http.ResponseWriter, r *http.Request) {
+ gotValue, gotOK = requiredPathParam(w, r, "name")
+ })
+ req := httptest.NewRequest(http.MethodGet, "/test/VCPU", http.NoBody)
+ w := httptest.NewRecorder()
+ mux.ServeHTTP(w, req)
+ if !gotOK {
+ t.Fatal("expected ok = true")
+ }
+ if gotValue != "VCPU" {
+ t.Fatalf("value = %q, want %q", gotValue, "VCPU")
+ }
+ })
+ t.Run("wrong param name returns empty", func(t *testing.T) {
+ mux := http.NewServeMux()
+ var gotOK bool
+ mux.HandleFunc("GET /test/{name}", func(w http.ResponseWriter, r *http.Request) {
+ _, gotOK = requiredPathParam(w, r, "nonexistent")
+ })
+ req := httptest.NewRequest(http.MethodGet, "/test/VCPU", http.NoBody)
+ w := httptest.NewRecorder()
+ mux.ServeHTTP(w, req)
+ if gotOK {
+ t.Fatal("expected ok = false for wrong param name")
+ }
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("status = %d, want %d", w.Code, http.StatusBadRequest)
+ }
+ })
+}
+
+func TestRequiredUUIDPathParam(t *testing.T) {
+ tests := []struct {
+ name string
+ paramValue string
+ wantOK bool
+ wantCode int
+ }{
+ {
+ name: "valid uuid",
+ paramValue: "d9b3a520-2a3c-4f6b-8b9a-1c2d3e4f5a6b",
+ wantOK: true,
+ },
+ {
+ name: "invalid uuid",
+ paramValue: "not-a-uuid",
+ wantOK: false,
+ wantCode: http.StatusBadRequest,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ mux := http.NewServeMux()
+ var gotValue string
+ var gotOK bool
+ mux.HandleFunc("GET /test/{uuid}", func(w http.ResponseWriter, r *http.Request) {
+ gotValue, gotOK = requiredUUIDPathParam(w, r, "uuid")
+ })
+ req := httptest.NewRequest(http.MethodGet, "/test/"+tt.paramValue, http.NoBody)
+ w := httptest.NewRecorder()
+ mux.ServeHTTP(w, req)
+ if gotOK != tt.wantOK {
+ t.Fatalf("ok = %v, want %v", gotOK, tt.wantOK)
+ }
+ if tt.wantOK && gotValue != tt.paramValue {
+ t.Fatalf("value = %q, want %q", gotValue, tt.paramValue)
+ }
+ if !tt.wantOK && w.Code != tt.wantCode {
+ t.Fatalf("status = %d, want %d", w.Code, tt.wantCode)
+ }
+ })
+ }
+}
From af9841cfd5074a1dc775563dc11db3b2036d445e Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Fri, 10 Apr 2026 08:51:49 +0200
Subject: [PATCH 13/17] Fix SSRF
---
internal/shim/placement/shim.go | 27 +++++++++++++++++++--------
1 file changed, 19 insertions(+), 8 deletions(-)
diff --git a/internal/shim/placement/shim.go b/internal/shim/placement/shim.go
index 2623a3baa..cd709db9c 100644
--- a/internal/shim/placement/shim.go
+++ b/internal/shim/placement/shim.go
@@ -8,6 +8,7 @@ import (
"errors"
"io"
"net/http"
+ "net/url"
"time"
"github.com/cobaltcore-dev/cortex/pkg/conf"
@@ -111,16 +112,26 @@ func (s *Shim) Start(ctx context.Context) (err error) {
func (s *Shim) forward(w http.ResponseWriter, r *http.Request) {
log := logf.FromContext(r.Context())
- // Build upstream URL: config.PlacementURL + original path + query string.
- upstreamURL := s.config.PlacementURL + r.URL.Path
- if r.URL.RawQuery != "" {
- upstreamURL += "?" + r.URL.RawQuery
+ // Parse the trusted base URL and resolve the request path against it
+ // so the upstream target is always anchored to the configured host.
+ upstream, err := url.Parse(s.config.PlacementURL)
+ if err != nil {
+ log.Error(err, "failed to parse placement URL", "url", s.config.PlacementURL)
+ http.Error(w, "failed to parse placement URL", http.StatusBadGateway)
+ return
+ }
+ upstream.Path, err = url.JoinPath(upstream.Path, r.URL.Path)
+ if err != nil {
+ log.Error(err, "failed to join upstream path", "path", r.URL.Path)
+ http.Error(w, "failed to join upstream path", http.StatusBadGateway)
+ return
}
+ upstream.RawQuery = r.URL.RawQuery
// Create upstream request preserving method, body, and context.
- upstreamReq, err := http.NewRequestWithContext(r.Context(), r.Method, upstreamURL, r.Body)
+ upstreamReq, err := http.NewRequestWithContext(r.Context(), r.Method, upstream.String(), r.Body)
if err != nil {
- log.Error(err, "failed to create upstream request", "url", upstreamURL)
+ log.Error(err, "failed to create upstream request", "url", upstream.String())
http.Error(w, "failed to create upstream request", http.StatusBadGateway)
return
}
@@ -128,9 +139,9 @@ func (s *Shim) forward(w http.ResponseWriter, r *http.Request) {
// Copy all incoming headers.
upstreamReq.Header = r.Header.Clone()
- resp, err := s.httpClient.Do(upstreamReq)
+ resp, err := s.httpClient.Do(upstreamReq) //nolint:gosec // G704: intentional reverse proxy; host is fixed by operator config, only path varies
if err != nil {
- log.Error(err, "failed to reach placement API", "url", upstreamURL)
+ log.Error(err, "failed to reach placement API", "url", upstream.String())
http.Error(w, "failed to reach placement API", http.StatusBadGateway)
return
}
From 271a91045581e9cb986fd2e677956ca150a5572a Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Fri, 10 Apr 2026 10:10:34 +0200
Subject: [PATCH 14/17] Add multicluster client setup
---
cmd/shim/main.go | 25 +++-
.../bundles/cortex-placement-shim/values.yaml | 5 +
internal/shim/placement/shim.go | 119 +++++++++++-------
pkg/multicluster/routers.go | 10 ++
4 files changed, 111 insertions(+), 48 deletions(-)
diff --git a/cmd/shim/main.go b/cmd/shim/main.go
index 0a68c7298..5ebf902a7 100644
--- a/cmd/shim/main.go
+++ b/cmd/shim/main.go
@@ -15,6 +15,7 @@ import (
"github.com/cobaltcore-dev/cortex/internal/shim/placement"
"github.com/cobaltcore-dev/cortex/pkg/conf"
"github.com/cobaltcore-dev/cortex/pkg/monitoring"
+ "github.com/cobaltcore-dev/cortex/pkg/multicluster"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
"github.com/sapcc/go-bits/httpext"
"k8s.io/apimachinery/pkg/runtime"
@@ -22,6 +23,7 @@ import (
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/certwatcher"
+ "sigs.k8s.io/controller-runtime/pkg/cluster"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/metrics"
@@ -197,7 +199,26 @@ func main() {
os.Exit(1)
}
- // TODO: Initialize multicluster client here.
+ homeCluster, err := cluster.New(restConfig, func(o *cluster.Options) { o.Scheme = scheme })
+ if err != nil {
+ setupLog.Error(err, "unable to create home cluster")
+ os.Exit(1)
+ }
+ if err := mgr.Add(homeCluster); err != nil {
+ setupLog.Error(err, "unable to add home cluster")
+ os.Exit(1)
+ }
+ multiclusterClient := &multicluster.Client{
+ HomeCluster: homeCluster,
+ HomeRestConfig: restConfig,
+ HomeScheme: scheme,
+ ResourceRouters: multicluster.DefaultResourceRouters,
+ }
+ multiclusterClientConfig := conf.GetConfigOrDie[multicluster.ClientConfig]()
+ if err := multiclusterClient.InitFromConf(ctx, mgr, multiclusterClientConfig); err != nil {
+ setupLog.Error(err, "unable to initialize multicluster client")
+ os.Exit(1)
+ }
// Our custom monitoring registry can add prometheus labels to all metrics.
// This is useful to distinguish metrics from different deployments.
@@ -207,7 +228,7 @@ func main() {
// API endpoint.
mux := http.NewServeMux()
if enablePlacementShim {
- placementShim := &placement.Shim{Client: mgr.GetClient()}
+ placementShim := &placement.Shim{Client: multiclusterClient}
setupLog.Info("Adding placement shim to manager")
if err := placementShim.SetupWithManager(ctx, mgr); err != nil {
setupLog.Error(err, "unable to set up placement shim")
diff --git a/helm/bundles/cortex-placement-shim/values.yaml b/helm/bundles/cortex-placement-shim/values.yaml
index 2fa998d6b..2facf6848 100644
--- a/helm/bundles/cortex-placement-shim/values.yaml
+++ b/helm/bundles/cortex-placement-shim/values.yaml
@@ -24,6 +24,11 @@ cortex-shim:
container:
extraArgs: ["--placement-shim=true"]
conf:
+ apiservers:
+ home:
+ gvks:
+ - kvm.cloud.sap/v1/Hypervisor
+ - kvm.cloud.sap/v1/HypervisorList
monitoring:
labels:
github_org: cobaltcore-dev
diff --git a/internal/shim/placement/shim.go b/internal/shim/placement/shim.go
index cd709db9c..c7518aacb 100644
--- a/internal/shim/placement/shim.go
+++ b/internal/shim/placement/shim.go
@@ -12,11 +12,14 @@ import (
"time"
"github.com/cobaltcore-dev/cortex/pkg/conf"
+ "github.com/cobaltcore-dev/cortex/pkg/multicluster"
"github.com/cobaltcore-dev/cortex/pkg/sso"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/handler"
logf "sigs.k8s.io/controller-runtime/pkg/log"
+ "sigs.k8s.io/controller-runtime/pkg/predicate"
)
// IndexHypervisorByID is the field index key for looking up Hypervisor
@@ -40,6 +43,15 @@ type config struct {
PlacementURL string `json:"placementURL,omitempty"`
}
+// validate checks the config for required fields and returns an error if the
+// config is invalid.
+func (c *config) validate() error {
+ if c.PlacementURL == "" {
+ return errors.New("placement URL is required")
+ }
+ return nil
+}
+
// Shim is the placement API shim. It holds a controller-runtime client for
// making Kubernetes API calls and exposes HTTP handlers that mirror the
// OpenStack Placement API surface.
@@ -82,11 +94,6 @@ func (s *Shim) Start(ctx context.Context) (err error) {
// Try establish a connection to the placement API to fail fast if the
// configuration is invalid. Directly call the root endpoint for that.
setupLog.Info("Testing connection to placement API", "url", s.config.PlacementURL)
- if s.config.PlacementURL == "" {
- err := errors.New("placement URL is not configured")
- setupLog.Error(err, "Invalid configuration for placement shim")
- return err
- }
req, err := http.NewRequestWithContext(ctx, http.MethodGet, s.config.PlacementURL, http.NoBody)
if err != nil {
setupLog.Error(err, "Failed to create HTTP request to placement API")
@@ -107,6 +114,67 @@ func (s *Shim) Start(ctx context.Context) (err error) {
return nil
}
+// Reconcile is not used by the shim, but must be implemented to satisfy the
+// controller-runtime Reconciler interface.
+func (s *Shim) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+ return ctrl.Result{}, nil
+}
+
+// handleRemoteHypervisor is called by watches in remote clusters and triggers
+// a reconcile on the hypervisor resource that was changed in the remote cluster.
+func (s *Shim) handleRemoteHypervisor() handler.EventHandler {
+ handler := handler.Funcs{}
+ // For now, the shim doesn't need to do anything on hypervisor events.
+ return handler
+}
+
+// predicateRemoteHypervisor is used to filter events from remote clusters,
+// so that only events for hypervisors that should be processed by the shim.
+func (s *Shim) predicateRemoteHypervisor() predicate.Predicate {
+ // For now, the shim doesn't need to process any hypervisor events.
+ return predicate.NewPredicateFuncs(func(object client.Object) bool {
+ return false
+ })
+}
+
+// SetupWithManager registers field indexes on the manager's cache so that
+// subsequent list calls are served from the informer cache rather than
+// hitting the API server. This must be called before the manager is started.
+//
+// Calling IndexField internally invokes GetInformer, which creates and
+// registers a shared informer for the indexed type (hv1.Hypervisor) with the
+// cache. The informer is started later when mgr.Start() is called. This
+// means no separate controller or empty Reconcile loop is needed — the
+// index registration alone is sufficient to warm the cache.
+func (s *Shim) SetupWithManager(ctx context.Context, mgr ctrl.Manager) (err error) {
+ setupLog.Info("Setting up placement shim with manager")
+ s.config, err = conf.GetConfig[config]()
+ if err != nil {
+ setupLog.Error(err, "Failed to load placement shim config")
+ return err
+ }
+ // Validate we don't have any weird values in the config.
+ if err := s.config.validate(); err != nil {
+ return err
+ }
+ // Check that the provided client is a multicluster client, since we need
+ // that to watch for hypervisors across clusters.
+ mcl, ok := s.Client.(*multicluster.Client)
+ if !ok {
+ return errors.New("provided client must be a multicluster client")
+ }
+ bldr := multicluster.BuildController(mcl, mgr)
+ // The hypervisor crd may be distributed across multiple remote clusters.
+ bldr, err = bldr.WatchesMulticluster(&hv1.Hypervisor{},
+ s.handleRemoteHypervisor(),
+ s.predicateRemoteHypervisor(),
+ )
+ if err != nil {
+ return err
+ }
+ return bldr.Named("placement-shim").Complete(s)
+}
+
// forward proxies the incoming HTTP request to the upstream placement API
// and copies the response (status, headers, body) back to the client.
func (s *Shim) forward(w http.ResponseWriter, r *http.Request) {
@@ -159,47 +227,6 @@ func (s *Shim) forward(w http.ResponseWriter, r *http.Request) {
}
}
-// SetupWithManager registers field indexes on the manager's cache so that
-// subsequent list calls are served from the informer cache rather than
-// hitting the API server. This must be called before the manager is started.
-//
-// Calling IndexField internally invokes GetInformer, which creates and
-// registers a shared informer for the indexed type (hv1.Hypervisor) with the
-// cache. The informer is started later when mgr.Start() is called. This
-// means no separate controller or empty Reconcile loop is needed — the
-// index registration alone is sufficient to warm the cache.
-func (s *Shim) SetupWithManager(ctx context.Context, mgr ctrl.Manager) (err error) {
- setupLog.Info("Setting up placement shim with manager")
- if err := mgr.Add(s); err != nil { // Bind Start(ctx)
- setupLog.Error(err, "Failed to bind start routine")
- return err
- }
- s.config, err = conf.GetConfig[config]()
- if err != nil {
- setupLog.Error(err, "Failed to load placement shim config")
- return err
- }
- setupLog.Info("Indexing Hypervisors by hypervisor ID")
- err = mgr.GetFieldIndexer().IndexField(ctx, &hv1.Hypervisor{}, IndexHypervisorByID,
- func(obj client.Object) []string {
- h, ok := obj.(*hv1.Hypervisor)
- if !ok {
- return nil
- }
- if h.Status.HypervisorID == "" {
- return nil
- }
- return []string{h.Status.HypervisorID}
- },
- )
- if err != nil {
- setupLog.Error(err, "Failed to index Hypervisors by hypervisor ID")
- return err
- }
- setupLog.Info("Successfully indexed Hypervisors by hypervisor ID")
- return nil
-}
-
// RegisterRoutes binds all Placement API handlers to the given mux. The
// route patterns use the Go 1.22+ ServeMux syntax with explicit HTTP methods
// and path wildcards. The routes mirror the OpenStack Placement API surface
diff --git a/pkg/multicluster/routers.go b/pkg/multicluster/routers.go
index 5eb693f2d..8c41e822a 100644
--- a/pkg/multicluster/routers.go
+++ b/pkg/multicluster/routers.go
@@ -9,8 +9,18 @@ import (
"github.com/cobaltcore-dev/cortex/api/v1alpha1"
hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/runtime/schema"
)
+// DefaultResourceRouters defines all mappings of GroupVersionKinds to RRs
+// for the multicluster client that cortex supports by default. This is used to
+// route resources to the correct cluster in a multicluster setup.
+var DefaultResourceRouters = map[schema.GroupVersionKind]ResourceRouter{
+ {Group: "kvm.cloud.sap", Version: "v1", Kind: "Hypervisor"}: HypervisorResourceRouter{},
+ {Group: "cortex.cloud", Version: "v1alpha1", Kind: "Reservation"}: ReservationsResourceRouter{},
+ {Group: "cortex.cloud", Version: "v1alpha1", Kind: "History"}: HistoryResourceRouter{},
+}
+
// ResourceRouter determines which remote cluster a resource should be written to
// by matching the resource content against the cluster's labels.
type ResourceRouter interface {
From 2b91b6fe0bd2bdc013c7bbe31938ca8b492a5193 Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Fri, 10 Apr 2026 10:27:03 +0200
Subject: [PATCH 15/17] Remove unused IndexHypervisorByID [skip ci]
---
internal/shim/placement/shim.go | 4 ----
1 file changed, 4 deletions(-)
diff --git a/internal/shim/placement/shim.go b/internal/shim/placement/shim.go
index c7518aacb..623f5ed1d 100644
--- a/internal/shim/placement/shim.go
+++ b/internal/shim/placement/shim.go
@@ -22,10 +22,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/predicate"
)
-// IndexHypervisorByID is the field index key for looking up Hypervisor
-// objects by their OpenStack hypervisor ID (status.hypervisorId).
-const IndexHypervisorByID = ".status.hypervisorId"
-
var (
// setupLog is a controller-runtime logger used for setup and route
// registration. Individual handlers should use their own loggers derived
From 84c98ed1e62d57e06fcd3b0f936d2ff8322c9850 Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Fri, 10 Apr 2026 12:51:44 +0200
Subject: [PATCH 16/17] PR Feedback
---
cmd/shim/main.go | 9 +++++++--
internal/shim/placement/shim.go | 32 +++++++++++++++++++++++++++++++-
2 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/cmd/shim/main.go b/cmd/shim/main.go
index 5ebf902a7..afaafb86c 100644
--- a/cmd/shim/main.go
+++ b/cmd/shim/main.go
@@ -227,8 +227,9 @@ func main() {
// API endpoint.
mux := http.NewServeMux()
+ var placementShim *placement.Shim
if enablePlacementShim {
- placementShim := &placement.Shim{Client: multiclusterClient}
+ placementShim = &placement.Shim{Client: multiclusterClient}
setupLog.Info("Adding placement shim to manager")
if err := placementShim.SetupWithManager(ctx, mgr); err != nil {
setupLog.Error(err, "unable to set up placement shim")
@@ -259,7 +260,11 @@ func main() {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}
- if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
+ readyzCheck := healthz.Ping
+ if placementShim != nil {
+ readyzCheck = placementShim.ReadyzCheck()
+ }
+ if err := mgr.AddReadyzCheck("readyz", readyzCheck); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
}
diff --git a/internal/shim/placement/shim.go b/internal/shim/placement/shim.go
index 623f5ed1d..87123ad22 100644
--- a/internal/shim/placement/shim.go
+++ b/internal/shim/placement/shim.go
@@ -7,8 +7,10 @@ import (
"context"
"errors"
"io"
+ "net"
"net/http"
"net/url"
+ "sync/atomic"
"time"
"github.com/cobaltcore-dev/cortex/pkg/conf"
@@ -57,6 +59,10 @@ type Shim struct {
// HTTP client that can talk to openstack placement, if needed, over
// ingress with single-sign-on.
httpClient *http.Client
+ // ready is set to true once Start() has completed successfully. It is
+ // used by the readiness check to prevent traffic from reaching the shim
+ // before the HTTP client and upstream connection are established.
+ ready atomic.Bool
}
// Start is called after the manager has started and the cache is running.
@@ -82,11 +88,15 @@ func (s *Shim) Start(ctx context.Context) (err error) {
transport.MaxIdleConns = 100
transport.MaxIdleConnsPerHost = 100
// Guard against a hung upstream or slow TLS negotiation.
+ transport.DialContext = (&net.Dialer{
+ Timeout: 10 * time.Second,
+ KeepAlive: 30 * time.Second,
+ }).DialContext
transport.TLSHandshakeTimeout = 10 * time.Second
transport.ResponseHeaderTimeout = 60 * time.Second
transport.ExpectContinueTimeout = 1 * time.Second
transport.IdleConnTimeout = 90 * time.Second
- s.httpClient = &http.Client{Transport: transport}
+ s.httpClient = &http.Client{Transport: transport, Timeout: 60 * time.Second}
// Try establish a connection to the placement API to fail fast if the
// configuration is invalid. Directly call the root endpoint for that.
setupLog.Info("Testing connection to placement API", "url", s.config.PlacementURL)
@@ -107,9 +117,23 @@ func (s *Shim) Start(ctx context.Context) (err error) {
return err
}
setupLog.Info("Successfully connected to placement API")
+ s.ready.Store(true)
return nil
}
+// ReadyzCheck returns a healthz.Checker that reports healthy only after
+// Start() has completed successfully. Wire this into the manager's readiness
+// endpoint so that Kubernetes does not route traffic to the pod before the
+// shim's HTTP client and upstream connection are established.
+func (s *Shim) ReadyzCheck() func(*http.Request) error {
+ return func(_ *http.Request) error {
+ if !s.ready.Load() {
+ return errors.New("placement shim not yet initialized")
+ }
+ return nil
+ }
+}
+
// Reconcile is not used by the shim, but must be implemented to satisfy the
// controller-runtime Reconciler interface.
func (s *Shim) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
@@ -176,6 +200,12 @@ func (s *Shim) SetupWithManager(ctx context.Context, mgr ctrl.Manager) (err erro
func (s *Shim) forward(w http.ResponseWriter, r *http.Request) {
log := logf.FromContext(r.Context())
+ if s.httpClient == nil {
+ log.Info("placement shim not yet initialized, rejecting request")
+ http.Error(w, "service not ready", http.StatusServiceUnavailable)
+ return
+ }
+
// Parse the trusted base URL and resolve the request path against it
// so the upstream target is always anchored to the configured host.
upstream, err := url.Parse(s.config.PlacementURL)
From 5bedfb5ba06785adaa6bd312239645100732004c Mon Sep 17 00:00:00 2001
From: Philipp Matthes
Date: Fri, 10 Apr 2026 13:23:11 +0200
Subject: [PATCH 17/17] Add configurable api-bind-address flag to shim
---
cmd/shim/main.go | 13 +++++++++++--
helm/library/cortex-shim/values.yaml | 1 +
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/cmd/shim/main.go b/cmd/shim/main.go
index afaafb86c..6fb951757 100644
--- a/cmd/shim/main.go
+++ b/cmd/shim/main.go
@@ -52,6 +52,7 @@ func main() {
restConfig := ctrl.GetConfigOrDie()
var metricsAddr string
+ var apiBindAddr string
var metricsCertPath, metricsCertName, metricsCertKey string
var webhookCertPath, webhookCertName, webhookCertKey string
// The shim does not require leader election, but this flag is provided to
@@ -64,6 +65,7 @@ func main() {
var tlsOpts []func(*tls.Config)
flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+
"Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.")
+ flag.StringVar(&apiBindAddr, "api-bind-address", ":8080", "The address the shim API server binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
@@ -94,6 +96,13 @@ func main() {
os.Exit(1)
}
+ // Check that the metrics and API bind addresses don't overlap.
+ if metricsAddr != "0" && metricsAddr == apiBindAddr {
+ err := errors.New("metrics-bind-address and api-bind-address must not be the same")
+ setupLog.Error(err, "invalid configuration", "metrics-bind-address", metricsAddr, "api-bind-address", apiBindAddr)
+ os.Exit(1)
+ }
+
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
// if the enable-http2 flag is false (the default), http/2 should be disabled
@@ -272,8 +281,8 @@ func main() {
errchan := make(chan error)
go func() {
errchan <- func() error {
- setupLog.Info("starting api server", "address", ":8080")
- return httpext.ListenAndServeContext(ctx, ":8080", mux)
+ setupLog.Info("starting api server", "address", apiBindAddr)
+ return httpext.ListenAndServeContext(ctx, apiBindAddr, mux)
}()
}()
go func() {
diff --git a/helm/library/cortex-shim/values.yaml b/helm/library/cortex-shim/values.yaml
index 3acead93b..91eaba11f 100644
--- a/helm/library/cortex-shim/values.yaml
+++ b/helm/library/cortex-shim/values.yaml
@@ -5,6 +5,7 @@ deployment:
image:
repository: ghcr.io/cobaltcore-dev/cortex-shim
args:
+ - "--api-bind-address=:8080"
- "--metrics-bind-address=:2112"
- "--health-probe-bind-address=:8081"
- "--metrics-secure=false"