diff --git a/.github/workflows/push-charts.yaml b/.github/workflows/push-charts.yaml index 2e3577275..a4559d15a 100644 --- a/.github/workflows/push-charts.yaml +++ b/.github/workflows/push-charts.yaml @@ -27,25 +27,6 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Get all changed helm/library/cortex Chart.yaml files - id: changed-chart-yaml-files-core - uses: tj-actions/changed-files@v47 - with: - files: | - helm/library/cortex/Chart.yaml - - name: Push cortex core charts to registry - if: steps.changed-chart-yaml-files-core.outputs.all_changed_files != '' - shell: bash - env: - ALL_CHANGED_FILES: ${{ steps.changed-chart-yaml-files-core.outputs.all_changed_files }} - run: | - for CHART_FILE in ${ALL_CHANGED_FILES}; do - CHART_DIR=$(dirname $CHART_FILE) - helm package $CHART_DIR --dependency-update --destination $CHART_DIR - CHART_PACKAGE=$(ls $CHART_DIR/*.tgz) - helm push $CHART_PACKAGE oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/ - done - - name: Get all changed library Chart.yaml files id: changed-chart-yaml-files-library uses: tj-actions/changed-files@v47 diff --git a/.github/workflows/push-images.yaml b/.github/workflows/push-images.yaml index 997595976..f3be685ce 100644 --- a/.github/workflows/push-images.yaml +++ b/.github/workflows/push-images.yaml @@ -72,6 +72,55 @@ jobs: subject-digest: ${{ steps.push_cortex_postgres.outputs.digest }} push-to-registry: true + # Only build and push the cortex-shim image if there are changes related + # to the cortex shims (e.g., in cmd/shim or internal/shim). + - name: Get all changed shim/ files + id: changed_shim_files + uses: tj-actions/changed-files@v47 + with: + files: | + cmd/shim/** + internal/shim/** + api/** + pkg/** + go.mod + go.sum + Dockerfile + - name: Docker Meta (Cortex Shim) + if: steps.changed_shim_files.outputs.all_changed_files != '' + id: meta_cortex_shim + uses: docker/metadata-action@v6 + with: + images: ${{ env.REGISTRY }}/${{ github.repository }}-shim + tags: | + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=sha + latest + env: + DOCKER_METADATA_SHORT_SHA_LENGTH: 8 + - name: Build and Push Cortex Shim + if: steps.changed_shim_files.outputs.all_changed_files != '' + id: push_cortex_shim + uses: docker/build-push-action@v7 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta_cortex_shim.outputs.tags }} + labels: ${{ steps.meta_cortex_shim.outputs.labels }} + build-args: | + GIT_TAG=${{ github.ref_name }} + GIT_COMMIT=${{ github.sha }} + GOMAIN=cmd/shim/main.go + - name: Generate Artifact Attestation for Cortex Shim + if: steps.changed_shim_files.outputs.all_changed_files != '' + uses: actions/attest-build-provenance@v4 + with: + subject-name: ${{ env.REGISTRY }}/${{ github.repository }}-shim + subject-digest: ${{ steps.push_cortex_shim.outputs.digest }} + push-to-registry: true + # Build & push new cortex image - name: Docker Meta (Cortex) id: meta_cortex @@ -98,6 +147,7 @@ jobs: build-args: | GIT_TAG=${{ github.ref_name }} GIT_COMMIT=${{ github.sha }} + GOMAIN=cmd/manager/main.go - name: Generate Artifact Attestation for Cortex uses: actions/attest-build-provenance@v4 with: diff --git a/.github/workflows/update-appversion.yml b/.github/workflows/update-appversion.yml index cc5ccdc9f..20087fa80 100644 --- a/.github/workflows/update-appversion.yml +++ b/.github/workflows/update-appversion.yml @@ -44,6 +44,27 @@ jobs: git commit -m "Bump cortex-postgres chart appVersions to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit" git push origin HEAD:main + # Only bumped if there are changes in shim-related directories + - name: Get all changed shim files + id: changed_shim_files + uses: tj-actions/changed-files@v47 + with: + files: | + internal/shim/** + cmd/shim/** + - name: Update appVersion in cortex-shim Chart.yaml + if: steps.changed_shim_files.outputs.all_changed_files != '' + run: | + sed -i 's/^\([ ]*appVersion:[ ]*\).*/\1"${{ steps.vars.outputs.sha }}"/' helm/library/cortex-shim/Chart.yaml + - name: Commit and push changes for cortex-shim + if: steps.changed_shim_files.outputs.all_changed_files != '' + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add helm/library/cortex-shim/Chart.yaml + git commit -m "Bump cortex-shim chart appVersions to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit" + git push origin HEAD:main + - name: Update appVersion in helm/library/cortex/Chart.yaml run: | sed -i 's/^\([ ]*appVersion:[ ]*\).*/\1"${{ steps.vars.outputs.sha }}"/' helm/library/cortex/Chart.yaml diff --git a/.gitignore b/.gitignore index 04bac2d09..7e21248bc 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,7 @@ cortex.secrets.yaml !.editorconfig !.gitignore !.github +!.gitkeep !.golangci.yaml !.license-scan-overrides.jsonl !.license-scan-rules.json diff --git a/AGENTS.md b/AGENTS.md index 6f2e12a17..59747bd8c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -50,7 +50,8 @@ Helm charts: ## Repository Structure Code: -- `cmd/main.go` is the entry point for the manager, which starts the controllers and webhooks +- `cmd/manager/main.go` is the entry point for the manager, which starts the controllers and webhooks +- `cmd/shim/main.go` is the entry point for cortex shims exposing cortex capabilities over REST endpoints - `api/v1alpha1` is where the CRD specs of cortex lives - `api/external` contains messages sent to cortex via http from external openstack services - `internal/scheduling` contains the logic for scheduling in different cloud domains diff --git a/Dockerfile b/Dockerfile index 6f7e79bea..2580e9637 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,8 @@ ARG TARGETARCH ARG GO_MOD_PATH=. ARG GOCACHE=/root/.cache/go-build ENV GOCACHE=${GOCACHE} +ARG GOMAIN=cmd/manager/main.go +ENV GOMAIN=${GOMAIN} # Note: avoid using COPY to /lib which will lead to docker build errors. WORKDIR /workspace/${GO_MOD_PATH} @@ -29,13 +31,13 @@ ENV GOOS=${TARGETOS:-linux} ENV GOARCH=${TARGETARCH} RUN --mount=type=cache,target=/go/pkg/mod/ \ --mount=type=cache,target=${GOCACHE} \ - go build -a -o /manager cmd/main.go + go build -a -o /main ${GOMAIN} # Use distroless as minimal base image to package the manager binary # Refer to https://github.com/GoogleContainerTools/distroless for more details FROM gcr.io/distroless/static:nonroot WORKDIR / -COPY --from=builder /manager . +COPY --from=builder /main . USER 65532:65532 -ENTRYPOINT ["/manager"] +ENTRYPOINT ["/main"] diff --git a/Tiltfile b/Tiltfile index 6871d18b3..bc87f4d30 100644 --- a/Tiltfile +++ b/Tiltfile @@ -7,7 +7,10 @@ analytics_settings(False) # Use the ACTIVE_DEPLOYMENTS env var to select which Cortex bundles to deploy. -ACTIVE_DEPLOYMENTS_ENV = os.getenv('ACTIVE_DEPLOYMENTS', 'nova,manila,cinder,ironcore,pods') +ACTIVE_DEPLOYMENTS_ENV = os.getenv( + 'ACTIVE_DEPLOYMENTS', + 'nova,manila,cinder,ironcore,pods,placement', +) if ACTIVE_DEPLOYMENTS_ENV == "": ACTIVE_DEPLOYMENTS = [] # Catch "".split(",") = [""] else: @@ -78,13 +81,22 @@ local('kubectl wait --namespace cert-manager --for=condition=available deploymen url = 'https://raw.githubusercontent.com/cobaltcore-dev/openstack-hypervisor-operator/refs/heads/main/charts/openstack-hypervisor-operator/crds/kvm.cloud.sap_hypervisors.yaml' local('curl -L ' + url + ' | kubectl apply -f -') -########### Cortex Operator & CRDs +########### Cortex Manager & CRDs docker_build('ghcr.io/cobaltcore-dev/cortex', '.', dockerfile='Dockerfile', + build_args={'GOMAIN': 'cmd/manager/main.go'}, only=['internal/', 'cmd/', 'api/', 'pkg', 'go.mod', 'go.sum', 'Dockerfile'], ) local('sh helm/sync.sh helm/library/cortex') +########### Cortex Shim +docker_build('ghcr.io/cobaltcore-dev/cortex-shim', '.', + dockerfile='Dockerfile', + build_args={'GOMAIN': 'cmd/shim/main.go'}, + only=['internal/', 'cmd/', 'api/', 'pkg', 'go.mod', 'go.sum', 'Dockerfile'], +) +local('sh helm/sync.sh helm/library/cortex-shim') + ########### Cortex Bundles docker_build('ghcr.io/cobaltcore-dev/cortex-postgres', 'postgres') @@ -98,6 +110,7 @@ bundle_charts = [ ('helm/bundles/cortex-cinder', 'cortex-cinder'), ('helm/bundles/cortex-ironcore', 'cortex-ironcore'), ('helm/bundles/cortex-pods', 'cortex-pods'), + ('helm/bundles/cortex-placement-shim', 'cortex-placement-shim'), ] dep_charts = { 'cortex-crds': [ @@ -123,6 +136,9 @@ dep_charts = { ('helm/library/cortex-postgres', 'cortex-postgres'), ('helm/library/cortex', 'cortex'), ], + 'cortex-placement-shim': [ + ('helm/library/cortex-shim', 'cortex-shim'), + ], } for (bundle_chart_path, bundle_chart_name) in bundle_charts: @@ -255,6 +271,10 @@ if 'pods' in ACTIVE_DEPLOYMENTS: k8s_yaml('samples/pods/pod.yaml') k8s_resource('test-pod', labels=['Cortex-Pods']) +if 'placement' in ACTIVE_DEPLOYMENTS: + print("Activating Cortex Placement Shim bundle") + k8s_yaml(helm('./helm/bundles/cortex-placement-shim', name='cortex-placement-shim', values=tilt_values, set=env_set_overrides)) + ########### Dev Dependencies local('sh helm/sync.sh helm/dev/cortex-prometheus-operator') k8s_yaml(helm('./helm/dev/cortex-prometheus-operator', name='cortex-prometheus-operator')) # Operator diff --git a/cmd/main.go b/cmd/manager/main.go similarity index 100% rename from cmd/main.go rename to cmd/manager/main.go diff --git a/cmd/shim/main.go b/cmd/shim/main.go new file mode 100644 index 000000000..9feea8d5f --- /dev/null +++ b/cmd/shim/main.go @@ -0,0 +1,252 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "crypto/tls" + "errors" + "flag" + "net/http" + "os" + "path/filepath" + + "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/pkg/conf" + "github.com/cobaltcore-dev/cortex/pkg/monitoring" + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "github.com/sapcc/go-bits/httpext" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/certwatcher" + "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics" + "sigs.k8s.io/controller-runtime/pkg/metrics/filters" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "sigs.k8s.io/controller-runtime/pkg/webhook" +) + +var ( + // Scheme defines the scheme for the API types used by the shim. + scheme = runtime.NewScheme() + // setupLog is the logger used for setup operations in the shim. + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + // Bind the Kubernetes client-go scheme and the custom API types to the + // scheme used by the shim. + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + utilruntime.Must(v1alpha1.AddToScheme(scheme)) // Cortex crds + utilruntime.Must(hv1.AddToScheme(scheme)) // Hypervisor crd +} + +func main() { + ctx := ctrl.SetupSignalHandler() + restConfig := ctrl.GetConfigOrDie() + + var metricsAddr string + var metricsCertPath, metricsCertName, metricsCertKey string + var webhookCertPath, webhookCertName, webhookCertKey string + // The shim does not require leader election, but this flag is provided to + // stay consistent with the kubebuilder scaffold. + var enableLeaderElection bool + var probeAddr string + var secureMetrics bool + var enableHTTP2 bool + var tlsOpts []func(*tls.Config) + flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+ + "Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.") + flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") + flag.BoolVar(&enableLeaderElection, "leader-elect", false, + "Enable leader election for controller manager. "+ + "Enabling this will ensure there is only one active controller manager.") + flag.BoolVar(&secureMetrics, "metrics-secure", true, + "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.") + flag.StringVar(&webhookCertPath, "webhook-cert-path", "", "The directory that contains the webhook certificate.") + flag.StringVar(&webhookCertName, "webhook-cert-name", "tls.crt", "The name of the webhook certificate file.") + flag.StringVar(&webhookCertKey, "webhook-cert-key", "tls.key", "The name of the webhook key file.") + flag.StringVar(&metricsCertPath, "metrics-cert-path", "", + "The directory that contains the metrics server certificate.") + flag.StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.") + flag.StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.") + flag.BoolVar(&enableHTTP2, "enable-http2", false, + "If set, HTTP/2 will be enabled for the metrics and webhook servers") + opts := zap.Options{ + Development: true, + } + opts.BindFlags(flag.CommandLine) + flag.Parse() + + // Check that we're really running this shim without leader election enabled. + if enableLeaderElection { + err := errors.New("leader election should not be enabled for the shim") + setupLog.Error(err, "invalid configuration") + os.Exit(1) + } + + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + + // if the enable-http2 flag is false (the default), http/2 should be disabled + // due to its vulnerabilities. More specifically, disabling http/2 will + // prevent from being vulnerable to the HTTP/2 Stream Cancellation and + // Rapid Reset CVEs. For more information see: + // - https://github.com/advisories/GHSA-qppj-fm5r-hxr3 + // - https://github.com/advisories/GHSA-4374-p667-p6c8 + disableHTTP2 := func(c *tls.Config) { + setupLog.Info("disabling http/2") + c.NextProtos = []string{"http/1.1"} + } + + if !enableHTTP2 { + tlsOpts = append(tlsOpts, disableHTTP2) + } + + // Create watchers for metrics and webhooks certificates + var metricsCertWatcher, webhookCertWatcher *certwatcher.CertWatcher + + // Initial webhook TLS options + webhookTLSOpts := append([]func(*tls.Config){}, tlsOpts...) + + if webhookCertPath != "" { + setupLog.Info("Initializing webhook certificate watcher using provided certificates", + "webhook-cert-path", webhookCertPath, "webhook-cert-name", webhookCertName, "webhook-cert-key", webhookCertKey) + + var err error + webhookCertWatcher, err = certwatcher.New( + filepath.Join(webhookCertPath, webhookCertName), + filepath.Join(webhookCertPath, webhookCertKey), + ) + if err != nil { + setupLog.Error(err, "Failed to initialize webhook certificate watcher") + os.Exit(1) + } + + webhookTLSOpts = append(webhookTLSOpts, func(config *tls.Config) { + config.GetCertificate = webhookCertWatcher.GetCertificate + }) + } + + webhookServer := webhook.NewServer(webhook.Options{ + TLSOpts: webhookTLSOpts, + }) + + // Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server. + // More info: + // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/metrics/server + // - https://book.kubebuilder.io/reference/metrics.html + metricsServerOptions := metricsserver.Options{ + BindAddress: metricsAddr, + SecureServing: secureMetrics, + TLSOpts: append([]func(*tls.Config){}, tlsOpts...), + } + + if secureMetrics { + // FilterProvider is used to protect the metrics endpoint with authn/authz. + // These configurations ensure that only authorized users and service accounts + // can access the metrics endpoint. The RBAC are configured in 'config/rbac/kustomization.yaml'. More info: + // https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/metrics/filters#WithAuthenticationAndAuthorization + metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization + } + + // If the certificate is not specified, controller-runtime will automatically + // generate self-signed certificates for the metrics server. While convenient for development and testing, + // this setup is not recommended for production. + // + // If you enable certManager, uncomment the following lines: + // - [METRICS-WITH-CERTS] at config/default/kustomization.yaml to generate and use certificates + // managed by cert-manager for the metrics server. + // - [PROMETHEUS-WITH-CERTS] at config/prometheus/kustomization.yaml for TLS certification. + if metricsCertPath != "" { + setupLog.Info("Initializing metrics certificate watcher using provided certificates", + "metrics-cert-path", metricsCertPath, "metrics-cert-name", metricsCertName, "metrics-cert-key", metricsCertKey) + + var err error + metricsCertWatcher, err = certwatcher.New( + filepath.Join(metricsCertPath, metricsCertName), + filepath.Join(metricsCertPath, metricsCertKey), + ) + if err != nil { + setupLog.Error(err, "Failed to initialize metrics certificate watcher") + os.Exit(1) + } + + metricsServerOptions.TLSOpts = append(metricsServerOptions.TLSOpts, func(config *tls.Config) { + config.GetCertificate = metricsCertWatcher.GetCertificate + }) + } + + mgr, err := ctrl.NewManager(restConfig, ctrl.Options{ + Scheme: scheme, + Metrics: metricsServerOptions, + WebhookServer: webhookServer, + HealthProbeBindAddress: probeAddr, + // Kept for consistency with kubebuilder scaffold, but the shim should + // always run with leader election disabled. + LeaderElection: enableLeaderElection, + }) + if err != nil { + setupLog.Error(err, "unable to start manager") + os.Exit(1) + } + + // TODO: Initialize multicluster client here. + + // Our custom monitoring registry can add prometheus labels to all metrics. + // This is useful to distinguish metrics from different deployments. + metricsConfig := conf.GetConfigOrDie[monitoring.Config]() + metrics.Registry = monitoring.WrapRegistry(metrics.Registry, metricsConfig) + + // API endpoint. + mux := http.NewServeMux() + + // +kubebuilder:scaffold:builder + + if metricsCertWatcher != nil { + setupLog.Info("Adding metrics certificate watcher to manager") + if err := mgr.Add(metricsCertWatcher); err != nil { + setupLog.Error(err, "unable to add metrics certificate watcher to manager") + os.Exit(1) + } + } + + if webhookCertWatcher != nil { + setupLog.Info("Adding webhook certificate watcher to manager") + if err := mgr.Add(webhookCertWatcher); err != nil { + setupLog.Error(err, "unable to add webhook certificate watcher to manager") + os.Exit(1) + } + } + + if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up health check") + os.Exit(1) + } + if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + setupLog.Error(err, "unable to set up ready check") + os.Exit(1) + } + + errchan := make(chan error) + go func() { + errchan <- func() error { + setupLog.Info("starting api server", "address", ":8080") + return httpext.ListenAndServeContext(ctx, ":8080", mux) + }() + }() + go func() { + if err := <-errchan; err != nil { + setupLog.Error(err, "problem running api server") + os.Exit(1) + } + }() + + setupLog.Info("starting manager") + if err := mgr.Start(ctx); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} diff --git a/helm/bundles/cortex-placement-shim/Chart.yaml b/helm/bundles/cortex-placement-shim/Chart.yaml new file mode 100644 index 000000000..7f53ed347 --- /dev/null +++ b/helm/bundles/cortex-placement-shim/Chart.yaml @@ -0,0 +1,20 @@ +# Copyright SAP SE +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: cortex-placement-shim +description: A Helm chart deploying the Cortex placement shim. +type: application +version: 0.0.1 +appVersion: 0.1.0 +dependencies: + # from: file://../../library/cortex-shim + - name: cortex-shim + repository: oci://ghcr.io/cobaltcore-dev/cortex/charts + version: 0.0.1 + # Owner info adds a configmap to the kubernetes cluster with information on + # the service owner. This makes it easier to find out who to contact in case + # of issues. See: https://github.com/sapcc/helm-charts/pkgs/container/helm-charts%2Fowner-info + - name: owner-info + repository: oci://ghcr.io/sapcc/helm-charts + version: 1.0.0 diff --git a/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml b/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml new file mode 100644 index 000000000..03aea7763 --- /dev/null +++ b/helm/bundles/cortex-placement-shim/alerts/placement-shim.alerts.yaml @@ -0,0 +1,3 @@ +groups: +- name: cortex-placement-shim-alerts + rules: [] \ No newline at end of file diff --git a/helm/bundles/cortex-placement-shim/templates/alerts.yaml b/helm/bundles/cortex-placement-shim/templates/alerts.yaml new file mode 100644 index 000000000..7db3b96e6 --- /dev/null +++ b/helm/bundles/cortex-placement-shim/templates/alerts.yaml @@ -0,0 +1,17 @@ +# Copyright SAP SE +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.alerts.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: cortex-placement-shim-alerts + labels: + type: alerting-rules + prometheus: {{ required ".Values.alerts.prometheus missing" .Values.alerts.prometheus | quote }} +spec: + {{- $files := .Files.Glob "alerts/*.alerts.yaml" }} + {{- range $path, $file := $files }} + {{ $file | toString | nindent 2 }} + {{- end }} +{{- end }} diff --git a/helm/bundles/cortex-placement-shim/templates/clusterrole.yaml b/helm/bundles/cortex-placement-shim/templates/clusterrole.yaml new file mode 100644 index 000000000..489878c89 --- /dev/null +++ b/helm/bundles/cortex-placement-shim/templates/clusterrole.yaml @@ -0,0 +1,23 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: cortex-placement-shim-role-hypervisor +rules: +- apiGroups: + - kvm.cloud.sap + resources: + - hypervisors + verbs: + - get + - list + - patch + - update + - watch +- apiGroups: + - kvm.cloud.sap + resources: + - hypervisors/status + verbs: + - get \ No newline at end of file diff --git a/helm/bundles/cortex-placement-shim/templates/clusterrolebinding.yaml b/helm/bundles/cortex-placement-shim/templates/clusterrolebinding.yaml new file mode 100644 index 000000000..0388373f9 --- /dev/null +++ b/helm/bundles/cortex-placement-shim/templates/clusterrolebinding.yaml @@ -0,0 +1,14 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: cortex-placement-shim-rolebinding-hypervisor +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cortex-placement-shim-role-hypervisor +subjects: +- kind: ServiceAccount + name: cortex-placement-shim + namespace: {{ .Release.Namespace }} \ No newline at end of file diff --git a/helm/bundles/cortex-placement-shim/values.yaml b/helm/bundles/cortex-placement-shim/values.yaml new file mode 100644 index 000000000..6dd793653 --- /dev/null +++ b/helm/bundles/cortex-placement-shim/values.yaml @@ -0,0 +1,27 @@ +# Copyright SAP SE +# SPDX-License-Identifier: Apache-2.0 + +owner-info: + enabled: true + helm-chart-url: "https://github.com/cobaltcore-dev/cortex/helm/bundles/cortex-placement-shim" + maintainers: + - "arno.uhlig@sap.com" + - "julius.clausnitzer@sap.com" + - "malte.viering@sap.com" + - "marcel.gute@sap.com" + - "markus.wieland@sap.com" + - "p.matthes@sap.com" + support-group: "workload-management" + service: "cortex-placement-shim" + +alerts: + enabled: true + prometheus: openstack + +cortex-shim: + namePrefix: cortex-placement + conf: + monitoring: + labels: + github_org: cobaltcore-dev + github_repo: cortex diff --git a/helm/library/cortex-shim/Chart.lock b/helm/library/cortex-shim/Chart.lock new file mode 100644 index 000000000..db4c5823b --- /dev/null +++ b/helm/library/cortex-shim/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: owner-info + repository: oci://ghcr.io/sapcc/helm-charts + version: 1.0.0 +digest: sha256:7643f231cc4ebda347fd12ec62fe4445c280e2b71d27eec555f3025290f5038f +generated: "2025-08-26T10:55:05.888651+02:00" diff --git a/helm/library/cortex-shim/Chart.yaml b/helm/library/cortex-shim/Chart.yaml new file mode 100644 index 000000000..5282dc655 --- /dev/null +++ b/helm/library/cortex-shim/Chart.yaml @@ -0,0 +1,8 @@ +apiVersion: v2 +name: cortex-shim +description: A Helm chart to distribute cortex shims. +type: application +version: 0.0.1 +appVersion: "sha-3e56acea" +icon: "https://example.com/icon.png" +dependencies: [] diff --git a/helm/library/cortex-shim/templates/_helpers.tpl b/helm/library/cortex-shim/templates/_helpers.tpl new file mode 100644 index 000000000..cca33d701 --- /dev/null +++ b/helm/library/cortex-shim/templates/_helpers.tpl @@ -0,0 +1,54 @@ +{{- define "chart.name" -}} +{{- if .Chart }} + {{- if .Chart.Name }} + {{- .Chart.Name | trunc 63 | trimSuffix "-" }} + {{- else if .Values.nameOverride }} + {{ .Values.nameOverride | trunc 63 | trimSuffix "-" }} + {{- else }} + scheduling + {{- end }} +{{- else }} + scheduling +{{- end }} +{{- end }} + + +{{- define "chart.labels" -}} +{{- if .Chart.AppVersion -}} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +{{- if .Chart.Version }} +helm.sh/chart: {{ .Chart.Version | quote }} +{{- end }} +app.kubernetes.io/name: {{ include "chart.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + + +{{- define "chart.selectorLabels" -}} +app.kubernetes.io/name: {{ include "chart.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + + +{{- define "chart.hasMutatingWebhooks" -}} +{{- $hasMutating := false }} +{{- range . }} + {{- if eq .type "mutating" }} + {{- $hasMutating = true -}} + {{- end }} +{{- end }} +{{ $hasMutating }} +{{- end }} + + +{{- define "chart.hasValidatingWebhooks" -}} +{{- $hasValidating := false }} +{{- range . }} + {{- if eq .type "validating" }} + {{- $hasValidating = true -}} + {{- end }} +{{- end }} +{{ $hasValidating }} +{{- end }} diff --git a/helm/library/cortex-shim/templates/clusterrole.yaml b/helm/library/cortex-shim/templates/clusterrole.yaml new file mode 100644 index 000000000..74f8e7ad4 --- /dev/null +++ b/helm/library/cortex-shim/templates/clusterrole.yaml @@ -0,0 +1,100 @@ +# Roles that grant the shims access to cortex crds. +{{- if .Values.rbac.enable }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: {{ .Values.namePrefix }}-shim-role +rules: +- apiGroups: + - cortex.cloud + resources: + - knowledges + - datasources + - reservations + - decisions + - deschedulings + - pipelines + - kpis + - histories + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - cortex.cloud + resources: + - knowledges/finalizers + - datasources/finalizers + - reservations/finalizers + - decisions/finalizers + - deschedulings/finalizers + - pipelines/finalizers + - kpis/finalizers + - histories/finalizers + verbs: + - update +- apiGroups: + - cortex.cloud + resources: + - knowledges/status + - datasources/status + - reservations/status + - decisions/status + - deschedulings/status + - pipelines/status + - kpis/status + - histories/status + verbs: + - get + - patch + - update +- apiGroups: + - events.k8s.io + resources: + - events + verbs: + - create + - patch +{{- end -}} +{{- if and .Values.rbac.enable .Values.metrics.enable }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: {{ .Values.namePrefix }}-metrics-reader +rules: +- nonResourceURLs: + - "/metrics" + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: {{ .Values.namePrefix }}-metrics-auth-role +rules: +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +{{- end -}} + diff --git a/helm/library/cortex-shim/templates/clusterrolebinding.yaml b/helm/library/cortex-shim/templates/clusterrolebinding.yaml new file mode 100644 index 000000000..ca82a0119 --- /dev/null +++ b/helm/library/cortex-shim/templates/clusterrolebinding.yaml @@ -0,0 +1,34 @@ +{{- if .Values.rbac.enable }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: {{ .Values.namePrefix }}-shim-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ .Values.namePrefix }}-shim-role +subjects: +- kind: ServiceAccount + name: {{ .Values.namePrefix }}-{{ .Values.deployment.serviceAccountName }} + namespace: {{ .Release.Namespace }} +{{- end -}} +{{- if and .Values.rbac.enable .Values.metrics.enable }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: {{ .Values.namePrefix }}-metrics-auth-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ .Values.namePrefix }}-metrics-auth-role +subjects: +- kind: ServiceAccount + name: {{ .Values.namePrefix }}-{{ .Values.deployment.serviceAccountName }} + namespace: {{ .Release.Namespace }} +{{- end -}} + diff --git a/helm/library/cortex-shim/templates/deployment.yaml b/helm/library/cortex-shim/templates/deployment.yaml new file mode 100644 index 000000000..b38eb3c02 --- /dev/null +++ b/helm/library/cortex-shim/templates/deployment.yaml @@ -0,0 +1,112 @@ +# This file is safe from kubebuilder edit --plugins=helm/v1-alpha +# If you want to re-generate, add the --force flag. + +{{- if .Values.deployment.enable }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Values.namePrefix }}-shim + namespace: {{ .Release.Namespace }} + labels: + {{- include "chart.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.deployment.replicas }} + selector: + matchLabels: + {{- include "chart.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: shim + labels: + {{- include "chart.labels" . | nindent 8 }} + {{- if and .Values.deployment.pod .Values.deployment.pod.labels }} + {{- range $key, $value := .Values.deployment.pod.labels }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} + spec: + containers: + - name: shim + args: + {{- range .Values.deployment.container.args }} + - {{ . }} + {{- end }} + ports: + - name: api + containerPort: 8080 + protocol: TCP + - name: metrics + containerPort: 2112 + protocol: TCP + command: + - /main + image: {{ .Values.deployment.container.image.repository }}:{{ .Values.deployment.container.image.tag | default .Chart.AppVersion }} + {{- if .Values.deployment.container.image.pullPolicy }} + imagePullPolicy: {{ .Values.deployment.container.image.pullPolicy }} + {{- end }} + {{- if .Values.deployment.container.env }} + env: + {{- range $key, $value := .Values.deployment.container.env }} + - name: {{ $key }} + value: {{ $value }} + {{- end }} + {{- end }} + livenessProbe: + {{- toYaml .Values.deployment.container.livenessProbe | nindent 12 }} + readinessProbe: + {{- toYaml .Values.deployment.container.readinessProbe | nindent 12 }} + resources: + {{- toYaml .Values.deployment.container.resources | nindent 12 }} + securityContext: + {{- toYaml .Values.deployment.container.securityContext | nindent 12 }} + volumeMounts: + - name: shim-config-volume + mountPath: /etc/config + - name: shim-secrets-volume + mountPath: /etc/secrets + readOnly: true + securityContext: + {{- toYaml .Values.deployment.securityContext | nindent 8 }} + serviceAccountName: {{ .Values.namePrefix }}-{{ .Values.deployment.serviceAccountName }} + terminationGracePeriodSeconds: {{ .Values.deployment.terminationGracePeriodSeconds }} + volumes: + # Custom values to configure the shim. + - name: shim-config-volume + configMap: + name: {{ .Values.namePrefix }}-shim-config + - name: shim-secrets-volume + secret: + secretName: {{ .Values.namePrefix }}-shim-secrets +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.namePrefix }}-shim-config +data: + conf.json: |- + {{- $mergedConf := dict }} + {{- if .Values.global.conf }} + {{- $mergedConf = .Values.global.conf }} + {{- end }} + {{- if .Values.conf }} + {{- $mergedConf = mergeOverwrite .Values.conf $mergedConf }} + {{- end }} + {{ toJson $mergedConf }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.namePrefix }}-shim-secrets +type: Opaque +data: + secrets.json: |- + {{- $mergedSecrets := dict }} + {{- if .Values.global.secrets }} + {{- $mergedSecrets = .Values.global.secrets }} + {{- end }} + {{- if .Values.secrets }} + {{- $mergedSecrets = mergeOverwrite .Values.secrets $mergedSecrets }} + {{- end }} + {{ toJson $mergedSecrets | b64enc }} +{{- end }} \ No newline at end of file diff --git a/helm/library/cortex-shim/templates/service.yaml b/helm/library/cortex-shim/templates/service.yaml new file mode 100644 index 000000000..faf3082a3 --- /dev/null +++ b/helm/library/cortex-shim/templates/service.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ .Values.namePrefix }}-shim-service + namespace: {{ .Release.Namespace }} + labels: + {{- include "chart.labels" . | nindent 4 }} +spec: + ports: + - port: 8080 + targetPort: api + protocol: TCP + name: api + selector: + {{- include "chart.selectorLabels" . | nindent 4 }} +{{- if .Values.metrics.enable }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Values.namePrefix }}-shim-metrics-service + namespace: {{ .Release.Namespace }} + labels: + {{- include "chart.labels" . | nindent 4 }} +spec: + ports: + - port: 2112 + targetPort: metrics + protocol: TCP + name: metrics + selector: + {{- include "chart.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/helm/library/cortex-shim/templates/serviceaccount.yaml b/helm/library/cortex-shim/templates/serviceaccount.yaml new file mode 100644 index 000000000..ea0789dd0 --- /dev/null +++ b/helm/library/cortex-shim/templates/serviceaccount.yaml @@ -0,0 +1,15 @@ +{{- if .Values.rbac.enable }} +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + {{- if and .Values.deployment.serviceAccount .Values.deployment.serviceAccount.annotations }} + annotations: + {{- range $key, $value := .Values.deployment.serviceAccount.annotations }} + {{ $key }}: {{ $value }} + {{- end }} + {{- end }} + name: {{ .Values.namePrefix }}-{{ .Values.deployment.serviceAccountName }} + namespace: {{ .Release.Namespace }} +{{- end -}} diff --git a/helm/library/cortex-shim/templates/servicemonitor.yaml b/helm/library/cortex-shim/templates/servicemonitor.yaml new file mode 100644 index 000000000..803e66dd5 --- /dev/null +++ b/helm/library/cortex-shim/templates/servicemonitor.yaml @@ -0,0 +1,16 @@ +# To integrate with Prometheus. +{{- if .Values.prometheus.enable }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + name: {{ .Values.namePrefix }}-shim-metrics-monitor + namespace: {{ .Release.Namespace }} +spec: + endpoints: + - port: metrics + selector: + matchLabels: + app.kubernetes.io/name: {{ include "chart.name" . }} +{{- end }} diff --git a/helm/library/cortex-shim/values.yaml b/helm/library/cortex-shim/values.yaml new file mode 100644 index 000000000..63574fbe4 --- /dev/null +++ b/helm/library/cortex-shim/values.yaml @@ -0,0 +1,61 @@ +deployment: + enable: true + replicas: 3 + container: + image: + repository: ghcr.io/cobaltcore-dev/cortex-shim + args: + - "--metrics-bind-address=:2112" + - "--health-probe-bind-address=:8081" + - "--metrics-secure=false" + resources: + limits: + cpu: 500m + memory: 2048Mi + requests: + cpu: 10m + memory: 64Mi + livenessProbe: + initialDelaySeconds: 15 + periodSeconds: 20 + httpGet: + path: /healthz + port: 8081 + readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 10 + httpGet: + path: /readyz + port: 8081 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "ALL" + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationGracePeriodSeconds: 10 + serviceAccountName: shim + +# [METRICS]: Set to true to generate manifests for exporting metrics. +# To disable metrics export set false, and remove the container args +# "--metrics-bind-address=:2112" and "--metrics-secure=false". +metrics: + enable: true + +# [RBAC]: To enable RBAC (Permissions) configurations +rbac: + enable: true + +# [PROMETHEUS]: To enable a ServiceMonitor to export metrics to Prometheus set true +prometheus: + enable: true + +global: + conf: {} + +# Use this to unambiguate multiple cortex deployments in the same cluster. +namePrefix: cortex +conf: {} # No config for now that's needed by all the shims. diff --git a/helm/library/cortex/templates/manager/manager.yaml b/helm/library/cortex/templates/manager/manager.yaml index 73672164f..0c9f362aa 100644 --- a/helm/library/cortex/templates/manager/manager.yaml +++ b/helm/library/cortex/templates/manager/manager.yaml @@ -51,7 +51,7 @@ spec: protocol: TCP {{- end }} command: - - /manager + - /main image: {{ .Values.controllerManager.container.image.repository }}:{{ .Values.controllerManager.container.image.tag | default .Chart.AppVersion }} {{- if .Values.controllerManager.container.image.pullPolicy }} imagePullPolicy: {{ .Values.controllerManager.container.image.pullPolicy }} diff --git a/internal/shim/placement/.gitkeep b/internal/shim/placement/.gitkeep new file mode 100644 index 000000000..e69de29bb