From b86f36e0a4b7b8b83abf394dd2068d85ac30f116 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Mon, 23 Mar 2026 14:35:52 +0100 Subject: [PATCH 01/16] activating endpoint --- internal/scheduling/reservations/commitments/api.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/scheduling/reservations/commitments/api.go b/internal/scheduling/reservations/commitments/api.go index 9d8fd5944..e72e5b3f8 100644 --- a/internal/scheduling/reservations/commitments/api.go +++ b/internal/scheduling/reservations/commitments/api.go @@ -32,7 +32,7 @@ func NewAPIWithConfig(client client.Client, config Config) *HTTPAPI { func (api *HTTPAPI) Init(mux *http.ServeMux) { mux.HandleFunc("/v1/commitments/change-commitments", api.HandleChangeCommitments) - // mux.HandleFunc("/v1/report-capacity", api.HandleReportCapacity) + mux.HandleFunc("/v1/report-capacity", api.HandleReportCapacity) mux.HandleFunc("/v1/commitments/info", api.HandleInfo) } From 7bb9ad47b15e5acce3b9ef7c83f660143ffe93a9 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Mon, 23 Mar 2026 14:36:42 +0100 Subject: [PATCH 02/16] capacity logic --- .../reservations/commitments/capacity.go | 117 ++++++++++++++---- 1 file changed, 93 insertions(+), 24 deletions(-) diff --git a/internal/scheduling/reservations/commitments/capacity.go b/internal/scheduling/reservations/commitments/capacity.go index 04ad177e1..33f18400c 100644 --- a/internal/scheduling/reservations/commitments/capacity.go +++ b/internal/scheduling/reservations/commitments/capacity.go @@ -7,7 +7,9 @@ import ( "context" "fmt" "sort" + "time" + . "github.com/majewsky/gg/option" "github.com/sapcc/go-api-declarations/liquid" "sigs.k8s.io/controller-runtime/pkg/client" @@ -18,11 +20,16 @@ import ( // CapacityCalculator computes capacity reports for Limes LIQUID API. type CapacityCalculator struct { - client client.Client + client client.Client + schedulerClient *reservations.SchedulerClient } func NewCapacityCalculator(client client.Client) *CapacityCalculator { - return &CapacityCalculator{client: client} + schedulerClient := reservations.NewSchedulerClient("http://localhost:8080/scheduler/nova/external") + return &CapacityCalculator{ + client: client, + schedulerClient: schedulerClient, + } } // CalculateCapacity computes per-AZ capacity for all flavor groups. @@ -59,61 +66,69 @@ func (c *CapacityCalculator) CalculateCapacity(ctx context.Context) (liquid.Serv func (c *CapacityCalculator) calculateAZCapacity( ctx context.Context, - _ string, // groupName - reserved for future use - _ compute.FlavorGroupFeature, // groupData - reserved for future use + groupName string, + groupData compute.FlavorGroupFeature, ) (map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport, error) { - // Get list of availability zones from HostDetails Knowledge azs, err := c.getAvailabilityZones(ctx) if err != nil { return nil, fmt.Errorf("failed to get availability zones: %w", err) } - // Create report entry for each AZ with empty capacity/usage - // Capacity and Usage are left unset (zero value of option.Option[uint64]) - // This signals to Limes: "These AZs exist, but capacity/usage not yet calculated" result := make(map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport) for _, az := range azs { + capacity, usage, err := c.calculateInstanceCapacity(ctx, groupName, groupData, az) + if err != nil { + // Log error but continue with empty values for this AZ + result[liquid.AvailabilityZone(az)] = &liquid.AZResourceCapacityReport{} + continue + } + result[liquid.AvailabilityZone(az)] = &liquid.AZResourceCapacityReport{ - // Both Capacity and Usage left unset (empty optional values) - // TODO: Calculate actual capacity from Reservation CRDs or host resources - // TODO: Calculate actual usage from VM allocations + Capacity: capacity, + Usage: Some(usage), } } return result, nil } -func (c *CapacityCalculator) getAvailabilityZones(ctx context.Context) ([]string, error) { - // List all Knowledge CRDs to find host-details knowledge +// getHostAZMap returns a map from compute host name to availability zone. +func (c *CapacityCalculator) getHostAZMap(ctx context.Context) (map[string]string, error) { var knowledgeList v1alpha1.KnowledgeList if err := c.client.List(ctx, &knowledgeList); err != nil { return nil, fmt.Errorf("failed to list Knowledge CRDs: %w", err) } - // Find host-details knowledge and extract AZs - azSet := make(map[string]struct{}) + hostAZMap := make(map[string]string) for _, knowledge := range knowledgeList.Items { - // Look for host-details extractor if knowledge.Spec.Extractor.Name != "host_details" { continue } - - // Parse features from Raw data features, err := v1alpha1.UnboxFeatureList[compute.HostDetails](knowledge.Status.Raw) if err != nil { - // Skip if we can't parse this knowledge continue } - - // Collect unique AZ names for _, feature := range features { - if feature.AvailabilityZone != "" { - azSet[feature.AvailabilityZone] = struct{}{} + if feature.ComputeHost != "" && feature.AvailabilityZone != "" { + hostAZMap[feature.ComputeHost] = feature.AvailabilityZone } } } - // Convert set to sorted slice + return hostAZMap, nil +} + +func (c *CapacityCalculator) getAvailabilityZones(ctx context.Context) ([]string, error) { + hostAZMap, err := c.getHostAZMap(ctx) + if err != nil { + return nil, err + } + + azSet := make(map[string]struct{}) + for _, az := range hostAZMap { + azSet[az] = struct{}{} + } + azs := make([]string, 0, len(azSet)) for az := range azSet { azs = append(azs, az) @@ -122,3 +137,57 @@ func (c *CapacityCalculator) getAvailabilityZones(ctx context.Context) ([]string return azs, nil } + +// calculateInstanceCapacity returns the total capacity and current usage for a flavor group in an AZ. +// Capacity is expressed in multiples of the smallest flavor's memory. +// Total capacity is derived directly from Hypervisor CRDs (as if everything were empty). +// Currently available is derived from the scheduler (respecting current VM and reservation state). +// Usage = totalCapacity - currentlyAvailable. +func (c *CapacityCalculator) calculateInstanceCapacity( + ctx context.Context, + groupName string, + groupData compute.FlavorGroupFeature, + az string, +) (capacity uint64, usage uint64, err error) { + smallestFlavor := groupData.SmallestFlavor + + // Request 1: currently available — how many instances can be placed right now. + currentResp, err := c.schedulerClient.ScheduleReservation(ctx, reservations.ScheduleReservationRequest{ + InstanceUUID: fmt.Sprintf("capacity-current-%s-%s-%d", groupName, az, time.Now().UnixNano()), + ProjectID: "cortex-capacity-check", + FlavorName: smallestFlavor.Name, + MemoryMB: uint64(smallestFlavor.MemoryMB), + VCPUs: uint64(smallestFlavor.VCPUs), + FlavorExtraSpecs: map[string]string{"hw_version": groupName}, + AvailabilityZone: az, + Pipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", + }) + if err != nil { + return 0, 0, fmt.Errorf("failed to get current available capacity: %w", err) + } + currentlyAvailable := uint64(len(currentResp.Hosts)) + + // Request 2: total capacity — hosts eligible if everything were empty. + // Uses a dedicated pipeline that ignores VM allocations and all reservations. + totalResp, err := c.schedulerClient.ScheduleReservation(ctx, reservations.ScheduleReservationRequest{ + InstanceUUID: fmt.Sprintf("capacity-total-%s-%s-%d", groupName, az, time.Now().UnixNano()), + ProjectID: "cortex-capacity-check", + FlavorName: smallestFlavor.Name, + MemoryMB: uint64(smallestFlavor.MemoryMB), + VCPUs: uint64(smallestFlavor.VCPUs), + FlavorExtraSpecs: map[string]string{"hw_version": groupName}, + AvailabilityZone: az, + Pipeline: "kvm-report-capacity", + }) + if err != nil { + return 0, 0, fmt.Errorf("failed to get total capacity: %w", err) + } + totalCapacity := uint64(len(totalResp.Hosts)) + + var usageValue uint64 + if totalCapacity >= currentlyAvailable { + usageValue = totalCapacity - currentlyAvailable + } + + return totalCapacity, usageValue, nil +} From 9d23d03d0d54c86dfba3ccae82a531e17da5b1b8 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Mon, 23 Mar 2026 14:40:00 +0100 Subject: [PATCH 03/16] new pipeline --- .../cortex-nova/templates/pipelines_kvm.yaml | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml index 29c9fed5b..e7cbbacb4 100644 --- a/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml +++ b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml @@ -748,4 +748,40 @@ spec: requested by the nova flavor extra specs, like `{"arch": "x86_64", "maxphysaddr:bits": 46, ...}`. weighers: [] +--- +apiVersion: cortex.cloud/v1alpha1 +kind: Pipeline +metadata: + name: kvm-report-capacity +spec: + schedulingDomain: nova + description: | + This pipeline is used by the Liquid capacity reporter to determine the + theoretical maximum capacity of each flavor group per availability zone, + as if all hosts were completely empty. It ignores current VM allocations + and all reservation blockings so that only raw hardware capacity is + considered. + type: filter-weigher + createDecisions: false + # Fetch all placement candidates, ignoring nova's preselection. + ignorePreselection: true + filters: + - name: filter_correct_az + description: | + Restricts host candidates to the requested availability zone. + - name: filter_has_enough_capacity + description: | + Filters hosts that cannot fit the flavor based on raw hardware capacity. + VM allocations and all reservation types are ignored to represent an + empty datacenter scenario. + params: + - {key: ignoreAllocations, boolValue: true} + - {key: ignoredReservationTypes, stringListValue: ["CommittedResourceReservation", "FailoverReservation"]} + - name: filter_has_requested_traits + description: | + Ensures hosts have the hardware traits required by the flavor. + - name: filter_status_conditions + description: | + Excludes hosts that are not ready or are disabled. + weighers: [] {{- end }} From 0920075419b85ddf0be1b43583255627f7f18c05 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Mon, 23 Mar 2026 14:52:06 +0100 Subject: [PATCH 04/16] add option to ignore vm allocation for capacity filter --- .../filters/filter_has_enough_capacity.go | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go index ea37e8c11..0a70f2063 100644 --- a/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go +++ b/internal/scheduling/nova/plugins/filters/filter_has_enough_capacity.go @@ -26,6 +26,10 @@ type FilterHasEnoughCapacityOpts struct { // When a reservation type is in this list, its capacity is not blocked. // Default: empty (all reservation types are considered) IgnoredReservationTypes []v1alpha1.ReservationType `json:"ignoredReservationTypes,omitempty"` + + // IgnoreAllocations skips subtracting current VM allocations from host capacity. + // When true, only raw hardware capacity is considered (empty datacenter scenario). + IgnoreAllocations bool `json:"ignoreAllocations,omitempty"` } func (FilterHasEnoughCapacityOpts) Validate() error { return nil } @@ -71,18 +75,20 @@ func (s *FilterHasEnoughCapacity) Run(traceLog *slog.Logger, request api.Externa freeResourcesByHost[hv.Name] = hv.Status.EffectiveCapacity } - // Subtract allocated resources. - for resourceName, allocated := range hv.Status.Allocation { - free, ok := freeResourcesByHost[hv.Name][resourceName] - if !ok { - traceLog.Error( - "hypervisor with allocation for unknown resource", - "host", hv.Name, "resource", resourceName, - ) - continue + // Subtract allocated resources (skip when ignoring allocations for empty-datacenter capacity queries). + if !s.Options.IgnoreAllocations { + for resourceName, allocated := range hv.Status.Allocation { + free, ok := freeResourcesByHost[hv.Name][resourceName] + if !ok { + traceLog.Error( + "hypervisor with allocation for unknown resource", + "host", hv.Name, "resource", resourceName, + ) + continue + } + free.Sub(allocated) + freeResourcesByHost[hv.Name][resourceName] = free } - free.Sub(allocated) - freeResourcesByHost[hv.Name][resourceName] = free } } From d383308b943f76c85902bd7b29b0c45a5b449919 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Mon, 23 Mar 2026 15:02:47 +0100 Subject: [PATCH 05/16] new tests --- .../commitments/api_report_capacity_test.go | 303 ++++++++++++++++++ 1 file changed, 303 insertions(+) diff --git a/internal/scheduling/reservations/commitments/api_report_capacity_test.go b/internal/scheduling/reservations/commitments/api_report_capacity_test.go index 76140e218..de7a35cd3 100644 --- a/internal/scheduling/reservations/commitments/api_report_capacity_test.go +++ b/internal/scheduling/reservations/commitments/api_report_capacity_test.go @@ -17,7 +17,9 @@ import ( "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client/fake" + novaapi "github.com/cobaltcore-dev/cortex/api/external/nova" "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" ) func TestHandleReportCapacity(t *testing.T) { @@ -283,3 +285,304 @@ func createTestFlavorGroupKnowledge(t *testing.T, groupName string) *v1alpha1.Kn }, } } + +func TestCapacityCalculatorWithScheduler(t *testing.T) { + scheme := runtime.NewScheme() + if err := v1alpha1.AddToScheme(scheme); err != nil { + t.Fatal(err) + } + + const ( + flavorGroup = "test-group" + az = "az-a" + flavorMemMB = uint64(32768) + flavorVCPUs = uint64(8) + ) + + flavorGroupKnowledge := createTestFlavorGroupKnowledgeWithSmallest(t, flavorGroup, flavorMemMB, flavorVCPUs) + hostDetailsKnowledge := createTestHostDetailsKnowledge(t, map[string]string{ + "host-1": az, + "host-2": az, + }) + + t.Run("computes capacity and usage via two scheduler calls", func(t *testing.T) { + // kvm-report-capacity returns 5 hosts (total capacity). + // kvm-general-purpose-load-balancing-all-filters-enabled returns 3 hosts (currently available). + // usage = 5 - 3 = 2. + server := newPipelineMockSchedulerServer(t, map[string][]string{ + "kvm-report-capacity": {"h1", "h2", "h3", "h4", "h5"}, + "kvm-general-purpose-load-balancing-all-filters-enabled": {"h1", "h2", "h3"}, + }) + defer server.Close() + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(flavorGroupKnowledge, hostDetailsKnowledge). + Build() + + calculator := &CapacityCalculator{ + client: fakeClient, + schedulerClient: reservations.NewSchedulerClient(server.URL), + } + + report, err := calculator.CalculateCapacity(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + res, ok := report.Resources[liquid.ResourceName("ram_"+flavorGroup)] + if !ok { + t.Fatal("expected ram_test-group resource") + } + azReport, ok := res.PerAZ[liquid.AvailabilityZone(az)] + if !ok { + t.Fatalf("expected %s in perAZ", az) + } + + if azReport.Capacity != 5 { + t.Errorf("expected capacity = 5, got %d", azReport.Capacity) + } + usageVal, ok := azReport.Usage.Unpack() + if !ok { + t.Fatal("expected usage to be set") + } + if usageVal != 2 { + t.Errorf("expected usage = 2, got %d", usageVal) + } + }) + + t.Run("usage is zero when total equals currently available", func(t *testing.T) { + server := newPipelineMockSchedulerServer(t, map[string][]string{ + "kvm-report-capacity": {"h1", "h2"}, + "kvm-general-purpose-load-balancing-all-filters-enabled": {"h1", "h2"}, + }) + defer server.Close() + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(flavorGroupKnowledge, hostDetailsKnowledge). + Build() + + calculator := &CapacityCalculator{ + client: fakeClient, + schedulerClient: reservations.NewSchedulerClient(server.URL), + } + + report, err := calculator.CalculateCapacity(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + azReport := report.Resources[liquid.ResourceName("ram_"+flavorGroup)].PerAZ[liquid.AvailabilityZone(az)] + usageVal, ok := azReport.Usage.Unpack() + if !ok { + t.Fatal("expected usage to be set") + } + if usageVal != 0 { + t.Errorf("expected usage = 0, got %d", usageVal) + } + }) + + t.Run("usage is clamped to zero when currently available exceeds total", func(t *testing.T) { + // Pathological: currently-available call returns more hosts than total capacity call. + server := newPipelineMockSchedulerServer(t, map[string][]string{ + "kvm-report-capacity": {"h1"}, + "kvm-general-purpose-load-balancing-all-filters-enabled": {"h1", "h2", "h3"}, + }) + defer server.Close() + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(flavorGroupKnowledge, hostDetailsKnowledge). + Build() + + calculator := &CapacityCalculator{ + client: fakeClient, + schedulerClient: reservations.NewSchedulerClient(server.URL), + } + + report, err := calculator.CalculateCapacity(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + azReport := report.Resources[liquid.ResourceName("ram_"+flavorGroup)].PerAZ[liquid.AvailabilityZone(az)] + usageVal, ok := azReport.Usage.Unpack() + if !ok { + t.Fatal("expected usage to be set") + } + if usageVal != 0 { + t.Errorf("expected usage = 0 (clamped), got %d", usageVal) + } + }) + + t.Run("scheduler failure yields empty AZ report without aborting", func(t *testing.T) { + failServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "internal error", http.StatusInternalServerError) + })) + defer failServer.Close() + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(flavorGroupKnowledge, hostDetailsKnowledge). + Build() + + calculator := &CapacityCalculator{ + client: fakeClient, + schedulerClient: reservations.NewSchedulerClient(failServer.URL), + } + + report, err := calculator.CalculateCapacity(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + res, ok := report.Resources[liquid.ResourceName("ram_"+flavorGroup)] + if !ok { + t.Fatal("expected resource to exist") + } + azReport := res.PerAZ[liquid.AvailabilityZone(az)] + if azReport == nil { + t.Fatal("expected non-nil AZ report on scheduler failure") + } + if azReport.Capacity != 0 { + t.Errorf("expected capacity = 0 on failure, got %d", azReport.Capacity) + } + }) + + t.Run("multiple AZs are reported independently", func(t *testing.T) { + twoAZHostDetails := createTestHostDetailsKnowledge(t, map[string]string{ + "host-1": "az-a", + "host-2": "az-b", + }) + // Both calls always return 3 hosts regardless of AZ (pipeline-routing mock). + server := newPipelineMockSchedulerServer(t, map[string][]string{ + "kvm-report-capacity": {"h1", "h2", "h3"}, + "kvm-general-purpose-load-balancing-all-filters-enabled": {"h1"}, + }) + defer server.Close() + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(flavorGroupKnowledge, twoAZHostDetails). + Build() + + calculator := &CapacityCalculator{ + client: fakeClient, + schedulerClient: reservations.NewSchedulerClient(server.URL), + } + + report, err := calculator.CalculateCapacity(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + res := report.Resources[liquid.ResourceName("ram_"+flavorGroup)] + if len(res.PerAZ) != 2 { + t.Errorf("expected 2 AZs, got %d", len(res.PerAZ)) + } + if _, ok := res.PerAZ[liquid.AvailabilityZone("az-a")]; !ok { + t.Error("expected az-a in report") + } + if _, ok := res.PerAZ[liquid.AvailabilityZone("az-b")]; !ok { + t.Error("expected az-b in report") + } + }) +} + +// newPipelineMockSchedulerServer starts a test HTTP server that returns different +// host lists depending on the pipeline name in the request body. +func newPipelineMockSchedulerServer(t *testing.T, hostsByPipeline map[string][]string) *httptest.Server { + t.Helper() + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var req novaapi.ExternalSchedulerRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, "bad request", http.StatusBadRequest) + return + } + hosts := hostsByPipeline[req.Pipeline] + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(novaapi.ExternalSchedulerResponse{Hosts: hosts}); err != nil { + t.Errorf("mock scheduler: encode error: %v", err) + } + })) +} + +// createTestFlavorGroupKnowledgeWithSmallest creates a Knowledge CRD where smallestFlavor +// is explicitly set so the capacity calculator uses the correct memory unit. +func createTestFlavorGroupKnowledgeWithSmallest(t *testing.T, groupName string, memMB, vcpus uint64) *v1alpha1.Knowledge { + t.Helper() + + features := []map[string]interface{}{ + { + "name": groupName, + "flavors": []map[string]interface{}{ + { + "name": "test_flavor", + "vcpus": vcpus, + "memoryMB": memMB, + "diskGB": 50, + }, + }, + "smallestFlavor": map[string]interface{}{ + "name": "test_flavor", + "vcpus": vcpus, + "memoryMB": memMB, + "diskGB": 50, + }, + "largestFlavor": map[string]interface{}{ + "name": "test_flavor", + "vcpus": vcpus, + "memoryMB": memMB, + "diskGB": 50, + }, + }, + } + + raw, err := v1alpha1.BoxFeatureList(features) + if err != nil { + t.Fatal(err) + } + + return &v1alpha1.Knowledge{ + ObjectMeta: v1.ObjectMeta{Name: "flavor-groups"}, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Extractor: v1alpha1.KnowledgeExtractorSpec{Name: "flavor_groups"}, + }, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []v1.Condition{{Type: v1alpha1.KnowledgeConditionReady, Status: "True"}}, + Raw: raw, + }, + } +} + +// createTestHostDetailsKnowledge creates a Knowledge CRD with host→AZ mappings. +func createTestHostDetailsKnowledge(t *testing.T, hostToAZ map[string]string) *v1alpha1.Knowledge { + t.Helper() + + features := make([]map[string]interface{}, 0, len(hostToAZ)) + for host, az := range hostToAZ { + features = append(features, map[string]interface{}{ + "computeHost": host, + "availabilityZone": az, + }) + } + + raw, err := v1alpha1.BoxFeatureList(features) + if err != nil { + t.Fatal(err) + } + + return &v1alpha1.Knowledge{ + ObjectMeta: v1.ObjectMeta{Name: "host-details"}, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Extractor: v1alpha1.KnowledgeExtractorSpec{Name: "host_details"}, + }, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []v1.Condition{{Type: v1alpha1.KnowledgeConditionReady, Status: "True"}}, + Raw: raw, + }, + } +} From 579ba71815b5ef08bd50c27e01bb128051bb3dab Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Mon, 23 Mar 2026 15:27:13 +0100 Subject: [PATCH 06/16] Fix lint: whitespace after multi-line signatures, drop unnecessary uint64 casts, combine return types --- .../scheduling/reservations/commitments/capacity.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/internal/scheduling/reservations/commitments/capacity.go b/internal/scheduling/reservations/commitments/capacity.go index 33f18400c..0d7b50a36 100644 --- a/internal/scheduling/reservations/commitments/capacity.go +++ b/internal/scheduling/reservations/commitments/capacity.go @@ -69,6 +69,7 @@ func (c *CapacityCalculator) calculateAZCapacity( groupName string, groupData compute.FlavorGroupFeature, ) (map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport, error) { + azs, err := c.getAvailabilityZones(ctx) if err != nil { return nil, fmt.Errorf("failed to get availability zones: %w", err) @@ -148,7 +149,8 @@ func (c *CapacityCalculator) calculateInstanceCapacity( groupName string, groupData compute.FlavorGroupFeature, az string, -) (capacity uint64, usage uint64, err error) { +) (capacity, usage uint64, err error) { + smallestFlavor := groupData.SmallestFlavor // Request 1: currently available — how many instances can be placed right now. @@ -156,8 +158,8 @@ func (c *CapacityCalculator) calculateInstanceCapacity( InstanceUUID: fmt.Sprintf("capacity-current-%s-%s-%d", groupName, az, time.Now().UnixNano()), ProjectID: "cortex-capacity-check", FlavorName: smallestFlavor.Name, - MemoryMB: uint64(smallestFlavor.MemoryMB), - VCPUs: uint64(smallestFlavor.VCPUs), + MemoryMB: smallestFlavor.MemoryMB, + VCPUs: smallestFlavor.VCPUs, FlavorExtraSpecs: map[string]string{"hw_version": groupName}, AvailabilityZone: az, Pipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", @@ -173,8 +175,8 @@ func (c *CapacityCalculator) calculateInstanceCapacity( InstanceUUID: fmt.Sprintf("capacity-total-%s-%s-%d", groupName, az, time.Now().UnixNano()), ProjectID: "cortex-capacity-check", FlavorName: smallestFlavor.Name, - MemoryMB: uint64(smallestFlavor.MemoryMB), - VCPUs: uint64(smallestFlavor.VCPUs), + MemoryMB: smallestFlavor.MemoryMB, + VCPUs: smallestFlavor.VCPUs, FlavorExtraSpecs: map[string]string{"hw_version": groupName}, AvailabilityZone: az, Pipeline: "kvm-report-capacity", From c66c80b8f3929e38e7ed3a89eff1583288f6dc8a Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Thu, 9 Apr 2026 13:44:48 +0200 Subject: [PATCH 07/16] fix --- .../reservations/commitments/api.go | 5 - .../commitments/api_report_capacity_test.go | 135 ++++++------------ .../reservations/commitments/capacity.go | 3 - 3 files changed, 40 insertions(+), 103 deletions(-) diff --git a/internal/scheduling/reservations/commitments/api.go b/internal/scheduling/reservations/commitments/api.go index c4d452ade..0d81db2aa 100644 --- a/internal/scheduling/reservations/commitments/api.go +++ b/internal/scheduling/reservations/commitments/api.go @@ -79,10 +79,5 @@ func (api *HTTPAPI) handleProjectEndpoint(w http.ResponseWriter, r *http.Request http.Error(w, "Not found", http.StatusNotFound) } } -func (api *HTTPAPI) Init(mux *http.ServeMux) { - mux.HandleFunc("/v1/commitments/change-commitments", api.HandleChangeCommitments) - mux.HandleFunc("/v1/report-capacity", api.HandleReportCapacity) - mux.HandleFunc("/v1/commitments/info", api.HandleInfo) -} var commitmentApiLog = ctrl.Log.WithName("commitment_api") diff --git a/internal/scheduling/reservations/commitments/api_report_capacity_test.go b/internal/scheduling/reservations/commitments/api_report_capacity_test.go index 865bf6b6d..89cef2aa8 100644 --- a/internal/scheduling/reservations/commitments/api_report_capacity_test.go +++ b/internal/scheduling/reservations/commitments/api_report_capacity_test.go @@ -395,10 +395,6 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { ) flavorGroupKnowledge := createTestFlavorGroupKnowledgeWithSmallest(t, flavorGroup, flavorMemMB, flavorVCPUs) - hostDetailsKnowledge := createTestHostDetailsKnowledge(t, map[string]string{ - "host-1": az, - "host-2": az, - }) t.Run("computes capacity and usage via two scheduler calls", func(t *testing.T) { // kvm-report-capacity returns 5 hosts (total capacity). @@ -412,7 +408,7 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetailsKnowledge). + WithObjects(flavorGroupKnowledge). Build() calculator := &CapacityCalculator{ @@ -420,29 +416,26 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { schedulerClient: reservations.NewSchedulerClient(server.URL), } - report, err := calculator.CalculateCapacity(context.Background()) + knowledge := &reservations.FlavorGroupKnowledgeClient{Client: fakeClient} + groups, err := knowledge.GetAllFlavorGroups(context.Background(), nil) if err != nil { - t.Fatalf("unexpected error: %v", err) + t.Fatalf("failed to get flavor groups: %v", err) } - - res, ok := report.Resources[liquid.ResourceName("ram_"+flavorGroup)] + groupData, ok := groups[flavorGroup] if !ok { - t.Fatal("expected ram_test-group resource") - } - azReport, ok := res.PerAZ[liquid.AvailabilityZone(az)] - if !ok { - t.Fatalf("expected %s in perAZ", az) + t.Fatalf("flavor group %q not found", flavorGroup) } - if azReport.Capacity != 5 { - t.Errorf("expected capacity = 5, got %d", azReport.Capacity) + capacity, usage, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groupData, az) + if err != nil { + t.Fatalf("unexpected error: %v", err) } - usageVal, ok := azReport.Usage.Unpack() - if !ok { - t.Fatal("expected usage to be set") + + if capacity != 5 { + t.Errorf("expected capacity = 5, got %d", capacity) } - if usageVal != 2 { - t.Errorf("expected usage = 2, got %d", usageVal) + if usage != 2 { + t.Errorf("expected usage = 2, got %d", usage) } }) @@ -455,7 +448,7 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetailsKnowledge). + WithObjects(flavorGroupKnowledge). Build() calculator := &CapacityCalculator{ @@ -463,18 +456,17 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { schedulerClient: reservations.NewSchedulerClient(server.URL), } - report, err := calculator.CalculateCapacity(context.Background()) + knowledge := &reservations.FlavorGroupKnowledgeClient{Client: fakeClient} + groups, err := knowledge.GetAllFlavorGroups(context.Background(), nil) if err != nil { - t.Fatalf("unexpected error: %v", err) + t.Fatalf("failed to get flavor groups: %v", err) } - - azReport := report.Resources[liquid.ResourceName("ram_"+flavorGroup)].PerAZ[liquid.AvailabilityZone(az)] - usageVal, ok := azReport.Usage.Unpack() - if !ok { - t.Fatal("expected usage to be set") + _, usage, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az) + if err != nil { + t.Fatalf("unexpected error: %v", err) } - if usageVal != 0 { - t.Errorf("expected usage = 0, got %d", usageVal) + if usage != 0 { + t.Errorf("expected usage = 0, got %d", usage) } }) @@ -488,7 +480,7 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetailsKnowledge). + WithObjects(flavorGroupKnowledge). Build() calculator := &CapacityCalculator{ @@ -496,22 +488,21 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { schedulerClient: reservations.NewSchedulerClient(server.URL), } - report, err := calculator.CalculateCapacity(context.Background()) + knowledge := &reservations.FlavorGroupKnowledgeClient{Client: fakeClient} + groups, err := knowledge.GetAllFlavorGroups(context.Background(), nil) if err != nil { - t.Fatalf("unexpected error: %v", err) + t.Fatalf("failed to get flavor groups: %v", err) } - - azReport := report.Resources[liquid.ResourceName("ram_"+flavorGroup)].PerAZ[liquid.AvailabilityZone(az)] - usageVal, ok := azReport.Usage.Unpack() - if !ok { - t.Fatal("expected usage to be set") + _, usage, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az) + if err != nil { + t.Fatalf("unexpected error: %v", err) } - if usageVal != 0 { - t.Errorf("expected usage = 0 (clamped), got %d", usageVal) + if usage != 0 { + t.Errorf("expected usage = 0 (clamped), got %d", usage) } }) - t.Run("scheduler failure yields empty AZ report without aborting", func(t *testing.T) { + t.Run("scheduler failure returns error", func(t *testing.T) { failServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { http.Error(w, "internal error", http.StatusInternalServerError) })) @@ -519,7 +510,7 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetailsKnowledge). + WithObjects(flavorGroupKnowledge). Build() calculator := &CapacityCalculator{ @@ -527,60 +518,14 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { schedulerClient: reservations.NewSchedulerClient(failServer.URL), } - report, err := calculator.CalculateCapacity(context.Background()) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - res, ok := report.Resources[liquid.ResourceName("ram_"+flavorGroup)] - if !ok { - t.Fatal("expected resource to exist") - } - azReport := res.PerAZ[liquid.AvailabilityZone(az)] - if azReport == nil { - t.Fatal("expected non-nil AZ report on scheduler failure") - } - if azReport.Capacity != 0 { - t.Errorf("expected capacity = 0 on failure, got %d", azReport.Capacity) - } - }) - - t.Run("multiple AZs are reported independently", func(t *testing.T) { - twoAZHostDetails := createTestHostDetailsKnowledge(t, map[string]string{ - "host-1": "az-a", - "host-2": "az-b", - }) - // Both calls always return 3 hosts regardless of AZ (pipeline-routing mock). - server := newPipelineMockSchedulerServer(t, map[string][]string{ - "kvm-report-capacity": {"h1", "h2", "h3"}, - "kvm-general-purpose-load-balancing-all-filters-enabled": {"h1"}, - }) - defer server.Close() - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(flavorGroupKnowledge, twoAZHostDetails). - Build() - - calculator := &CapacityCalculator{ - client: fakeClient, - schedulerClient: reservations.NewSchedulerClient(server.URL), - } - - report, err := calculator.CalculateCapacity(context.Background()) + knowledge := &reservations.FlavorGroupKnowledgeClient{Client: fakeClient} + groups, err := knowledge.GetAllFlavorGroups(context.Background(), nil) if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - res := report.Resources[liquid.ResourceName("ram_"+flavorGroup)] - if len(res.PerAZ) != 2 { - t.Errorf("expected 2 AZs, got %d", len(res.PerAZ)) - } - if _, ok := res.PerAZ[liquid.AvailabilityZone("az-a")]; !ok { - t.Error("expected az-a in report") + t.Fatalf("failed to get flavor groups: %v", err) } - if _, ok := res.PerAZ[liquid.AvailabilityZone("az-b")]; !ok { - t.Error("expected az-b in report") + _, _, err = calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az) + if err == nil { + t.Fatal("expected error on scheduler failure, got nil") } }) } diff --git a/internal/scheduling/reservations/commitments/capacity.go b/internal/scheduling/reservations/commitments/capacity.go index ebec0ea48..445b3f90b 100644 --- a/internal/scheduling/reservations/commitments/capacity.go +++ b/internal/scheduling/reservations/commitments/capacity.go @@ -14,9 +14,6 @@ import ( "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" - . "github.com/majewsky/gg/option" - "github.com/sapcc/go-api-declarations/liquid" - "sigs.k8s.io/controller-runtime/pkg/client" ) // CapacityCalculator computes capacity reports for Limes LIQUID API. From 7a0fd4fe5ddcc0b7624092df5d40ccce1dca15fb Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Thu, 9 Apr 2026 14:09:36 +0200 Subject: [PATCH 08/16] resolve merge conflicts --- helm/bundles/cortex-nova/values.yaml | 4 + .../commitments/api_report_capacity.go | 12 +- .../commitments/api_report_capacity_test.go | 174 ++++++++++-------- .../reservations/commitments/capacity.go | 127 +++++++++---- .../reservations/commitments/config.go | 10 + 5 files changed, 211 insertions(+), 116 deletions(-) diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index d32316e47..ce9839d32 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -146,6 +146,10 @@ cortex-scheduling-controllers: "*": "kvm-general-purpose-load-balancing" # Catch-all fallback # Default pipeline for CR reservations when no CommittedResourceFlavorGroupPipelines entry matches committedResourcePipelineDefault: "kvm-general-purpose-load-balancing" + # Pipeline used for currently-available capacity check (respects VM allocations and reservations) + reportCapacityCurrentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled" + # Pipeline used for total theoretical capacity check (ignores VM allocations and reservations) + reportCapacityTotalPipeline: "kvm-report-capacity" # How often to re-verify active reservations # 5m = 300000000000 nanoseconds committedResourceRequeueIntervalActive: 300000000000 diff --git a/internal/scheduling/reservations/commitments/api_report_capacity.go b/internal/scheduling/reservations/commitments/api_report_capacity.go index 09fc55168..9d084b211 100644 --- a/internal/scheduling/reservations/commitments/api_report_capacity.go +++ b/internal/scheduling/reservations/commitments/api_report_capacity.go @@ -11,7 +11,6 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" "github.com/google/uuid" - "github.com/sapcc/go-api-declarations/liquid" ) // handles POST /commitments/v1/report-capacity requests from Limes: @@ -50,16 +49,9 @@ func (api *HTTPAPI) HandleReportCapacity(w http.ResponseWriter, r *http.Request) logger.V(1).Info("processing report capacity request") - // Parse request body (may be empty or contain ServiceCapacityRequest) - var req liquid.ServiceCapacityRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - // Empty body is acceptable for capacity reports - req = liquid.ServiceCapacityRequest{} - } - // Calculate capacity - calculator := NewCapacityCalculator(api.client) - report, err := calculator.CalculateCapacity(ctx, req) + calculator := NewCapacityCalculator(api.client, api.config) + report, err := calculator.CalculateCapacity(ctx) if err != nil { logger.Error(err, "failed to calculate capacity") statusCode = http.StatusInternalServerError diff --git a/internal/scheduling/reservations/commitments/api_report_capacity_test.go b/internal/scheduling/reservations/commitments/api_report_capacity_test.go index 89cef2aa8..c9957f16e 100644 --- a/internal/scheduling/reservations/commitments/api_report_capacity_test.go +++ b/internal/scheduling/reservations/commitments/api_report_capacity_test.go @@ -9,7 +9,6 @@ import ( "encoding/json" "net/http" "net/http/httptest" - "slices" "strings" "testing" @@ -137,11 +136,8 @@ func TestCapacityCalculator(t *testing.T) { WithScheme(scheme). Build() - calculator := NewCapacityCalculator(fakeClient) - req := liquid.ServiceCapacityRequest{ - AllAZs: []liquid.AvailabilityZone{"az-one", "az-two"}, - } - _, err := calculator.CalculateCapacity(context.Background(), req) + calculator := NewCapacityCalculator(fakeClient, DefaultConfig()) + _, err := calculator.CalculateCapacity(context.Background()) if err == nil { t.Fatal("Expected error when flavor groups knowledge doesn't exist, got nil") } @@ -159,11 +155,8 @@ func TestCapacityCalculator(t *testing.T) { WithObjects(emptyKnowledge). Build() - calculator := NewCapacityCalculator(fakeClient) - req := liquid.ServiceCapacityRequest{ - AllAZs: []liquid.AvailabilityZone{"az-one", "az-two"}, - } - report, err := calculator.CalculateCapacity(context.Background(), req) + calculator := NewCapacityCalculator(fakeClient, DefaultConfig()) + report, err := calculator.CalculateCapacity(context.Background()) if err != nil { t.Fatalf("Expected no error, got: %v", err) } @@ -177,18 +170,19 @@ func TestCapacityCalculator(t *testing.T) { } }) - t.Run("CalculateCapacity returns perAZ entries for all AZs from request", func(t *testing.T) { + t.Run("CalculateCapacity returns perAZ entries for all AZs from host details", func(t *testing.T) { flavorGroupKnowledge := createTestFlavorGroupKnowledge(t, "test-group") + hostDetails := createTestHostDetailsKnowledge(t, map[string]string{ + "host-1": "qa-de-1a", + "host-2": "qa-de-1b", + }) fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge). + WithObjects(flavorGroupKnowledge, hostDetails). Build() - calculator := NewCapacityCalculator(fakeClient) - req := liquid.ServiceCapacityRequest{ - AllAZs: []liquid.AvailabilityZone{"qa-de-1a", "qa-de-1b", "qa-de-1d"}, - } - report, err := calculator.CalculateCapacity(context.Background(), req) + calculator := NewCapacityCalculator(fakeClient, DefaultConfig()) + report, err := calculator.CalculateCapacity(context.Background()) if err != nil { t.Fatalf("Expected no error, got: %v", err) } @@ -197,22 +191,32 @@ func TestCapacityCalculator(t *testing.T) { t.Fatalf("Expected 3 resources (_ram, _cores, _instances), got %d", len(report.Resources)) } - // Verify all resources have exactly the requested AZs - verifyPerAZMatchesRequest(t, report.Resources["hw_version_test-group_ram"], req.AllAZs) - verifyPerAZMatchesRequest(t, report.Resources["hw_version_test-group_cores"], req.AllAZs) - verifyPerAZMatchesRequest(t, report.Resources["hw_version_test-group_instances"], req.AllAZs) + // Verify all resources have entries for the AZs from host details + expectedAZs := []liquid.AvailabilityZone{"qa-de-1a", "qa-de-1b"} + for _, resName := range []string{"hw_version_test-group_ram", "hw_version_test-group_cores", "hw_version_test-group_instances"} { + res := report.Resources[liquid.ResourceName(resName)] + if res == nil { + t.Errorf("resource %s not found", resName) + continue + } + for _, az := range expectedAZs { + if _, ok := res.PerAZ[az]; !ok { + t.Errorf("%s: missing entry for AZ %s", resName, az) + } + } + } }) - t.Run("CalculateCapacity with empty AllAZs returns empty perAZ maps", func(t *testing.T) { + t.Run("CalculateCapacity with no host details returns empty perAZ maps", func(t *testing.T) { flavorGroupKnowledge := createTestFlavorGroupKnowledge(t, "test-group") + // No host details knowledge - no AZs can be derived. fakeClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(flavorGroupKnowledge). Build() - calculator := NewCapacityCalculator(fakeClient) - req := liquid.ServiceCapacityRequest{AllAZs: []liquid.AvailabilityZone{}} - report, err := calculator.CalculateCapacity(context.Background(), req) + calculator := NewCapacityCalculator(fakeClient, DefaultConfig()) + report, err := calculator.CalculateCapacity(context.Background()) if err != nil { t.Fatalf("Expected no error, got: %v", err) } @@ -228,64 +232,37 @@ func TestCapacityCalculator(t *testing.T) { } }) - t.Run("CalculateCapacity responds to different AZ sets correctly", func(t *testing.T) { + t.Run("CalculateCapacity produces perAZ entries matching host details AZs", func(t *testing.T) { flavorGroupKnowledge := createTestFlavorGroupKnowledge(t, "test-group") + hostDetails := createTestHostDetailsKnowledge(t, map[string]string{ + "host-a": "eu-de-1a", + "host-b": "eu-de-1b", + }) fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge). + WithObjects(flavorGroupKnowledge, hostDetails). Build() - calculator := NewCapacityCalculator(fakeClient) - - req1 := liquid.ServiceCapacityRequest{ - AllAZs: []liquid.AvailabilityZone{"eu-de-1a", "eu-de-1b"}, - } - report1, err := calculator.CalculateCapacity(context.Background(), req1) + calculator := NewCapacityCalculator(fakeClient, DefaultConfig()) + report, err := calculator.CalculateCapacity(context.Background()) if err != nil { t.Fatalf("Expected no error, got: %v", err) } - req2 := liquid.ServiceCapacityRequest{ - AllAZs: []liquid.AvailabilityZone{"us-west-1a", "us-west-1b", "us-west-1c", "us-west-1d"}, - } - report2, err := calculator.CalculateCapacity(context.Background(), req2) - if err != nil { - t.Fatalf("Expected no error, got: %v", err) - } - - // Verify reports have exactly the requested AZs - for _, res := range report1.Resources { - verifyPerAZMatchesRequest(t, res, req1.AllAZs) - } - for _, res := range report2.Resources { - verifyPerAZMatchesRequest(t, res, req2.AllAZs) + // Verify resources contain exactly the AZs from host details + for resName, res := range report.Resources { + if len(res.PerAZ) != 2 { + t.Errorf("%s: expected 2 AZs, got %d", resName, len(res.PerAZ)) + } + for _, az := range []liquid.AvailabilityZone{"eu-de-1a", "eu-de-1b"} { + if _, ok := res.PerAZ[az]; !ok { + t.Errorf("%s: missing entry for AZ %s", resName, az) + } + } } }) } -// verifyPerAZMatchesRequest checks that perAZ entries match exactly the requested AZs. -// This follows the same semantics as nova liquid: the response must contain -// entries for all AZs in AllAZs, no more and no less. -func verifyPerAZMatchesRequest(t *testing.T, res *liquid.ResourceCapacityReport, requestedAZs []liquid.AvailabilityZone) { - t.Helper() - if res == nil { - t.Error("resource is nil") - return - } - if len(res.PerAZ) != len(requestedAZs) { - t.Errorf("expected %d AZs, got %d", len(requestedAZs), len(res.PerAZ)) - } - for _, az := range requestedAZs { - if _, ok := res.PerAZ[az]; !ok { - t.Errorf("missing entry for requested AZ %s", az) - } - } - for az := range res.PerAZ { - if !slices.Contains(requestedAZs, az) { - t.Errorf("unexpected AZ %s in response (not in request)", az) - } - } -} // createEmptyFlavorGroupKnowledge creates an empty flavor groups Knowledge CRD func createEmptyFlavorGroupKnowledge() *v1alpha1.Knowledge { @@ -414,6 +391,8 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { calculator := &CapacityCalculator{ client: fakeClient, schedulerClient: reservations.NewSchedulerClient(server.URL), + currentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", + totalPipeline: "kvm-report-capacity", } knowledge := &reservations.FlavorGroupKnowledgeClient{Client: fakeClient} @@ -454,6 +433,8 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { calculator := &CapacityCalculator{ client: fakeClient, schedulerClient: reservations.NewSchedulerClient(server.URL), + currentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", + totalPipeline: "kvm-report-capacity", } knowledge := &reservations.FlavorGroupKnowledgeClient{Client: fakeClient} @@ -486,6 +467,8 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { calculator := &CapacityCalculator{ client: fakeClient, schedulerClient: reservations.NewSchedulerClient(server.URL), + currentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", + totalPipeline: "kvm-report-capacity", } knowledge := &reservations.FlavorGroupKnowledgeClient{Client: fakeClient} @@ -516,6 +499,8 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { calculator := &CapacityCalculator{ client: fakeClient, schedulerClient: reservations.NewSchedulerClient(failServer.URL), + currentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", + totalPipeline: "kvm-report-capacity", } knowledge := &reservations.FlavorGroupKnowledgeClient{Client: fakeClient} @@ -528,6 +513,47 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { t.Fatal("expected error on scheduler failure, got nil") } }) + + t.Run("multiple AZs are reported independently", func(t *testing.T) { + twoAZHostDetails := createTestHostDetailsKnowledge(t, map[string]string{ + "host-1": "az-a", + "host-2": "az-b", + }) + // Both calls always return 3 hosts regardless of AZ (pipeline-routing mock). + server := newPipelineMockSchedulerServer(t, map[string][]string{ + "kvm-report-capacity": {"h1", "h2", "h3"}, + "kvm-general-purpose-load-balancing-all-filters-enabled": {"h1"}, + }) + defer server.Close() + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(flavorGroupKnowledge, twoAZHostDetails). + Build() + + calculator := &CapacityCalculator{ + client: fakeClient, + schedulerClient: reservations.NewSchedulerClient(server.URL), + currentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", + totalPipeline: "kvm-report-capacity", + } + + report, err := calculator.CalculateCapacity(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + res := report.Resources[liquid.ResourceName(ResourceNameRAM(flavorGroup))] + if len(res.PerAZ) != 2 { + t.Errorf("expected 2 AZs, got %d", len(res.PerAZ)) + } + if _, ok := res.PerAZ[liquid.AvailabilityZone("az-a")]; !ok { + t.Error("expected az-a in report") + } + if _, ok := res.PerAZ[liquid.AvailabilityZone("az-b")]; !ok { + t.Error("expected az-b in report") + } + }) } // newPipelineMockSchedulerServer starts a test HTTP server that returns different @@ -604,8 +630,8 @@ func createTestHostDetailsKnowledge(t *testing.T, hostToAZ map[string]string) *v features := make([]map[string]interface{}, 0, len(hostToAZ)) for host, az := range hostToAZ { features = append(features, map[string]interface{}{ - "computeHost": host, - "availabilityZone": az, + "ComputeHost": host, + "AvailabilityZone": az, }) } @@ -618,7 +644,7 @@ func createTestHostDetailsKnowledge(t *testing.T, hostToAZ map[string]string) *v ObjectMeta: v1.ObjectMeta{Name: "host-details"}, Spec: v1alpha1.KnowledgeSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, - Extractor: v1alpha1.KnowledgeExtractorSpec{Name: "host_details"}, + Extractor: v1alpha1.KnowledgeExtractorSpec{Name: "sap_host_details_extractor"}, }, Status: v1alpha1.KnowledgeStatus{ Conditions: []v1.Condition{{Type: v1alpha1.KnowledgeConditionReady, Status: "True"}}, diff --git a/internal/scheduling/reservations/commitments/capacity.go b/internal/scheduling/reservations/commitments/capacity.go index 445b3f90b..067e066a7 100644 --- a/internal/scheduling/reservations/commitments/capacity.go +++ b/internal/scheduling/reservations/commitments/capacity.go @@ -6,12 +6,14 @@ package commitments import ( "context" "fmt" + "sort" "time" . "github.com/majewsky/gg/option" "github.com/sapcc/go-api-declarations/liquid" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" ) @@ -20,21 +22,24 @@ import ( type CapacityCalculator struct { client client.Client schedulerClient *reservations.SchedulerClient + currentPipeline string + totalPipeline string } -func NewCapacityCalculator(client client.Client) *CapacityCalculator { - schedulerClient := reservations.NewSchedulerClient("http://localhost:8080/scheduler/nova/external") +func NewCapacityCalculator(client client.Client, config Config) *CapacityCalculator { return &CapacityCalculator{ client: client, - schedulerClient: schedulerClient, + schedulerClient: reservations.NewSchedulerClient(config.SchedulerURL), + currentPipeline: config.ReportCapacityCurrentPipeline, + totalPipeline: config.ReportCapacityTotalPipeline, } } // CalculateCapacity computes per-AZ capacity for all flavor groups. // For each flavor group, three resources are reported: _ram, _cores, _instances. // All flavor groups are included, not just those with fixed RAM/core ratio. -// The request provides the list of all AZs from Limes that must be included in the report. -func (c *CapacityCalculator) CalculateCapacity(ctx context.Context, req liquid.ServiceCapacityRequest) (liquid.ServiceCapacityReport, error) { +// AZs are derived from HostDetails Knowledge CRDs. +func (c *CapacityCalculator) CalculateCapacity(ctx context.Context) (liquid.ServiceCapacityReport, error) { // Get all flavor groups from Knowledge CRDs knowledge := &reservations.FlavorGroupKnowledgeClient{Client: c.client} flavorGroups, err := knowledge.GetAllFlavorGroups(ctx, nil) @@ -48,6 +53,12 @@ func (c *CapacityCalculator) CalculateCapacity(ctx context.Context, req liquid.S infoVersion = knowledgeCRD.Status.LastContentChange.Unix() } + // Get availability zones from host details + azs, err := c.getAvailabilityZones(ctx) + if err != nil { + return liquid.ServiceCapacityReport{}, fmt.Errorf("failed to get availability zones: %w", err) + } + // Build capacity report for all flavor groups report := liquid.ServiceCapacityReport{ InfoVersion: infoVersion, @@ -55,10 +66,11 @@ func (c *CapacityCalculator) CalculateCapacity(ctx context.Context, req liquid.S } for groupName, groupData := range flavorGroups { - // All flavor groups are included in capacity reporting (not just those with fixed ratio). - - // Calculate per-AZ capacity (placeholder: capacity=0 for all resources) - azCapacity := c.calculateAZCapacity(groupName, groupData, req.AllAZs) + // Calculate per-AZ capacity using scheduler + azCapacity, err := c.calculateAZCapacity(ctx, groupName, groupData, azs) + if err != nil { + return liquid.ServiceCapacityReport{}, fmt.Errorf("failed to calculate capacity for %s: %w", groupName, err) + } // === 1. RAM Resource === ramResourceName := liquid.ResourceName(ResourceNameRAM(groupName)) @@ -103,32 +115,32 @@ func (c *CapacityCalculator) copyAZCapacity( return result } +// calculateAZCapacity computes capacity per AZ for a flavor group via scheduler calls. +// On scheduler failure for an AZ, that AZ still gets an entry with capacity=0. func (c *CapacityCalculator) calculateAZCapacity( - _ string, // groupName - reserved for future use - _ compute.FlavorGroupFeature, // groupData - reserved for future use - allAZs []liquid.AvailabilityZone, // list of all AZs from Limes request -) map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport { + ctx context.Context, + groupName string, + groupData compute.FlavorGroupFeature, + azs []string, +) (map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport, error) { - // Create report entry for each AZ with placeholder capacity=0. - // - // NOTE: When implementing real capacity calculation here, you MUST also update - // the copying logic in CalculateCapacity() for _cores and _instances resources. - // Those resources use different units (vCPUs and VM count) than _ram (memory multiples), - // so the capacity values cannot be simply copied - they require unit conversion: - // - _cores capacity = RAM capacity / ramCoreRatio - // - _instances capacity = needs its own derivation logic - // - // TODO: Calculate actual capacity from Reservation CRDs or host resources - // TODO: Calculate actual usage from VM allocations result := make(map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport) - for _, az := range allAZs { - result[az] = &liquid.AZResourceCapacityReport{ - Capacity: 0, // Placeholder: capacity=0 until actual calculation is implemented - Usage: Some[uint64](0), // Placeholder: usage=0 until actual calculation is implemented + for _, az := range azs { + capacity, usage, err := c.calculateInstanceCapacity(ctx, groupName, groupData, az) + if err != nil { + // On failure, report az with capacity=0 rather than aborting entirely. + result[liquid.AvailabilityZone(az)] = &liquid.AZResourceCapacityReport{ + Capacity: 0, + Usage: Some[uint64](0), + } + continue + } + result[liquid.AvailabilityZone(az)] = &liquid.AZResourceCapacityReport{ + Capacity: capacity, + Usage: Some[uint64](usage), } } - - return result + return result, nil } // calculateInstanceCapacity returns the total capacity and current usage for a flavor group in an AZ. @@ -154,7 +166,7 @@ func (c *CapacityCalculator) calculateInstanceCapacity( VCPUs: smallestFlavor.VCPUs, FlavorExtraSpecs: map[string]string{"hw_version": groupName}, AvailabilityZone: az, - Pipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", + Pipeline: c.currentPipeline, }) if err != nil { return 0, 0, fmt.Errorf("failed to get current available capacity: %w", err) @@ -171,7 +183,7 @@ func (c *CapacityCalculator) calculateInstanceCapacity( VCPUs: smallestFlavor.VCPUs, FlavorExtraSpecs: map[string]string{"hw_version": groupName}, AvailabilityZone: az, - Pipeline: "kvm-report-capacity", + Pipeline: c.totalPipeline, }) if err != nil { return 0, 0, fmt.Errorf("failed to get total capacity: %w", err) @@ -185,3 +197,54 @@ func (c *CapacityCalculator) calculateInstanceCapacity( return totalCapacity, usageValue, nil } + +// getHostAZMap returns a map from compute host name to availability zone. +func (c *CapacityCalculator) getHostAZMap(ctx context.Context) (map[string]string, error) { + var knowledgeList v1alpha1.KnowledgeList + if err := c.client.List(ctx, &knowledgeList); err != nil { + return nil, fmt.Errorf("failed to list Knowledge CRDs: %w", err) + } + + type hostAZEntry struct { + ComputeHost string `json:"ComputeHost"` + AvailabilityZone string `json:"AvailabilityZone"` + } + + hostAZMap := make(map[string]string) + for _, knowledge := range knowledgeList.Items { + if knowledge.Spec.Extractor.Name != "sap_host_details_extractor" { + continue + } + features, err := v1alpha1.UnboxFeatureList[hostAZEntry](knowledge.Status.Raw) + if err != nil { + continue + } + for _, feature := range features { + if feature.ComputeHost != "" && feature.AvailabilityZone != "" { + hostAZMap[feature.ComputeHost] = feature.AvailabilityZone + } + } + } + + return hostAZMap, nil +} + +func (c *CapacityCalculator) getAvailabilityZones(ctx context.Context) ([]string, error) { + hostAZMap, err := c.getHostAZMap(ctx) + if err != nil { + return nil, err + } + + azSet := make(map[string]struct{}) + for _, az := range hostAZMap { + azSet[az] = struct{}{} + } + + azs := make([]string, 0, len(azSet)) + for az := range azSet { + azs = append(azs, az) + } + sort.Strings(azs) + + return azs, nil +} diff --git a/internal/scheduling/reservations/commitments/config.go b/internal/scheduling/reservations/commitments/config.go index 7a6c9005f..4579e41d3 100644 --- a/internal/scheduling/reservations/commitments/config.go +++ b/internal/scheduling/reservations/commitments/config.go @@ -30,6 +30,14 @@ type Config struct { // Secret ref to the database credentials for querying VM state. DatabaseSecretRef *corev1.SecretReference `json:"databaseSecretRef,omitempty"` + // ReportCapacityCurrentPipeline is the pipeline used to determine currently available capacity + // (respects VM allocations and reservations). + ReportCapacityCurrentPipeline string `json:"reportCapacityCurrentPipeline"` + + // ReportCapacityTotalPipeline is the pipeline used to determine total theoretical capacity + // (ignores VM allocations and reservations). + ReportCapacityTotalPipeline string `json:"reportCapacityTotalPipeline"` + // FlavorGroupPipelines maps flavor group names to pipeline names. // Example: {"2152": "kvm-hana-bin-packing", "2101": "kvm-general-purpose-load-balancing", "*": "kvm-general-purpose-load-balancing"} // Used to select different scheduling pipelines based on flavor group characteristics. @@ -90,6 +98,8 @@ func DefaultConfig() Config { RequeueIntervalRetry: 1 * time.Minute, PipelineDefault: "kvm-general-purpose-load-balancing", SchedulerURL: "http://localhost:8080/scheduler/nova/external", + ReportCapacityCurrentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", + ReportCapacityTotalPipeline: "kvm-report-capacity", ChangeAPIWatchReservationsTimeout: 10 * time.Second, ChangeAPIWatchReservationsPollInterval: 500 * time.Millisecond, EnableChangeCommitmentsAPI: true, From 9a72243812f906f85a553373e693565b4038d411 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Tue, 14 Apr 2026 14:13:31 +0200 Subject: [PATCH 09/16] refined capacity endpoint w/o usage --- helm/bundles/cortex-nova/values.yaml | 5 +- .../reservations/commitments/api.go | 3 - .../commitments/api_report_capacity_test.go | 419 +++++++++++------- .../reservations/commitments/capacity.go | 114 +++-- .../reservations/commitments/config.go | 10 +- 5 files changed, 320 insertions(+), 231 deletions(-) diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index ce9839d32..d087603c7 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -146,9 +146,8 @@ cortex-scheduling-controllers: "*": "kvm-general-purpose-load-balancing" # Catch-all fallback # Default pipeline for CR reservations when no CommittedResourceFlavorGroupPipelines entry matches committedResourcePipelineDefault: "kvm-general-purpose-load-balancing" - # Pipeline used for currently-available capacity check (respects VM allocations and reservations) - reportCapacityCurrentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled" - # Pipeline used for total theoretical capacity check (ignores VM allocations and reservations) + # Pipeline used for capacity reporting: determines eligible hosts per AZ (ignores allocations/reservations). + # Host resource data is then read from Hypervisor CRDs to compute actual multiples. reportCapacityTotalPipeline: "kvm-report-capacity" # How often to re-verify active reservations # 5m = 300000000000 nanoseconds diff --git a/internal/scheduling/reservations/commitments/api.go b/internal/scheduling/reservations/commitments/api.go index 0d81db2aa..06fb97be1 100644 --- a/internal/scheduling/reservations/commitments/api.go +++ b/internal/scheduling/reservations/commitments/api.go @@ -12,7 +12,6 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduling/nova" "github.com/go-logr/logr" "github.com/prometheus/client_golang/prometheus" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -79,5 +78,3 @@ func (api *HTTPAPI) handleProjectEndpoint(w http.ResponseWriter, r *http.Request http.Error(w, "Not found", http.StatusNotFound) } } - -var commitmentApiLog = ctrl.Log.WithName("commitment_api") diff --git a/internal/scheduling/reservations/commitments/api_report_capacity_test.go b/internal/scheduling/reservations/commitments/api_report_capacity_test.go index c9957f16e..cae317c00 100644 --- a/internal/scheduling/reservations/commitments/api_report_capacity_test.go +++ b/internal/scheduling/reservations/commitments/api_report_capacity_test.go @@ -12,7 +12,9 @@ import ( "strings" "testing" + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" "github.com/sapcc/go-api-declarations/liquid" + "k8s.io/apimachinery/pkg/api/resource" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -22,12 +24,20 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" ) -func TestHandleReportCapacity(t *testing.T) { - // Setup fake client +func testScheme(t *testing.T) *runtime.Scheme { + t.Helper() scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatal(err) } + if err := hv1.AddToScheme(scheme); err != nil { + t.Fatal(err) + } + return scheme +} + +func TestHandleReportCapacity(t *testing.T) { + scheme := testScheme(t) // Create empty flavor groups knowledge so capacity calculation doesn't fail emptyKnowledge := createEmptyFlavorGroupKnowledge() @@ -125,11 +135,7 @@ func TestHandleReportCapacity(t *testing.T) { } func TestCapacityCalculator(t *testing.T) { - // Setup fake client with Knowledge CRD - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatal(err) - } + scheme := testScheme(t) t.Run("CalculateCapacity returns error when no flavor groups knowledge exists", func(t *testing.T) { fakeClient := fake.NewClientBuilder(). @@ -147,7 +153,6 @@ func TestCapacityCalculator(t *testing.T) { }) t.Run("CalculateCapacity returns empty report when flavor groups knowledge exists but is empty", func(t *testing.T) { - // Create empty flavor groups knowledge emptyKnowledge := createEmptyFlavorGroupKnowledge() fakeClient := fake.NewClientBuilder(). @@ -209,7 +214,6 @@ func TestCapacityCalculator(t *testing.T) { t.Run("CalculateCapacity with no host details returns empty perAZ maps", func(t *testing.T) { flavorGroupKnowledge := createTestFlavorGroupKnowledge(t, "test-group") - // No host details knowledge - no AZs can be derived. fakeClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(flavorGroupKnowledge). @@ -249,7 +253,6 @@ func TestCapacityCalculator(t *testing.T) { t.Fatalf("Expected no error, got: %v", err) } - // Verify resources contain exactly the AZs from host details for resName, res := range report.Resources { if len(res.PerAZ) != 2 { t.Errorf("%s: expected 2 AZs, got %d", resName, len(res.PerAZ)) @@ -263,135 +266,35 @@ func TestCapacityCalculator(t *testing.T) { }) } - -// createEmptyFlavorGroupKnowledge creates an empty flavor groups Knowledge CRD -func createEmptyFlavorGroupKnowledge() *v1alpha1.Knowledge { - // Box empty array properly - emptyFeatures := []map[string]interface{}{} - raw, err := v1alpha1.BoxFeatureList(emptyFeatures) - if err != nil { - panic(err) // Should never happen for empty slice - } - - return &v1alpha1.Knowledge{ - ObjectMeta: v1.ObjectMeta{ - Name: "flavor-groups", - // No namespace - Knowledge is cluster-scoped - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Extractor: v1alpha1.KnowledgeExtractorSpec{ - Name: "flavor_groups", - }, - }, - Status: v1alpha1.KnowledgeStatus{ - Conditions: []v1.Condition{ - { - Type: v1alpha1.KnowledgeConditionReady, - Status: "True", - }, - }, - Raw: raw, - }, - } -} - -// createTestFlavorGroupKnowledge creates a test Knowledge CRD with flavor group data -// that accepts commitments (has fixed RAM/core ratio) -func createTestFlavorGroupKnowledge(t *testing.T, groupName string) *v1alpha1.Knowledge { - t.Helper() - - features := []map[string]interface{}{ - { - "name": groupName, - "flavors": []map[string]interface{}{ - { - "name": "test_c8_m32", - "vcpus": 8, - "memoryMB": 32768, - "diskGB": 50, - }, - }, - "largestFlavor": map[string]interface{}{ - "name": "test_c8_m32", - "vcpus": 8, - "memoryMB": 32768, - "diskGB": 50, - }, - "smallestFlavor": map[string]interface{}{ - "name": "test_c8_m32", - "vcpus": 8, - "memoryMB": 32768, - "diskGB": 50, - }, - // Fixed RAM/core ratio (4096 MiB per vCPU) - required for group to accept commitments - "ramCoreRatio": 4096, - }, - } - - // Use BoxFeatureList to properly format the features - raw, err := v1alpha1.BoxFeatureList(features) - if err != nil { - t.Fatal(err) - } - - return &v1alpha1.Knowledge{ - ObjectMeta: v1.ObjectMeta{ - Name: "flavor-groups", - // No namespace - Knowledge is cluster-scoped - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Extractor: v1alpha1.KnowledgeExtractorSpec{ - Name: "flavor_groups", - }, - }, - Status: v1alpha1.KnowledgeStatus{ - Conditions: []v1.Condition{ - { - Type: v1alpha1.KnowledgeConditionReady, - Status: "True", - }, - }, - Raw: raw, - }, - } -} - -func TestCapacityCalculatorWithScheduler(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatal(err) - } +func TestCapacityCalculatorWithHypervisors(t *testing.T) { + scheme := testScheme(t) const ( flavorGroup = "test-group" az = "az-a" - flavorMemMB = uint64(32768) + flavorMemMB = uint64(32768) // 32 GiB flavorVCPUs = uint64(8) ) flavorGroupKnowledge := createTestFlavorGroupKnowledgeWithSmallest(t, flavorGroup, flavorMemMB, flavorVCPUs) - t.Run("computes capacity and usage via two scheduler calls", func(t *testing.T) { - // kvm-report-capacity returns 5 hosts (total capacity). - // kvm-general-purpose-load-balancing-all-filters-enabled returns 3 hosts (currently available). - // usage = 5 - 3 = 2. - server := newPipelineMockSchedulerServer(t, map[string][]string{ - "kvm-report-capacity": {"h1", "h2", "h3", "h4", "h5"}, - "kvm-general-purpose-load-balancing-all-filters-enabled": {"h1", "h2", "h3"}, - }) + t.Run("computes capacity as multiples of smallest flavor", func(t *testing.T) { + // Host has 256 GiB effective capacity. Smallest flavor = 32 GiB. + // Total capacity = floor(256 / 32) = 8. + server := newMockSchedulerServer(t, []string{"host-1"}) defer server.Close() + hvObj := createTestHypervisor("host-1", "256Gi", "64Gi") + hostDetails := createTestHostDetailsKnowledge(t, map[string]string{"host-1": az}) + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge). + WithObjects(flavorGroupKnowledge, hostDetails, hvObj). Build() calculator := &CapacityCalculator{ client: fakeClient, schedulerClient: reservations.NewSchedulerClient(server.URL), - currentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", totalPipeline: "kvm-report-capacity", } @@ -400,40 +303,75 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { if err != nil { t.Fatalf("failed to get flavor groups: %v", err) } - groupData, ok := groups[flavorGroup] - if !ok { - t.Fatalf("flavor group %q not found", flavorGroup) - } - capacity, usage, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groupData, az) + hvByName := map[string]hv1.Hypervisor{"host-1": *hvObj} + capacity, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) if err != nil { t.Fatalf("unexpected error: %v", err) } - if capacity != 5 { - t.Errorf("expected capacity = 5, got %d", capacity) - } - if usage != 2 { - t.Errorf("expected usage = 2, got %d", usage) + if capacity != 8 { + t.Errorf("expected capacity = 8, got %d", capacity) } }) - t.Run("usage is zero when total equals currently available", func(t *testing.T) { - server := newPipelineMockSchedulerServer(t, map[string][]string{ - "kvm-report-capacity": {"h1", "h2"}, - "kvm-general-purpose-load-balancing-all-filters-enabled": {"h1", "h2"}, + t.Run("sums multiples across multiple hosts", func(t *testing.T) { + // Host-1: 256 GiB → total=8 + // Host-2: 128 GiB → total=4 + // Combined: total=12 + server := newMockSchedulerServer(t, []string{"host-1", "host-2"}) + defer server.Close() + + hv1Obj := createTestHypervisor("host-1", "256Gi", "128Gi") + hv2Obj := createTestHypervisor("host-2", "128Gi", "0") + hostDetails := createTestHostDetailsKnowledge(t, map[string]string{ + "host-1": az, + "host-2": az, }) + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(flavorGroupKnowledge, hostDetails, hv1Obj, hv2Obj). + Build() + + calculator := &CapacityCalculator{ + client: fakeClient, + schedulerClient: reservations.NewSchedulerClient(server.URL), + totalPipeline: "kvm-report-capacity", + } + + knowledge := &reservations.FlavorGroupKnowledgeClient{Client: fakeClient} + groups, err := knowledge.GetAllFlavorGroups(context.Background(), nil) + if err != nil { + t.Fatalf("failed to get flavor groups: %v", err) + } + + hvByName := map[string]hv1.Hypervisor{"host-1": *hv1Obj, "host-2": *hv2Obj} + capacity, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if capacity != 12 { + t.Errorf("expected capacity = 12, got %d", capacity) + } + }) + + t.Run("capacity is correct when nothing is allocated", func(t *testing.T) { + server := newMockSchedulerServer(t, []string{"host-1"}) defer server.Close() + hvObj := createTestHypervisor("host-1", "128Gi", "0") + hostDetails := createTestHostDetailsKnowledge(t, map[string]string{"host-1": az}) + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge). + WithObjects(flavorGroupKnowledge, hostDetails, hvObj). Build() calculator := &CapacityCalculator{ client: fakeClient, schedulerClient: reservations.NewSchedulerClient(server.URL), - currentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", totalPipeline: "kvm-report-capacity", } @@ -442,32 +380,33 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { if err != nil { t.Fatalf("failed to get flavor groups: %v", err) } - _, usage, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az) + + hvByName := map[string]hv1.Hypervisor{"host-1": *hvObj} + capacity, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) if err != nil { t.Fatalf("unexpected error: %v", err) } - if usage != 0 { - t.Errorf("expected usage = 0, got %d", usage) + + if capacity != 4 { + t.Errorf("expected capacity = 4, got %d", capacity) } }) - t.Run("usage is clamped to zero when currently available exceeds total", func(t *testing.T) { - // Pathological: currently-available call returns more hosts than total capacity call. - server := newPipelineMockSchedulerServer(t, map[string][]string{ - "kvm-report-capacity": {"h1"}, - "kvm-general-purpose-load-balancing-all-filters-enabled": {"h1", "h2", "h3"}, - }) + t.Run("host not found in HV CRDs is skipped", func(t *testing.T) { + // Scheduler returns a host with no matching HV CRD — should contribute 0 capacity. + server := newMockSchedulerServer(t, []string{"host-unknown"}) defer server.Close() + hostDetails := createTestHostDetailsKnowledge(t, map[string]string{"host-unknown": az}) + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge). + WithObjects(flavorGroupKnowledge, hostDetails). Build() calculator := &CapacityCalculator{ client: fakeClient, schedulerClient: reservations.NewSchedulerClient(server.URL), - currentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", totalPipeline: "kvm-report-capacity", } @@ -476,12 +415,15 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { if err != nil { t.Fatalf("failed to get flavor groups: %v", err) } - _, usage, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az) + + hvByName := map[string]hv1.Hypervisor{} // empty + capacity, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) if err != nil { t.Fatalf("unexpected error: %v", err) } - if usage != 0 { - t.Errorf("expected usage = 0 (clamped), got %d", usage) + + if capacity != 0 { + t.Errorf("expected capacity = 0, got %d", capacity) } }) @@ -499,7 +441,6 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { calculator := &CapacityCalculator{ client: fakeClient, schedulerClient: reservations.NewSchedulerClient(failServer.URL), - currentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", totalPipeline: "kvm-report-capacity", } @@ -508,33 +449,34 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { if err != nil { t.Fatalf("failed to get flavor groups: %v", err) } - _, _, err = calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az) + + hvByName := map[string]hv1.Hypervisor{} + _, err = calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) if err == nil { t.Fatal("expected error on scheduler failure, got nil") } }) t.Run("multiple AZs are reported independently", func(t *testing.T) { - twoAZHostDetails := createTestHostDetailsKnowledge(t, map[string]string{ + hostDetails := createTestHostDetailsKnowledge(t, map[string]string{ "host-1": "az-a", "host-2": "az-b", }) - // Both calls always return 3 hosts regardless of AZ (pipeline-routing mock). - server := newPipelineMockSchedulerServer(t, map[string][]string{ - "kvm-report-capacity": {"h1", "h2", "h3"}, - "kvm-general-purpose-load-balancing-all-filters-enabled": {"h1"}, - }) + // Scheduler always returns both hosts (mock doesn't filter by AZ). + server := newMockSchedulerServer(t, []string{"host-1", "host-2"}) defer server.Close() + hv1Obj := createTestHypervisor("host-1", "128Gi", "32Gi") + hv2Obj := createTestHypervisor("host-2", "64Gi", "0") + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, twoAZHostDetails). + WithObjects(flavorGroupKnowledge, hostDetails, hv1Obj, hv2Obj). Build() calculator := &CapacityCalculator{ client: fakeClient, schedulerClient: reservations.NewSchedulerClient(server.URL), - currentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", totalPipeline: "kvm-report-capacity", } @@ -554,19 +496,49 @@ func TestCapacityCalculatorWithScheduler(t *testing.T) { t.Error("expected az-b in report") } }) + + t.Run("partial memory is floored to full multiples", func(t *testing.T) { + // Host has 100 GiB capacity. Smallest flavor = 32 GiB. + // Total = floor(100 / 32) = 3 (not 3.125). + server := newMockSchedulerServer(t, []string{"host-1"}) + defer server.Close() + + hvObj := createTestHypervisor("host-1", "100Gi", "0") + hostDetails := createTestHostDetailsKnowledge(t, map[string]string{"host-1": az}) + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(flavorGroupKnowledge, hostDetails, hvObj). + Build() + + calculator := &CapacityCalculator{ + client: fakeClient, + schedulerClient: reservations.NewSchedulerClient(server.URL), + totalPipeline: "kvm-report-capacity", + } + + knowledge := &reservations.FlavorGroupKnowledgeClient{Client: fakeClient} + groups, err := knowledge.GetAllFlavorGroups(context.Background(), nil) + if err != nil { + t.Fatalf("failed to get flavor groups: %v", err) + } + + hvByName := map[string]hv1.Hypervisor{"host-1": *hvObj} + capacity, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if capacity != 3 { + t.Errorf("expected capacity = 3 (floored), got %d", capacity) + } + }) } -// newPipelineMockSchedulerServer starts a test HTTP server that returns different -// host lists depending on the pipeline name in the request body. -func newPipelineMockSchedulerServer(t *testing.T, hostsByPipeline map[string][]string) *httptest.Server { +// newMockSchedulerServer returns a test HTTP server that always returns the given host list. +func newMockSchedulerServer(t *testing.T, hosts []string) *httptest.Server { t.Helper() return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - var req novaapi.ExternalSchedulerRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, "bad request", http.StatusBadRequest) - return - } - hosts := hostsByPipeline[req.Pipeline] w.Header().Set("Content-Type", "application/json") if err := json.NewEncoder(w).Encode(novaapi.ExternalSchedulerResponse{Hosts: hosts}); err != nil { t.Errorf("mock scheduler: encode error: %v", err) @@ -574,6 +546,113 @@ func newPipelineMockSchedulerServer(t *testing.T, hostsByPipeline map[string][]s })) } +// createTestHypervisor creates an HV CRD with the given effective capacity and allocation. +func createTestHypervisor(name, effectiveCapacity, allocation string) *hv1.Hypervisor { + hv := &hv1.Hypervisor{ + ObjectMeta: v1.ObjectMeta{Name: name}, + Status: hv1.HypervisorStatus{ + EffectiveCapacity: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceMemory: resource.MustParse(effectiveCapacity), + }, + }, + } + if allocation != "0" && allocation != "" { + hv.Status.Allocation = map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceMemory: resource.MustParse(allocation), + } + } + return hv +} + +// createEmptyFlavorGroupKnowledge creates an empty flavor groups Knowledge CRD +func createEmptyFlavorGroupKnowledge() *v1alpha1.Knowledge { + // Box empty array properly + emptyFeatures := []map[string]interface{}{} + raw, err := v1alpha1.BoxFeatureList(emptyFeatures) + if err != nil { + panic(err) // Should never happen for empty slice + } + + return &v1alpha1.Knowledge{ + ObjectMeta: v1.ObjectMeta{ + Name: "flavor-groups", + }, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Extractor: v1alpha1.KnowledgeExtractorSpec{ + Name: "flavor_groups", + }, + }, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []v1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: "True", + }, + }, + Raw: raw, + }, + } +} + +// createTestFlavorGroupKnowledge creates a test Knowledge CRD with flavor group data +func createTestFlavorGroupKnowledge(t *testing.T, groupName string) *v1alpha1.Knowledge { + t.Helper() + + features := []map[string]interface{}{ + { + "name": groupName, + "flavors": []map[string]interface{}{ + { + "name": "test_c8_m32", + "vcpus": 8, + "memoryMB": 32768, + "diskGB": 50, + }, + }, + "largestFlavor": map[string]interface{}{ + "name": "test_c8_m32", + "vcpus": 8, + "memoryMB": 32768, + "diskGB": 50, + }, + "smallestFlavor": map[string]interface{}{ + "name": "test_c8_m32", + "vcpus": 8, + "memoryMB": 32768, + "diskGB": 50, + }, + "ramCoreRatio": 4096, + }, + } + + raw, err := v1alpha1.BoxFeatureList(features) + if err != nil { + t.Fatal(err) + } + + return &v1alpha1.Knowledge{ + ObjectMeta: v1.ObjectMeta{ + Name: "flavor-groups", + }, + Spec: v1alpha1.KnowledgeSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Extractor: v1alpha1.KnowledgeExtractorSpec{ + Name: "flavor_groups", + }, + }, + Status: v1alpha1.KnowledgeStatus{ + Conditions: []v1.Condition{ + { + Type: v1alpha1.KnowledgeConditionReady, + Status: "True", + }, + }, + Raw: raw, + }, + } +} + // createTestFlavorGroupKnowledgeWithSmallest creates a Knowledge CRD where smallestFlavor // is explicitly set so the capacity calculator uses the correct memory unit. func createTestFlavorGroupKnowledgeWithSmallest(t *testing.T, groupName string, memMB, vcpus uint64) *v1alpha1.Knowledge { diff --git a/internal/scheduling/reservations/commitments/capacity.go b/internal/scheduling/reservations/commitments/capacity.go index 067e066a7..3be3ce9b6 100644 --- a/internal/scheduling/reservations/commitments/capacity.go +++ b/internal/scheduling/reservations/commitments/capacity.go @@ -9,6 +9,7 @@ import ( "sort" "time" + hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" . "github.com/majewsky/gg/option" "github.com/sapcc/go-api-declarations/liquid" "sigs.k8s.io/controller-runtime/pkg/client" @@ -22,7 +23,6 @@ import ( type CapacityCalculator struct { client client.Client schedulerClient *reservations.SchedulerClient - currentPipeline string totalPipeline string } @@ -30,7 +30,6 @@ func NewCapacityCalculator(client client.Client, config Config) *CapacityCalcula return &CapacityCalculator{ client: client, schedulerClient: reservations.NewSchedulerClient(config.SchedulerURL), - currentPipeline: config.ReportCapacityCurrentPipeline, totalPipeline: config.ReportCapacityTotalPipeline, } } @@ -59,6 +58,16 @@ func (c *CapacityCalculator) CalculateCapacity(ctx context.Context) (liquid.Serv return liquid.ServiceCapacityReport{}, fmt.Errorf("failed to get availability zones: %w", err) } + // Pre-fetch all Hypervisor CRDs once (shared across all flavor groups and AZs) + var hvList hv1.HypervisorList + if err := c.client.List(ctx, &hvList); err != nil { + return liquid.ServiceCapacityReport{}, fmt.Errorf("failed to list hypervisors: %w", err) + } + hvByName := make(map[string]hv1.Hypervisor, len(hvList.Items)) + for _, hv := range hvList.Items { + hvByName[hv.Name] = hv + } + // Build capacity report for all flavor groups report := liquid.ServiceCapacityReport{ InfoVersion: infoVersion, @@ -66,11 +75,8 @@ func (c *CapacityCalculator) CalculateCapacity(ctx context.Context) (liquid.Serv } for groupName, groupData := range flavorGroups { - // Calculate per-AZ capacity using scheduler - azCapacity, err := c.calculateAZCapacity(ctx, groupName, groupData, azs) - if err != nil { - return liquid.ServiceCapacityReport{}, fmt.Errorf("failed to calculate capacity for %s: %w", groupName, err) - } + // Calculate per-AZ capacity using scheduler + HV CRDs + azCapacity := c.calculateAZCapacity(ctx, groupName, groupData, azs, hvByName) // === 1. RAM Resource === ramResourceName := liquid.ResourceName(ResourceNameRAM(groupName)) @@ -79,17 +85,14 @@ func (c *CapacityCalculator) CalculateCapacity(ctx context.Context) (liquid.Serv } // === 2. Cores Resource === - // NOTE: Copying RAM capacity is only valid while capacity=0 (placeholder). - // When real capacity is implemented, derive cores capacity with unit conversion - // (e.g., cores = RAM / ramCoreRatio). See calculateAZCapacity for details. + // All three resources express capacity in units of "multiples of the smallest flavor", + // so the same number applies to ram, cores, and instances. coresResourceName := liquid.ResourceName(ResourceNameCores(groupName)) report.Resources[coresResourceName] = &liquid.ResourceCapacityReport{ PerAZ: c.copyAZCapacity(azCapacity), } // === 3. Instances Resource === - // NOTE: Same as cores - copying is only valid while capacity=0 (placeholder). - // When real capacity is implemented, derive instances capacity appropriately. instancesResourceName := liquid.ResourceName(ResourceNameInstances(groupName)) report.Resources[instancesResourceName] = &liquid.ResourceCapacityReport{ PerAZ: c.copyAZCapacity(azCapacity), @@ -115,68 +118,60 @@ func (c *CapacityCalculator) copyAZCapacity( return result } -// calculateAZCapacity computes capacity per AZ for a flavor group via scheduler calls. +// calculateAZCapacity computes capacity per AZ for a flavor group. +// Uses one scheduler call per AZ to get eligible hosts, then reads HV CRDs for resource data. // On scheduler failure for an AZ, that AZ still gets an entry with capacity=0. func (c *CapacityCalculator) calculateAZCapacity( ctx context.Context, groupName string, groupData compute.FlavorGroupFeature, azs []string, -) (map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport, error) { + hvByName map[string]hv1.Hypervisor, +) map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport { result := make(map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport) for _, az := range azs { - capacity, usage, err := c.calculateInstanceCapacity(ctx, groupName, groupData, az) + capacity, err := c.calculateInstanceCapacity(ctx, groupName, groupData, az, hvByName) if err != nil { // On failure, report az with capacity=0 rather than aborting entirely. result[liquid.AvailabilityZone(az)] = &liquid.AZResourceCapacityReport{ Capacity: 0, - Usage: Some[uint64](0), + Usage: None[uint64](), } continue } result[liquid.AvailabilityZone(az)] = &liquid.AZResourceCapacityReport{ Capacity: capacity, - Usage: Some[uint64](usage), + Usage: None[uint64](), } } - return result, nil + return result } -// calculateInstanceCapacity returns the total capacity and current usage for a flavor group in an AZ. +// calculateInstanceCapacity returns the total capacity for a flavor group in an AZ. // Capacity is expressed in multiples of the smallest flavor's memory. -// Total capacity is derived directly from Hypervisor CRDs (as if everything were empty). -// Currently available is derived from the scheduler (respecting current VM and reservation state). -// Usage = totalCapacity - currentlyAvailable. +// Usage tracking (VM allocations + reservations) is not yet implemented — see PR 2. +// +// 1. One scheduler call (kvm-report-capacity pipeline, ignoring allocations) → list of eligible hosts +// 2. For each eligible host, read EffectiveCapacity from HV CRDs +// 3. Total capacity = sum(floor(EffectiveCapacity.Memory / smallestFlavorMemory)) func (c *CapacityCalculator) calculateInstanceCapacity( ctx context.Context, groupName string, groupData compute.FlavorGroupFeature, az string, -) (capacity, usage uint64, err error) { + hvByName map[string]hv1.Hypervisor, +) (capacity uint64, err error) { smallestFlavor := groupData.SmallestFlavor - - // Request 1: currently available — how many instances can be placed right now. - currentResp, err := c.schedulerClient.ScheduleReservation(ctx, reservations.ScheduleReservationRequest{ - InstanceUUID: fmt.Sprintf("capacity-current-%s-%s-%d", groupName, az, time.Now().UnixNano()), - ProjectID: "cortex-capacity-check", - FlavorName: smallestFlavor.Name, - MemoryMB: smallestFlavor.MemoryMB, - VCPUs: smallestFlavor.VCPUs, - FlavorExtraSpecs: map[string]string{"hw_version": groupName}, - AvailabilityZone: az, - Pipeline: c.currentPipeline, - }) - if err != nil { - return 0, 0, fmt.Errorf("failed to get current available capacity: %w", err) + smallestFlavorBytes := int64(smallestFlavor.MemoryMB) * 1024 * 1024 //nolint:gosec // flavor memory from Nova, realistically bounded + if smallestFlavorBytes <= 0 { + return 0, fmt.Errorf("smallest flavor %q has invalid memory %d MB", smallestFlavor.Name, smallestFlavor.MemoryMB) } - currentlyAvailable := uint64(len(currentResp.Hosts)) - // Request 2: total capacity — hosts eligible if everything were empty. - // Uses a dedicated pipeline that ignores VM allocations and all reservations. - totalResp, err := c.schedulerClient.ScheduleReservation(ctx, reservations.ScheduleReservationRequest{ - InstanceUUID: fmt.Sprintf("capacity-total-%s-%s-%d", groupName, az, time.Now().UnixNano()), + // Scheduler call: get eligible hosts (ignoring allocations and reservations). + resp, err := c.schedulerClient.ScheduleReservation(ctx, reservations.ScheduleReservationRequest{ + InstanceUUID: fmt.Sprintf("capacity-%s-%s-%d", groupName, az, time.Now().UnixNano()), ProjectID: "cortex-capacity-check", FlavorName: smallestFlavor.Name, MemoryMB: smallestFlavor.MemoryMB, @@ -186,16 +181,39 @@ func (c *CapacityCalculator) calculateInstanceCapacity( Pipeline: c.totalPipeline, }) if err != nil { - return 0, 0, fmt.Errorf("failed to get total capacity: %w", err) + return 0, fmt.Errorf("scheduler call failed: %w", err) } - totalCapacity := uint64(len(totalResp.Hosts)) - var usageValue uint64 - if totalCapacity >= currentlyAvailable { - usageValue = totalCapacity - currentlyAvailable + // For each eligible host, look up HV CRD and compute multiples. + var totalCapacity uint64 + for _, hostName := range resp.Hosts { + hv, ok := hvByName[hostName] + if !ok { + continue + } + + // Use EffectiveCapacity if available, fall back to Capacity. + effectiveCap := hv.Status.EffectiveCapacity + if effectiveCap == nil { + effectiveCap = hv.Status.Capacity + } + if effectiveCap == nil { + continue + } + + memCapacity, ok := effectiveCap[hv1.ResourceMemory] + if !ok { + continue + } + + // Total: floor(effectiveCapacity / smallestFlavorMemory) + capBytes := memCapacity.Value() + if capBytes > 0 { + totalCapacity += uint64(capBytes / smallestFlavorBytes) //nolint:gosec // both values are positive, result fits uint64 + } } - return totalCapacity, usageValue, nil + return totalCapacity, nil } // getHostAZMap returns a map from compute host name to availability zone. diff --git a/internal/scheduling/reservations/commitments/config.go b/internal/scheduling/reservations/commitments/config.go index 4579e41d3..d9f6718c9 100644 --- a/internal/scheduling/reservations/commitments/config.go +++ b/internal/scheduling/reservations/commitments/config.go @@ -30,12 +30,9 @@ type Config struct { // Secret ref to the database credentials for querying VM state. DatabaseSecretRef *corev1.SecretReference `json:"databaseSecretRef,omitempty"` - // ReportCapacityCurrentPipeline is the pipeline used to determine currently available capacity - // (respects VM allocations and reservations). - ReportCapacityCurrentPipeline string `json:"reportCapacityCurrentPipeline"` - - // ReportCapacityTotalPipeline is the pipeline used to determine total theoretical capacity - // (ignores VM allocations and reservations). + // ReportCapacityTotalPipeline is the pipeline used to determine eligible hosts for capacity calculation. + // This pipeline ignores VM allocations and reservations (empty datacenter scenario). + // Host resource data is then read from Hypervisor CRDs to compute actual multiples. ReportCapacityTotalPipeline string `json:"reportCapacityTotalPipeline"` // FlavorGroupPipelines maps flavor group names to pipeline names. @@ -98,7 +95,6 @@ func DefaultConfig() Config { RequeueIntervalRetry: 1 * time.Minute, PipelineDefault: "kvm-general-purpose-load-balancing", SchedulerURL: "http://localhost:8080/scheduler/nova/external", - ReportCapacityCurrentPipeline: "kvm-general-purpose-load-balancing-all-filters-enabled", ReportCapacityTotalPipeline: "kvm-report-capacity", ChangeAPIWatchReservationsTimeout: 10 * time.Second, ChangeAPIWatchReservationsPollInterval: 500 * time.Millisecond, From 3d178286458a6acd19eb1525902ac85203dfb64e Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Tue, 14 Apr 2026 14:36:30 +0200 Subject: [PATCH 10/16] fix --- internal/scheduling/reservations/commitments/capacity.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/scheduling/reservations/commitments/capacity.go b/internal/scheduling/reservations/commitments/capacity.go index 3be3ce9b6..5b2febbdf 100644 --- a/internal/scheduling/reservations/commitments/capacity.go +++ b/internal/scheduling/reservations/commitments/capacity.go @@ -136,13 +136,13 @@ func (c *CapacityCalculator) calculateAZCapacity( // On failure, report az with capacity=0 rather than aborting entirely. result[liquid.AvailabilityZone(az)] = &liquid.AZResourceCapacityReport{ Capacity: 0, - Usage: None[uint64](), + Usage: Some[uint64](0), // Placeholder: usage=0 until actual calculation is implemented } continue } result[liquid.AvailabilityZone(az)] = &liquid.AZResourceCapacityReport{ Capacity: capacity, - Usage: None[uint64](), + Usage: Some[uint64](0), // Placeholder: usage=0 until actual calculation is implemented } } return result From a849edeaf65df5383d159cf76ac8652b8fe0dd98 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Wed, 15 Apr 2026 10:48:12 +0200 Subject: [PATCH 11/16] coderabbit fixes --- .../cortex-nova/templates/pipelines_kvm.yaml | 4 ++++ .../commitments/api_report_capacity_test.go | 14 +++++++------- .../reservations/commitments/capacity.go | 9 +++++++-- .../scheduling/reservations/commitments/config.go | 3 +++ 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml index ccfd7930a..b7a95599f 100644 --- a/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml +++ b/helm/bundles/cortex-nova/templates/pipelines_kvm.yaml @@ -589,6 +589,10 @@ spec: - name: filter_has_requested_traits description: | Ensures hosts have the hardware traits required by the flavor. + - name: filter_capabilities + description: | + Ensures hosts meet the compute capabilities required by the flavor + extra specs (e.g., architecture, maxphysaddr bits). - name: filter_status_conditions description: | Excludes hosts that are not ready or are disabled. diff --git a/internal/scheduling/reservations/commitments/api_report_capacity_test.go b/internal/scheduling/reservations/commitments/api_report_capacity_test.go index cae317c00..644e83cb1 100644 --- a/internal/scheduling/reservations/commitments/api_report_capacity_test.go +++ b/internal/scheduling/reservations/commitments/api_report_capacity_test.go @@ -322,8 +322,8 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { server := newMockSchedulerServer(t, []string{"host-1", "host-2"}) defer server.Close() - hv1Obj := createTestHypervisor("host-1", "256Gi", "128Gi") - hv2Obj := createTestHypervisor("host-2", "128Gi", "0") + host1HV := createTestHypervisor("host-1", "256Gi", "128Gi") + host2HV := createTestHypervisor("host-2", "128Gi", "0") hostDetails := createTestHostDetailsKnowledge(t, map[string]string{ "host-1": az, "host-2": az, @@ -331,7 +331,7 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetails, hv1Obj, hv2Obj). + WithObjects(flavorGroupKnowledge, hostDetails, host1HV, host2HV). Build() calculator := &CapacityCalculator{ @@ -346,7 +346,7 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { t.Fatalf("failed to get flavor groups: %v", err) } - hvByName := map[string]hv1.Hypervisor{"host-1": *hv1Obj, "host-2": *hv2Obj} + hvByName := map[string]hv1.Hypervisor{"host-1": *host1HV, "host-2": *host2HV} capacity, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) if err != nil { t.Fatalf("unexpected error: %v", err) @@ -466,12 +466,12 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { server := newMockSchedulerServer(t, []string{"host-1", "host-2"}) defer server.Close() - hv1Obj := createTestHypervisor("host-1", "128Gi", "32Gi") - hv2Obj := createTestHypervisor("host-2", "64Gi", "0") + host1HV := createTestHypervisor("host-1", "128Gi", "32Gi") + host2HV := createTestHypervisor("host-2", "64Gi", "0") fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetails, hv1Obj, hv2Obj). + WithObjects(flavorGroupKnowledge, hostDetails, host1HV, host2HV). Build() calculator := &CapacityCalculator{ diff --git a/internal/scheduling/reservations/commitments/capacity.go b/internal/scheduling/reservations/commitments/capacity.go index 5b2febbdf..918fc40dd 100644 --- a/internal/scheduling/reservations/commitments/capacity.go +++ b/internal/scheduling/reservations/commitments/capacity.go @@ -7,8 +7,8 @@ import ( "context" "fmt" "sort" - "time" + "github.com/google/uuid" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" . "github.com/majewsky/gg/option" "github.com/sapcc/go-api-declarations/liquid" @@ -133,6 +133,8 @@ func (c *CapacityCalculator) calculateAZCapacity( for _, az := range azs { capacity, err := c.calculateInstanceCapacity(ctx, groupName, groupData, az, hvByName) if err != nil { + LoggerFromContext(ctx).Error(err, "failed to calculate capacity for AZ, reporting 0", + "flavorGroup", groupName, "az", az) // On failure, report az with capacity=0 rather than aborting entirely. result[liquid.AvailabilityZone(az)] = &liquid.AZResourceCapacityReport{ Capacity: 0, @@ -171,7 +173,7 @@ func (c *CapacityCalculator) calculateInstanceCapacity( // Scheduler call: get eligible hosts (ignoring allocations and reservations). resp, err := c.schedulerClient.ScheduleReservation(ctx, reservations.ScheduleReservationRequest{ - InstanceUUID: fmt.Sprintf("capacity-%s-%s-%d", groupName, az, time.Now().UnixNano()), + InstanceUUID: uuid.New().String(), ProjectID: "cortex-capacity-check", FlavorName: smallestFlavor.Name, MemoryMB: smallestFlavor.MemoryMB, @@ -230,6 +232,9 @@ func (c *CapacityCalculator) getHostAZMap(ctx context.Context) (map[string]strin hostAZMap := make(map[string]string) for _, knowledge := range knowledgeList.Items { + if knowledge.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { + continue + } if knowledge.Spec.Extractor.Name != "sap_host_details_extractor" { continue } diff --git a/internal/scheduling/reservations/commitments/config.go b/internal/scheduling/reservations/commitments/config.go index a7770b20f..e41153cb6 100644 --- a/internal/scheduling/reservations/commitments/config.go +++ b/internal/scheduling/reservations/commitments/config.go @@ -86,6 +86,9 @@ func (c *Config) ApplyDefaults() { if c.SchedulerURL == "" { c.SchedulerURL = defaults.SchedulerURL } + if c.ReportCapacityTotalPipeline == "" { + c.ReportCapacityTotalPipeline = defaults.ReportCapacityTotalPipeline + } if c.ChangeAPIWatchReservationsTimeout == 0 { c.ChangeAPIWatchReservationsTimeout = defaults.ChangeAPIWatchReservationsTimeout } From 8a1abfc678751e5c25260b08e6f19bc731c5ea2c Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Wed, 15 Apr 2026 14:27:48 +0200 Subject: [PATCH 12/16] lint --- internal/scheduling/reservations/commitments/capacity.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/scheduling/reservations/commitments/capacity.go b/internal/scheduling/reservations/commitments/capacity.go index 918fc40dd..b625ce80e 100644 --- a/internal/scheduling/reservations/commitments/capacity.go +++ b/internal/scheduling/reservations/commitments/capacity.go @@ -8,8 +8,8 @@ import ( "fmt" "sort" - "github.com/google/uuid" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "github.com/google/uuid" . "github.com/majewsky/gg/option" "github.com/sapcc/go-api-declarations/liquid" "sigs.k8s.io/controller-runtime/pkg/client" From 6697346901d6cdf62c6e22f9625c0b746dd667b3 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Wed, 15 Apr 2026 16:07:26 +0200 Subject: [PATCH 13/16] fix --- internal/scheduling/reservations/commitments/capacity.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/scheduling/reservations/commitments/capacity.go b/internal/scheduling/reservations/commitments/capacity.go index b625ce80e..bb506c755 100644 --- a/internal/scheduling/reservations/commitments/capacity.go +++ b/internal/scheduling/reservations/commitments/capacity.go @@ -178,7 +178,7 @@ func (c *CapacityCalculator) calculateInstanceCapacity( FlavorName: smallestFlavor.Name, MemoryMB: smallestFlavor.MemoryMB, VCPUs: smallestFlavor.VCPUs, - FlavorExtraSpecs: map[string]string{"hw_version": groupName}, + FlavorExtraSpecs: smallestFlavor.ExtraSpecs, AvailabilityZone: az, Pipeline: c.totalPipeline, }) From 755f4373df604da739c9c51ee6647ad148484442 Mon Sep 17 00:00:00 2001 From: "coderabbitai[bot]" <136622811+coderabbitai[bot]@users.noreply.github.com> Date: Wed, 15 Apr 2026 14:30:48 +0000 Subject: [PATCH 14/16] fix: apply CodeRabbit auto-fixes Fixed 1 file(s) based on 3 unresolved review comments. Co-authored-by: CodeRabbit --- .../reservations/commitments/capacity.go | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/internal/scheduling/reservations/commitments/capacity.go b/internal/scheduling/reservations/commitments/capacity.go index bb506c755..ea0c11bdd 100644 --- a/internal/scheduling/reservations/commitments/capacity.go +++ b/internal/scheduling/reservations/commitments/capacity.go @@ -76,7 +76,10 @@ func (c *CapacityCalculator) CalculateCapacity(ctx context.Context) (liquid.Serv for groupName, groupData := range flavorGroups { // Calculate per-AZ capacity using scheduler + HV CRDs - azCapacity := c.calculateAZCapacity(ctx, groupName, groupData, azs, hvByName) + azCapacity, err := c.calculateAZCapacity(ctx, groupName, groupData, azs, hvByName) + if err != nil { + return liquid.ServiceCapacityReport{}, fmt.Errorf("failed to calculate capacity for flavor group %s: %w", groupName, err) + } // === 1. RAM Resource === ramResourceName := liquid.ResourceName(ResourceNameRAM(groupName)) @@ -121,18 +124,21 @@ func (c *CapacityCalculator) copyAZCapacity( // calculateAZCapacity computes capacity per AZ for a flavor group. // Uses one scheduler call per AZ to get eligible hosts, then reads HV CRDs for resource data. // On scheduler failure for an AZ, that AZ still gets an entry with capacity=0. +// If all AZs fail, returns an error instead of a zero-capacity report. func (c *CapacityCalculator) calculateAZCapacity( ctx context.Context, groupName string, groupData compute.FlavorGroupFeature, azs []string, hvByName map[string]hv1.Hypervisor, -) map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport { +) (map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport, error) { result := make(map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport) + failures := 0 for _, az := range azs { capacity, err := c.calculateInstanceCapacity(ctx, groupName, groupData, az, hvByName) if err != nil { + failures++ LoggerFromContext(ctx).Error(err, "failed to calculate capacity for AZ, reporting 0", "flavorGroup", groupName, "az", az) // On failure, report az with capacity=0 rather than aborting entirely. @@ -147,7 +153,13 @@ func (c *CapacityCalculator) calculateAZCapacity( Usage: Some[uint64](0), // Placeholder: usage=0 until actual calculation is implemented } } - return result + + // If all AZs failed, return an error instead of a zero-capacity report + if failures == len(azs) && len(azs) > 0 { + return nil, fmt.Errorf("failed to calculate capacity for all AZs in flavor group %s", groupName) + } + + return result, nil } // calculateInstanceCapacity returns the total capacity for a flavor group in an AZ. @@ -191,6 +203,8 @@ func (c *CapacityCalculator) calculateInstanceCapacity( for _, hostName := range resp.Hosts { hv, ok := hvByName[hostName] if !ok { + LoggerFromContext(ctx).Info("scheduler host not found in hypervisor CRDs, skipping", + "host", hostName, "az", az, "flavorGroup", groupName) continue } @@ -222,7 +236,7 @@ func (c *CapacityCalculator) calculateInstanceCapacity( func (c *CapacityCalculator) getHostAZMap(ctx context.Context) (map[string]string, error) { var knowledgeList v1alpha1.KnowledgeList if err := c.client.List(ctx, &knowledgeList); err != nil { - return nil, fmt.Errorf("failed to list Knowledge CRDs: %w", err) + return nil, fmt.Errorf("failed to list knowledge CRDs: %w", err) } type hostAZEntry struct { @@ -270,4 +284,4 @@ func (c *CapacityCalculator) getAvailabilityZones(ctx context.Context) ([]string sort.Strings(azs) return azs, nil -} +} \ No newline at end of file From a5b2cf212d93a718d411863226ffa706d5d19ae8 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Wed, 15 Apr 2026 16:30:53 +0200 Subject: [PATCH 15/16] lint --- .../commitments/api_report_capacity_test.go | 12 ++++++------ .../scheduling/reservations/commitments/capacity.go | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/internal/scheduling/reservations/commitments/api_report_capacity_test.go b/internal/scheduling/reservations/commitments/api_report_capacity_test.go index 644e83cb1..c659d0de2 100644 --- a/internal/scheduling/reservations/commitments/api_report_capacity_test.go +++ b/internal/scheduling/reservations/commitments/api_report_capacity_test.go @@ -305,7 +305,7 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { } hvByName := map[string]hv1.Hypervisor{"host-1": *hvObj} - capacity, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) + capacity, err := calculator.calculateInstanceCapacity(context.Background(), groups[flavorGroup], az, hvByName) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -347,7 +347,7 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { } hvByName := map[string]hv1.Hypervisor{"host-1": *host1HV, "host-2": *host2HV} - capacity, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) + capacity, err := calculator.calculateInstanceCapacity(context.Background(), groups[flavorGroup], az, hvByName) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -382,7 +382,7 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { } hvByName := map[string]hv1.Hypervisor{"host-1": *hvObj} - capacity, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) + capacity, err := calculator.calculateInstanceCapacity(context.Background(), groups[flavorGroup], az, hvByName) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -417,7 +417,7 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { } hvByName := map[string]hv1.Hypervisor{} // empty - capacity, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) + capacity, err := calculator.calculateInstanceCapacity(context.Background(), groups[flavorGroup], az, hvByName) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -451,7 +451,7 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { } hvByName := map[string]hv1.Hypervisor{} - _, err = calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) + _, err = calculator.calculateInstanceCapacity(context.Background(), groups[flavorGroup], az, hvByName) if err == nil { t.Fatal("expected error on scheduler failure, got nil") } @@ -524,7 +524,7 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { } hvByName := map[string]hv1.Hypervisor{"host-1": *hvObj} - capacity, err := calculator.calculateInstanceCapacity(context.Background(), flavorGroup, groups[flavorGroup], az, hvByName) + capacity, err := calculator.calculateInstanceCapacity(context.Background(), groups[flavorGroup], az, hvByName) if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/internal/scheduling/reservations/commitments/capacity.go b/internal/scheduling/reservations/commitments/capacity.go index ea0c11bdd..34dac1587 100644 --- a/internal/scheduling/reservations/commitments/capacity.go +++ b/internal/scheduling/reservations/commitments/capacity.go @@ -136,7 +136,7 @@ func (c *CapacityCalculator) calculateAZCapacity( result := make(map[liquid.AvailabilityZone]*liquid.AZResourceCapacityReport) failures := 0 for _, az := range azs { - capacity, err := c.calculateInstanceCapacity(ctx, groupName, groupData, az, hvByName) + capacity, err := c.calculateInstanceCapacity(ctx, groupData, az, hvByName) if err != nil { failures++ LoggerFromContext(ctx).Error(err, "failed to calculate capacity for AZ, reporting 0", @@ -171,7 +171,6 @@ func (c *CapacityCalculator) calculateAZCapacity( // 3. Total capacity = sum(floor(EffectiveCapacity.Memory / smallestFlavorMemory)) func (c *CapacityCalculator) calculateInstanceCapacity( ctx context.Context, - groupName string, groupData compute.FlavorGroupFeature, az string, hvByName map[string]hv1.Hypervisor, From 0fe5078431b532636557ef95baa7233ff69f2e53 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Wed, 15 Apr 2026 16:54:16 +0200 Subject: [PATCH 16/16] fix hypervisor az mapping --- .../commitments/api_report_capacity_test.go | 110 ++++++++---------- .../reservations/commitments/capacity.go | 67 +++-------- 2 files changed, 63 insertions(+), 114 deletions(-) diff --git a/internal/scheduling/reservations/commitments/api_report_capacity_test.go b/internal/scheduling/reservations/commitments/api_report_capacity_test.go index c659d0de2..8573ea021 100644 --- a/internal/scheduling/reservations/commitments/api_report_capacity_test.go +++ b/internal/scheduling/reservations/commitments/api_report_capacity_test.go @@ -175,18 +175,22 @@ func TestCapacityCalculator(t *testing.T) { } }) - t.Run("CalculateCapacity returns perAZ entries for all AZs from host details", func(t *testing.T) { + t.Run("CalculateCapacity returns perAZ entries for all AZs from hypervisors", func(t *testing.T) { flavorGroupKnowledge := createTestFlavorGroupKnowledge(t, "test-group") - hostDetails := createTestHostDetailsKnowledge(t, map[string]string{ + hvs := createTestHypervisorsWithAZ(map[string]string{ "host-1": "qa-de-1a", "host-2": "qa-de-1b", }) + server := newMockSchedulerServer(t, []string{}) + defer server.Close() + cfg := DefaultConfig() + cfg.SchedulerURL = server.URL fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetails). + WithObjects(flavorGroupKnowledge, hvs[0], hvs[1]). Build() - calculator := NewCapacityCalculator(fakeClient, DefaultConfig()) + calculator := NewCapacityCalculator(fakeClient, cfg) report, err := calculator.CalculateCapacity(context.Background()) if err != nil { t.Fatalf("Expected no error, got: %v", err) @@ -196,7 +200,7 @@ func TestCapacityCalculator(t *testing.T) { t.Fatalf("Expected 3 resources (_ram, _cores, _instances), got %d", len(report.Resources)) } - // Verify all resources have entries for the AZs from host details + // Verify all resources have entries for the AZs from hypervisors expectedAZs := []liquid.AvailabilityZone{"qa-de-1a", "qa-de-1b"} for _, resName := range []string{"hw_version_test-group_ram", "hw_version_test-group_cores", "hw_version_test-group_instances"} { res := report.Resources[liquid.ResourceName(resName)] @@ -236,18 +240,22 @@ func TestCapacityCalculator(t *testing.T) { } }) - t.Run("CalculateCapacity produces perAZ entries matching host details AZs", func(t *testing.T) { + t.Run("CalculateCapacity produces perAZ entries matching hypervisor AZs", func(t *testing.T) { flavorGroupKnowledge := createTestFlavorGroupKnowledge(t, "test-group") - hostDetails := createTestHostDetailsKnowledge(t, map[string]string{ + hvs := createTestHypervisorsWithAZ(map[string]string{ "host-a": "eu-de-1a", "host-b": "eu-de-1b", }) + server := newMockSchedulerServer(t, []string{}) + defer server.Close() + cfg := DefaultConfig() + cfg.SchedulerURL = server.URL fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetails). + WithObjects(flavorGroupKnowledge, hvs[0], hvs[1]). Build() - calculator := NewCapacityCalculator(fakeClient, DefaultConfig()) + calculator := NewCapacityCalculator(fakeClient, cfg) report, err := calculator.CalculateCapacity(context.Background()) if err != nil { t.Fatalf("Expected no error, got: %v", err) @@ -284,12 +292,11 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { server := newMockSchedulerServer(t, []string{"host-1"}) defer server.Close() - hvObj := createTestHypervisor("host-1", "256Gi", "64Gi") - hostDetails := createTestHostDetailsKnowledge(t, map[string]string{"host-1": az}) + hvObj := createTestHypervisorWithAZ("host-1", az, "256Gi", "64Gi") fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetails, hvObj). + WithObjects(flavorGroupKnowledge, hvObj). Build() calculator := &CapacityCalculator{ @@ -322,16 +329,12 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { server := newMockSchedulerServer(t, []string{"host-1", "host-2"}) defer server.Close() - host1HV := createTestHypervisor("host-1", "256Gi", "128Gi") - host2HV := createTestHypervisor("host-2", "128Gi", "0") - hostDetails := createTestHostDetailsKnowledge(t, map[string]string{ - "host-1": az, - "host-2": az, - }) + host1HV := createTestHypervisorWithAZ("host-1", az, "256Gi", "128Gi") + host2HV := createTestHypervisorWithAZ("host-2", az, "128Gi", "0") fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetails, host1HV, host2HV). + WithObjects(flavorGroupKnowledge, host1HV, host2HV). Build() calculator := &CapacityCalculator{ @@ -361,12 +364,11 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { server := newMockSchedulerServer(t, []string{"host-1"}) defer server.Close() - hvObj := createTestHypervisor("host-1", "128Gi", "0") - hostDetails := createTestHostDetailsKnowledge(t, map[string]string{"host-1": az}) + hvObj := createTestHypervisorWithAZ("host-1", az, "128Gi", "0") fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetails, hvObj). + WithObjects(flavorGroupKnowledge, hvObj). Build() calculator := &CapacityCalculator{ @@ -397,11 +399,11 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { server := newMockSchedulerServer(t, []string{"host-unknown"}) defer server.Close() - hostDetails := createTestHostDetailsKnowledge(t, map[string]string{"host-unknown": az}) + hostDetails := createTestHypervisorsWithAZ(map[string]string{"host-unknown": az}) fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetails). + WithObjects(flavorGroupKnowledge, hostDetails[0]). Build() calculator := &CapacityCalculator{ @@ -458,20 +460,16 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { }) t.Run("multiple AZs are reported independently", func(t *testing.T) { - hostDetails := createTestHostDetailsKnowledge(t, map[string]string{ - "host-1": "az-a", - "host-2": "az-b", - }) // Scheduler always returns both hosts (mock doesn't filter by AZ). server := newMockSchedulerServer(t, []string{"host-1", "host-2"}) defer server.Close() - host1HV := createTestHypervisor("host-1", "128Gi", "32Gi") - host2HV := createTestHypervisor("host-2", "64Gi", "0") + host1HV := createTestHypervisorWithAZ("host-1", "az-a", "128Gi", "32Gi") + host2HV := createTestHypervisorWithAZ("host-2", "az-b", "64Gi", "0") fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetails, host1HV, host2HV). + WithObjects(flavorGroupKnowledge, host1HV, host2HV). Build() calculator := &CapacityCalculator{ @@ -503,12 +501,11 @@ func TestCapacityCalculatorWithHypervisors(t *testing.T) { server := newMockSchedulerServer(t, []string{"host-1"}) defer server.Close() - hvObj := createTestHypervisor("host-1", "100Gi", "0") - hostDetails := createTestHostDetailsKnowledge(t, map[string]string{"host-1": az}) + hvObj := createTestHypervisorWithAZ("host-1", az, "100Gi", "0") fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(flavorGroupKnowledge, hostDetails, hvObj). + WithObjects(flavorGroupKnowledge, hvObj). Build() calculator := &CapacityCalculator{ @@ -564,6 +561,13 @@ func createTestHypervisor(name, effectiveCapacity, allocation string) *hv1.Hyper return hv } +// createTestHypervisorWithAZ creates an HV CRD with a topology.kubernetes.io/zone label. +func createTestHypervisorWithAZ(name, az, effectiveCapacity, allocation string) *hv1.Hypervisor { + hv := createTestHypervisor(name, effectiveCapacity, allocation) + hv.Labels = map[string]string{"topology.kubernetes.io/zone": az} + return hv +} + // createEmptyFlavorGroupKnowledge creates an empty flavor groups Knowledge CRD func createEmptyFlavorGroupKnowledge() *v1alpha1.Knowledge { // Box empty array properly @@ -702,32 +706,18 @@ func createTestFlavorGroupKnowledgeWithSmallest(t *testing.T, groupName string, } } -// createTestHostDetailsKnowledge creates a Knowledge CRD with host→AZ mappings. -func createTestHostDetailsKnowledge(t *testing.T, hostToAZ map[string]string) *v1alpha1.Knowledge { - t.Helper() - - features := make([]map[string]interface{}, 0, len(hostToAZ)) +// createTestHypervisorsWithAZ creates HV CRDs with topology.kubernetes.io/zone labels +// from a host→AZ map. Hypervisors have no capacity data (used only for AZ discovery). +func createTestHypervisorsWithAZ(hostToAZ map[string]string) []*hv1.Hypervisor { + hvs := make([]*hv1.Hypervisor, 0, len(hostToAZ)) for host, az := range hostToAZ { - features = append(features, map[string]interface{}{ - "ComputeHost": host, - "AvailabilityZone": az, - }) - } - - raw, err := v1alpha1.BoxFeatureList(features) - if err != nil { - t.Fatal(err) - } - - return &v1alpha1.Knowledge{ - ObjectMeta: v1.ObjectMeta{Name: "host-details"}, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Extractor: v1alpha1.KnowledgeExtractorSpec{Name: "sap_host_details_extractor"}, - }, - Status: v1alpha1.KnowledgeStatus{ - Conditions: []v1.Condition{{Type: v1alpha1.KnowledgeConditionReady, Status: "True"}}, - Raw: raw, - }, + hv := &hv1.Hypervisor{ + ObjectMeta: v1.ObjectMeta{ + Name: host, + Labels: map[string]string{"topology.kubernetes.io/zone": az}, + }, + } + hvs = append(hvs, hv) } + return hvs } diff --git a/internal/scheduling/reservations/commitments/capacity.go b/internal/scheduling/reservations/commitments/capacity.go index 34dac1587..e65edac48 100644 --- a/internal/scheduling/reservations/commitments/capacity.go +++ b/internal/scheduling/reservations/commitments/capacity.go @@ -14,7 +14,6 @@ import ( "github.com/sapcc/go-api-declarations/liquid" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor/plugins/compute" "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" ) @@ -37,7 +36,7 @@ func NewCapacityCalculator(client client.Client, config Config) *CapacityCalcula // CalculateCapacity computes per-AZ capacity for all flavor groups. // For each flavor group, three resources are reported: _ram, _cores, _instances. // All flavor groups are included, not just those with fixed RAM/core ratio. -// AZs are derived from HostDetails Knowledge CRDs. +// AZs are derived from Hypervisor CRDs via the topology.kubernetes.io/zone label. func (c *CapacityCalculator) CalculateCapacity(ctx context.Context) (liquid.ServiceCapacityReport, error) { // Get all flavor groups from Knowledge CRDs knowledge := &reservations.FlavorGroupKnowledgeClient{Client: c.client} @@ -52,12 +51,6 @@ func (c *CapacityCalculator) CalculateCapacity(ctx context.Context) (liquid.Serv infoVersion = knowledgeCRD.Status.LastContentChange.Unix() } - // Get availability zones from host details - azs, err := c.getAvailabilityZones(ctx) - if err != nil { - return liquid.ServiceCapacityReport{}, fmt.Errorf("failed to get availability zones: %w", err) - } - // Pre-fetch all Hypervisor CRDs once (shared across all flavor groups and AZs) var hvList hv1.HypervisorList if err := c.client.List(ctx, &hvList); err != nil { @@ -68,6 +61,9 @@ func (c *CapacityCalculator) CalculateCapacity(ctx context.Context) (liquid.Serv hvByName[hv.Name] = hv } + // Derive AZs from Hypervisor CRDs via topology.kubernetes.io/zone label + azs := getAvailabilityZones(hvList.Items) + // Build capacity report for all flavor groups report := liquid.ServiceCapacityReport{ InfoVersion: infoVersion, @@ -203,7 +199,7 @@ func (c *CapacityCalculator) calculateInstanceCapacity( hv, ok := hvByName[hostName] if !ok { LoggerFromContext(ctx).Info("scheduler host not found in hypervisor CRDs, skipping", - "host", hostName, "az", az, "flavorGroup", groupName) + "host", hostName, "az", az) continue } @@ -231,56 +227,19 @@ func (c *CapacityCalculator) calculateInstanceCapacity( return totalCapacity, nil } -// getHostAZMap returns a map from compute host name to availability zone. -func (c *CapacityCalculator) getHostAZMap(ctx context.Context) (map[string]string, error) { - var knowledgeList v1alpha1.KnowledgeList - if err := c.client.List(ctx, &knowledgeList); err != nil { - return nil, fmt.Errorf("failed to list knowledge CRDs: %w", err) - } - - type hostAZEntry struct { - ComputeHost string `json:"ComputeHost"` - AvailabilityZone string `json:"AvailabilityZone"` - } - - hostAZMap := make(map[string]string) - for _, knowledge := range knowledgeList.Items { - if knowledge.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { - continue - } - if knowledge.Spec.Extractor.Name != "sap_host_details_extractor" { - continue - } - features, err := v1alpha1.UnboxFeatureList[hostAZEntry](knowledge.Status.Raw) - if err != nil { - continue - } - for _, feature := range features { - if feature.ComputeHost != "" && feature.AvailabilityZone != "" { - hostAZMap[feature.ComputeHost] = feature.AvailabilityZone - } - } - } - - return hostAZMap, nil -} - -func (c *CapacityCalculator) getAvailabilityZones(ctx context.Context) ([]string, error) { - hostAZMap, err := c.getHostAZMap(ctx) - if err != nil { - return nil, err - } - +// getAvailabilityZones returns a sorted, deduplicated list of AZs from Hypervisor CRDs. +// AZ is read from the topology.kubernetes.io/zone label on each Hypervisor. +func getAvailabilityZones(hvs []hv1.Hypervisor) []string { azSet := make(map[string]struct{}) - for _, az := range hostAZMap { - azSet[az] = struct{}{} + for _, hv := range hvs { + if az, ok := hv.Labels["topology.kubernetes.io/zone"]; ok && az != "" { + azSet[az] = struct{}{} + } } - azs := make([]string, 0, len(azSet)) for az := range azSet { azs = append(azs, az) } sort.Strings(azs) - - return azs, nil + return azs } \ No newline at end of file