From c3c52c0a54ec66145380b946f6816fe2e59ca494 Mon Sep 17 00:00:00 2001 From: Daniil Antoshin Date: Wed, 20 May 2026 21:26:36 +0200 Subject: [PATCH] fix: delay repeated workload migrations Delay workload-update VMOP execution after a recently completed migration to let target cleanup settle. Retry VM placement updates in the e2e test to avoid resourceVersion conflicts. Signed-off-by: Daniil Antoshin --- .../migration/internal/handler/lifecycle.go | 40 +++++++++++++++++ .../internal/handler/lifecycle_test.go | 45 +++++++++++++++++++ test/e2e/vm/affinity_toleration.go | 41 ++++++++++++----- 3 files changed, 114 insertions(+), 12 deletions(-) diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go index 9bb21ce859..704f88e30a 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle.go @@ -30,6 +30,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/deckhouse/virtualization-controller/pkg/common/annotations" "github.com/deckhouse/virtualization-controller/pkg/common/object" commonvmop "github.com/deckhouse/virtualization-controller/pkg/common/vmop" "github.com/deckhouse/virtualization-controller/pkg/controller/conditions" @@ -65,6 +66,8 @@ const ( messageSourceVMSuspended = "Source VM suspended" ) +const workloadUpdateMigrationCooldown = 30 * time.Second + const ( reasonFailedAttachVolume = "FailedAttachVolume" reasonFailedMount = "FailedMount" @@ -235,6 +238,21 @@ func (h LifecycleHandler) Handle(ctx context.Context, vmop *v1alpha2.VirtualMach return reconcile.Result{}, nil } + if remaining, err := h.workloadUpdateMigrationCooldownRemaining(ctx, vmop); err != nil { + return reconcile.Result{}, err + } else if remaining > 0 { + vmop.Status.Phase = v1alpha2.VMOPPhasePending + vmop.Status.Progress = migrationprogress.FormatPercent(1) + conditions.SetCondition( + conditions.NewConditionBuilder(vmopcondition.TypeCompleted). + Generation(vmop.GetGeneration()). + Reason(vmopcondition.ReasonMigrationPending). + Status(metav1.ConditionFalse). + Message("Waiting for the previous migration cleanup to complete."), + &vmop.Status.Conditions) + return reconcile.Result{RequeueAfter: remaining}, nil + } + // 7. Check if the vm is migratable. if !h.canExecute(vmop, vm) { return reconcile.Result{}, nil @@ -398,6 +416,28 @@ func (h LifecycleHandler) otherMigrationsAreInProgress(ctx context.Context, vmop return false, nil } +func (h LifecycleHandler) workloadUpdateMigrationCooldownRemaining(ctx context.Context, vmop *v1alpha2.VirtualMachineOperation) (time.Duration, error) { + if _, ok := vmop.GetAnnotations()[annotations.AnnVMOPWorkloadUpdate]; !ok { + return 0, nil + } + + kvvmi, err := object.FetchObject(ctx, types.NamespacedName{Name: vmop.Spec.VirtualMachine, Namespace: vmop.Namespace}, h.client, &virtv1.VirtualMachineInstance{}) + if err != nil || kvvmi == nil { + return 0, err + } + + state := kvvmi.Status.MigrationState + if state == nil || !state.Completed || state.EndTimestamp == nil { + return 0, nil + } + + elapsed := time.Since(state.EndTimestamp.Time) + if elapsed >= workloadUpdateMigrationCooldown { + return 0, nil + } + return workloadUpdateMigrationCooldown - elapsed, nil +} + func (h LifecycleHandler) canExecute(vmop *v1alpha2.VirtualMachineOperation, vm *v1alpha2.VirtualMachine) bool { migrating, _ := conditions.GetCondition(vmcondition.TypeMigrating, vm.Status.Conditions) if migrating.Reason == vmcondition.ReasonReadyToMigrate.String() { diff --git a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go index e821efab49..caeab23a42 100644 --- a/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go +++ b/images/virtualization-artifact/pkg/controller/vmop/migration/internal/handler/lifecycle_test.go @@ -33,6 +33,7 @@ import ( vmbuilder "github.com/deckhouse/virtualization-controller/pkg/builder/vm" vmopbuilder "github.com/deckhouse/virtualization-controller/pkg/builder/vmop" + "github.com/deckhouse/virtualization-controller/pkg/common/annotations" "github.com/deckhouse/virtualization-controller/pkg/common/testutil" "github.com/deckhouse/virtualization-controller/pkg/controller/conditions" "github.com/deckhouse/virtualization-controller/pkg/controller/reconciler" @@ -128,6 +129,50 @@ var _ = Describe("LifecycleHandler", func() { return vmop } + newKVVMIWithCompletedMigration := func(endTimestamp metav1.Time) *virtv1.VirtualMachineInstance { + return &virtv1.VirtualMachineInstance{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Status: virtv1.VirtualMachineInstanceStatus{ + MigrationState: &virtv1.VirtualMachineInstanceMigrationState{ + Completed: true, + EndTimestamp: &endTimestamp, + }, + }, + } + } + + It("should postpone workload update migration during cooldown after completed migration", func() { + vm := newVM(v1alpha2.PreferSafeMigrationPolicy) + vm.Status.Conditions = []metav1.Condition{{ + Type: string(vmcondition.TypeMigrating), + Reason: string(vmcondition.ReasonReadyToMigrate), + }} + vmop := newVMOPEvictPending(vmopbuilder.WithAnnotation(annotations.AnnVMOPWorkloadUpdate, "true")) + kvvmi := newKVVMIWithCompletedMigration(metav1.Now()) + + fakeClient, srv = setupEnvironment(vmop, vm, kvvmi) + migrationService := service.NewMigrationService(fakeClient, featuregates.Default()) + base := genericservice.NewBaseVMOPService(fakeClient, recorderMock) + + h := NewLifecycleHandler(fakeClient, migrationService, base, recorderMock) + result, err := h.Handle(ctx, srv.Changed()) + Expect(err).NotTo(HaveOccurred()) + + Expect(result.RequeueAfter).To(BeNumerically(">", 0)) + Expect(srv.Changed().Status.Phase).To(Equal(v1alpha2.VMOPPhasePending)) + completed, found := conditions.GetCondition(vmopcondition.TypeCompleted, srv.Changed().Status.Conditions) + Expect(found).To(BeTrue()) + Expect(completed.Reason).To(Equal(vmopcondition.ReasonMigrationPending.String())) + + migrations := virtv1.VirtualMachineInstanceMigrationList{} + err = fakeClient.List(ctx, &migrations) + Expect(err).NotTo(HaveOccurred()) + Expect(migrations.Items).To(BeEmpty()) + }) + DescribeTable("Evict operation for migration policy", func(vmop *v1alpha2.VirtualMachineOperation, vmPolicy v1alpha2.LiveMigrationPolicy, expectedPhase v1alpha2.VMOPPhase) { vm := newVM(vmPolicy) diff --git a/test/e2e/vm/affinity_toleration.go b/test/e2e/vm/affinity_toleration.go index fa78e64972..417dd27bbb 100644 --- a/test/e2e/vm/affinity_toleration.go +++ b/test/e2e/vm/affinity_toleration.go @@ -26,6 +26,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" "k8s.io/utils/ptr" crclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -171,8 +172,9 @@ var _ = Describe("VirtualMachineAffinityAndToleration", Ordered, Label(precheck. sourceNode := vmC.Status.Node startedAt := time.Now().UTC() - vmC.Spec.Affinity = antiAffinityToVM("vm-a") - err := f.GenericClient().Update(ctx, vmC) + err := updateVirtualMachineSpec(ctx, f, vmC.Name, func(vm *v1alpha2.VirtualMachine) { + vm.Spec.Affinity = antiAffinityToVM("vm-a") + }) Expect(err).NotTo(HaveOccurred()) waitForStabilizedVMMigration( @@ -200,8 +202,9 @@ var _ = Describe("VirtualMachineAffinityAndToleration", Ordered, Label(precheck. vmC = getVirtualMachine(ctx, f, vmC.Name) startedAt := time.Now().UTC() - vmC.Spec.Affinity = affinityToVM("vm-a") - err := f.GenericClient().Update(ctx, vmC) + err := updateVirtualMachineSpec(ctx, f, vmC.Name, func(vm *v1alpha2.VirtualMachine) { + vm.Spec.Affinity = affinityToVM("vm-a") + }) Expect(err).NotTo(HaveOccurred()) waitForStabilizedVMMigration( @@ -259,8 +262,9 @@ var _ = Describe("VirtualMachineAffinityAndToleration", Ordered, Label(precheck. sourceNode = vmNodeSelector.Status.Node Expect(sourceNode).NotTo(BeEmpty()) - vmNodeSelector.Spec.NodeSelector = map[string]string{affinityHostnameLabelKey: sourceNode} - err := f.GenericClient().Update(ctx, vmNodeSelector) + err := updateVirtualMachineSpec(ctx, f, vmNodeSelector.Name, func(vm *v1alpha2.VirtualMachine) { + vm.Spec.NodeSelector = map[string]string{affinityHostnameLabelKey: sourceNode} + }) Expect(err).NotTo(HaveOccurred()) assertNoVMMigration(ctx, f, crclient.ObjectKeyFromObject(vmNodeSelector), sourceNode, placementNoMigrationWait) @@ -274,8 +278,9 @@ var _ = Describe("VirtualMachineAffinityAndToleration", Ordered, Label(precheck. vmNodeSelector = getVirtualMachine(ctx, f, vmNodeSelector.Name) startedAt := time.Now().UTC() - vmNodeSelector.Spec.NodeSelector = map[string]string{affinityHostnameLabelKey: targetNode} - err = f.GenericClient().Update(ctx, vmNodeSelector) + err = updateVirtualMachineSpec(ctx, f, vmNodeSelector.Name, func(vm *v1alpha2.VirtualMachine) { + vm.Spec.NodeSelector = map[string]string{affinityHostnameLabelKey: targetNode} + }) Expect(err).NotTo(HaveOccurred()) waitForStabilizedVMMigration( @@ -336,8 +341,9 @@ var _ = Describe("VirtualMachineAffinityAndToleration", Ordered, Label(precheck. sourceNode = vmNodeAffinity.Status.Node Expect(sourceNode).NotTo(BeEmpty()) - vmNodeAffinity.Spec.Affinity = nodeAffinityForNode(sourceNode) - err := f.GenericClient().Update(ctx, vmNodeAffinity) + err := updateVirtualMachineSpec(ctx, f, vmNodeAffinity.Name, func(vm *v1alpha2.VirtualMachine) { + vm.Spec.Affinity = nodeAffinityForNode(sourceNode) + }) Expect(err).NotTo(HaveOccurred()) assertNoVMMigration(ctx, f, crclient.ObjectKeyFromObject(vmNodeAffinity), sourceNode, placementNoMigrationWait) @@ -351,8 +357,9 @@ var _ = Describe("VirtualMachineAffinityAndToleration", Ordered, Label(precheck. vmNodeAffinity = getVirtualMachine(ctx, f, vmNodeAffinity.Name) startedAt := time.Now().UTC() - vmNodeAffinity.Spec.Affinity = nodeAffinityForNode(targetNode) - err = f.GenericClient().Update(ctx, vmNodeAffinity) + err = updateVirtualMachineSpec(ctx, f, vmNodeAffinity.Name, func(vm *v1alpha2.VirtualMachine) { + vm.Spec.Affinity = nodeAffinityForNode(targetNode) + }) Expect(err).NotTo(HaveOccurred()) waitForStabilizedVMMigration( @@ -545,6 +552,16 @@ func assertNoVMMigration( }).WithTimeout(duration).WithPolling(placementNoMigrationPolling).Should(Succeed()) } +func updateVirtualMachineSpec(ctx context.Context, f *framework.Framework, name string, mutate func(*v1alpha2.VirtualMachine)) error { + GinkgoHelper() + + return retry.RetryOnConflict(retry.DefaultBackoff, func() error { + vm := getVirtualMachine(ctx, f, name) + mutate(vm) + return f.GenericClient().Update(ctx, vm) + }) +} + func getVirtualMachine(ctx context.Context, f *framework.Framework, name string) *v1alpha2.VirtualMachine { GinkgoHelper()