diff --git a/internal/controller/device/vpci/doc.go b/internal/controller/device/vpci/doc.go new file mode 100644 index 0000000000..c00562aae9 --- /dev/null +++ b/internal/controller/device/vpci/doc.go @@ -0,0 +1,69 @@ +//go:build windows + +// Package vpci provides a controller for managing virtual PCI (vPCI) device +// assignments on a Utility VM (UVM). It handles assigning and removing +// PCI devices from the UVM via HCS modify calls. +// +// # Lifecycle +// +// [Controller] tracks active device assignments by VMBus GUID (device identifier +// within UVM) in an internal map. Each assignment is reference-counted to +// support shared access by multiple callers. +// +// A device follows the state machine below. +// +// ┌─────────────────┐ +// │ StateReserved │ +// └────────┬────────┘ +// │ AddToVM host ok +// ▼ +// ┌─────────────────┐ AddToVM host fails ┌─────────────────┐ +// │ StateAssigned │──────────────────────────►│ StateRemoved │ +// └────────┬────────┘ └────────┬────────┘ +// ┌───────────┤ │ RemoveFromVM +// │ │ waitGuest ok ▼ +// │ ▼ (untracked) +// │ ┌─────────────────┐ +// │ │ StateReady │◄── AddToVM (refCount++) +// │ └────────┬────────┘ +// │ │ RemoveFromVM ok +// │ ▼ +// │ (untracked) +// │ +// │ ┌──────────────────────┐ +// └──waitGuest fail───────────────►│ StateAssignedInvalid │◄── RemoveFromVM host fails +// └──────────┬───────────┘ +// │ RemoveFromVM ok +// ▼ +// (untracked) +// +// - [Controller.Reserve] generates a unique VMBus GUID for a device and +// records the reservation. If the same device is already reserved, the +// existing GUID is returned. +// - [Controller.AddToVM] assigns a previously reserved device to the VM +// using the VMBus GUID returned by Reserve. If the device is already +// ready for use in the VM, the reference count is incremented. +// - [Controller.RemoveFromVM] decrements the reference count for the device +// identified by VMBus GUID. When it reaches zero, the device is removed +// from the VM. It also handles cleanup for devices that were reserved +// but never assigned, and for devices in an invalid state. +// +// # Invalid Devices +// +// The device is marked invalid if the host-side assignment succeeds but the +// guest-side notification fails or if the host-side remove call fails. +// The device remains tracked as Invalid so that the caller can call +// [Controller.RemoveFromVM] to perform host-side cleanup. +// +// # Virtual Functions +// +// Each Virtual Function is assigned as an independent guest device with its own +// VMBus GUID. Multiple Virtual Functions on the same physical device are treated +// as separate devices in the guest. +// +// # Guest Requests +// +// On LCOW, assigning a vPCI device requires a guest-side notification so the +// GCS can wait for the required device paths to become available. +// WCOW does not require a guest request as part of device assignment. +package vpci diff --git a/internal/controller/device/vpci/mocks/mock_vpci.go b/internal/controller/device/vpci/mocks/mock_vpci.go new file mode 100644 index 0000000000..c541fcc4bc --- /dev/null +++ b/internal/controller/device/vpci/mocks/mock_vpci.go @@ -0,0 +1,110 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: types.go +// +// Generated by this command: +// +// mockgen -source types.go -package mocks -destination mocks/mock_vpci.go +// + +// Package mocks is a generated GoMock package. +package mocks + +import ( + context "context" + reflect "reflect" + + guid "github.com/Microsoft/go-winio/pkg/guid" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + guestresource "github.com/Microsoft/hcsshim/internal/protocol/guestresource" + gomock "go.uber.org/mock/gomock" +) + +// MockvmVPCI is a mock of vmVPCI interface. +type MockvmVPCI struct { + ctrl *gomock.Controller + recorder *MockvmVPCIMockRecorder + isgomock struct{} +} + +// MockvmVPCIMockRecorder is the mock recorder for MockvmVPCI. +type MockvmVPCIMockRecorder struct { + mock *MockvmVPCI +} + +// NewMockvmVPCI creates a new mock instance. +func NewMockvmVPCI(ctrl *gomock.Controller) *MockvmVPCI { + mock := &MockvmVPCI{ctrl: ctrl} + mock.recorder = &MockvmVPCIMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockvmVPCI) EXPECT() *MockvmVPCIMockRecorder { + return m.recorder +} + +// AddDevice mocks base method. +func (m *MockvmVPCI) AddDevice(ctx context.Context, vmbusGUID guid.GUID, settings hcsschema.VirtualPciDevice) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AddDevice", ctx, vmbusGUID, settings) + ret0, _ := ret[0].(error) + return ret0 +} + +// AddDevice indicates an expected call of AddDevice. +func (mr *MockvmVPCIMockRecorder) AddDevice(ctx, vmbusGUID, settings any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddDevice", reflect.TypeOf((*MockvmVPCI)(nil).AddDevice), ctx, vmbusGUID, settings) +} + +// RemoveDevice mocks base method. +func (m *MockvmVPCI) RemoveDevice(ctx context.Context, vmbusGUID guid.GUID) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RemoveDevice", ctx, vmbusGUID) + ret0, _ := ret[0].(error) + return ret0 +} + +// RemoveDevice indicates an expected call of RemoveDevice. +func (mr *MockvmVPCIMockRecorder) RemoveDevice(ctx, vmbusGUID any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RemoveDevice", reflect.TypeOf((*MockvmVPCI)(nil).RemoveDevice), ctx, vmbusGUID) +} + +// MocklinuxGuestVPCI is a mock of linuxGuestVPCI interface. +type MocklinuxGuestVPCI struct { + ctrl *gomock.Controller + recorder *MocklinuxGuestVPCIMockRecorder + isgomock struct{} +} + +// MocklinuxGuestVPCIMockRecorder is the mock recorder for MocklinuxGuestVPCI. +type MocklinuxGuestVPCIMockRecorder struct { + mock *MocklinuxGuestVPCI +} + +// NewMocklinuxGuestVPCI creates a new mock instance. +func NewMocklinuxGuestVPCI(ctrl *gomock.Controller) *MocklinuxGuestVPCI { + mock := &MocklinuxGuestVPCI{ctrl: ctrl} + mock.recorder = &MocklinuxGuestVPCIMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MocklinuxGuestVPCI) EXPECT() *MocklinuxGuestVPCIMockRecorder { + return m.recorder +} + +// AddVPCIDevice mocks base method. +func (m *MocklinuxGuestVPCI) AddVPCIDevice(ctx context.Context, settings guestresource.LCOWMappedVPCIDevice) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "AddVPCIDevice", ctx, settings) + ret0, _ := ret[0].(error) + return ret0 +} + +// AddVPCIDevice indicates an expected call of AddVPCIDevice. +func (mr *MocklinuxGuestVPCIMockRecorder) AddVPCIDevice(ctx, settings any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddVPCIDevice", reflect.TypeOf((*MocklinuxGuestVPCI)(nil).AddVPCIDevice), ctx, settings) +} diff --git a/internal/controller/device/vpci/state.go b/internal/controller/device/vpci/state.go new file mode 100644 index 0000000000..dcecdcbb52 --- /dev/null +++ b/internal/controller/device/vpci/state.go @@ -0,0 +1,97 @@ +//go:build windows + +package vpci + +// State represents the current state of a vPCI device assignment lifecycle. +// +// The normal progression is: +// +// StateReserved → StateAssigned → StateReady → StateRemoved+untracked +// +// [StateAssigned] is a transient state set within a single [Controller.AddToVM] call +// after the host-side HCS modify succeeds. [waitGuestDeviceReady] is then called; on +// success the device moves to [StateReady], on failure to [StateAssignedInvalid]. +// +// A device transitions to [StateAssignedInvalid] when an operation partially succeeds +// and leaves the VM in an inconsistent state. This can happen in two ways: +// - [Controller.AddToVM]: host-side assignment succeeds but guest-side notification fails. +// - [Controller.RemoveFromVM]: the host-side remove call fails. +// +// A device in [StateAssignedInvalid] can only be cleaned up via [Controller.RemoveFromVM]. +// +// Full state-transition table: +// +// Current State │ Trigger │ Next State +// ─────────────────────┼────────────────────────────────────────────────────┼────────────────────── +// StateReserved │ AddToVM host-side succeeds │ StateAssigned +// StateReserved │ AddToVM host-side fails │ StateRemoved +// StateReserved │ RemoveFromVM called │ (untracked) +// StateAssigned │ waitGuestDeviceReady succeeds │ StateReady +// StateAssigned │ waitGuestDeviceReady fails │ StateAssignedInvalid +// StateReady │ AddToVM called (refCount++) │ StateReady +// StateReady │ RemoveFromVM refCount drops to 0, succeeds │ (untracked) +// StateReady │ RemoveFromVM refCount drops to 0, host-side fails │ StateAssignedInvalid +// StateAssignedInvalid │ RemoveFromVM succeeds │ (untracked) +// StateAssignedInvalid │ RemoveFromVM host-side fails │ StateAssignedInvalid +// StateRemoved │ AddToVM called │ error (call RemoveFromVM) +// StateRemoved │ RemoveFromVM called │ (untracked) +type State int32 + +const ( + // StateReserved indicates that a VMBus GUID has been generated and the + // device has been recorded in the Controller, but it has not yet been + // assigned to the VM via a host-side HCS modify call. + // This is the initial state set by [Controller.Reserve]. + StateReserved State = iota + + // StateAssigned is a transient state that indicates the host-side HCS modify + // has succeeded but [waitGuestDeviceReady] has not yet been called/completed + // within a single [Controller.AddToVM] invocation. + // External callers should never observe this state. + StateAssigned + + // StateReady indicates the device has been fully assigned to the VM: + // the host-side HCS modify succeeded and the guest-side device is ready. + // The reference count may be greater than one when multiple callers share + // the same device. + StateReady + + // StateAssignedInvalid indicates the device is in an inconsistent state due to a + // partial failure. This state is reached in two ways: + // - [Controller.AddToVM]: the host-side assignment succeeded but the + // guest-side notification failed; the host-side assignment still exists + // but the guest-side device is not in a usable state. + // - [Controller.RemoveFromVM]: the host-side remove call failed; the + // host-side assignment still exists but the reference count has been + // decremented to zero. + // In either case the device must be cleaned up by calling [Controller.RemoveFromVM]. + StateAssignedInvalid + + // StateRemoved indicates that no host-side VM assignment exists for this device. + // This state is reached in two ways: + // - [Controller.AddToVM]: the host-side add call failed. The device is still + // tracked in the Controller and the caller must call [Controller.RemoveFromVM] + // to clean up the reservation. No further [Controller.AddToVM] calls are allowed. + // - [Controller.untrack]: set as a safety marker immediately before the device + // is deleted from the tracking maps. In this case the state is never externally + // observable — the device is gone from the map by the time the lock is released. + StateRemoved +) + +// String returns a human-readable string representation of the device State. +func (s State) String() string { + switch s { + case StateReserved: + return "Reserved" + case StateAssigned: + return "Assigned" + case StateReady: + return "Ready" + case StateAssignedInvalid: + return "AssignedInvalid" + case StateRemoved: + return "Removed" + default: + return "Unknown" + } +} diff --git a/internal/controller/device/vpci/types.go b/internal/controller/device/vpci/types.go new file mode 100644 index 0000000000..ca2446971c --- /dev/null +++ b/internal/controller/device/vpci/types.go @@ -0,0 +1,61 @@ +//go:build windows + +package vpci + +import ( + "context" + + "github.com/Microsoft/go-winio/pkg/guid" + + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" +) + +// Device holds the configuration required to assign a vPCI device to the VM. +type Device struct { + // DeviceInstanceID is the host device instance path of the vPCI device. + DeviceInstanceID string + + // VirtualFunctionIndex is the SR-IOV virtual function index to assign. + VirtualFunctionIndex uint16 +} + +// vmVPCI manages adding and removing vPCI devices for a Utility VM. +// Implemented by [vmmanager.UtilityVM]. +type vmVPCI interface { + // AddDevice adds a vPCI device identified by `vmBusGUID` to the Utility VM with the provided settings. + AddDevice(ctx context.Context, vmbusGUID guid.GUID, settings hcsschema.VirtualPciDevice) error + + // RemoveDevice removes the vPCI device identified by `vmBusGUID` from the Utility VM. + RemoveDevice(ctx context.Context, vmbusGUID guid.GUID) error +} + +// linuxGuestVPCI exposes vPCI device operations in the LCOW guest. +// Implemented by [guestmanager.Guest]. +type linuxGuestVPCI interface { + // AddVPCIDevice adds a vPCI device to the guest. + AddVPCIDevice(ctx context.Context, settings guestresource.LCOWMappedVPCIDevice) error +} + +// ============================================================================== +// INTERNAL DATA STRUCTURES +// ============================================================================== + +// deviceInfo records one vPCI device's assignment state and reference count. +type deviceInfo struct { + // device is the immutable host device identifier used to detect duplicate + // assignment requests. + device Device + + // vmBusGUID identifies the vPCI device (backed by a VMBus channel) + // inside the UVM. + vmBusGUID guid.GUID + + // state is the current lifecycle state of this device assignment. + // Access must be guarded by [Controller.mu]. + state State + + // refCount is the number of active callers sharing this device. + // Access must be guarded by [Controller.mu]. + refCount uint32 +} diff --git a/internal/controller/device/vpci/utils.go b/internal/controller/device/vpci/utils.go index 044764057d..0605d89b20 100644 --- a/internal/controller/device/vpci/utils.go +++ b/internal/controller/device/vpci/utils.go @@ -3,6 +3,7 @@ package vpci import ( + "fmt" "path/filepath" "strconv" ) @@ -17,6 +18,16 @@ const ( DeviceIDType = "vpci-instance-id" ) +const ( + // vmBusChannelTypeGUIDFormatted is the well-known channel type GUID defined by + // VMBus for all assigned devices. + vmBusChannelTypeGUIDFormatted = "{44c4f61d-4444-4400-9d52-802e27ede19f}" + + // assignedDeviceEnumerator is the VMBus enumerator prefix used in device + // instance IDs for assigned devices. + assignedDeviceEnumerator = "VMBUS" +) + // IsValidDeviceType returns true if the device type is valid i.e. supported by the runtime. func IsValidDeviceType(deviceType string) bool { return (deviceType == DeviceIDType) || @@ -30,9 +41,28 @@ func GetDeviceInfoFromPath(rawDevicePath string) (string, uint16) { indexString := filepath.Base(rawDevicePath) index, err := strconv.ParseUint(indexString, 10, 16) if err == nil { - // we have a vf index + // We have a VF index. return filepath.Dir(rawDevicePath), uint16(index) } - // otherwise, just use default index and full device ID given + // Otherwise, just use default index and the full device ID as given. return rawDevicePath, 0 } + +// GetAssignedDeviceVMBUSInstanceID returns the instance ID of the VMBus channel +// device node created when a device is assigned to a UVM via vPCI. +// +// When a device is assigned to a UVM via vPCI support in HCS, a new VMBus channel device node is +// created in the UVM. The actual device that was assigned in is exposed as a child on this VMBus +// channel device node. +// +// A device node's instance ID is an identifier that distinguishes that device from other devices +// on the system. The GUID of a VMBus channel device node refers to that channel's unique +// identifier used internally by VMBus and can be used to determine the VMBus channel +// device node's instance ID. +// +// A VMBus channel device node's instance ID is in the form: +// +// "VMBUS\{channelTypeGUID}\{vmBusChannelGUID}" +func GetAssignedDeviceVMBUSInstanceID(vmBusChannelGUID string) string { + return fmt.Sprintf("%s\\%s\\{%s}", assignedDeviceEnumerator, vmBusChannelTypeGUIDFormatted, vmBusChannelGUID) +} diff --git a/internal/controller/device/vpci/vpci.go b/internal/controller/device/vpci/vpci.go new file mode 100644 index 0000000000..aa8a675150 --- /dev/null +++ b/internal/controller/device/vpci/vpci.go @@ -0,0 +1,248 @@ +//go:build windows + +package vpci + +import ( + "context" + "fmt" + "sync" + + "github.com/Microsoft/go-winio/pkg/guid" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/logfields" + "github.com/sirupsen/logrus" + + "github.com/Microsoft/hcsshim/internal/log" +) + +// Controller manages vPCI device assignments for a Utility VM. +type Controller struct { + mu sync.Mutex + + // devices tracks currently assigned vPCI devices, keyed by VMBus GUID. + // Guarded by mu. + devices map[guid.GUID]*deviceInfo + + // deviceToGUID maps a [Device] to its VMBus GUID for duplicate detection + // during [Controller.Reserve]. Guarded by mu. + deviceToGUID map[Device]guid.GUID + + // vmVPCI performs host-side vPCI device add/remove on the VM. + vmVPCI vmVPCI + + // linuxGuestVPCI performs guest-side vPCI device setup for LCOW. + linuxGuestVPCI linuxGuestVPCI +} + +// New creates a ready-to-use [Controller]. +func New( + vmVPCI vmVPCI, + linuxGuestVPCI linuxGuestVPCI, +) *Controller { + return &Controller{ + vmVPCI: vmVPCI, + linuxGuestVPCI: linuxGuestVPCI, + devices: make(map[guid.GUID]*deviceInfo), + deviceToGUID: make(map[Device]guid.GUID), + } +} + +// Reserve generates a unique VMBus GUID for the given vPCI device and records +// the reservation. The returned GUID can later be passed to [Controller.AddToVM] +// to actually assign the device to the VM. +// +// If the same device (identified by DeviceInstanceID and VirtualFunctionIndex) has +// already been reserved, the existing GUID is returned. +// +// Each Virtual Function is assigned as an independent guest device with its own +// VMBus GUID. Multiple Virtual Functions on the same physical device are treated +// as separate devices. +func (c *Controller) Reserve(ctx context.Context, device Device) (guid.GUID, error) { + ctx, _ = log.WithContext(ctx, logrus.WithFields(logrus.Fields{ + logfields.DeviceID: device.DeviceInstanceID, + logfields.VFIndex: device.VirtualFunctionIndex, + })) + + c.mu.Lock() + defer c.mu.Unlock() + + // If this device is already reserved, return the existing GUID. + if existingGUID, ok := c.deviceToGUID[device]; ok { + log.G(ctx).WithField(logfields.VMBusGUID, existingGUID).Debug("vPCI device already reserved, reusing existing GUID") + return existingGUID, nil + } + + // Generate a new VMBus GUID for this device. + vmBusGUID, err := guid.NewV4() + if err != nil { + return guid.GUID{}, fmt.Errorf("generate vmbus guid for device %s: %w", device.DeviceInstanceID, err) + } + + c.devices[vmBusGUID] = &deviceInfo{ + device: device, + vmBusGUID: vmBusGUID, + state: StateReserved, + } + c.deviceToGUID[device] = vmBusGUID + + log.G(ctx).WithField(logfields.VMBusGUID, vmBusGUID).Debug("reserved vPCI device with new VMBus GUID") + return vmBusGUID, nil +} + +// AddToVM assigns a previously reserved vPCI device to the VM. +// The vmBusGUID must have been obtained from a prior call to [Controller.Reserve]. +// If the device is already ready for use in the VM, the reference count is incremented. +// +// On failure the caller should call [Controller.RemoveFromVM] to clean up. +func (c *Controller) AddToVM(ctx context.Context, vmBusGUID guid.GUID) error { + // Set vmBusGUID in logging context. + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.VMBusGUID, vmBusGUID)) + + c.mu.Lock() + defer c.mu.Unlock() + + dev, ok := c.devices[vmBusGUID] + if !ok { + return fmt.Errorf("no reservation found for vmBusGUID %s; call Reserve first", vmBusGUID) + } + + ctx, _ = log.WithContext(ctx, logrus.WithFields(logrus.Fields{ + logfields.DeviceID: dev.device.DeviceInstanceID, + logfields.VFIndex: dev.device.VirtualFunctionIndex, + })) + + switch dev.state { + case StateReady: + // Device is already fully assigned and guest-ready; just bump the ref count. + dev.refCount++ + log.G(ctx).Debug("vPCI device already ready, reusing existing assignment") + + case StateReserved: + // Device not yet attached to VM — perform the host-side add. + log.G(ctx).Debug("assigning vPCI device to VM") + + // NUMA affinity is always propagated for assigned devices. + // This feature is available on WS2025 and later. + // Since the V2 shims only support WS2025 and later, this is set as true. + propagateAffinity := true + + settings := hcsschema.VirtualPciDevice{ + Functions: []hcsschema.VirtualPciFunction{ + { + DeviceInstancePath: dev.device.DeviceInstanceID, + VirtualFunction: dev.device.VirtualFunctionIndex, + }, + }, + PropagateNumaAffinity: &propagateAffinity, + } + + // Host-side: add the vPCI device to the VM. + if err := c.vmVPCI.AddDevice(ctx, vmBusGUID, settings); err != nil { + // Set state to removed on failure. + // The caller can call RemoveFromVM to clean up the reservation. + dev.state = StateRemoved + return fmt.Errorf("add vpci device %s to vm: %w", dev.device.DeviceInstanceID, err) + } + + // Host-side succeeded; mark as assigned (transient state) before + // waiting for the guest. + dev.state = StateAssigned + + // Guest-side: wait for the device to be ready inside the guest. + if err := c.waitGuestDeviceReady(ctx, vmBusGUID); err != nil { + // Host assignment is in place but guest is not ready. + // Mark StateAssignedInvalid so the caller can call RemoveFromVM + // to clean up the host-side assignment. + dev.state = StateAssignedInvalid + return fmt.Errorf("wait for guest vpci device with vmBusGUID %s to become ready: %w", vmBusGUID, err) + } + + // Both host and guest succeeded; device is fully ready. + dev.refCount++ + dev.state = StateReady + + log.G(ctx).Info("vPCI device assigned to VM") + + case StateAssignedInvalid: + // The device add failed in a previous attempt after the host-side assignment + // succeeded. Call RemoveFromVM to clean up the host-side assignment before retrying. + return fmt.Errorf("vpci device with vmBusGUID %s is in an invalid state; call RemoveFromVM first", vmBusGUID) + + case StateRemoved: + // The device failed to be added to the VM and hence was moved to state removed. + return fmt.Errorf("vpci device with vmBusGUID %s was removed due to a prior failure; call RemoveFromVM first", vmBusGUID) + + default: + // StateAssigned should never be observed by callers (it is a transient + // within-call state). + return fmt.Errorf("vpci device with vmBusGUID %s is in an unexpected state %s", vmBusGUID, dev.state) + } + + return nil +} + +// RemoveFromVM removes a vPCI device from the VM. +// If the device is shared (reference count > 1), the reference count is +// decremented without actually removing the device from the VM. +func (c *Controller) RemoveFromVM(ctx context.Context, vmBusGUID guid.GUID) error { + c.mu.Lock() + defer c.mu.Unlock() + + ctx, _ = log.WithContext(ctx, logrus.WithField(logfields.VMBusGUID, vmBusGUID)) + + dev, ok := c.devices[vmBusGUID] + if !ok { + return fmt.Errorf("no vpci device with vmBusGUID %s is assigned to the vm", vmBusGUID) + } + + switch dev.state { + case StateReserved, StateRemoved: + // Device was reserved but never assigned to the VM (or never assigned). + // No host-side state to clean up — just drop the tracking entry. + log.G(ctx).WithField("state", dev.state).Debug("vPCI device has no host-side assignment, cleaning up reservation") + c.untrack(vmBusGUID, dev) + return nil + + case StateReady: + // Decrement ref count; only remove from the host when the last reference is dropped. + if dev.refCount > 1 { + dev.refCount-- + log.G(ctx).WithField("refCount", dev.refCount).Debug("vPCI device still in use, decremented ref count") + return nil + } + + // Last reference — fall through to host-side remove below. + dev.refCount = 0 + + case StateAssignedInvalid: + // Host-side assignment exists but device is in an inconsistent state. + // Proceed directly to host-side remove (refCount is always 0 here). + + default: + // StateAssigned is a transient within-call state and should not be seen here. + return fmt.Errorf("vpci device with vmBusGUID %s is in an unexpected state %s", vmBusGUID, dev.state) + } + + log.G(ctx).Debug("removing vPCI device from VM") + + // Host-side: remove the vPCI device from the VM. + if err := c.vmVPCI.RemoveDevice(ctx, vmBusGUID); err != nil { + // The host-side remove failed; the device is still partially assigned. + // Mark it StateAssignedInvalid so that callers can retry via RemoveFromVM. + dev.state = StateAssignedInvalid + return fmt.Errorf("remove vpci device %s from vm: %w", vmBusGUID, err) + } + + c.untrack(vmBusGUID, dev) + log.G(ctx).Info("vPCI device removed from VM") + return nil +} + +// untrack removes a device from the controller's tracking maps and sets its +// state to [StateRemoved] as a safety marker. +// Must be called with c.mu held. +func (c *Controller) untrack(vmBusGUID guid.GUID, dev *deviceInfo) { + dev.state = StateRemoved + delete(c.devices, vmBusGUID) + delete(c.deviceToGUID, dev.device) +} diff --git a/internal/controller/device/vpci/vpci_lcow.go b/internal/controller/device/vpci/vpci_lcow.go new file mode 100644 index 0000000000..762893da1e --- /dev/null +++ b/internal/controller/device/vpci/vpci_lcow.go @@ -0,0 +1,19 @@ +//go:build windows && lcow + +package vpci + +import ( + "context" + + "github.com/Microsoft/go-winio/pkg/guid" + + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" +) + +// waitGuestDeviceReady notifies the guest about the new device and blocks until +// the required sysfs/device paths are available before workloads use them. +func (c *Controller) waitGuestDeviceReady(ctx context.Context, vmBusGUID guid.GUID) error { + return c.linuxGuestVPCI.AddVPCIDevice(ctx, guestresource.LCOWMappedVPCIDevice{ + VMBusGUID: vmBusGUID.String(), + }) +} diff --git a/internal/controller/device/vpci/vpci_test.go b/internal/controller/device/vpci/vpci_test.go new file mode 100644 index 0000000000..8010505510 --- /dev/null +++ b/internal/controller/device/vpci/vpci_test.go @@ -0,0 +1,700 @@ +//go:build windows && lcow + +package vpci + +import ( + "context" + "errors" + "testing" + + "github.com/Microsoft/go-winio/pkg/guid" + "go.uber.org/mock/gomock" + + "github.com/Microsoft/hcsshim/internal/controller/device/vpci/mocks" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" +) + +func newTestDevice() Device { + return Device{ + DeviceInstanceID: "PCI\\VEN_1234&DEV_5678\\0", + VirtualFunctionIndex: 0, + } +} + +var ( + errHostAdd = errors.New("host add failed") + errHostRemove = errors.New("host remove failed") + errGuestAdd = errors.New("guest add failed") +) + +// ───────────────────────────────────────────────────────────────────────────── +// Reserve tests +// ───────────────────────────────────────────────────────────────────────────── + +// TestReserve_NewDevice verifies that reserving a device returns a non-zero GUID +// and that the device appears in both tracking maps. +func TestReserve_NewDevice(t *testing.T) { + ctrl := gomock.NewController(t) + c := New(mocks.NewMockvmVPCI(ctrl), mocks.NewMocklinuxGuestVPCI(ctrl)) + ctx := context.Background() + + dev := newTestDevice() + g, err := c.Reserve(ctx, dev) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if g == (guid.GUID{}) { + t.Fatal("expected non-zero GUID") + } + if _, ok := c.devices[g]; !ok { + t.Error("device not in c.devices after Reserve") + } + if storedGUID, ok := c.deviceToGUID[dev]; !ok || storedGUID != g { + t.Error("device not in c.deviceToGUID after Reserve") + } + if c.devices[g].state != StateReserved { + t.Errorf("expected StateReserved, got %v", c.devices[g].state) + } +} + +// TestReserve_SameDeviceTwice verifies idempotency: a second Reserve for the +// same device returns the exact same GUID without creating a new entry. +func TestReserve_SameDeviceTwice(t *testing.T) { + ctrl := gomock.NewController(t) + c := New(mocks.NewMockvmVPCI(ctrl), mocks.NewMocklinuxGuestVPCI(ctrl)) + ctx := context.Background() + + dev := newTestDevice() + g1, err := c.Reserve(ctx, dev) + if err != nil { + t.Fatalf("first Reserve: %v", err) + } + g2, err := c.Reserve(ctx, dev) + if err != nil { + t.Fatalf("second Reserve: %v", err) + } + if g1 != g2 { + t.Errorf("expected same GUID, got %v vs %v", g1, g2) + } + if len(c.devices) != 1 { + t.Errorf("expected 1 device entry, got %d", len(c.devices)) + } +} + +// TestReserve_DifferentVF verifies that two VFs on the same physical device are +// treated as independent reservations. +func TestReserve_DifferentVF(t *testing.T) { + ctrl := gomock.NewController(t) + c := New(mocks.NewMockvmVPCI(ctrl), mocks.NewMocklinuxGuestVPCI(ctrl)) + ctx := context.Background() + + dev0 := Device{DeviceInstanceID: "PCI\\VEN_1234&DEV_5678\\0", VirtualFunctionIndex: 0} + dev1 := Device{DeviceInstanceID: "PCI\\VEN_1234&DEV_5678\\0", VirtualFunctionIndex: 1} + + g0, _ := c.Reserve(ctx, dev0) + g1, _ := c.Reserve(ctx, dev1) + + if g0 == g1 { + t.Error("different VFs should get different GUIDs") + } + if len(c.devices) != 2 { + t.Errorf("expected 2 device entries, got %d", len(c.devices)) + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// AddToVM tests +// ───────────────────────────────────────────────────────────────────────────── + +// TestAddToVM_NoReservation verifies that AddToVM returns an error when called +// with an unknown GUID. +func TestAddToVM_NoReservation(t *testing.T) { + ctrl := gomock.NewController(t) + c := New(mocks.NewMockvmVPCI(ctrl), mocks.NewMocklinuxGuestVPCI(ctrl)) + ctx := context.Background() + + unknownGUID, _ := guid.NewV4() + err := c.AddToVM(ctx, unknownGUID) + if err == nil { + t.Fatal("expected error for unknown GUID") + } +} + +// TestAddToVM_HappyPath verifies a normal Reserve → AddToVM flow. +func TestAddToVM_HappyPath(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + + if err := c.AddToVM(ctx, g); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + di := c.devices[g] + if di.state != StateReady { + t.Errorf("expected StateReady, got %v", di.state) + } + if di.refCount != 1 { + t.Errorf("expected refCount=1, got %d", di.refCount) + } +} + +// TestAddToVM_Idempotent verifies that calling AddToVM twice on the same device +// increments the refCount but does not make a second host-side call. +func TestAddToVM_Idempotent(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + // Host and guest calls must happen exactly once. + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil).Times(1) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil).Times(1) + + _ = c.AddToVM(ctx, g) + if err := c.AddToVM(ctx, g); err != nil { + t.Fatalf("second AddToVM: %v", err) + } + + di := c.devices[g] + if di.refCount != 2 { + t.Errorf("expected refCount=2, got %d", di.refCount) + } + if di.state != StateReady { + t.Errorf("expected StateReady, got %v", di.state) + } +} + +// TestAddToVM_HostFails verifies that a host-side failure transitions the device +// to StateRemoved (still tracked in the map) without bumping the refCount. +// The device must be cleaned up via RemoveFromVM. +func TestAddToVM_HostFails(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(errHostAdd) + + err := c.AddToVM(ctx, g) + if err == nil { + t.Fatal("expected error") + } + + // Device must still be tracked (StateRemoved, awaiting RemoveFromVM cleanup). + di, ok := c.devices[g] + if !ok { + t.Fatal("expected device to still be tracked after host failure") + } + if di.state != StateRemoved { + t.Errorf("expected StateRemoved after host failure, got %v", di.state) + } + if di.refCount != 0 { + t.Errorf("expected refCount=0 after host failure, got %d", di.refCount) + } +} + +// TestAddToVM_StateRemoved verifies that calling AddToVM on a StateRemoved device +// returns an error and does not make any host or guest calls. +func TestAddToVM_StateRemoved(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + // Host-side add fails → StateRemoved. + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(errHostAdd) + _ = c.AddToVM(ctx, g) + + // Second AddToVM: must NOT call host or guest again. + err := c.AddToVM(ctx, g) + if err == nil { + t.Fatal("expected error for StateRemoved device") + } +} + +// TestAddToVM_GuestFails verifies that a guest-side failure marks the device +// StateAssignedInvalid and does not bump the refCount. +func TestAddToVM_GuestFails(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(errGuestAdd) + + err := c.AddToVM(ctx, g) + if err == nil { + t.Fatal("expected error") + } + + di := c.devices[g] + if di.state != StateAssignedInvalid { + t.Errorf("expected StateAssignedInvalid after guest failure, got %v", di.state) + } + if di.refCount != 0 { + t.Errorf("expected refCount=0 after guest failure, got %d", di.refCount) + } +} + +// TestAddToVM_InvalidDevice verifies that AddToVM on a StateAssignedInvalid device +// returns an error and does not attempt a new host/guest call. +func TestAddToVM_InvalidDevice(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + // First AddToVM: host succeeds, guest fails → StateAssignedInvalid. + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(errGuestAdd) + _ = c.AddToVM(ctx, g) + + // Second AddToVM: must NOT call host or guest again. + err := c.AddToVM(ctx, g) + if err == nil { + t.Fatal("expected error for StateAssignedInvalid device") + } +} + +// TestAddToVM_ReservedTwice_ThenAdd exercises the scenario where the same +// device is reserved (getting the same GUID back), then AddToVM is called. +// The reservation itself is idempotent, so AddToVM should be called only once +// for the underlying host/guest pair. +func TestAddToVM_ReservedTwice_ThenAdd(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g1, _ := c.Reserve(ctx, dev) + g2, _ := c.Reserve(ctx, dev) + + if g1 != g2 { + t.Fatal("expected same GUID from double Reserve") + } + + vm.EXPECT().AddDevice(gomock.Any(), g1, gomock.Any()).Return(nil).Times(1) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil).Times(1) + + if err := c.AddToVM(ctx, g1); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// RemoveFromVM tests +// ───────────────────────────────────────────────────────────────────────────── + +// TestRemoveFromVM_NoDevice verifies that removing an unknown GUID returns an error. +func TestRemoveFromVM_NoDevice(t *testing.T) { + ctrl := gomock.NewController(t) + c := New(mocks.NewMockvmVPCI(ctrl), mocks.NewMocklinuxGuestVPCI(ctrl)) + ctx := context.Background() + + unknownGUID, _ := guid.NewV4() + err := c.RemoveFromVM(ctx, unknownGUID) + if err == nil { + t.Fatal("expected error for unknown GUID") + } +} + +// TestRemoveFromVM_ReservedButNeverAdded verifies that removing a device that +// was reserved but never added cleans up the maps without touching the host. +func TestRemoveFromVM_ReservedButNeverAdded(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + // RemoveDevice must NOT be called. + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, ok := c.devices[g]; ok { + t.Error("device still tracked after removal of reserved-only device") + } + if _, ok := c.deviceToGUID[dev]; ok { + t.Error("deviceToGUID still has entry after removal of reserved-only device") + } +} + +// TestRemoveFromVM_AfterHostAddFails verifies that a device in StateRemoved (due to +// a failed host-side add in AddToVM) can be cleaned up via RemoveFromVM without +// making any host call. +func TestRemoveFromVM_AfterHostAddFails(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + // Host-side add fails → StateRemoved. + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(errHostAdd) + _ = c.AddToVM(ctx, g) + + // RemoveFromVM must NOT call RemoveDevice (no host-side state to clean up). + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, ok := c.devices[g]; ok { + t.Error("device still tracked after RemoveFromVM on StateRemoved device") + } + if _, ok := c.deviceToGUID[dev]; ok { + t.Error("deviceToGUID still has entry after RemoveFromVM on StateRemoved device") + } +} + +// TestRemoveFromVM_HappyPath verifies a full Reserve → AddToVM → RemoveFromVM cycle. +func TestRemoveFromVM_HappyPath(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + _ = c.AddToVM(ctx, g) + + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(nil) + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if _, ok := c.devices[g]; ok { + t.Error("device still tracked after successful removal") + } +} + +// TestRemoveFromVM_RefCounting verifies that the device is only removed from +// the host when the last reference is dropped. +func TestRemoveFromVM_RefCounting(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil).Times(1) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil).Times(1) + _ = c.AddToVM(ctx, g) + _ = c.AddToVM(ctx, g) // refCount → 2 + + // First remove: decrements ref to 1, no host call. + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("first remove error: %v", err) + } + if c.devices[g].refCount != 1 { + t.Errorf("expected refCount=1 after first remove, got %d", c.devices[g].refCount) + } + + // Second remove: drops to 0, triggers host remove. + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(nil).Times(1) + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("second remove error: %v", err) + } + if _, ok := c.devices[g]; ok { + t.Error("device still tracked after last ref dropped") + } +} + +// TestRemoveFromVM_HostFails verifies that a failed host-side remove marks the +// device StateAssignedInvalid so it can be retried. +func TestRemoveFromVM_HostFails(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + _ = c.AddToVM(ctx, g) + + // First remove attempt fails. + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(errHostRemove) + err := c.RemoveFromVM(ctx, g) + if err == nil { + t.Fatal("expected error on host remove failure") + } + + di := c.devices[g] + if di.state != StateAssignedInvalid { + t.Errorf("expected StateAssignedInvalid after failed remove, got %v", di.state) + } + if di.refCount != 0 { + t.Errorf("expected refCount=0 after failed remove, got %d", di.refCount) + } +} + +// TestRemoveFromVM_HostFails_ThenRetry verifies that after a failed host remove +// (device is now StateAssignedInvalid with refCount=0), a retry via RemoveFromVM +// succeeds and cleans up the maps. +func TestRemoveFromVM_HostFails_ThenRetry(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + _ = c.AddToVM(ctx, g) + + // First remove: host fails. + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(errHostRemove) + _ = c.RemoveFromVM(ctx, g) + + // Retry: host succeeds. + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(nil) + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("retry RemoveFromVM failed: %v", err) + } + + if _, ok := c.devices[g]; ok { + t.Error("device still tracked after successful retry removal") + } +} + +// TestRemoveFromVM_InvalidDevice_AfterGuestFail verifies that a device stuck in +// StateAssignedInvalid (due to guest failure in AddToVM) can be cleaned up via RemoveFromVM. +func TestRemoveFromVM_InvalidDevice_AfterGuestFail(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + // AddToVM: host succeeds, guest fails. + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(errGuestAdd) + _ = c.AddToVM(ctx, g) + + // RemoveFromVM should issue a host remove and clean up. + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(nil) + if err := c.RemoveFromVM(ctx, g); err != nil { + t.Fatalf("RemoveFromVM on invalid device failed: %v", err) + } + + if _, ok := c.devices[g]; ok { + t.Error("device still tracked after cleanup of invalid device") + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Scenario: Reserve deduplication vs. state transitions +// ───────────────────────────────────────────────────────────────────────────── + +// TestReserve_AfterRemove verifies what happens when Reserve is called for a +// device that was previously removed. +// After RemoveFromVM the device is untracked from deviceToGUID, so Reserve +// should treat it as a brand-new device and return a fresh GUID. +func TestReserve_AfterRemove(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g1, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g1, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + _ = c.AddToVM(ctx, g1) + + vm.EXPECT().RemoveDevice(gomock.Any(), g1).Return(nil) + _ = c.RemoveFromVM(ctx, g1) + + // Reserve again after full removal: should get a new GUID. + g2, err := c.Reserve(ctx, dev) + if err != nil { + t.Fatalf("Reserve after remove failed: %v", err) + } + if g2 == g1 { + t.Error("expected a new GUID after re-reserving a fully removed device") + } +} + +// TestReserve_AfterGuestFailure verifies what Reserve returns for a device that +// is currently StateAssignedInvalid (guest failed, host succeeded). +// Since the device is still in deviceToGUID, Reserve should return the SAME GUID. +func TestReserve_AfterGuestFailure(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g1, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g1, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(errGuestAdd) + _ = c.AddToVM(ctx, g1) + + // Device is now StateAssignedInvalid and still in deviceToGUID. + g2, err := c.Reserve(ctx, dev) + if err != nil { + t.Fatalf("Reserve after guest failure: %v", err) + } + if g2 != g1 { + t.Errorf("expected same GUID for invalid device on re-reserve, got different") + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Scenario: RemoveFromVM on a StateRemoved device (already untracked) +// ───────────────────────────────────────────────────────────────────────────── + +// TestRemoveFromVM_AlreadyRemoved verifies that calling RemoveFromVM twice +// returns an error on the second call (device no longer in map). +func TestRemoveFromVM_AlreadyRemoved(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + _ = c.AddToVM(ctx, g) + + vm.EXPECT().RemoveDevice(gomock.Any(), g).Return(nil) + _ = c.RemoveFromVM(ctx, g) + + // Second call: device is no longer tracked. + err := c.RemoveFromVM(ctx, g) + if err == nil { + t.Fatal("expected error when removing an already-removed device") + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Scenario: AddToVM propagates correct HCS settings +// ───────────────────────────────────────────────────────────────────────────── + +// TestAddToVM_HCSSettings verifies that AddToVM passes the correct VirtualPciDevice +// settings including PropagateNumaAffinity=true and the device instance path. +func TestAddToVM_HCSSettings(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := Device{ + DeviceInstanceID: "PCI\\VEN_ABCD&DEV_1234\\99", + VirtualFunctionIndex: 3, + } + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT(). + AddDevice(gomock.Any(), g, gomock.AssignableToTypeOf(hcsschema.VirtualPciDevice{})). + DoAndReturn(func(_ context.Context, _ guid.GUID, settings hcsschema.VirtualPciDevice) error { + if settings.PropagateNumaAffinity == nil || !*settings.PropagateNumaAffinity { + t.Error("expected PropagateNumaAffinity=true") + } + if len(settings.Functions) != 1 { + t.Errorf("expected 1 function, got %d", len(settings.Functions)) + } + fn := settings.Functions[0] + if fn.DeviceInstancePath != dev.DeviceInstanceID { + t.Errorf("expected DeviceInstancePath=%q, got %q", dev.DeviceInstanceID, fn.DeviceInstancePath) + } + if fn.VirtualFunction != dev.VirtualFunctionIndex { + t.Errorf("expected VirtualFunction=%d, got %d", dev.VirtualFunctionIndex, fn.VirtualFunction) + } + return nil + }) + guest.EXPECT().AddVPCIDevice(gomock.Any(), gomock.Any()).Return(nil) + + if err := c.AddToVM(ctx, g); err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +// TestAddToVM_GuestVMBusGUIDForwarded verifies that the correct vmBusGUID string +// is forwarded to the guest-side AddVPCIDevice call. +func TestAddToVM_GuestVMBusGUIDForwarded(t *testing.T) { + ctrl := gomock.NewController(t) + vm := mocks.NewMockvmVPCI(ctrl) + guest := mocks.NewMocklinuxGuestVPCI(ctrl) + c := New(vm, guest) + ctx := context.Background() + + dev := newTestDevice() + g, _ := c.Reserve(ctx, dev) + + vm.EXPECT().AddDevice(gomock.Any(), g, gomock.Any()).Return(nil) + guest.EXPECT(). + AddVPCIDevice(gomock.Any(), gomock.AssignableToTypeOf(guestresource.LCOWMappedVPCIDevice{})). + DoAndReturn(func(_ context.Context, req guestresource.LCOWMappedVPCIDevice) error { + if req.VMBusGUID != g.String() { + t.Errorf("expected VMBusGUID=%q, got %q", g.String(), req.VMBusGUID) + } + return nil + }) + + _ = c.AddToVM(ctx, g) +} diff --git a/internal/controller/device/vpci/vpci_unsupported.go b/internal/controller/device/vpci/vpci_unsupported.go new file mode 100644 index 0000000000..31729ac881 --- /dev/null +++ b/internal/controller/device/vpci/vpci_unsupported.go @@ -0,0 +1,15 @@ +//go:build windows && !wcow && !lcow + +package vpci + +import ( + "context" + "fmt" + + "github.com/Microsoft/go-winio/pkg/guid" +) + +// waitGuestDeviceReady is a not supported for unsupported guests. +func (c *Controller) waitGuestDeviceReady(_ context.Context, _ guid.GUID) error { + return fmt.Errorf("unsupported guest") +} diff --git a/internal/controller/device/vpci/vpci_wcow.go b/internal/controller/device/vpci/vpci_wcow.go new file mode 100644 index 0000000000..db7846a938 --- /dev/null +++ b/internal/controller/device/vpci/vpci_wcow.go @@ -0,0 +1,15 @@ +//go:build windows && wcow + +package vpci + +import ( + "context" + + "github.com/Microsoft/go-winio/pkg/guid" +) + +// waitGuestDeviceReady is a no-op for Windows guests. WCOW does not require a +// guest-side notification as part of vPCI device assignment. +func (c *Controller) waitGuestDeviceReady(_ context.Context, _ guid.GUID) error { + return nil +} diff --git a/internal/devices/assigned_devices.go b/internal/devices/assigned_devices.go index 97db77e0d6..3b751ad57a 100644 --- a/internal/devices/assigned_devices.go +++ b/internal/devices/assigned_devices.go @@ -10,6 +10,7 @@ import ( "strconv" "github.com/Microsoft/hcsshim/internal/cmd" + vpciCtrl "github.com/Microsoft/hcsshim/internal/controller/device/vpci" "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/pkg/errors" @@ -45,7 +46,7 @@ func AddDevice(ctx context.Context, vm *uvm.UtilityVM, idType, deviceID string, if err != nil { return vpci, nil, errors.Wrapf(err, "failed to assign device %s of type %s to pod %s", deviceID, idType, vm.ID()) } - vmBusInstanceID := vm.GetAssignedDeviceVMBUSInstanceID(vpci.VMBusGUID) + vmBusInstanceID := vpciCtrl.GetAssignedDeviceVMBUSInstanceID(vpci.VMBusGUID) log.G(ctx).WithField("vmbus id", vmBusInstanceID).Info("vmbus instance ID") locationPaths, err = getChildrenDeviceLocationPaths(ctx, vm, vmBusInstanceID, deviceUtilPath) diff --git a/internal/logfields/fields.go b/internal/logfields/fields.go index dac5a708e5..3f077cd6f8 100644 --- a/internal/logfields/fields.go +++ b/internal/logfields/fields.go @@ -70,6 +70,12 @@ const ( ShimPid = "shim-pid" TaskPid = "task-pid" + // vpci device + + VMBusGUID = "vmBusGUID" + DeviceID = "deviceInstanceID" + VFIndex = "virtualFunctionIndex" + // sandbox NetNsPath = "net-ns-path" diff --git a/internal/uvm/virtual_device.go b/internal/uvm/virtual_device.go index 7a0518f926..1fc4078397 100644 --- a/internal/uvm/virtual_device.go +++ b/internal/uvm/virtual_device.go @@ -24,10 +24,6 @@ const ( VPCIDeviceIDType = "vpci-instance-id" ) -// this is the well known channel type GUID defined by VMBUS for all assigned devices -const vmbusChannelTypeGUIDFormatted = "{44c4f61d-4444-4400-9d52-802e27ede19f}" -const assignedDeviceEnumerator = "VMBUS" - type VPCIDeviceID struct { deviceInstanceID string virtualFunctionIndex uint16 @@ -55,23 +51,6 @@ type VPCIDevice struct { refCount uint32 } -// GetAssignedDeviceVMBUSInstanceID returns the instance ID of the VMBUS channel device node created. -// -// When a device is assigned to a UVM via VPCI support in HCS, a new VMBUS channel device node is -// created in the UVM. The actual device that was assigned in is exposed as a child on this VMBUS -// channel device node. -// -// A device node's instance ID is an identifier that distinguishes that device from other devices -// on the system. The GUID of a VMBUS channel device node refers to that channel's unique -// identifier used internally by VMBUS and can be used to determine the VMBUS channel -// device node's instance ID. -// -// A VMBUS channel device node's instance ID is in the form: -// "VMBUS\vmbusChannelTypeGUIDFormatted\{vmBusChannelGUID}" -func (uvm *UtilityVM) GetAssignedDeviceVMBUSInstanceID(vmBusChannelGUID string) string { - return fmt.Sprintf("%s\\%s\\{%s}", assignedDeviceEnumerator, vmbusChannelTypeGUIDFormatted, vmBusChannelGUID) -} - // Release frees the resources of the corresponding vpci device func (vpci *VPCIDevice) Release(ctx context.Context) error { if err := vpci.vm.RemoveDevice(ctx, vpci.deviceInstanceID, vpci.virtualFunctionIndex); err != nil { diff --git a/internal/vm/guestmanager/device_lcow.go b/internal/vm/guestmanager/device_lcow.go index c6cece1fa0..61524a0ccc 100644 --- a/internal/vm/guestmanager/device_lcow.go +++ b/internal/vm/guestmanager/device_lcow.go @@ -11,18 +11,6 @@ import ( "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) -// LCOWDeviceManager exposes VPCI and VPMem device operations in the LCOW guest. -type LCOWDeviceManager interface { - // AddVPCIDevice adds a VPCI device to the guest. - AddVPCIDevice(ctx context.Context, settings guestresource.LCOWMappedVPCIDevice) error - // AddVPMemDevice adds a VPMem device to the guest. - AddVPMemDevice(ctx context.Context, settings guestresource.LCOWMappedVPMemDevice) error - // RemoveVPMemDevice removes a VPMem device from the guest. - RemoveVPMemDevice(ctx context.Context, settings guestresource.LCOWMappedVPMemDevice) error -} - -var _ LCOWDeviceManager = (*Guest)(nil) - // AddVPCIDevice adds a VPCI device in the guest. func (gm *Guest) AddVPCIDevice(ctx context.Context, settings guestresource.LCOWMappedVPCIDevice) error { request := &hcsschema.ModifySettingRequest{ diff --git a/internal/vm/vmmanager/pci.go b/internal/vm/vmmanager/pci.go index fbb72c4c04..f3d12a3ad3 100644 --- a/internal/vm/vmmanager/pci.go +++ b/internal/vm/vmmanager/pci.go @@ -6,23 +6,14 @@ import ( "context" "fmt" + "github.com/Microsoft/go-winio/pkg/guid" + "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" ) -// PCIManager manages assiging pci devices to a Utility VM. This is Windows specific at the moment. -type PCIManager interface { - // AddDevice adds a pci device identified by `vmbusGUID` to the Utility VM with the provided settings. - AddDevice(ctx context.Context, vmbusGUID string, settings hcsschema.VirtualPciDevice) error - - // RemoveDevice removes the pci device identified by `vmbusGUID` from the Utility VM. - RemoveDevice(ctx context.Context, vmbusGUID string) error -} - -var _ PCIManager = (*UtilityVM)(nil) - -func (uvm *UtilityVM) AddDevice(ctx context.Context, vmbusGUID string, settings hcsschema.VirtualPciDevice) error { +func (uvm *UtilityVM) AddDevice(ctx context.Context, vmbusGUID guid.GUID, settings hcsschema.VirtualPciDevice) error { request := &hcsschema.ModifySettingRequest{ ResourcePath: fmt.Sprintf(resourcepaths.VirtualPCIResourceFormat, vmbusGUID), RequestType: guestrequest.RequestTypeAdd, @@ -34,7 +25,7 @@ func (uvm *UtilityVM) AddDevice(ctx context.Context, vmbusGUID string, settings return nil } -func (uvm *UtilityVM) RemoveDevice(ctx context.Context, vmbusGUID string) error { +func (uvm *UtilityVM) RemoveDevice(ctx context.Context, vmbusGUID guid.GUID) error { request := &hcsschema.ModifySettingRequest{ ResourcePath: fmt.Sprintf(resourcepaths.VirtualPCIResourceFormat, vmbusGUID), RequestType: guestrequest.RequestTypeRemove,