From 7def2554968833781459fd018ba3aa3db0cc1e57 Mon Sep 17 00:00:00 2001 From: Hamza El-Saawy Date: Fri, 27 Mar 2026 14:19:53 -0400 Subject: [PATCH] Bug fix for leaked LCOW resources; expand logs Incorrect resource closures did not cause errors in non-virtual pod scenarios since the uVM was closed after sandbox exit, so leaked resources where released then. Fix bug where LCOW adapter removal incorrectly used the endpoint ID as the namespace ID. Don't attempt to remove virtual pod endpoints before host removes adapters from host. Unmount and delete virtual pod directories and mounts. Add more logs to networking and resource code paths to help debug leaked vPCI endpoint devices. Signed-off-by: Hamza El-Saawy --- cmd/containerd-shim-runhcs-v1/task_hcs.go | 77 ++++++++---- internal/guest/network/netns.go | 6 +- internal/guest/runtime/hcsv2/uvm.go | 86 ++++++++----- internal/guest/storage/mount.go | 20 ++- internal/hcsoci/create.go | 70 +++++++---- internal/log/hook.go | 4 +- internal/logfields/fields.go | 1 + internal/resources/resources.go | 6 +- internal/uvm/network.go | 144 +++++++++++++++++----- internal/uvm/virtual_device.go | 32 +++++ 10 files changed, 327 insertions(+), 119 deletions(-) diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index 1b12777e4c..9fbb1faf35 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -188,9 +188,11 @@ func newHcsTask( ownsParent bool, req *task.CreateTaskRequest, s *specs.Spec) (_ shimTask, err error) { - log.G(ctx).WithFields(logrus.Fields{ - "tid": req.ID, - "ownsParent": ownsParent, + ctx, entry := log.SetEntry(ctx, logrus.Fields{logfields.TaskID: req.ID}) + entry.WithFields(logrus.Fields{ + "hasHost": parent != nil, + "isVirtualPod": s.Annotations != nil && s.Annotations[annotations.VirtualPodID] != "", + "ownsHost": ownsParent, }).Debug("newHcsTask") owner := filepath.Base(os.Args[0]) @@ -495,22 +497,30 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim } if eid == "" { + entry := log.G(ctx).WithFields(logrus.Fields{ + logfields.TaskID: ht.id, + logfields.ExecID: eid, + }) + // We are killing the init task, so we expect the container to be // stopped after this. // // The task process may have already exited, and the status set to // shimExecStateExited, but resources may still be in the process // of being cleaned up. Wait for ht.closed to be closed. This signals - // that waitInitExit() has finished destroying container resources, - // and layers were umounted. + // that [hcsTask.waitInitExit]/[hcsTask.waitForHostExit] has finished destroying + // container resources and that layers were umounted. // If the shim exits before resources are cleaned up, those resources // will remain locked and untracked, which leads to lingering sandboxes // and container resources like base vhdx. + const timeout = 30 * time.Second + entry.WithField(logfields.Timeout, timeout).Trace("waiting for task to be closed") select { - case <-time.After(30 * time.Second): - log.G(ctx).Error("timed out waiting for resource cleanup") + case <-time.After(timeout): + entry.WithField(logfields.Timeout, timeout).Error("timed out waiting for task to close while deleting init exec") return 0, 0, time.Time{}, errors.Wrap(hcs.ErrTimeout, "waiting for container resource cleanup") case <-ht.closed: + entry.Trace("received task close signal") } // The init task has now exited. A ForceExit() has already been sent to @@ -608,7 +618,10 @@ func (ht *hcsTask) Wait() *task.StateResponse { func (ht *hcsTask) waitInitExit() { ctx, span := oc.StartSpan(context.Background(), "hcsTask::waitInitExit") defer span.End() - span.AddAttributes(trace.StringAttribute("tid", ht.id)) + span.AddAttributes( + trace.StringAttribute("tid", ht.id), + trace.BoolAttribute("host", ht.host != nil), + trace.BoolAttribute("ownsHost", ht.ownsHost)) // Wait for it to exit on its own ht.init.Wait() @@ -627,7 +640,10 @@ func (ht *hcsTask) waitInitExit() { func (ht *hcsTask) waitForHostExit() { ctx, span := oc.StartSpan(context.Background(), "hcsTask::waitForHostExit") defer span.End() - span.AddAttributes(trace.StringAttribute("tid", ht.id)) + span.AddAttributes( + trace.StringAttribute("tid", ht.id), + trace.BoolAttribute("host", ht.host != nil), + trace.BoolAttribute("ownsHost", ht.ownsHost)) err := ht.host.WaitCtx(ctx) if err != nil { @@ -655,13 +671,16 @@ func (ht *hcsTask) waitForHostExit() { // == nil`. func (ht *hcsTask) close(ctx context.Context) { ht.closeOnce.Do(func() { - log.G(ctx).Debug("hcsTask::closeOnce") + entry := log.G(ctx) + entry.Debug("hcsTask::closeOnce") // ht.c should never be nil for a real task but in testing we stub // this to avoid a nil dereference. We really should introduce a // method or interface for ht.c operations that we can stub for // testing. if ht.c != nil { + const tearDownTimeout = 30 * time.Second + // Do our best attempt to tear down the container. // TODO: unify timeout select statements and use [ht.c.WaitCtx] and [context.WithTimeout] var werr error @@ -670,52 +689,63 @@ func (ht *hcsTask) close(ctx context.Context) { werr = ht.c.Wait() close(ch) }() + err := ht.c.Shutdown(ctx) if err != nil { - log.G(ctx).WithError(err).Error("failed to shutdown container") + entry.WithError(err).Error("failed to shutdown container") } else { - t := time.NewTimer(time.Second * 30) + t := time.NewTimer(tearDownTimeout) select { case <-ch: err = werr t.Stop() if err != nil { - log.G(ctx).WithError(err).Error("failed to wait for container shutdown") + entry.WithError(err).Error("failed to wait for container shutdown") } case <-t.C: err = hcs.ErrTimeout - log.G(ctx).WithError(err).Error("failed to wait for container shutdown") + entry.WithFields(logrus.Fields{ + logfields.Timeout: tearDownTimeout, + logrus.ErrorKey: err, + }).Error("timed out while waiting for container shutdown") } } if err != nil { err = ht.c.Terminate(ctx) if err != nil { - log.G(ctx).WithError(err).Error("failed to terminate container") + entry.WithError(err).Error("failed to terminate container") } else { - t := time.NewTimer(time.Second * 30) + t := time.NewTimer(tearDownTimeout) select { case <-ch: err = werr t.Stop() if err != nil { - log.G(ctx).WithError(err).Error("failed to wait for container terminate") + entry.WithError(err).Error("failed to wait for container terminate") } case <-t.C: - log.G(ctx).WithError(hcs.ErrTimeout).Error("failed to wait for container terminate") + entry.WithFields(logrus.Fields{ + logfields.Timeout: tearDownTimeout, + logrus.ErrorKey: hcs.ErrTimeout, + }).Error("timed out while waiting for container terminate") } } } // Release any resources associated with the container. + entry.Trace("starting task resource cleanup") + if err := resources.ReleaseResources(ctx, ht.cr, ht.host, true); err != nil { - log.G(ctx).WithError(err).Error("failed to release container resources") + entry.WithError(err).Error("failed to release container resources") } // Close the container handle invalidating all future access. if err := ht.c.Close(); err != nil { - log.G(ctx).WithError(err).Error("failed to close container") + entry.WithError(err).Error("failed to close container") } + + entry.Trace("task resource cleanup completed") } ht.closeHost(ctx) }) @@ -730,11 +760,12 @@ func (ht *hcsTask) close(ctx context.Context) { // This call is idempotent and safe to call multiple times. func (ht *hcsTask) closeHost(ctx context.Context) { ht.closeHostOnce.Do(func() { - log.G(ctx).Debug("hcsTask::closeHostOnce") + entry := log.G(ctx) + entry.Debug("hcsTask::closeHostOnce") if ht.ownsHost && ht.host != nil { if err := ht.host.Close(); err != nil { - log.G(ctx).WithError(err).Error("failed host vm shutdown") + entry.WithError(err).Error("failed host vm shutdown") } } // Send the `init` exec exit notification always. @@ -750,7 +781,7 @@ func (ht *hcsTask) closeHost(ctx context.Context) { ExitStatus: exit.ExitStatus, ExitedAt: exit.ExitedAt, }); err != nil { - log.G(ctx).WithError(err).Error("failed to publish TaskExitEventTopic") + entry.WithError(err).Error("failed to publish TaskExitEventTopic") } close(ht.closed) }) diff --git a/internal/guest/network/netns.go b/internal/guest/network/netns.go index e809a6ea04..ca128f2cc6 100644 --- a/internal/guest/network/netns.go +++ b/internal/guest/network/netns.go @@ -181,7 +181,7 @@ func NetNSConfig(ctx context.Context, ifStr string, nsPid int, adapter *guestres addrsStr = append(addrsStr, fmt.Sprintf("%v", addr)) } - entry.WithField("addresses", addrsStr).Debugf("%v: %s[idx=%d,type=%s] is %v", + entry.WithField("addresses", strings.Join(addrsStr, "; ")).Debugf("%v: %s[idx=%d,type=%s] is %v", curNS, attr.Name, attr.Index, link.Type(), attr.OperState) } @@ -209,7 +209,7 @@ func configureLink(ctx context.Context, addr.IP = ip log.G(ctx).WithFields(logrus.Fields{ "allocatedIP": ip, - "IP": addr, + "IP": addr.String(), }).Debugf("parsed ip address %s/%d", ipConfig.IPAddress, ipConfig.PrefixLength) ipAddr := &netlink.Addr{IPNet: addr, Label: ""} if err := netlinkAddrAdd(link, ipAddr); err != nil { @@ -237,7 +237,7 @@ func configureLink(ctx context.Context, } for _, r := range adapter.Routes { - log.G(ctx).WithField("route", r).Debugf("adding a route to interface %s", link.Attrs().Name) + log.G(ctx).WithField("route", log.Format(ctx, r)).Debugf("adding a route to interface %s", link.Attrs().Name) if (r.DestinationPrefix == ipv4GwDestination || r.DestinationPrefix == ipv6GwDestination) && (r.NextHop == ipv4EmptyGw || r.NextHop == ipv6EmptyGw) { diff --git a/internal/guest/runtime/hcsv2/uvm.go b/internal/guest/runtime/hcsv2/uvm.go index 48c09b2018..5699e98298 100644 --- a/internal/guest/runtime/hcsv2/uvm.go +++ b/internal/guest/runtime/hcsv2/uvm.go @@ -1368,7 +1368,14 @@ func modifyNetwork(ctx context.Context, rt guestrequest.RequestType, na *guestre // container or not so it must always call `Sync`. return ns.Sync(ctx) case guestrequest.RequestTypeRemove: - ns := GetOrAddNetworkNamespace(na.ID) + ns, err := getNetworkNamespace(na.NamespaceID) + if err != nil { + log.G(ctx).WithFields(logrus.Fields{ + logfields.NamespaceID: na.NamespaceID, + "adapterID": na.ID, + }).WithError(err).Warn("namespace not found for adapter removal, skipping") + return nil + } if err := ns.RemoveAdapter(ctx, na.ID); err != nil { return err } @@ -1540,56 +1547,75 @@ func (h *Host) RemoveContainerFromVirtualPod(containerID string) { return // Container not in any virtual pod } + ctx, entry := log.SetEntry(context.Background(), logrus.Fields{ + "virtualSandboxID": virtualSandboxID, + logfields.ContainerID: containerID, + }) + // Remove from virtual pod if vp, vpExists := h.virtualPods[virtualSandboxID]; vpExists { delete(vp.Containers, containerID) - // If this is the sandbox container, delete the network namespace + // If this is the sandbox container, clean up the network namespace adapters first, + // then remove the namespace. + + // NOTE: Do NOT call [RemoveNetworkNamespace] here. + // When cleaning up the virtual pod's sandbox container on the host, + // [UtilityVM.TearDownNetworking] calls [UtilityVM.RemoveNetNS], + // which, via [UtilityVM.removeNIC], removes the NICs from the uVM and + // sends [guestresource.ResourceTypeNetwork] RPCs that ultimately cal [modifyNetwork] + // to remove adapters from guest tracking. + // However, that happens AFTER this function runs. + // Calling [RemoveNetworkNamespace] here would + // fail with "contains adapters" since the host hasn't removed them yet. + // + // The host-driven path handles cleanup in the correct order. if containerID == virtualSandboxID && vp.NetworkNamespace != "" { - if err := RemoveNetworkNamespace(context.Background(), vp.NetworkNamespace); err != nil { - logrus.WithError(err).WithField("virtualSandboxID", virtualSandboxID). - Warn("Failed to remove virtual pod network namespace (sandbox container removal)") - } + entry.WithField(logfields.NamespaceID, vp.NetworkNamespace).Debug("skipping virtual pod network namespace removal") } // If this was the last container, cleanup the virtual pod if len(vp.Containers) == 0 { - h.cleanupVirtualPod(virtualSandboxID) + h.cleanupVirtualPod(ctx, virtualSandboxID) } } delete(h.containerToVirtualPod, containerID) - logrus.WithFields(logrus.Fields{ - "containerID": containerID, - "virtualSandboxID": virtualSandboxID, - }).Info("Container removed from virtual pod") + entry.Info("Container removed from virtual pod") } // cleanupVirtualPod removes a virtual pod and its cgroup (should be called with mutex held) -func (h *Host) cleanupVirtualPod(virtualSandboxID string) { - if vp, exists := h.virtualPods[virtualSandboxID]; exists { - // Delete the cgroup - if err := vp.CgroupControl.Delete(); err != nil { - logrus.WithError(err).WithField("virtualSandboxID", virtualSandboxID). - Warn("Failed to delete virtual pod cgroup") - } +func (h *Host) cleanupVirtualPod(ctx context.Context, virtualSandboxID string) { + vp, exists := h.virtualPods[virtualSandboxID] + if !exists { + return // virtual pod does not exist + } - // Clean up network namespace if this is the last virtual pod using it - // Only remove if this virtual pod was managing the network namespace - if vp.NetworkNamespace != "" { - // For virtual pods, the network namespace is shared, so we only clean it up - // when the virtual pod itself is being destroyed - if err := RemoveNetworkNamespace(context.Background(), vp.NetworkNamespace); err != nil { - logrus.WithError(err).WithField("virtualSandboxID", virtualSandboxID). - Warn("Failed to remove virtual pod network namespace") - } - } + entry := log.G(ctx) - delete(h.virtualPods, virtualSandboxID) + // Delete the cgroup + if err := vp.CgroupControl.Delete(); err != nil { + entry.Warn("failed to delete virtual pod cgroup") + } - logrus.WithField("virtualSandboxID", virtualSandboxID).Info("Virtual pod cleaned up") + // NOTE: Do NOT call [RemoveNetworkNamespace] here. + // See comment in [Host.RemoveContainerFromVirtualPod] for more info. + + // Clean up the virtual pod root directory which contains hostname, hosts, + // resolv.conf, and logs/. These are NOT cleaned by Container.Delete() because + // they live under /run/gcs/c/virtual-pods// which is separate from the + // container's ociBundlePath (/run/gcs/c//). + if vpRootDir := specGuest.VirtualPodRootDir(virtualSandboxID); vpRootDir != "" { + if err := os.RemoveAll(vpRootDir); err != nil { + entry.WithField("path", vpRootDir).WithError(err).Warn("Failed to remove virtual pod root directory") + } else { + entry.WithField("path", vpRootDir).Debug("Removed virtual pod root directory") + } } + + delete(h.virtualPods, virtualSandboxID) + entry.Info("Virtual pod cleaned up") } // setupVirtualPodMountsPath creates mount directories for virtual pods diff --git a/internal/guest/storage/mount.go b/internal/guest/storage/mount.go index a3d10a3b25..15664dd693 100644 --- a/internal/guest/storage/mount.go +++ b/internal/guest/storage/mount.go @@ -16,6 +16,7 @@ import ( "go.opencensus.io/trace" "golang.org/x/sys/unix" + "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oc" ) @@ -131,13 +132,24 @@ func UnmountPath(ctx context.Context, target string, removeTarget bool) (err err return errors.Wrapf(err, "failed to determine if path '%s' exists", target) } - if err := unixUnmount(target, 0); err != nil { - // If `Unmount` returns `EINVAL` it's not mounted. Just delete the - // folder. - if !gerrors.Is(err, unix.EINVAL) { + // Loop to handle stacked mounts (e.g. tmpfs + bind mount from MountRShared). + // Each Unmount call only peels one layer off the stack, so we must keep + // unmounting until EINVAL tells us nothing is mounted anymore. + const maxUnmountAttempts = 5 + unmounted := false + for i := 0; i < maxUnmountAttempts; i++ { + if err := unixUnmount(target, 0); err != nil { + // EINVAL means it's not mounted — we're done unmounting. + if gerrors.Is(err, unix.EINVAL) { + unmounted = true + break + } return errors.Wrapf(err, "failed to unmount path '%s'", target) } } + if !unmounted { + log.G(ctx).WithField("target", target).Warnf("path still mounted after %d unmount attempts", maxUnmountAttempts) + } if removeTarget { return osRemoveAll(target) } diff --git a/internal/hcsoci/create.go b/internal/hcsoci/create.go index e0db4c9c09..3e6606a15f 100644 --- a/internal/hcsoci/create.go +++ b/internal/hcsoci/create.go @@ -129,8 +129,8 @@ func initializeCreateOptions(ctx context.Context, createOptions *CreateOptions) } log.G(ctx).WithFields(logrus.Fields{ - "options": fmt.Sprintf("%+v", createOptions), - "schema": coi.actualSchemaVersion, + "options": log.Format(ctx, createOptions), + "schema": log.Format(ctx, coi.actualSchemaVersion), }).Debug("hcsshim::initializeCreateOptions") return coi, nil @@ -139,43 +139,61 @@ func initializeCreateOptions(ctx context.Context, createOptions *CreateOptions) // configureSandboxNetwork creates a new network namespace for the pod (sandbox) // if required and then adds that namespace to the pod. func configureSandboxNetwork(ctx context.Context, coi *createOptionsInternal, r *resources.Resources, ct oci.KubernetesContainerType) error { + virtualPodID := coi.Spec.Annotations[annotations.VirtualPodID] + ctx, entry := log.SetEntry(ctx, logrus.Fields{ + "containerType": ct, + "virtualPodID": virtualPodID, + logfields.NamespaceID: coi.NetworkNamespace, + }) + entry.Debug("hcsoci::configureSandboxNetwork") + if coi.NetworkNamespace != "" { r.SetNetNS(coi.NetworkNamespace) + entry.Trace("using existing network namespace") } else { err := createNetworkNamespace(ctx, coi, r) if err != nil { + entry.WithError(err).Error("failed to create network namespace") return err } } coi.actualNetworkNamespace = r.NetNS() - if coi.HostingSystem != nil { - // Check for virtual pod first containers: if containerID == virtualPodID, treat as sandbox for networking configuration - virtualPodID := coi.Spec.Annotations[annotations.VirtualPodID] - isVirtualPodFirstContainer := virtualPodID != "" && coi.actualID == virtualPodID - - // Only add the network namespace to a standalone or sandbox - // container but not a workload container in a sandbox that inherits - // the namespace. - if ct == oci.KubernetesContainerTypeNone || ct == oci.KubernetesContainerTypeSandbox || isVirtualPodFirstContainer { - if err := coi.HostingSystem.ConfigureNetworking(ctx, coi.actualNetworkNamespace); err != nil { - // No network setup type was specified for this UVM. Create and assign one here unless - // we received a different error. - if errors.Is(err, uvm.ErrNoNetworkSetup) { - if err := coi.HostingSystem.CreateAndAssignNetworkSetup(ctx, "", ""); err != nil { - return err - } - if err := coi.HostingSystem.ConfigureNetworking(ctx, coi.actualNetworkNamespace); err != nil { - return err - } - } else { - return err - } - } - r.SetAddedNetNSToVM(true) + ctx, entry = log.SetEntry(ctx, logrus.Fields{logfields.NamespaceID: coi.actualNetworkNamespace}) + + // Check for virtual pod first containers: if containerID == virtualPodID, treat as sandbox for networking configuration + isVirtualPodFirstContainer := virtualPodID != "" && coi.actualID == virtualPodID + + if coi.HostingSystem == nil { + entry.Trace("skipping sandbox network configuration - no hosting system") + return nil + } else if ct != oci.KubernetesContainerTypeNone && ct != oci.KubernetesContainerTypeSandbox && !isVirtualPodFirstContainer { + // Only add the network namespace to a standalone or sandbox container, + // but not a workload container in a sandbox that inherits the namespace. + entry.Trace("skipping sandbox network configuration - not a sandbox/standalone/virtual pod first container") + return nil + } + + entry.WithField("isVirtualPodFirstContainer", isVirtualPodFirstContainer).Trace("configuring uVM network for sandbox") + + err := coi.HostingSystem.ConfigureNetworking(ctx, coi.actualNetworkNamespace) + if errors.Is(err, uvm.ErrNoNetworkSetup) { + // No network setup type was specified for this UVM. + // Create and assign one here and then retry + entry.Debug("retrying configuring sandbox network after creating and assigning uVM network setup") + + if err := coi.HostingSystem.CreateAndAssignNetworkSetup(ctx, "", ""); err != nil { + entry.WithError(err).Error("failed to create and assign network setup for sandbox network") + return err } + err = coi.HostingSystem.ConfigureNetworking(ctx, coi.actualNetworkNamespace) + } + if err != nil { + entry.WithError(err).Error("failed to configure sandbox network within uVM") + return err } + r.SetAddedNetNSToVM(true) return nil } diff --git a/internal/log/hook.go b/internal/log/hook.go index da81023b65..7a91d5e68a 100644 --- a/internal/log/hook.go +++ b/internal/log/hook.go @@ -107,8 +107,8 @@ func (h *Hook) encode(e *logrus.Entry) { switch vv := v.(type) { // built in types - // "json" marshals errors as "{}", so leave alone here - case bool, string, error, uintptr, + case bool, string, uintptr, + error, // "json" marshals errors as "{}", so leave alone here int8, int16, int32, int64, int, uint8, uint32, uint64, uint, float32, float64: diff --git a/internal/logfields/fields.go b/internal/logfields/fields.go index dac5a708e5..b53625edae 100644 --- a/internal/logfields/fields.go +++ b/internal/logfields/fields.go @@ -10,6 +10,7 @@ const ( ID = "id" ContainerID = "cid" ExecID = "eid" + NamespaceID = "namespace-id" ProcessID = "pid" TaskID = "tid" UVMID = "uvm-id" diff --git a/internal/resources/resources.go b/internal/resources/resources.go index a111506f62..c12f9fa7fa 100644 --- a/internal/resources/resources.go +++ b/internal/resources/resources.go @@ -121,6 +121,8 @@ func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all log.G(ctx).Warn(err) } r.addedNetNSToVM = false + } else { + log.G(ctx).Debug("skipping network teardown during resource release") } } @@ -134,14 +136,14 @@ func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all case *uvm.NetworkEndpoints: if r.createdNetNS { if err := r.resources[i].Release(ctx); err != nil { - log.G(ctx).WithError(err).Error("failed to release container resource") + log.G(ctx).WithError(err).Error("failed to release container network endpoints resource") releaseErr = true } r.createdNetNS = false } case *credentials.CCGResource: if err := r.resources[i].Release(ctx); err != nil { - log.G(ctx).WithError(err).Error("failed to release container resource") + log.G(ctx).WithError(err).Error("failed to release container credential resource") releaseErr = true } default: diff --git a/internal/uvm/network.go b/internal/uvm/network.go index 7836e1a7f1..5a468e33fa 100644 --- a/internal/uvm/network.go +++ b/internal/uvm/network.go @@ -22,6 +22,7 @@ import ( hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/hns" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/ncproxyttrpc" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" @@ -58,6 +59,12 @@ func sortEndpoints(endpoints []*hcn.HostComputeEndpoint) { // UVM. We hot add the namespace. We get the endpoints associated with this namespace // and then hot add those endpoints. func (uvm *UtilityVM) SetupNetworkNamespace(ctx context.Context, nsid string) error { + ctx, entry := log.SetEntry(ctx, logrus.Fields{ + logfields.UVMID: uvm.ID(), + logfields.NamespaceID: nsid, + }) + entry.Trace("setting up uVM network namespace") + nsidInsideUVM := nsid // Query endpoints with actual nsid @@ -66,14 +73,18 @@ func (uvm *UtilityVM) SetupNetworkNamespace(ctx context.Context, nsid string) er return err } + entry.WithField("endpoints", log.Format(ctx, endpoints)).Debug("retrieved endpoints from HCN") + // Add the network namespace inside the UVM. // Get the namespace struct from the actual nsid. hcnNamespace, err := hcn.GetNamespaceByID(nsid) if err != nil { + entry.WithError(err).Error("failed to get HCN namespace") return err } if err = uvm.AddNetNS(ctx, hcnNamespace); err != nil { + entry.WithError(err).Error("failed to add namespace to UVM") return err } @@ -85,12 +96,15 @@ func (uvm *UtilityVM) SetupNetworkNamespace(ctx context.Context, nsid string) er sortEndpoints(endpoints) if err = uvm.AddEndpointsToNS(ctx, nsidInsideUVM, endpoints); err != nil { + entry.WithError(err).Error("failed to add endpoints to namespace") + // Best effort clean up the NS if removeErr := uvm.RemoveNetNS(ctx, nsidInsideUVM); removeErr != nil { - log.G(ctx).Warn(removeErr) + entry.WithError(removeErr).Error("failed to clean up namespace after failure to add endpoints") } return err } + return nil } @@ -157,16 +171,27 @@ var ErrNoNetworkSetup = errors.New("no network setup present for UVM") // // `addr` is an optional parameter func (uvm *UtilityVM) CreateAndAssignNetworkSetup(ctx context.Context, addr, containerID string) (err error) { + entry := log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: uvm.ID(), + logfields.ContainerID: containerID, + "ncProxyAddr": addr, + "ncProxyEnabled": uvm.NCProxyEnabled(), + }) + if uvm.NCProxyEnabled() { if addr == "" || containerID == "" { return errors.New("received empty field(s) for external network setup") } + entry.Debug("creating external network setup with NCProxy") + setup, err := NewExternalNetworkSetup(ctx, uvm, addr, containerID) if err != nil { return err } uvm.networkSetup = setup } else { + entry.Debug("creating internal network setup") + uvm.networkSetup = NewInternalNetworkSetup(uvm) } return nil @@ -175,18 +200,34 @@ func (uvm *UtilityVM) CreateAndAssignNetworkSetup(ctx context.Context, addr, con // ConfigureNetworking configures the utility VMs networking setup using the namespace ID // `nsid`. func (uvm *UtilityVM) ConfigureNetworking(ctx context.Context, nsid string) error { + ctx, entry := log.SetEntry(ctx, logrus.Fields{ + logfields.UVMID: uvm.ID(), + logfields.NamespaceID: nsid, + }) + entry.Trace("starting network configuration for namespace") + if uvm.networkSetup != nil { return uvm.networkSetup.ConfigureNetworking(ctx, nsid, NetworkRequestSetup) } + + entry.Error("no network setup present") return ErrNoNetworkSetup } // TearDownNetworking tears down the utility VMs networking setup using the namespace ID // `nsid`. func (uvm *UtilityVM) TearDownNetworking(ctx context.Context, nsid string) error { + ctx, entry := log.SetEntry(ctx, logrus.Fields{ + logfields.UVMID: uvm.ID(), + logfields.NamespaceID: nsid, + }) + entry.Trace("starting network teardown for namespace") + if uvm.networkSetup != nil { return uvm.networkSetup.ConfigureNetworking(ctx, nsid, NetworkRequestTearDown) } + + entry.Error("no network setup present") return ErrNoNetworkSetup } @@ -207,6 +248,11 @@ func NewInternalNetworkSetup(vm *UtilityVM) NetworkSetup { } func (i *internalNetworkSetup) ConfigureNetworking(ctx context.Context, namespaceID string, configType NetworkConfigType) error { + log.G(ctx).WithFields(logrus.Fields{ + "configType": configType, + logfields.NamespaceID: namespaceID, + }).Trace("configuring internal network") + switch configType { case NetworkRequestSetup: if err := i.vm.SetupNetworkNamespace(ctx, namespaceID); err != nil { @@ -247,6 +293,11 @@ func NewExternalNetworkSetup(ctx context.Context, vm *UtilityVM, caAddr, contain } func (e *externalNetworkSetup) ConfigureNetworking(ctx context.Context, namespaceID string, configType NetworkConfigType) error { + log.G(ctx).WithFields(logrus.Fields{ + "configType": configType, + logfields.NamespaceID: namespaceID, + }).Trace("configuring external network") + client, err := e.vm.GetNCProxyClient() if err != nil { return errors.Wrapf(err, "no ncproxy client for UVM %q", e.vm.ID()) @@ -331,6 +382,11 @@ func (endpoints *NetworkEndpoints) Release(ctx context.Context) error { // // If a namespace with the same id already exists this returns `ErrNetNSAlreadyAttached`. func (uvm *UtilityVM) AddNetNS(ctx context.Context, hcnNamespace *hcn.HostComputeNamespace) error { + log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: uvm.ID(), + logfields.Namespace: log.Format(ctx, hcnNamespace), + }).Trace("adding network namespace to uVM") + uvm.m.Lock() defer uvm.m.Unlock() if _, ok := uvm.namespaces[hcnNamespace.Id]; ok { @@ -420,6 +476,12 @@ func (uvm *UtilityVM) AddEndpointToNSWithID(ctx context.Context, nsID, nicID str // // If no network namespace matches `id` returns `ErrNetNSNotFound`. func (uvm *UtilityVM) AddEndpointsToNS(ctx context.Context, id string, endpoints []*hcn.HostComputeEndpoint) error { + log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: uvm.ID(), + "nic-id": id, + "endpoints": log.Format(ctx, endpoints), + }).Trace("adding endpoints to uVM's network namespace") + uvm.m.Lock() defer uvm.m.Unlock() @@ -451,36 +513,51 @@ func (uvm *UtilityVM) AddEndpointsToNS(ctx context.Context, id string, endpoints // // If a namespace matching `id` is not found this command silently succeeds. func (uvm *UtilityVM) RemoveNetNS(ctx context.Context, id string) error { + entry := log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: uvm.ID(), + logfields.NamespaceID: id, + }) + + entry.Trace("removing network namespace") + uvm.m.Lock() defer uvm.m.Unlock() - if ns, ok := uvm.namespaces[id]; ok { - for _, ninfo := range ns.nics { - if err := uvm.removeNIC(ctx, ninfo.ID, ninfo.Endpoint); err != nil { + + ns, ok := uvm.namespaces[id] + if !ok { + entry.Warn("cannot remove non-existant namespace from uVM") + return nil + } + + entry.WithField("nics", log.Format(ctx, ns.nics)).Debug("removing NICs from namespace") + for _, ninfo := range ns.nics { + if err := uvm.removeNIC(ctx, ninfo.ID, ninfo.Endpoint); err != nil { + return err + } + ns.nics[ninfo.Endpoint.Id] = nil + } + + // Remove the Guest Network namespace + if uvm.isNetworkNamespaceSupported() { + if uvm.operatingSystem == "windows" { + hcnNamespace, err := hcn.GetNamespaceByID(id) + if err != nil { return err } - ns.nics[ninfo.Endpoint.Id] = nil - } - // Remove the Guest Network namespace - if uvm.isNetworkNamespaceSupported() { - if uvm.operatingSystem == "windows" { - hcnNamespace, err := hcn.GetNamespaceByID(id) - if err != nil { - return err - } - guestNamespace := hcsschema.ModifySettingRequest{ - GuestRequest: guestrequest.ModificationRequest{ - ResourceType: guestresource.ResourceTypeNetworkNamespace, - RequestType: guestrequest.RequestTypeRemove, - Settings: hcnNamespace, - }, - } - if err := uvm.modify(ctx, &guestNamespace); err != nil { - return err - } + guestNamespace := hcsschema.ModifySettingRequest{ + GuestRequest: guestrequest.ModificationRequest{ + ResourceType: guestresource.ResourceTypeNetworkNamespace, + RequestType: guestrequest.RequestTypeRemove, + Settings: hcnNamespace, + }, + } + entry.Debug("removing namespace from guest") + if err := uvm.modify(ctx, &guestNamespace); err != nil { + return err } } - delete(uvm.namespaces, id) } + delete(uvm.namespaces, id) return nil } @@ -621,6 +698,12 @@ func convertToLCOWReq(id string, endpoint *hcn.HostComputeEndpoint, policyBasedR // addNIC adds a nic to the Utility VM. func (uvm *UtilityVM) addNIC(ctx context.Context, id string, endpoint *hcn.HostComputeEndpoint) error { + log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: uvm.ID(), + "nic-id": id, + "endpoint": log.Format(ctx, endpoint), + }).Trace("adding NIC to uVM") + // First a pre-add. This is a guest-only request and is only done on Windows. if uvm.operatingSystem == "windows" { preAddRequest := hcsschema.ModifySettingRequest{ @@ -681,6 +764,12 @@ func (uvm *UtilityVM) addNIC(ctx context.Context, id string, endpoint *hcn.HostC } func (uvm *UtilityVM) removeNIC(ctx context.Context, id string, endpoint *hcn.HostComputeEndpoint) error { + log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: uvm.ID(), + "nic-id": id, + "endpoint": log.Format(ctx, endpoint), + }).Trace("removing NIC from uVM") + request := hcsschema.ModifySettingRequest{ RequestType: guestrequest.RequestTypeRemove, ResourcePath: fmt.Sprintf(resourcepaths.NetworkResourceFormat, id), @@ -706,16 +795,13 @@ func (uvm *UtilityVM) removeNIC(ctx context.Context, id string, endpoint *hcn.Ho RequestType: guestrequest.RequestTypeRemove, Settings: &guestresource.LCOWNetworkAdapter{ NamespaceID: endpoint.HostComputeNamespace, - ID: endpoint.Id, + ID: id, }, } } } - if err := uvm.modify(ctx, &request); err != nil { - return err - } - return nil + return uvm.modify(ctx, &request) } // Removes all NICs added to this uvm. diff --git a/internal/uvm/virtual_device.go b/internal/uvm/virtual_device.go index 7a0518f926..89c3356b91 100644 --- a/internal/uvm/virtual_device.go +++ b/internal/uvm/virtual_device.go @@ -7,9 +7,12 @@ import ( "fmt" "github.com/Microsoft/go-winio/pkg/guid" + "github.com/sirupsen/logrus" "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/osversion" @@ -102,6 +105,12 @@ func (uvm *UtilityVM) AssignDevice(ctx context.Context, deviceID string, index u vmBusGUID = guid.String() } + ctx, entry := log.SetEntry(ctx, logrus.Fields{ + logfields.UVMID: uvm.ID(), + "deviceID": deviceID, + "funcIndex": index, + }) + key := VPCIDeviceID{ deviceInstanceID: deviceID, virtualFunctionIndex: index, @@ -113,6 +122,10 @@ func (uvm *UtilityVM) AssignDevice(ctx context.Context, deviceID string, index u existingVPCIDevice := uvm.vpciDevices[key] if existingVPCIDevice != nil { existingVPCIDevice.refCount++ + entry.WithFields(logrus.Fields{ + "vmBusGUID": existingVPCIDevice.VMBusGUID, + "refCount": existingVPCIDevice.refCount, + }).Debug("reusing existing vPCI device") return existingVPCIDevice, nil } @@ -153,6 +166,8 @@ func (uvm *UtilityVM) AssignDevice(ctx context.Context, deviceID string, index u } } + entry.WithField("vmBusGUID", vmBusGUID).Debug("assigning vPCI device") + if err := uvm.modify(ctx, request); err != nil { return nil, err } @@ -164,6 +179,7 @@ func (uvm *UtilityVM) AssignDevice(ctx context.Context, deviceID string, index u refCount: 1, } uvm.vpciDevices[key] = device + return device, nil } @@ -176,21 +192,37 @@ func (uvm *UtilityVM) RemoveDevice(ctx context.Context, deviceInstanceID string, virtualFunctionIndex: index, } + ctx, entry := log.SetEntry(ctx, logrus.Fields{ + logfields.UVMID: uvm.ID(), + "deviceID": deviceInstanceID, + "funcIndex": index, + }) + uvm.m.Lock() defer uvm.m.Unlock() vpci := uvm.vpciDevices[key] if vpci == nil { + entry.Error("failed to remove nonexistant vPCI device ") return fmt.Errorf("no device with ID %s and index %d is present on the uvm %s", deviceInstanceID, index, uvm.ID()) } vpci.refCount-- + entry = entry.WithFields(logrus.Fields{ + "vmBusGUID": vpci.VMBusGUID, + "refCount": vpci.refCount, + }) + + entry.Trace("vPCI device refcount updated") if vpci.refCount == 0 { + entry.Debug("removing vPCI device from uVM") + delete(uvm.vpciDevices, key) return uvm.modify(ctx, &hcsschema.ModifySettingRequest{ ResourcePath: fmt.Sprintf(resourcepaths.VirtualPCIResourceFormat, vpci.VMBusGUID), RequestType: guestrequest.RequestTypeRemove, }) } + return nil }