diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index 1b12777e4c..9fbb1faf35 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -188,9 +188,11 @@ func newHcsTask( ownsParent bool, req *task.CreateTaskRequest, s *specs.Spec) (_ shimTask, err error) { - log.G(ctx).WithFields(logrus.Fields{ - "tid": req.ID, - "ownsParent": ownsParent, + ctx, entry := log.SetEntry(ctx, logrus.Fields{logfields.TaskID: req.ID}) + entry.WithFields(logrus.Fields{ + "hasHost": parent != nil, + "isVirtualPod": s.Annotations != nil && s.Annotations[annotations.VirtualPodID] != "", + "ownsHost": ownsParent, }).Debug("newHcsTask") owner := filepath.Base(os.Args[0]) @@ -495,22 +497,30 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim } if eid == "" { + entry := log.G(ctx).WithFields(logrus.Fields{ + logfields.TaskID: ht.id, + logfields.ExecID: eid, + }) + // We are killing the init task, so we expect the container to be // stopped after this. // // The task process may have already exited, and the status set to // shimExecStateExited, but resources may still be in the process // of being cleaned up. Wait for ht.closed to be closed. This signals - // that waitInitExit() has finished destroying container resources, - // and layers were umounted. + // that [hcsTask.waitInitExit]/[hcsTask.waitForHostExit] has finished destroying + // container resources and that layers were umounted. // If the shim exits before resources are cleaned up, those resources // will remain locked and untracked, which leads to lingering sandboxes // and container resources like base vhdx. + const timeout = 30 * time.Second + entry.WithField(logfields.Timeout, timeout).Trace("waiting for task to be closed") select { - case <-time.After(30 * time.Second): - log.G(ctx).Error("timed out waiting for resource cleanup") + case <-time.After(timeout): + entry.WithField(logfields.Timeout, timeout).Error("timed out waiting for task to close while deleting init exec") return 0, 0, time.Time{}, errors.Wrap(hcs.ErrTimeout, "waiting for container resource cleanup") case <-ht.closed: + entry.Trace("received task close signal") } // The init task has now exited. A ForceExit() has already been sent to @@ -608,7 +618,10 @@ func (ht *hcsTask) Wait() *task.StateResponse { func (ht *hcsTask) waitInitExit() { ctx, span := oc.StartSpan(context.Background(), "hcsTask::waitInitExit") defer span.End() - span.AddAttributes(trace.StringAttribute("tid", ht.id)) + span.AddAttributes( + trace.StringAttribute("tid", ht.id), + trace.BoolAttribute("host", ht.host != nil), + trace.BoolAttribute("ownsHost", ht.ownsHost)) // Wait for it to exit on its own ht.init.Wait() @@ -627,7 +640,10 @@ func (ht *hcsTask) waitInitExit() { func (ht *hcsTask) waitForHostExit() { ctx, span := oc.StartSpan(context.Background(), "hcsTask::waitForHostExit") defer span.End() - span.AddAttributes(trace.StringAttribute("tid", ht.id)) + span.AddAttributes( + trace.StringAttribute("tid", ht.id), + trace.BoolAttribute("host", ht.host != nil), + trace.BoolAttribute("ownsHost", ht.ownsHost)) err := ht.host.WaitCtx(ctx) if err != nil { @@ -655,13 +671,16 @@ func (ht *hcsTask) waitForHostExit() { // == nil`. func (ht *hcsTask) close(ctx context.Context) { ht.closeOnce.Do(func() { - log.G(ctx).Debug("hcsTask::closeOnce") + entry := log.G(ctx) + entry.Debug("hcsTask::closeOnce") // ht.c should never be nil for a real task but in testing we stub // this to avoid a nil dereference. We really should introduce a // method or interface for ht.c operations that we can stub for // testing. if ht.c != nil { + const tearDownTimeout = 30 * time.Second + // Do our best attempt to tear down the container. // TODO: unify timeout select statements and use [ht.c.WaitCtx] and [context.WithTimeout] var werr error @@ -670,52 +689,63 @@ func (ht *hcsTask) close(ctx context.Context) { werr = ht.c.Wait() close(ch) }() + err := ht.c.Shutdown(ctx) if err != nil { - log.G(ctx).WithError(err).Error("failed to shutdown container") + entry.WithError(err).Error("failed to shutdown container") } else { - t := time.NewTimer(time.Second * 30) + t := time.NewTimer(tearDownTimeout) select { case <-ch: err = werr t.Stop() if err != nil { - log.G(ctx).WithError(err).Error("failed to wait for container shutdown") + entry.WithError(err).Error("failed to wait for container shutdown") } case <-t.C: err = hcs.ErrTimeout - log.G(ctx).WithError(err).Error("failed to wait for container shutdown") + entry.WithFields(logrus.Fields{ + logfields.Timeout: tearDownTimeout, + logrus.ErrorKey: err, + }).Error("timed out while waiting for container shutdown") } } if err != nil { err = ht.c.Terminate(ctx) if err != nil { - log.G(ctx).WithError(err).Error("failed to terminate container") + entry.WithError(err).Error("failed to terminate container") } else { - t := time.NewTimer(time.Second * 30) + t := time.NewTimer(tearDownTimeout) select { case <-ch: err = werr t.Stop() if err != nil { - log.G(ctx).WithError(err).Error("failed to wait for container terminate") + entry.WithError(err).Error("failed to wait for container terminate") } case <-t.C: - log.G(ctx).WithError(hcs.ErrTimeout).Error("failed to wait for container terminate") + entry.WithFields(logrus.Fields{ + logfields.Timeout: tearDownTimeout, + logrus.ErrorKey: hcs.ErrTimeout, + }).Error("timed out while waiting for container terminate") } } } // Release any resources associated with the container. + entry.Trace("starting task resource cleanup") + if err := resources.ReleaseResources(ctx, ht.cr, ht.host, true); err != nil { - log.G(ctx).WithError(err).Error("failed to release container resources") + entry.WithError(err).Error("failed to release container resources") } // Close the container handle invalidating all future access. if err := ht.c.Close(); err != nil { - log.G(ctx).WithError(err).Error("failed to close container") + entry.WithError(err).Error("failed to close container") } + + entry.Trace("task resource cleanup completed") } ht.closeHost(ctx) }) @@ -730,11 +760,12 @@ func (ht *hcsTask) close(ctx context.Context) { // This call is idempotent and safe to call multiple times. func (ht *hcsTask) closeHost(ctx context.Context) { ht.closeHostOnce.Do(func() { - log.G(ctx).Debug("hcsTask::closeHostOnce") + entry := log.G(ctx) + entry.Debug("hcsTask::closeHostOnce") if ht.ownsHost && ht.host != nil { if err := ht.host.Close(); err != nil { - log.G(ctx).WithError(err).Error("failed host vm shutdown") + entry.WithError(err).Error("failed host vm shutdown") } } // Send the `init` exec exit notification always. @@ -750,7 +781,7 @@ func (ht *hcsTask) closeHost(ctx context.Context) { ExitStatus: exit.ExitStatus, ExitedAt: exit.ExitedAt, }); err != nil { - log.G(ctx).WithError(err).Error("failed to publish TaskExitEventTopic") + entry.WithError(err).Error("failed to publish TaskExitEventTopic") } close(ht.closed) }) diff --git a/internal/guest/network/netns.go b/internal/guest/network/netns.go index e809a6ea04..ca128f2cc6 100644 --- a/internal/guest/network/netns.go +++ b/internal/guest/network/netns.go @@ -181,7 +181,7 @@ func NetNSConfig(ctx context.Context, ifStr string, nsPid int, adapter *guestres addrsStr = append(addrsStr, fmt.Sprintf("%v", addr)) } - entry.WithField("addresses", addrsStr).Debugf("%v: %s[idx=%d,type=%s] is %v", + entry.WithField("addresses", strings.Join(addrsStr, "; ")).Debugf("%v: %s[idx=%d,type=%s] is %v", curNS, attr.Name, attr.Index, link.Type(), attr.OperState) } @@ -209,7 +209,7 @@ func configureLink(ctx context.Context, addr.IP = ip log.G(ctx).WithFields(logrus.Fields{ "allocatedIP": ip, - "IP": addr, + "IP": addr.String(), }).Debugf("parsed ip address %s/%d", ipConfig.IPAddress, ipConfig.PrefixLength) ipAddr := &netlink.Addr{IPNet: addr, Label: ""} if err := netlinkAddrAdd(link, ipAddr); err != nil { @@ -237,7 +237,7 @@ func configureLink(ctx context.Context, } for _, r := range adapter.Routes { - log.G(ctx).WithField("route", r).Debugf("adding a route to interface %s", link.Attrs().Name) + log.G(ctx).WithField("route", log.Format(ctx, r)).Debugf("adding a route to interface %s", link.Attrs().Name) if (r.DestinationPrefix == ipv4GwDestination || r.DestinationPrefix == ipv6GwDestination) && (r.NextHop == ipv4EmptyGw || r.NextHop == ipv6EmptyGw) { diff --git a/internal/guest/runtime/hcsv2/uvm.go b/internal/guest/runtime/hcsv2/uvm.go index 48c09b2018..5699e98298 100644 --- a/internal/guest/runtime/hcsv2/uvm.go +++ b/internal/guest/runtime/hcsv2/uvm.go @@ -1368,7 +1368,14 @@ func modifyNetwork(ctx context.Context, rt guestrequest.RequestType, na *guestre // container or not so it must always call `Sync`. return ns.Sync(ctx) case guestrequest.RequestTypeRemove: - ns := GetOrAddNetworkNamespace(na.ID) + ns, err := getNetworkNamespace(na.NamespaceID) + if err != nil { + log.G(ctx).WithFields(logrus.Fields{ + logfields.NamespaceID: na.NamespaceID, + "adapterID": na.ID, + }).WithError(err).Warn("namespace not found for adapter removal, skipping") + return nil + } if err := ns.RemoveAdapter(ctx, na.ID); err != nil { return err } @@ -1540,56 +1547,75 @@ func (h *Host) RemoveContainerFromVirtualPod(containerID string) { return // Container not in any virtual pod } + ctx, entry := log.SetEntry(context.Background(), logrus.Fields{ + "virtualSandboxID": virtualSandboxID, + logfields.ContainerID: containerID, + }) + // Remove from virtual pod if vp, vpExists := h.virtualPods[virtualSandboxID]; vpExists { delete(vp.Containers, containerID) - // If this is the sandbox container, delete the network namespace + // If this is the sandbox container, clean up the network namespace adapters first, + // then remove the namespace. + + // NOTE: Do NOT call [RemoveNetworkNamespace] here. + // When cleaning up the virtual pod's sandbox container on the host, + // [UtilityVM.TearDownNetworking] calls [UtilityVM.RemoveNetNS], + // which, via [UtilityVM.removeNIC], removes the NICs from the uVM and + // sends [guestresource.ResourceTypeNetwork] RPCs that ultimately cal [modifyNetwork] + // to remove adapters from guest tracking. + // However, that happens AFTER this function runs. + // Calling [RemoveNetworkNamespace] here would + // fail with "contains adapters" since the host hasn't removed them yet. + // + // The host-driven path handles cleanup in the correct order. if containerID == virtualSandboxID && vp.NetworkNamespace != "" { - if err := RemoveNetworkNamespace(context.Background(), vp.NetworkNamespace); err != nil { - logrus.WithError(err).WithField("virtualSandboxID", virtualSandboxID). - Warn("Failed to remove virtual pod network namespace (sandbox container removal)") - } + entry.WithField(logfields.NamespaceID, vp.NetworkNamespace).Debug("skipping virtual pod network namespace removal") } // If this was the last container, cleanup the virtual pod if len(vp.Containers) == 0 { - h.cleanupVirtualPod(virtualSandboxID) + h.cleanupVirtualPod(ctx, virtualSandboxID) } } delete(h.containerToVirtualPod, containerID) - logrus.WithFields(logrus.Fields{ - "containerID": containerID, - "virtualSandboxID": virtualSandboxID, - }).Info("Container removed from virtual pod") + entry.Info("Container removed from virtual pod") } // cleanupVirtualPod removes a virtual pod and its cgroup (should be called with mutex held) -func (h *Host) cleanupVirtualPod(virtualSandboxID string) { - if vp, exists := h.virtualPods[virtualSandboxID]; exists { - // Delete the cgroup - if err := vp.CgroupControl.Delete(); err != nil { - logrus.WithError(err).WithField("virtualSandboxID", virtualSandboxID). - Warn("Failed to delete virtual pod cgroup") - } +func (h *Host) cleanupVirtualPod(ctx context.Context, virtualSandboxID string) { + vp, exists := h.virtualPods[virtualSandboxID] + if !exists { + return // virtual pod does not exist + } - // Clean up network namespace if this is the last virtual pod using it - // Only remove if this virtual pod was managing the network namespace - if vp.NetworkNamespace != "" { - // For virtual pods, the network namespace is shared, so we only clean it up - // when the virtual pod itself is being destroyed - if err := RemoveNetworkNamespace(context.Background(), vp.NetworkNamespace); err != nil { - logrus.WithError(err).WithField("virtualSandboxID", virtualSandboxID). - Warn("Failed to remove virtual pod network namespace") - } - } + entry := log.G(ctx) - delete(h.virtualPods, virtualSandboxID) + // Delete the cgroup + if err := vp.CgroupControl.Delete(); err != nil { + entry.Warn("failed to delete virtual pod cgroup") + } - logrus.WithField("virtualSandboxID", virtualSandboxID).Info("Virtual pod cleaned up") + // NOTE: Do NOT call [RemoveNetworkNamespace] here. + // See comment in [Host.RemoveContainerFromVirtualPod] for more info. + + // Clean up the virtual pod root directory which contains hostname, hosts, + // resolv.conf, and logs/. These are NOT cleaned by Container.Delete() because + // they live under /run/gcs/c/virtual-pods// which is separate from the + // container's ociBundlePath (/run/gcs/c//). + if vpRootDir := specGuest.VirtualPodRootDir(virtualSandboxID); vpRootDir != "" { + if err := os.RemoveAll(vpRootDir); err != nil { + entry.WithField("path", vpRootDir).WithError(err).Warn("Failed to remove virtual pod root directory") + } else { + entry.WithField("path", vpRootDir).Debug("Removed virtual pod root directory") + } } + + delete(h.virtualPods, virtualSandboxID) + entry.Info("Virtual pod cleaned up") } // setupVirtualPodMountsPath creates mount directories for virtual pods diff --git a/internal/guest/storage/mount.go b/internal/guest/storage/mount.go index a3d10a3b25..15664dd693 100644 --- a/internal/guest/storage/mount.go +++ b/internal/guest/storage/mount.go @@ -16,6 +16,7 @@ import ( "go.opencensus.io/trace" "golang.org/x/sys/unix" + "github.com/Microsoft/hcsshim/internal/log" "github.com/Microsoft/hcsshim/internal/oc" ) @@ -131,13 +132,24 @@ func UnmountPath(ctx context.Context, target string, removeTarget bool) (err err return errors.Wrapf(err, "failed to determine if path '%s' exists", target) } - if err := unixUnmount(target, 0); err != nil { - // If `Unmount` returns `EINVAL` it's not mounted. Just delete the - // folder. - if !gerrors.Is(err, unix.EINVAL) { + // Loop to handle stacked mounts (e.g. tmpfs + bind mount from MountRShared). + // Each Unmount call only peels one layer off the stack, so we must keep + // unmounting until EINVAL tells us nothing is mounted anymore. + const maxUnmountAttempts = 5 + unmounted := false + for i := 0; i < maxUnmountAttempts; i++ { + if err := unixUnmount(target, 0); err != nil { + // EINVAL means it's not mounted — we're done unmounting. + if gerrors.Is(err, unix.EINVAL) { + unmounted = true + break + } return errors.Wrapf(err, "failed to unmount path '%s'", target) } } + if !unmounted { + log.G(ctx).WithField("target", target).Warnf("path still mounted after %d unmount attempts", maxUnmountAttempts) + } if removeTarget { return osRemoveAll(target) } diff --git a/internal/hcsoci/create.go b/internal/hcsoci/create.go index e0db4c9c09..3e6606a15f 100644 --- a/internal/hcsoci/create.go +++ b/internal/hcsoci/create.go @@ -129,8 +129,8 @@ func initializeCreateOptions(ctx context.Context, createOptions *CreateOptions) } log.G(ctx).WithFields(logrus.Fields{ - "options": fmt.Sprintf("%+v", createOptions), - "schema": coi.actualSchemaVersion, + "options": log.Format(ctx, createOptions), + "schema": log.Format(ctx, coi.actualSchemaVersion), }).Debug("hcsshim::initializeCreateOptions") return coi, nil @@ -139,43 +139,61 @@ func initializeCreateOptions(ctx context.Context, createOptions *CreateOptions) // configureSandboxNetwork creates a new network namespace for the pod (sandbox) // if required and then adds that namespace to the pod. func configureSandboxNetwork(ctx context.Context, coi *createOptionsInternal, r *resources.Resources, ct oci.KubernetesContainerType) error { + virtualPodID := coi.Spec.Annotations[annotations.VirtualPodID] + ctx, entry := log.SetEntry(ctx, logrus.Fields{ + "containerType": ct, + "virtualPodID": virtualPodID, + logfields.NamespaceID: coi.NetworkNamespace, + }) + entry.Debug("hcsoci::configureSandboxNetwork") + if coi.NetworkNamespace != "" { r.SetNetNS(coi.NetworkNamespace) + entry.Trace("using existing network namespace") } else { err := createNetworkNamespace(ctx, coi, r) if err != nil { + entry.WithError(err).Error("failed to create network namespace") return err } } coi.actualNetworkNamespace = r.NetNS() - if coi.HostingSystem != nil { - // Check for virtual pod first containers: if containerID == virtualPodID, treat as sandbox for networking configuration - virtualPodID := coi.Spec.Annotations[annotations.VirtualPodID] - isVirtualPodFirstContainer := virtualPodID != "" && coi.actualID == virtualPodID - - // Only add the network namespace to a standalone or sandbox - // container but not a workload container in a sandbox that inherits - // the namespace. - if ct == oci.KubernetesContainerTypeNone || ct == oci.KubernetesContainerTypeSandbox || isVirtualPodFirstContainer { - if err := coi.HostingSystem.ConfigureNetworking(ctx, coi.actualNetworkNamespace); err != nil { - // No network setup type was specified for this UVM. Create and assign one here unless - // we received a different error. - if errors.Is(err, uvm.ErrNoNetworkSetup) { - if err := coi.HostingSystem.CreateAndAssignNetworkSetup(ctx, "", ""); err != nil { - return err - } - if err := coi.HostingSystem.ConfigureNetworking(ctx, coi.actualNetworkNamespace); err != nil { - return err - } - } else { - return err - } - } - r.SetAddedNetNSToVM(true) + ctx, entry = log.SetEntry(ctx, logrus.Fields{logfields.NamespaceID: coi.actualNetworkNamespace}) + + // Check for virtual pod first containers: if containerID == virtualPodID, treat as sandbox for networking configuration + isVirtualPodFirstContainer := virtualPodID != "" && coi.actualID == virtualPodID + + if coi.HostingSystem == nil { + entry.Trace("skipping sandbox network configuration - no hosting system") + return nil + } else if ct != oci.KubernetesContainerTypeNone && ct != oci.KubernetesContainerTypeSandbox && !isVirtualPodFirstContainer { + // Only add the network namespace to a standalone or sandbox container, + // but not a workload container in a sandbox that inherits the namespace. + entry.Trace("skipping sandbox network configuration - not a sandbox/standalone/virtual pod first container") + return nil + } + + entry.WithField("isVirtualPodFirstContainer", isVirtualPodFirstContainer).Trace("configuring uVM network for sandbox") + + err := coi.HostingSystem.ConfigureNetworking(ctx, coi.actualNetworkNamespace) + if errors.Is(err, uvm.ErrNoNetworkSetup) { + // No network setup type was specified for this UVM. + // Create and assign one here and then retry + entry.Debug("retrying configuring sandbox network after creating and assigning uVM network setup") + + if err := coi.HostingSystem.CreateAndAssignNetworkSetup(ctx, "", ""); err != nil { + entry.WithError(err).Error("failed to create and assign network setup for sandbox network") + return err } + err = coi.HostingSystem.ConfigureNetworking(ctx, coi.actualNetworkNamespace) + } + if err != nil { + entry.WithError(err).Error("failed to configure sandbox network within uVM") + return err } + r.SetAddedNetNSToVM(true) return nil } diff --git a/internal/log/hook.go b/internal/log/hook.go index da81023b65..7a91d5e68a 100644 --- a/internal/log/hook.go +++ b/internal/log/hook.go @@ -107,8 +107,8 @@ func (h *Hook) encode(e *logrus.Entry) { switch vv := v.(type) { // built in types - // "json" marshals errors as "{}", so leave alone here - case bool, string, error, uintptr, + case bool, string, uintptr, + error, // "json" marshals errors as "{}", so leave alone here int8, int16, int32, int64, int, uint8, uint32, uint64, uint, float32, float64: diff --git a/internal/logfields/fields.go b/internal/logfields/fields.go index dac5a708e5..b53625edae 100644 --- a/internal/logfields/fields.go +++ b/internal/logfields/fields.go @@ -10,6 +10,7 @@ const ( ID = "id" ContainerID = "cid" ExecID = "eid" + NamespaceID = "namespace-id" ProcessID = "pid" TaskID = "tid" UVMID = "uvm-id" diff --git a/internal/resources/resources.go b/internal/resources/resources.go index a111506f62..c12f9fa7fa 100644 --- a/internal/resources/resources.go +++ b/internal/resources/resources.go @@ -121,6 +121,8 @@ func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all log.G(ctx).Warn(err) } r.addedNetNSToVM = false + } else { + log.G(ctx).Debug("skipping network teardown during resource release") } } @@ -134,14 +136,14 @@ func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all case *uvm.NetworkEndpoints: if r.createdNetNS { if err := r.resources[i].Release(ctx); err != nil { - log.G(ctx).WithError(err).Error("failed to release container resource") + log.G(ctx).WithError(err).Error("failed to release container network endpoints resource") releaseErr = true } r.createdNetNS = false } case *credentials.CCGResource: if err := r.resources[i].Release(ctx); err != nil { - log.G(ctx).WithError(err).Error("failed to release container resource") + log.G(ctx).WithError(err).Error("failed to release container credential resource") releaseErr = true } default: diff --git a/internal/uvm/network.go b/internal/uvm/network.go index 7836e1a7f1..5a468e33fa 100644 --- a/internal/uvm/network.go +++ b/internal/uvm/network.go @@ -22,6 +22,7 @@ import ( hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/hns" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/ncproxyttrpc" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" @@ -58,6 +59,12 @@ func sortEndpoints(endpoints []*hcn.HostComputeEndpoint) { // UVM. We hot add the namespace. We get the endpoints associated with this namespace // and then hot add those endpoints. func (uvm *UtilityVM) SetupNetworkNamespace(ctx context.Context, nsid string) error { + ctx, entry := log.SetEntry(ctx, logrus.Fields{ + logfields.UVMID: uvm.ID(), + logfields.NamespaceID: nsid, + }) + entry.Trace("setting up uVM network namespace") + nsidInsideUVM := nsid // Query endpoints with actual nsid @@ -66,14 +73,18 @@ func (uvm *UtilityVM) SetupNetworkNamespace(ctx context.Context, nsid string) er return err } + entry.WithField("endpoints", log.Format(ctx, endpoints)).Debug("retrieved endpoints from HCN") + // Add the network namespace inside the UVM. // Get the namespace struct from the actual nsid. hcnNamespace, err := hcn.GetNamespaceByID(nsid) if err != nil { + entry.WithError(err).Error("failed to get HCN namespace") return err } if err = uvm.AddNetNS(ctx, hcnNamespace); err != nil { + entry.WithError(err).Error("failed to add namespace to UVM") return err } @@ -85,12 +96,15 @@ func (uvm *UtilityVM) SetupNetworkNamespace(ctx context.Context, nsid string) er sortEndpoints(endpoints) if err = uvm.AddEndpointsToNS(ctx, nsidInsideUVM, endpoints); err != nil { + entry.WithError(err).Error("failed to add endpoints to namespace") + // Best effort clean up the NS if removeErr := uvm.RemoveNetNS(ctx, nsidInsideUVM); removeErr != nil { - log.G(ctx).Warn(removeErr) + entry.WithError(removeErr).Error("failed to clean up namespace after failure to add endpoints") } return err } + return nil } @@ -157,16 +171,27 @@ var ErrNoNetworkSetup = errors.New("no network setup present for UVM") // // `addr` is an optional parameter func (uvm *UtilityVM) CreateAndAssignNetworkSetup(ctx context.Context, addr, containerID string) (err error) { + entry := log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: uvm.ID(), + logfields.ContainerID: containerID, + "ncProxyAddr": addr, + "ncProxyEnabled": uvm.NCProxyEnabled(), + }) + if uvm.NCProxyEnabled() { if addr == "" || containerID == "" { return errors.New("received empty field(s) for external network setup") } + entry.Debug("creating external network setup with NCProxy") + setup, err := NewExternalNetworkSetup(ctx, uvm, addr, containerID) if err != nil { return err } uvm.networkSetup = setup } else { + entry.Debug("creating internal network setup") + uvm.networkSetup = NewInternalNetworkSetup(uvm) } return nil @@ -175,18 +200,34 @@ func (uvm *UtilityVM) CreateAndAssignNetworkSetup(ctx context.Context, addr, con // ConfigureNetworking configures the utility VMs networking setup using the namespace ID // `nsid`. func (uvm *UtilityVM) ConfigureNetworking(ctx context.Context, nsid string) error { + ctx, entry := log.SetEntry(ctx, logrus.Fields{ + logfields.UVMID: uvm.ID(), + logfields.NamespaceID: nsid, + }) + entry.Trace("starting network configuration for namespace") + if uvm.networkSetup != nil { return uvm.networkSetup.ConfigureNetworking(ctx, nsid, NetworkRequestSetup) } + + entry.Error("no network setup present") return ErrNoNetworkSetup } // TearDownNetworking tears down the utility VMs networking setup using the namespace ID // `nsid`. func (uvm *UtilityVM) TearDownNetworking(ctx context.Context, nsid string) error { + ctx, entry := log.SetEntry(ctx, logrus.Fields{ + logfields.UVMID: uvm.ID(), + logfields.NamespaceID: nsid, + }) + entry.Trace("starting network teardown for namespace") + if uvm.networkSetup != nil { return uvm.networkSetup.ConfigureNetworking(ctx, nsid, NetworkRequestTearDown) } + + entry.Error("no network setup present") return ErrNoNetworkSetup } @@ -207,6 +248,11 @@ func NewInternalNetworkSetup(vm *UtilityVM) NetworkSetup { } func (i *internalNetworkSetup) ConfigureNetworking(ctx context.Context, namespaceID string, configType NetworkConfigType) error { + log.G(ctx).WithFields(logrus.Fields{ + "configType": configType, + logfields.NamespaceID: namespaceID, + }).Trace("configuring internal network") + switch configType { case NetworkRequestSetup: if err := i.vm.SetupNetworkNamespace(ctx, namespaceID); err != nil { @@ -247,6 +293,11 @@ func NewExternalNetworkSetup(ctx context.Context, vm *UtilityVM, caAddr, contain } func (e *externalNetworkSetup) ConfigureNetworking(ctx context.Context, namespaceID string, configType NetworkConfigType) error { + log.G(ctx).WithFields(logrus.Fields{ + "configType": configType, + logfields.NamespaceID: namespaceID, + }).Trace("configuring external network") + client, err := e.vm.GetNCProxyClient() if err != nil { return errors.Wrapf(err, "no ncproxy client for UVM %q", e.vm.ID()) @@ -331,6 +382,11 @@ func (endpoints *NetworkEndpoints) Release(ctx context.Context) error { // // If a namespace with the same id already exists this returns `ErrNetNSAlreadyAttached`. func (uvm *UtilityVM) AddNetNS(ctx context.Context, hcnNamespace *hcn.HostComputeNamespace) error { + log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: uvm.ID(), + logfields.Namespace: log.Format(ctx, hcnNamespace), + }).Trace("adding network namespace to uVM") + uvm.m.Lock() defer uvm.m.Unlock() if _, ok := uvm.namespaces[hcnNamespace.Id]; ok { @@ -420,6 +476,12 @@ func (uvm *UtilityVM) AddEndpointToNSWithID(ctx context.Context, nsID, nicID str // // If no network namespace matches `id` returns `ErrNetNSNotFound`. func (uvm *UtilityVM) AddEndpointsToNS(ctx context.Context, id string, endpoints []*hcn.HostComputeEndpoint) error { + log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: uvm.ID(), + "nic-id": id, + "endpoints": log.Format(ctx, endpoints), + }).Trace("adding endpoints to uVM's network namespace") + uvm.m.Lock() defer uvm.m.Unlock() @@ -451,36 +513,51 @@ func (uvm *UtilityVM) AddEndpointsToNS(ctx context.Context, id string, endpoints // // If a namespace matching `id` is not found this command silently succeeds. func (uvm *UtilityVM) RemoveNetNS(ctx context.Context, id string) error { + entry := log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: uvm.ID(), + logfields.NamespaceID: id, + }) + + entry.Trace("removing network namespace") + uvm.m.Lock() defer uvm.m.Unlock() - if ns, ok := uvm.namespaces[id]; ok { - for _, ninfo := range ns.nics { - if err := uvm.removeNIC(ctx, ninfo.ID, ninfo.Endpoint); err != nil { + + ns, ok := uvm.namespaces[id] + if !ok { + entry.Warn("cannot remove non-existant namespace from uVM") + return nil + } + + entry.WithField("nics", log.Format(ctx, ns.nics)).Debug("removing NICs from namespace") + for _, ninfo := range ns.nics { + if err := uvm.removeNIC(ctx, ninfo.ID, ninfo.Endpoint); err != nil { + return err + } + ns.nics[ninfo.Endpoint.Id] = nil + } + + // Remove the Guest Network namespace + if uvm.isNetworkNamespaceSupported() { + if uvm.operatingSystem == "windows" { + hcnNamespace, err := hcn.GetNamespaceByID(id) + if err != nil { return err } - ns.nics[ninfo.Endpoint.Id] = nil - } - // Remove the Guest Network namespace - if uvm.isNetworkNamespaceSupported() { - if uvm.operatingSystem == "windows" { - hcnNamespace, err := hcn.GetNamespaceByID(id) - if err != nil { - return err - } - guestNamespace := hcsschema.ModifySettingRequest{ - GuestRequest: guestrequest.ModificationRequest{ - ResourceType: guestresource.ResourceTypeNetworkNamespace, - RequestType: guestrequest.RequestTypeRemove, - Settings: hcnNamespace, - }, - } - if err := uvm.modify(ctx, &guestNamespace); err != nil { - return err - } + guestNamespace := hcsschema.ModifySettingRequest{ + GuestRequest: guestrequest.ModificationRequest{ + ResourceType: guestresource.ResourceTypeNetworkNamespace, + RequestType: guestrequest.RequestTypeRemove, + Settings: hcnNamespace, + }, + } + entry.Debug("removing namespace from guest") + if err := uvm.modify(ctx, &guestNamespace); err != nil { + return err } } - delete(uvm.namespaces, id) } + delete(uvm.namespaces, id) return nil } @@ -621,6 +698,12 @@ func convertToLCOWReq(id string, endpoint *hcn.HostComputeEndpoint, policyBasedR // addNIC adds a nic to the Utility VM. func (uvm *UtilityVM) addNIC(ctx context.Context, id string, endpoint *hcn.HostComputeEndpoint) error { + log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: uvm.ID(), + "nic-id": id, + "endpoint": log.Format(ctx, endpoint), + }).Trace("adding NIC to uVM") + // First a pre-add. This is a guest-only request and is only done on Windows. if uvm.operatingSystem == "windows" { preAddRequest := hcsschema.ModifySettingRequest{ @@ -681,6 +764,12 @@ func (uvm *UtilityVM) addNIC(ctx context.Context, id string, endpoint *hcn.HostC } func (uvm *UtilityVM) removeNIC(ctx context.Context, id string, endpoint *hcn.HostComputeEndpoint) error { + log.G(ctx).WithFields(logrus.Fields{ + logfields.UVMID: uvm.ID(), + "nic-id": id, + "endpoint": log.Format(ctx, endpoint), + }).Trace("removing NIC from uVM") + request := hcsschema.ModifySettingRequest{ RequestType: guestrequest.RequestTypeRemove, ResourcePath: fmt.Sprintf(resourcepaths.NetworkResourceFormat, id), @@ -706,16 +795,13 @@ func (uvm *UtilityVM) removeNIC(ctx context.Context, id string, endpoint *hcn.Ho RequestType: guestrequest.RequestTypeRemove, Settings: &guestresource.LCOWNetworkAdapter{ NamespaceID: endpoint.HostComputeNamespace, - ID: endpoint.Id, + ID: id, }, } } } - if err := uvm.modify(ctx, &request); err != nil { - return err - } - return nil + return uvm.modify(ctx, &request) } // Removes all NICs added to this uvm. diff --git a/internal/uvm/virtual_device.go b/internal/uvm/virtual_device.go index 7a0518f926..89c3356b91 100644 --- a/internal/uvm/virtual_device.go +++ b/internal/uvm/virtual_device.go @@ -7,9 +7,12 @@ import ( "fmt" "github.com/Microsoft/go-winio/pkg/guid" + "github.com/sirupsen/logrus" "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/protocol/guestrequest" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" "github.com/Microsoft/hcsshim/osversion" @@ -102,6 +105,12 @@ func (uvm *UtilityVM) AssignDevice(ctx context.Context, deviceID string, index u vmBusGUID = guid.String() } + ctx, entry := log.SetEntry(ctx, logrus.Fields{ + logfields.UVMID: uvm.ID(), + "deviceID": deviceID, + "funcIndex": index, + }) + key := VPCIDeviceID{ deviceInstanceID: deviceID, virtualFunctionIndex: index, @@ -113,6 +122,10 @@ func (uvm *UtilityVM) AssignDevice(ctx context.Context, deviceID string, index u existingVPCIDevice := uvm.vpciDevices[key] if existingVPCIDevice != nil { existingVPCIDevice.refCount++ + entry.WithFields(logrus.Fields{ + "vmBusGUID": existingVPCIDevice.VMBusGUID, + "refCount": existingVPCIDevice.refCount, + }).Debug("reusing existing vPCI device") return existingVPCIDevice, nil } @@ -153,6 +166,8 @@ func (uvm *UtilityVM) AssignDevice(ctx context.Context, deviceID string, index u } } + entry.WithField("vmBusGUID", vmBusGUID).Debug("assigning vPCI device") + if err := uvm.modify(ctx, request); err != nil { return nil, err } @@ -164,6 +179,7 @@ func (uvm *UtilityVM) AssignDevice(ctx context.Context, deviceID string, index u refCount: 1, } uvm.vpciDevices[key] = device + return device, nil } @@ -176,21 +192,37 @@ func (uvm *UtilityVM) RemoveDevice(ctx context.Context, deviceInstanceID string, virtualFunctionIndex: index, } + ctx, entry := log.SetEntry(ctx, logrus.Fields{ + logfields.UVMID: uvm.ID(), + "deviceID": deviceInstanceID, + "funcIndex": index, + }) + uvm.m.Lock() defer uvm.m.Unlock() vpci := uvm.vpciDevices[key] if vpci == nil { + entry.Error("failed to remove nonexistant vPCI device ") return fmt.Errorf("no device with ID %s and index %d is present on the uvm %s", deviceInstanceID, index, uvm.ID()) } vpci.refCount-- + entry = entry.WithFields(logrus.Fields{ + "vmBusGUID": vpci.VMBusGUID, + "refCount": vpci.refCount, + }) + + entry.Trace("vPCI device refcount updated") if vpci.refCount == 0 { + entry.Debug("removing vPCI device from uVM") + delete(uvm.vpciDevices, key) return uvm.modify(ctx, &hcsschema.ModifySettingRequest{ ResourcePath: fmt.Sprintf(resourcepaths.VirtualPCIResourceFormat, vpci.VMBusGUID), RequestType: guestrequest.RequestTypeRemove, }) } + return nil }