microsoft · helsaawy · Mar 27, 2026
@@ -188,9 +188,11 @@ func newHcsTask(
 	ownsParent bool,
 	req *task.CreateTaskRequest,
 	s *specs.Spec) (_ shimTask, err error) {
-	log.G(ctx).WithFields(logrus.Fields{
-		"tid":        req.ID,
-		"ownsParent": ownsParent,
+	ctx, entry := log.SetEntry(ctx, logrus.Fields{logfields.TaskID: req.ID})
+	entry.WithFields(logrus.Fields{
+		"hasHost":      parent != nil,
+		"isVirtualPod": s.Annotations != nil && s.Annotations[annotations.VirtualPodID] != "",
+		"ownsHost":     ownsParent,
 	}).Debug("newHcsTask")
 
 	owner := filepath.Base(os.Args[0])
@@ -495,22 +497,30 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim
 	}
 
 	if eid == "" {
+		entry := log.G(ctx).WithFields(logrus.Fields{
+			logfields.TaskID: ht.id,
+			logfields.ExecID: eid,
+		})
+
 		// We are killing the init task, so we expect the container to be
 		// stopped after this.
 		//
 		// The task process may have already exited, and the status set to
 		// shimExecStateExited, but resources may still be in the process
 		// of being cleaned up. Wait for ht.closed to be closed. This signals
-		// that waitInitExit() has finished destroying container resources,
-		// and layers were umounted.
+		// that [hcsTask.waitInitExit]/[hcsTask.waitForHostExit] has finished destroying
+		// container resources and that layers were umounted.
 		// If the shim exits before resources are cleaned up, those resources
 		// will remain locked and untracked, which leads to lingering sandboxes
 		// and container resources like base vhdx.
+		const timeout = 30 * time.Second
+		entry.WithField(logfields.Timeout, timeout).Trace("waiting for task to be closed")
 		select {
-		case <-time.After(30 * time.Second):
-			log.G(ctx).Error("timed out waiting for resource cleanup")
+		case <-time.After(timeout):
+			entry.WithField(logfields.Timeout, timeout).Error("timed out waiting for task to close while deleting init exec")
 			return 0, 0, time.Time{}, errors.Wrap(hcs.ErrTimeout, "waiting for container resource cleanup")
 		case <-ht.closed:
+			entry.Trace("received task close signal")
 		}
 
 		// The init task has now exited. A ForceExit() has already been sent to
@@ -608,7 +618,10 @@ func (ht *hcsTask) Wait() *task.StateResponse {
 func (ht *hcsTask) waitInitExit() {
 	ctx, span := oc.StartSpan(context.Background(), "hcsTask::waitInitExit")
 	defer span.End()
-	span.AddAttributes(trace.StringAttribute("tid", ht.id))
+	span.AddAttributes(
+		trace.StringAttribute("tid", ht.id),
+		trace.BoolAttribute("host", ht.host != nil),
+		trace.BoolAttribute("ownsHost", ht.ownsHost))
 
 	// Wait for it to exit on its own
 	ht.init.Wait()
@@ -627,7 +640,10 @@ func (ht *hcsTask) waitInitExit() {
 func (ht *hcsTask) waitForHostExit() {
 	ctx, span := oc.StartSpan(context.Background(), "hcsTask::waitForHostExit")
 	defer span.End()
-	span.AddAttributes(trace.StringAttribute("tid", ht.id))
+	span.AddAttributes(
+		trace.StringAttribute("tid", ht.id),
+		trace.BoolAttribute("host", ht.host != nil),
+		trace.BoolAttribute("ownsHost", ht.ownsHost))
 
 	err := ht.host.WaitCtx(ctx)
 	if err != nil {
@@ -655,13 +671,16 @@ func (ht *hcsTask) waitForHostExit() {
 // == nil`.
 func (ht *hcsTask) close(ctx context.Context) {
 	ht.closeOnce.Do(func() {
-		log.G(ctx).Debug("hcsTask::closeOnce")
+		entry := log.G(ctx)
+		entry.Debug("hcsTask::closeOnce")
 
 		// ht.c should never be nil for a real task but in testing we stub
 		// this to avoid a nil dereference. We really should introduce a
 		// method or interface for ht.c operations that we can stub for
 		// testing.
 		if ht.c != nil {
+			const tearDownTimeout = 30 * time.Second
+
 			// Do our best attempt to tear down the container.
 			// TODO: unify timeout select statements and use [ht.c.WaitCtx] and [context.WithTimeout]
 			var werr error
@@ -670,52 +689,63 @@ func (ht *hcsTask) close(ctx context.Context) {
 				werr = ht.c.Wait()
 				close(ch)
 			}()
+
 			err := ht.c.Shutdown(ctx)
 			if err != nil {
-				log.G(ctx).WithError(err).Error("failed to shutdown container")
+				entry.WithError(err).Error("failed to shutdown container")
 			} else {
-				t := time.NewTimer(time.Second * 30)
+				t := time.NewTimer(tearDownTimeout)
 				select {
 				case <-ch:
 					err = werr
 					t.Stop()
 					if err != nil {
-						log.G(ctx).WithError(err).Error("failed to wait for container shutdown")
+						entry.WithError(err).Error("failed to wait for container shutdown")
 					}
 				case <-t.C:
 					err = hcs.ErrTimeout
-					log.G(ctx).WithError(err).Error("failed to wait for container shutdown")
+					entry.WithFields(logrus.Fields{
+						logfields.Timeout: tearDownTimeout,
+						logrus.ErrorKey:   err,
+					}).Error("timed out while waiting for container shutdown")
 				}
 			}
 
 			if err != nil {
 				err = ht.c.Terminate(ctx)
 				if err != nil {
-					log.G(ctx).WithError(err).Error("failed to terminate container")
+					entry.WithError(err).Error("failed to terminate container")
 				} else {
-					t := time.NewTimer(time.Second * 30)
+					t := time.NewTimer(tearDownTimeout)
 					select {
 					case <-ch:
 						err = werr
 						t.Stop()
 						if err != nil {
-							log.G(ctx).WithError(err).Error("failed to wait for container terminate")
+							entry.WithError(err).Error("failed to wait for container terminate")
 						}
 					case <-t.C:
-						log.G(ctx).WithError(hcs.ErrTimeout).Error("failed to wait for container terminate")
+						entry.WithFields(logrus.Fields{
+							logfields.Timeout: tearDownTimeout,
+							logrus.ErrorKey:   hcs.ErrTimeout,
+						}).Error("timed out while waiting for container terminate")
 					}
 				}
 			}
 
 			// Release any resources associated with the container.
+			entry.Trace("starting task resource cleanup")
+
 			if err := resources.ReleaseResources(ctx, ht.cr, ht.host, true); err != nil {
-				log.G(ctx).WithError(err).Error("failed to release container resources")
+				entry.WithError(err).Error("failed to release container resources")
 			}
 
 			// Close the container handle invalidating all future access.
 			if err := ht.c.Close(); err != nil {
-				log.G(ctx).WithError(err).Error("failed to close container")
+				entry.WithError(err).Error("failed to close container")
 			}
+
+			entry.Trace("task resource cleanup completed")
 		}
 		ht.closeHost(ctx)
 	})
@@ -730,11 +760,12 @@ func (ht *hcsTask) close(ctx context.Context) {
 // This call is idempotent and safe to call multiple times.
 func (ht *hcsTask) closeHost(ctx context.Context) {
 	ht.closeHostOnce.Do(func() {
-		log.G(ctx).Debug("hcsTask::closeHostOnce")
+		entry := log.G(ctx)
+		entry.Debug("hcsTask::closeHostOnce")
 
 		if ht.ownsHost && ht.host != nil {
 			if err := ht.host.Close(); err != nil {
-				log.G(ctx).WithError(err).Error("failed host vm shutdown")
+				entry.WithError(err).Error("failed host vm shutdown")
 			}
 		}
 		// Send the `init` exec exit notification always.
@@ -750,7 +781,7 @@ func (ht *hcsTask) closeHost(ctx context.Context) {
 				ExitStatus:  exit.ExitStatus,
 				ExitedAt:    exit.ExitedAt,
 			}); err != nil {
-			log.G(ctx).WithError(err).Error("failed to publish TaskExitEventTopic")
+			entry.WithError(err).Error("failed to publish TaskExitEventTopic")
 		}
 		close(ht.closed)
 	})

@@ -181,7 +181,7 @@ func NetNSConfig(ctx context.Context, ifStr string, nsPid int, adapter *guestres
 			addrsStr = append(addrsStr, fmt.Sprintf("%v", addr))
 		}
 
-		entry.WithField("addresses", addrsStr).Debugf("%v: %s[idx=%d,type=%s] is %v",
+		entry.WithField("addresses", strings.Join(addrsStr, "; ")).Debugf("%v: %s[idx=%d,type=%s] is %v",
 			curNS, attr.Name, attr.Index, link.Type(), attr.OperState)
 	}
 
@@ -209,7 +209,7 @@ func configureLink(ctx context.Context,
 		addr.IP = ip
 		log.G(ctx).WithFields(logrus.Fields{
 			"allocatedIP": ip,
-			"IP":          addr,
+			"IP":          addr.String(),
 		}).Debugf("parsed ip address %s/%d", ipConfig.IPAddress, ipConfig.PrefixLength)
 		ipAddr := &netlink.Addr{IPNet: addr, Label: ""}
 		if err := netlinkAddrAdd(link, ipAddr); err != nil {
@@ -237,7 +237,7 @@ func configureLink(ctx context.Context,
 	}
 
 	for _, r := range adapter.Routes {
-		log.G(ctx).WithField("route", r).Debugf("adding a route to interface %s", link.Attrs().Name)
+		log.G(ctx).WithField("route", log.Format(ctx, r)).Debugf("adding a route to interface %s", link.Attrs().Name)
 
 		if (r.DestinationPrefix == ipv4GwDestination || r.DestinationPrefix == ipv6GwDestination) &&
 			(r.NextHop == ipv4EmptyGw || r.NextHop == ipv6EmptyGw) {

@@ -1368,7 +1368,14 @@ func modifyNetwork(ctx context.Context, rt guestrequest.RequestType, na *guestre
 		// container or not so it must always call `Sync`.
 		return ns.Sync(ctx)
 	case guestrequest.RequestTypeRemove:
-		ns := GetOrAddNetworkNamespace(na.ID)
+		ns, err := getNetworkNamespace(na.NamespaceID)
+		if err != nil {
+			log.G(ctx).WithFields(logrus.Fields{
+				logfields.NamespaceID: na.NamespaceID,
+				"adapterID":           na.ID,
+			}).WithError(err).Warn("namespace not found for adapter removal, skipping")
+			return nil
+		}
 		if err := ns.RemoveAdapter(ctx, na.ID); err != nil {
 			return err
 		}
@@ -1540,56 +1547,75 @@ func (h *Host) RemoveContainerFromVirtualPod(containerID string) {
 		return // Container not in any virtual pod
 	}
 
+	ctx, entry := log.SetEntry(context.Background(), logrus.Fields{
+		"virtualSandboxID":    virtualSandboxID,
+		logfields.ContainerID: containerID,
+	})
+
 	// Remove from virtual pod
 	if vp, vpExists := h.virtualPods[virtualSandboxID]; vpExists {
 		delete(vp.Containers, containerID)
 
-		// If this is the sandbox container, delete the network namespace
+		// If this is the sandbox container, clean up the network namespace adapters first,
+		// then remove the namespace.
+
+		// NOTE: Do NOT call [RemoveNetworkNamespace] here.
+		// When cleaning up the virtual pod's sandbox container on the host,
+		// [UtilityVM.TearDownNetworking] calls [UtilityVM.RemoveNetNS],
+		// which, via [UtilityVM.removeNIC], removes the NICs from the uVM and
+		// sends [guestresource.ResourceTypeNetwork] RPCs that ultimately cal [modifyNetwork]
+		// to remove adapters from guest tracking.
+		// However, that happens AFTER this function runs.
+		// Calling [RemoveNetworkNamespace] here would
+		// fail with "contains adapters" since the host hasn't removed them yet.
+		//
+		// The host-driven path handles cleanup in the correct order.
 		if containerID == virtualSandboxID && vp.NetworkNamespace != "" {
-			if err := RemoveNetworkNamespace(context.Background(), vp.NetworkNamespace); err != nil {
-				logrus.WithError(err).WithField("virtualSandboxID", virtualSandboxID).
-					Warn("Failed to remove virtual pod network namespace (sandbox container removal)")
-			}
+			entry.WithField(logfields.NamespaceID, vp.NetworkNamespace).Debug("skipping virtual pod network namespace removal")
 		}
 
 		// If this was the last container, cleanup the virtual pod
 		if len(vp.Containers) == 0 {
-			h.cleanupVirtualPod(virtualSandboxID)
+			h.cleanupVirtualPod(ctx, virtualSandboxID)
 		}
 	}
 
 	delete(h.containerToVirtualPod, containerID)
 
-	logrus.WithFields(logrus.Fields{
-		"containerID":      containerID,
-		"virtualSandboxID": virtualSandboxID,
-	}).Info("Container removed from virtual pod")
+	entry.Info("Container removed from virtual pod")
 }
 
 // cleanupVirtualPod removes a virtual pod and its cgroup (should be called with mutex held)
-func (h *Host) cleanupVirtualPod(virtualSandboxID string) {
-	if vp, exists := h.virtualPods[virtualSandboxID]; exists {
-		// Delete the cgroup
-		if err := vp.CgroupControl.Delete(); err != nil {
-			logrus.WithError(err).WithField("virtualSandboxID", virtualSandboxID).
-				Warn("Failed to delete virtual pod cgroup")
-		}
+func (h *Host) cleanupVirtualPod(ctx context.Context, virtualSandboxID string) {
+	vp, exists := h.virtualPods[virtualSandboxID]
+	if !exists {
+		return // virtual pod does not exist
+	}
 
-		// Clean up network namespace if this is the last virtual pod using it
-		// Only remove if this virtual pod was managing the network namespace
-		if vp.NetworkNamespace != "" {
-			// For virtual pods, the network namespace is shared, so we only clean it up
-			// when the virtual pod itself is being destroyed
-			if err := RemoveNetworkNamespace(context.Background(), vp.NetworkNamespace); err != nil {
-				logrus.WithError(err).WithField("virtualSandboxID", virtualSandboxID).
-					Warn("Failed to remove virtual pod network namespace")
-			}
-		}
+	entry := log.G(ctx)
 
-		delete(h.virtualPods, virtualSandboxID)
+	// Delete the cgroup
+	if err := vp.CgroupControl.Delete(); err != nil {
+		entry.Warn("failed to delete virtual pod cgroup")
+	}
 
-		logrus.WithField("virtualSandboxID", virtualSandboxID).Info("Virtual pod cleaned up")
+	// NOTE: Do NOT call [RemoveNetworkNamespace] here.
+	// See comment in [Host.RemoveContainerFromVirtualPod] for more info.
+
+	// Clean up the virtual pod root directory which contains hostname, hosts,
+	// resolv.conf, and logs/. These are NOT cleaned by Container.Delete() because
+	// they live under /run/gcs/c/virtual-pods/<id>/ which is separate from the
+	// container's ociBundlePath (/run/gcs/c/<id>/).
+	if vpRootDir := specGuest.VirtualPodRootDir(virtualSandboxID); vpRootDir != "" {
+		if err := os.RemoveAll(vpRootDir); err != nil {
+			entry.WithField("path", vpRootDir).WithError(err).Warn("Failed to remove virtual pod root directory")
+		} else {
+			entry.WithField("path", vpRootDir).Debug("Removed virtual pod root directory")
+		}
 	}
+
+	delete(h.virtualPods, virtualSandboxID)
+	entry.Info("Virtual pod cleaned up")
 }
 
 // setupVirtualPodMountsPath creates mount directories for virtual pods

@@ -16,6 +16,7 @@ import (
 	"go.opencensus.io/trace"
 	"golang.org/x/sys/unix"
 
+	"github.com/Microsoft/hcsshim/internal/log"
 	"github.com/Microsoft/hcsshim/internal/oc"
 )
 
@@ -131,13 +132,24 @@ func UnmountPath(ctx context.Context, target string, removeTarget bool) (err err
 		return errors.Wrapf(err, "failed to determine if path '%s' exists", target)
 	}
 
-	if err := unixUnmount(target, 0); err != nil {
-		// If `Unmount` returns `EINVAL` it's not mounted. Just delete the
-		// folder.
-		if !gerrors.Is(err, unix.EINVAL) {
+	// Loop to handle stacked mounts (e.g. tmpfs + bind mount from MountRShared).
+	// Each Unmount call only peels one layer off the stack, so we must keep
+	// unmounting until EINVAL tells us nothing is mounted anymore.
+	const maxUnmountAttempts = 5
+	unmounted := false
+	for i := 0; i < maxUnmountAttempts; i++ {
+		if err := unixUnmount(target, 0); err != nil {
+			// EINVAL means it's not mounted — we're done unmounting.
+			if gerrors.Is(err, unix.EINVAL) {
+				unmounted = true
+				break
+			}
 			return errors.Wrapf(err, "failed to unmount path '%s'", target)
 		}
 	}
+	if !unmounted {
+		log.G(ctx).WithField("target", target).Warnf("path still mounted after %d unmount attempts", maxUnmountAttempts)
+	}
 	if removeTarget {
 		return osRemoveAll(target)
 	}