From 6cdb79cbb76942f90f823b8755aac832c9cdce82 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Sat, 21 Mar 2026 04:03:05 +0000 Subject: [PATCH] fix(vmm): preserve orphan VM workdir when no .removing marker Only delete the VM workdir during cleanup if the .removing marker file is present (set by user-initiated remove_vm). Orphaned supervisor processes discovered at startup without the marker now have their workdir preserved, preventing accidental data loss on VMM restart. --- vmm/src/app.rs | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/vmm/src/app.rs b/vmm/src/app.rs index 71ab5448..c61d2a34 100644 --- a/vmm/src/app.rs +++ b/vmm/src/app.rs @@ -292,17 +292,17 @@ impl App { // Persist the removing marker so crash recovery can resume let work_dir = self.work_dir(id); if let Err(err) = work_dir.set_removing() { - warn!("Failed to write .removing marker for {id}: {err:?}"); + warn!("failed to write .removing marker for {id}: {err:?}"); } // Clean up port forwarding immediately self.cleanup_port_forward(id).await; - // Spawn background cleanup coroutine + // User-initiated removal always deletes the workdir let app = self.clone(); let id = id.to_string(); tokio::spawn(async move { - if let Err(err) = app.finish_remove_vm(&id).await { + if let Err(err) = app.finish_remove_vm(&id, true).await { error!("Background cleanup failed for {id}: {err:?}"); } }); @@ -311,8 +311,10 @@ impl App { } /// Background cleanup: stop supervisor process, wait for it to exit, - /// remove from supervisor, delete workdir, and free CID. - async fn finish_remove_vm(&self, id: &str) -> Result<()> { + /// remove from supervisor, optionally delete workdir, and free CID. + /// + /// `delete_workdir`: true for user-initiated removal, false for orphan cleanup. + async fn finish_remove_vm(&self, id: &str, delete_workdir: bool) -> Result<()> { // Stop the supervisor process (idempotent if already stopped) if let Err(err) = self.supervisor.stop(id).await { debug!("supervisor.stop({id}) during removal: {err:?}"); @@ -351,12 +353,20 @@ impl App { } } - // Delete the workdir (may already be gone, e.g. manual deletion before reload) + // Only delete the workdir for user-initiated removal or if .removing marker exists. + // Orphaned supervisor processes without the marker keep their data intact. let vm_path = self.work_dir(id); - if vm_path.path().exists() { - if let Err(err) = fs::remove_dir_all(&vm_path) { - error!("Failed to remove VM directory for {id}: {err:?}"); + if delete_workdir || vm_path.is_removing() { + if vm_path.path().exists() { + if let Err(err) = fs::remove_dir_all(&vm_path) { + error!("failed to remove VM directory for {id}: {err:?}"); + } } + } else if vm_path.path().exists() { + info!( + "VM {id} workdir preserved (orphan cleanup): {}", + vm_path.path().display() + ); } // Free CID and remove from memory (last step) @@ -371,7 +381,8 @@ impl App { Ok(()) } - /// Spawn a background task to clean up a VM (stop + remove from supervisor + delete workdir). + /// Spawn a background task to clean up a VM (stop + remove from supervisor). + /// Workdir deletion is based on the `.removing` marker (only present for user-initiated removal). /// Returns false if a cleanup task is already running for this VM. fn spawn_finish_remove(&self, id: &str) -> bool { { @@ -384,12 +395,13 @@ impl App { vm.state.removing = true; } // If VM is not in memory (e.g. orphaned supervisor process), no entry to guard - // but we still need to clean up the supervisor process and workdir. + // but we still need to clean up the supervisor process. } let app = self.clone(); let id = id.to_string(); tokio::spawn(async move { - if let Err(err) = app.finish_remove_vm(&id).await { + // Don't pass delete_workdir=true; rely on .removing marker check inside + if let Err(err) = app.finish_remove_vm(&id, false).await { error!("Background cleanup failed for {id}: {err:?}"); } });