diff --git a/crates/terraphim_orchestrator/Cargo.toml b/crates/terraphim_orchestrator/Cargo.toml index 37bb7f317..e2b6f660c 100644 --- a/crates/terraphim_orchestrator/Cargo.toml +++ b/crates/terraphim_orchestrator/Cargo.toml @@ -101,6 +101,15 @@ name = "compound_cancellation_test" path = "tests/compound_cancellation_test.rs" required-features = ["test-helpers"] +# Layer 2 startup sweep integration test (epic #1567 / issue #1570). +# Same test-helpers gate as Layer 1 -- the test re-uses the shared +# `scope::test_support::setup_git_repo` fixture so the sweep never +# runs against the live repository. +[[test]] +name = "sweep_on_startup_test" +path = "tests/sweep_on_startup_test.rs" +required-features = ["test-helpers"] + [[bin]] name = "adf" diff --git a/crates/terraphim_orchestrator/src/compound.rs b/crates/terraphim_orchestrator/src/compound.rs index f51c165cd..2fc974d91 100644 --- a/crates/terraphim_orchestrator/src/compound.rs +++ b/crates/terraphim_orchestrator/src/compound.rs @@ -250,6 +250,18 @@ impl CompoundReviewWorkflow { Self::new(swarm_config) } + /// Borrow the inner [`WorktreeManager`]. + /// + /// Layer 2 (epic #1567, issue #1570) calls + /// `worktree_manager().sweep_stale(...)` from + /// `AgentOrchestrator::new` to reconcile stale `review-*` residue + /// left behind by SIGKILL / OOM before any tick thread runs. + /// Production code outside the startup sweep should prefer the + /// higher-level workflow methods. + pub fn worktree_manager(&self) -> &WorktreeManager { + &self.worktree_manager + } + /// Run a full compound review cycle. /// /// 1. Get changed files between git_ref and base_ref diff --git a/crates/terraphim_orchestrator/src/lib.rs b/crates/terraphim_orchestrator/src/lib.rs index 68828108a..a7284bc2f 100644 --- a/crates/terraphim_orchestrator/src/lib.rs +++ b/crates/terraphim_orchestrator/src/lib.rs @@ -693,6 +693,45 @@ impl AgentOrchestrator { let scheduler = TimeScheduler::new(&config.agents, Some(&config.compound_review.schedule))?; let compound_workflow = CompoundReviewWorkflow::from_compound_config(config.compound_review.clone()); + + // Layer 2 startup sweep (epic #1567, issue #1570). + // + // Reconcile any worktree residue left by a previous instance + // before we accept ticks. Synchronous: must finish before the + // tick thread is spawned in `run()` so a fresh review cycle + // never races against half-killed `review-*` directories. + // + // `extra_roots` mirrors the per-agent worktree convention + // from `lib.rs:5393`. If you change that literal there, change + // it here too. + // + // `cfg(not(test))` gate: the in-lib `test_config()` (see + // `lib.rs:7926`) points `repo_path` at the live terraphim-ai + // checkout. Without this gate, the sweep's + // `git worktree prune --verbose` races against + // `test_orchestrator_compound_review_manual`'s concurrent + // `git worktree add` on that shared real repo's + // `.git/worktrees/` admin registry. The production wiring is + // exercised end-to-end by `tests/sweep_on_startup_test.rs`, + // which builds an isolated `TempDir` repo and asserts the + // sweep DOES run from `AgentOrchestrator::new`. Do not remove + // this gate without first migrating the in-lib `test_config()` + // to a TempDir-based `repo_path`. + #[cfg(not(test))] + { + let sweep_report = compound_workflow + .worktree_manager() + .sweep_stale(&[PathBuf::from("/tmp/adf-worktrees")]); + if sweep_report.swept_count + sweep_report.root_owned_skipped > 10 { + warn!( + swept_count = sweep_report.swept_count, + root_owned_skipped = sweep_report.root_owned_skipped, + failed_count = sweep_report.failed_count, + "large worktree backlog at startup -- prior crash storm likely" + ); + } + } + let handoff_buffer = HandoffBuffer::new(config.handoff_buffer_ttl_secs.unwrap_or(86400)); let handoff_ledger = HandoffLedger::new(config.working_dir.join("handoff-ledger.jsonl")); diff --git a/crates/terraphim_orchestrator/src/scope.rs b/crates/terraphim_orchestrator/src/scope.rs index 989ebe25c..74d1d5df9 100644 --- a/crates/terraphim_orchestrator/src/scope.rs +++ b/crates/terraphim_orchestrator/src/scope.rs @@ -423,6 +423,211 @@ impl WorktreeManager { pub fn worktree_exists(&self, name: &str) -> bool { self.worktree_base.join(name).join(".git").exists() } + + /// Sweep stale worktree residue left by a previous orchestrator + /// instance (SIGKILL, OOM, panic-across-runtime, host reboot + /// mid-review). + /// + /// Synchronous on purpose: `AgentOrchestrator::new` is a sync + /// constructor and the sweep must complete before any tick thread + /// is spawned. This is Layer 2 of the worktree lifecycle defence + /// in depth (epic #1567): + /// + /// - Layer 1 (`WorktreeGuard::Drop`) handles the happy / cancelled + /// path while the process is still alive. + /// - Layer 2 (this method) reconciles whatever survived the + /// previous process death. + /// - Layer 3 (`scripts/adf-setup/adf-cleanup.sh` via + /// `ExecStartPre`) catches root-owned residue that the + /// orchestrator user cannot remove. + /// + /// Behaviour: + /// 1. Walk `self.worktree_base` direct children whose name starts + /// with [`WORKTREE_REVIEW_PREFIX`] and attempt removal. + /// 2. Walk every direct child of each `extra_roots` entry (no + /// prefix filter -- the per-agent `/tmp/adf-worktrees` root + /// convention has no prefix). + /// 3. Removal tries `git worktree remove --force` first so the + /// `/.git/worktrees/` admin entry is reconciled, + /// then falls back to `std::fs::remove_dir_all` on non-zero + /// exit. + /// 4. `io::ErrorKind::PermissionDenied` (EACCES/EPERM on Linux) + /// increments `root_owned_skipped` rather than `failed_count`. + /// Layer 3 will catch those on the next service restart. + /// 5. After walking, runs `git worktree prune --verbose` to drop + /// dead admin entries whose paths no longer exist on disk. + /// + /// Emits a single structured `info!` line with all + /// [`SweepReport`] fields plus a synthetic + /// `backlog_count = swept_count + root_owned_skipped`, so Quickwit + /// dashboards can alert on residue backlog without parsing + /// individual `warn!` lines. + pub fn sweep_stale(&self, extra_roots: &[PathBuf]) -> SweepReport { + let start = std::time::Instant::now(); + let mut report = SweepReport::default(); + + // 1. Primary base: only review-prefixed entries. + if self.worktree_base.is_dir() { + match std::fs::read_dir(&self.worktree_base) { + Ok(entries) => { + for entry in entries.flatten() { + let name = match entry.file_name().into_string() { + Ok(n) => n, + Err(_) => continue, + }; + if !name.starts_with(WORKTREE_REVIEW_PREFIX) { + continue; + } + self.sweep_one(&entry.path(), &mut report); + } + } + Err(e) => { + warn!( + path = %self.worktree_base.display(), + error = %e, + "sweep_stale could not enumerate worktree base" + ); + } + } + } + + // 2. Extra roots (typically `/tmp/adf-worktrees`): every + // direct child, regardless of prefix. + for root in extra_roots { + if !root.is_dir() { + continue; + } + match std::fs::read_dir(root) { + Ok(entries) => { + for entry in entries.flatten() { + self.sweep_one(&entry.path(), &mut report); + } + } + Err(e) => { + warn!( + path = %root.display(), + error = %e, + "sweep_stale could not enumerate extra root" + ); + } + } + } + + // 3. Reconcile git's admin registry so half-killed worktree + // metadata under `/.git/worktrees/` is dropped. + let prune = std::process::Command::new("git") + .arg("-C") + .arg(&self.repo_path) + .arg("worktree") + .arg("prune") + .arg("--verbose") + .env_remove("GIT_INDEX_FILE") + .output(); + report.prune_succeeded = matches!(&prune, Ok(o) if o.status.success()); + if let Ok(out) = prune { + if !out.status.success() { + let stderr = String::from_utf8_lossy(&out.stderr); + warn!(stderr = %stderr, "git worktree prune failed during sweep"); + } + } else if let Err(e) = prune { + warn!(error = %e, "git worktree prune could not be invoked during sweep"); + } + + report.duration_ms = start.elapsed().as_millis() as u64; + info!( + swept_count = report.swept_count, + failed_count = report.failed_count, + root_owned_skipped = report.root_owned_skipped, + prune_succeeded = report.prune_succeeded, + duration_ms = report.duration_ms, + backlog_count = report.swept_count + report.root_owned_skipped, + "worktree sweep_stale complete" + ); + report + } + + /// Remove one worktree path, updating `report` in place. + /// + /// Tries `git worktree remove --force` first so the git admin + /// registry stays in sync; falls back to `remove_dir_all` on + /// non-zero exit. Permission-denied during the fallback path is + /// counted as `root_owned_skipped` (Layer 3 territory) rather + /// than a hard failure. + fn sweep_one(&self, path: &Path, report: &mut SweepReport) { + let status = std::process::Command::new("git") + .arg("-C") + .arg(&self.repo_path) + .arg("worktree") + .arg("remove") + .arg("--force") + .arg(path) + .env_remove("GIT_INDEX_FILE") + .status(); + + if matches!(&status, Ok(s) if s.success()) { + // Git removed both the worktree directory and the admin + // entry. Some git versions leave an empty directory if the + // worktree was already corrupt; tidy that up best-effort. + if path.exists() { + let _ = std::fs::remove_dir_all(path); + } + report.swept_count += 1; + return; + } + + // Git refused (or the path was never a registered worktree to + // begin with -- common for residue under `/tmp/adf-worktrees`). + // Fall back to a plain directory removal. + match std::fs::remove_dir_all(path) { + Ok(_) => report.swept_count += 1, + Err(e) if matches!(e.kind(), std::io::ErrorKind::PermissionDenied) => { + warn!( + path = %path.display(), + "sweep_stale skipping root-owned worktree -- Layer 3 will clean" + ); + report.root_owned_skipped += 1; + } + Err(e) if matches!(e.kind(), std::io::ErrorKind::NotFound) => { + // The path vanished between read_dir() and remove. Not + // an error worth counting -- something else (Layer 3, + // a sibling sweep) already handled it. + } + Err(e) => { + warn!( + path = %path.display(), + error = %e, + "sweep_stale failed to remove worktree residue" + ); + report.failed_count += 1; + } + } + } +} + +/// Summary of a [`WorktreeManager::sweep_stale`] invocation. +/// +/// Emitted via structured `tracing::info!` so Quickwit can compute +/// backlog gauges (`swept_count + root_owned_skipped`) and alert on +/// large residue indicative of a prior crash storm. +#[derive(Debug, Clone, Default)] +pub struct SweepReport { + /// Number of worktree directories successfully removed (either + /// via `git worktree remove --force` or filesystem fallback). + pub swept_count: usize, + /// Number of removal attempts that returned a non-PermissionDenied + /// error. Indicates filesystem-level problems worth investigating. + pub failed_count: usize, + /// Number of entries skipped because the orchestrator user lacked + /// permission. These belong to Layer 3 (`adf-cleanup.sh` run as + /// root via `ExecStartPre`). + pub root_owned_skipped: usize, + /// Whether `git worktree prune --verbose` exited zero. False + /// implies the git admin registry under `/.git/worktrees` + /// may still hold dangling entries; the next sweep will retry. + pub prune_succeeded: bool, + /// Wall-clock duration of the sweep in milliseconds, for + /// observability and startup-time budgeting. + pub duration_ms: u64, } #[cfg(test)] @@ -814,6 +1019,206 @@ mod tests { assert!(result.is_err()); guard.keep(); } + + // ==================== sweep_stale Tests (Layer 2 / #1570) ==================== + + #[test] + fn test_sweep_stale_empty_dir() { + // Base dir exists but is empty. Report should be all zeros and + // prune should still succeed against a clean repo. + let (_temp_dir, repo_path) = setup_git_repo(); + let base = repo_path.join(".worktrees"); + std::fs::create_dir_all(&base).expect("create empty base"); + let manager = WorktreeManager::with_base(&repo_path, &base); + + let report = manager.sweep_stale(&[]); + assert_eq!(report.swept_count, 0); + assert_eq!(report.failed_count, 0); + assert_eq!(report.root_owned_skipped, 0); + assert!(report.prune_succeeded, "prune should succeed on clean repo"); + } + + #[test] + fn test_sweep_stale_no_base() { + // Base dir does not exist; method must return successfully. + let (_temp_dir, repo_path) = setup_git_repo(); + let manager = + WorktreeManager::with_base(&repo_path, repo_path.join("does-not-exist-anywhere")); + + let report = manager.sweep_stale(&[]); + assert_eq!(report.swept_count, 0); + assert_eq!(report.failed_count, 0); + assert_eq!(report.root_owned_skipped, 0); + assert!(report.prune_succeeded); + } + + #[test] + fn test_sweep_stale_removes_review_prefix() { + // Seed three `review-*` directories as plain dirs (not real + // worktrees -- sweep_one falls back to remove_dir_all when git + // refuses an unregistered path, which is exactly the + // residue-after-SIGKILL shape). + let (_temp_dir, repo_path) = setup_git_repo(); + let base = repo_path.join(".worktrees"); + std::fs::create_dir_all(&base).unwrap(); + + for i in 0..3 { + let dir = base.join(format!("{}{}", WORKTREE_REVIEW_PREFIX, i)); + std::fs::create_dir_all(&dir).unwrap(); + std::fs::write(dir.join("dummy.txt"), "residue").unwrap(); + } + + let manager = WorktreeManager::with_base(&repo_path, &base); + let report = manager.sweep_stale(&[]); + + assert_eq!(report.swept_count, 3, "all three review dirs swept"); + assert_eq!(report.failed_count, 0); + assert_eq!(report.root_owned_skipped, 0); + assert!(report.prune_succeeded); + + for i in 0..3 { + let dir = base.join(format!("{}{}", WORKTREE_REVIEW_PREFIX, i)); + assert!(!dir.exists(), "review-{} should be removed", i); + } + } + + #[test] + fn test_sweep_stale_preserves_non_review_prefix() { + // Only `review-` prefixed entries are swept from worktree_base. + // Sibling directories under the same base must be preserved. + let (_temp_dir, repo_path) = setup_git_repo(); + let base = repo_path.join(".worktrees"); + std::fs::create_dir_all(&base).unwrap(); + + let review_dir = base.join(format!("{}victim", WORKTREE_REVIEW_PREFIX)); + let keep_dir = base.join("keep-me"); + std::fs::create_dir_all(&review_dir).unwrap(); + std::fs::create_dir_all(&keep_dir).unwrap(); + std::fs::write(keep_dir.join("important.txt"), "data").unwrap(); + + let manager = WorktreeManager::with_base(&repo_path, &base); + let report = manager.sweep_stale(&[]); + + assert_eq!(report.swept_count, 1); + assert!(!review_dir.exists(), "review-victim should be swept"); + assert!(keep_dir.exists(), "keep-me must be preserved"); + assert!( + keep_dir.join("important.txt").exists(), + "keep-me contents must survive" + ); + } + + #[test] + fn test_sweep_stale_runs_prune() { + // `prune_succeeded` must be true after a clean sweep against a + // healthy repo. This guards against silently regressing the + // git registry reconciliation step. + let (_temp_dir, repo_path) = setup_git_repo(); + let base = repo_path.join(".worktrees"); + std::fs::create_dir_all(&base).unwrap(); + let manager = WorktreeManager::with_base(&repo_path, &base); + + let report = manager.sweep_stale(&[]); + assert!( + report.prune_succeeded, + "prune step must run and succeed after sweep" + ); + } + + #[test] + fn test_sweep_stale_extra_roots_no_prefix_filter() { + // Entries under extra_roots are swept regardless of prefix -- + // the per-agent root convention has no naming convention. + let (_temp_dir, repo_path) = setup_git_repo(); + let base = repo_path.join(".worktrees"); + std::fs::create_dir_all(&base).unwrap(); + + // Unique temp dir to mimic `/tmp/adf-worktrees` without + // colliding with a parallel real orchestrator on this host. + let extra = std::env::temp_dir().join(format!("adf-worktrees-test-{}", Uuid::new_v4())); + std::fs::create_dir_all(&extra).unwrap(); + let agent_dir = extra.join("agent-alpha"); + std::fs::create_dir_all(&agent_dir).unwrap(); + std::fs::write(agent_dir.join("scratch.txt"), "tmp").unwrap(); + + let manager = WorktreeManager::with_base(&repo_path, &base); + let report = manager.sweep_stale(std::slice::from_ref(&extra)); + + assert_eq!(report.swept_count, 1, "agent-alpha should be swept"); + assert!(!agent_dir.exists(), "extra-root child should be removed"); + + // Tidy up the now-empty extra root. + let _ = std::fs::remove_dir_all(&extra); + } + + /// Root-owned residue belongs to Layer 3 (`adf-cleanup.sh` via + /// `ExecStartPre`); Layer 2 must skip it gracefully without + /// counting it as a hard failure. + /// + /// This test is Linux-only and only meaningful when the test + /// process runs as root. In every other environment it is a no-op + /// that nonetheless documents the contract (the assertion would + /// be vacuously skipped). + #[test] + #[cfg_attr(not(target_os = "linux"), ignore)] + fn test_sweep_stale_skips_root_owned() { + // Runtime gate: skip when not root. Without elevated privilege + // we cannot create a file the test user cannot delete, so the + // PermissionDenied path is unreachable. + let is_root = std::env::var("USER") + .map(|u| u == "root") + .unwrap_or(false) + // SAFETY: getuid() is async-signal-safe and side-effect free. + || unsafe { libc_getuid() } == 0; + if !is_root { + eprintln!("skipping test_sweep_stale_skips_root_owned: not root"); + return; + } + + let (_temp_dir, repo_path) = setup_git_repo(); + let base = repo_path.join(".worktrees"); + std::fs::create_dir_all(&base).unwrap(); + + let review_dir = base.join(format!("{}root-owned", WORKTREE_REVIEW_PREFIX)); + std::fs::create_dir_all(&review_dir).unwrap(); + + // Make the parent dir read+exec but not writable for non-owner + // so remove_dir_all from a dropped-privilege process would + // EACCES. Real root-owned residue scenario: the file's owner + // is root and the orchestrator runs as a service user. + // + // We cannot truly fake this from inside a root test process + // (root bypasses DAC), so this test asserts only that the + // method completes without panicking when handed a path it + // cannot remove. The actual EACCES branch is exercised by + // Layer 3 integration testing on the bigbox. + let report = WorktreeManager::with_base(&repo_path, &base).sweep_stale(&[]); + + // Whether the dir was swept or marked root_owned_skipped + // depends on the host's filesystem; both are acceptable. The + // contract is just "no panic, no failed_count". + assert_eq!( + report.failed_count, 0, + "root-owned residue must never count as a hard failure" + ); + } + + // Direct FFI to `getuid(2)` so we do not pull `nix` or `libc` + // crates just for one test. Linux only. + #[cfg(target_os = "linux")] + extern "C" { + #[link_name = "getuid"] + fn libc_getuid() -> u32; + } + + #[cfg(not(target_os = "linux"))] + #[allow(dead_code)] + unsafe fn libc_getuid() -> u32 { + // Non-Linux fallback that never claims root, so the runtime + // gate above always skips. The `#[cfg_attr(ignore)]` on the + // test means we will not reach this branch in practice. + 1 + } } /// Test helpers exposed at module scope (under the `test-helpers` diff --git a/crates/terraphim_orchestrator/tests/sweep_on_startup_test.rs b/crates/terraphim_orchestrator/tests/sweep_on_startup_test.rs new file mode 100644 index 000000000..6419cb6cc --- /dev/null +++ b/crates/terraphim_orchestrator/tests/sweep_on_startup_test.rs @@ -0,0 +1,237 @@ +//! Layer 2 startup sweep integration test (epic #1567, Gitea +//! issue #1570). +//! +//! Property under test: +//! +//! > Constructing `AgentOrchestrator` MUST reconcile any stale +//! > `review-*` directories left under the configured +//! > `worktree_root`, AND any direct child of any path listed in +//! > `extra_roots`, BEFORE returning to the caller. The sweep is +//! > synchronous; no tick has fired yet. +//! +//! Compiled only when the `test-helpers` feature is enabled (so the +//! `scope::test_support` shared git-repo fixture is visible) and on +//! Unix (the WorktreeManager fallback path expects POSIX file +//! semantics). + +#![cfg(all(unix, feature = "test-helpers"))] + +use std::path::PathBuf; + +use tempfile::TempDir; +use uuid::Uuid; + +use terraphim_orchestrator::scope::{test_support::setup_git_repo, WORKTREE_REVIEW_PREFIX}; +use terraphim_orchestrator::{ + AgentDefinition, AgentLayer, AgentOrchestrator, CompoundReviewConfig, LearningConfig, + NightwatchConfig, OrchestratorConfig, +}; + +/// Build the minimal `OrchestratorConfig` required for +/// `AgentOrchestrator::new` to succeed against the supplied fixture. +/// +/// Borrows the layout from `tests/orchestrator_tests.rs::test_config` +/// but points `repo_path` and `worktree_root` into the per-test +/// `TempDir` so the startup sweep never touches the live repo. +fn isolated_config( + working_dir: PathBuf, + repo_path: PathBuf, + worktree_root: PathBuf, +) -> OrchestratorConfig { + OrchestratorConfig { + working_dir, + nightwatch: NightwatchConfig::default(), + compound_review: CompoundReviewConfig { + cli_tool: None, + provider: None, + model: None, + schedule: "0 2 * * *".to_string(), + max_duration_secs: 60, + repo_path, + create_prs: false, + worktree_root, + base_branch: "main".to_string(), + max_concurrent_agents: 1, + ..Default::default() + }, + workflow: None, + // A single trivial agent; the sweep runs unconditionally in + // `new` regardless of agent set. + agents: vec![AgentDefinition { + name: "noop".to_string(), + layer: AgentLayer::Core, + cli_tool: "echo".to_string(), + task: "noop".to_string(), + model: None, + schedule: None, + capabilities: vec![], + max_memory_bytes: None, + budget_monthly_cents: None, + provider: None, + persona: None, + terraphim_role: None, + skill_chain: vec![], + sfia_skills: vec![], + fallback_provider: None, + fallback_model: None, + grace_period_secs: None, + max_cpu_seconds: None, + pre_check: None, + gitea_issue: None, + event_only: false, + evolution_enabled: false, + project: None, + }], + restart_cooldown_secs: 60, + max_restart_count: 10, + restart_budget_window_secs: 43_200, + disk_usage_threshold: 100, + tick_interval_secs: 30, + handoff_buffer_ttl_secs: None, + persona_data_dir: None, + skill_data_dir: None, + flows: vec![], + flow_state_dir: None, + gitea: None, + mentions: None, + webhook: None, + role_config_path: None, + routing: None, + #[cfg(feature = "quickwit")] + quickwit: None, + projects: vec![], + include: vec![], + providers: vec![], + provider_budget_state_file: None, + pause_dir: None, + project_circuit_breaker_threshold: 3, + fleet_escalation_owner: None, + fleet_escalation_repo: None, + post_merge_gate: None, + learning: LearningConfig::default(), + pr_dispatch: None, + pr_dispatch_per_project: Default::default(), + gitea_skill_repo: None, + gate_reconcile_interval_ticks: 20, + evolution: Default::default(), + } +} + +/// Pre-seed three `review-` directories under `worktree_root`, +/// then build the orchestrator and assert all three are gone. +/// +/// The directories are plain `mkdir` entries (not registered +/// worktrees), which is the exact shape SIGKILL leaves behind: the +/// directory survives but `/.git/worktrees/` may or may +/// not. The sweep's fallback path handles both. +#[test] +fn sweep_removes_review_prefixed_residue_on_startup() { + let working_dir = TempDir::new().expect("working dir"); + let (_repo_tmp, repo_path) = setup_git_repo(); + let worktree_root = working_dir.path().join("worktrees"); + std::fs::create_dir_all(&worktree_root).unwrap(); + + // Seed three review-prefixed dirs to look like residue. + let mut review_dirs = Vec::new(); + for _ in 0..3 { + let dir = worktree_root.join(format!("{}{}", WORKTREE_REVIEW_PREFIX, Uuid::new_v4())); + std::fs::create_dir_all(&dir).unwrap(); + std::fs::write(dir.join("residue.txt"), "left by SIGKILL").unwrap(); + review_dirs.push(dir); + } + + // Sanity: all present before construction. + for d in &review_dirs { + assert!(d.exists(), "fixture seeding failed for {}", d.display()); + } + + let config = isolated_config( + working_dir.path().to_path_buf(), + repo_path, + worktree_root.clone(), + ); + let _orch = AgentOrchestrator::new(config).expect("AgentOrchestrator::new must succeed"); + + // After new() returns, all review residue must be gone. + for d in &review_dirs { + assert!( + !d.exists(), + "Layer 2 sweep failed to remove {}", + d.display() + ); + } +} + +/// Non-review-prefixed siblings under the same `worktree_root` must +/// be preserved by the startup sweep. +#[test] +fn sweep_preserves_non_review_siblings_on_startup() { + let working_dir = TempDir::new().expect("working dir"); + let (_repo_tmp, repo_path) = setup_git_repo(); + let worktree_root = working_dir.path().join("worktrees"); + std::fs::create_dir_all(&worktree_root).unwrap(); + + let review_dir = worktree_root.join(format!("{}victim", WORKTREE_REVIEW_PREFIX)); + let keep_dir = worktree_root.join("keep-me"); + std::fs::create_dir_all(&review_dir).unwrap(); + std::fs::create_dir_all(&keep_dir).unwrap(); + std::fs::write(keep_dir.join("important.txt"), "do not delete").unwrap(); + + let config = isolated_config( + working_dir.path().to_path_buf(), + repo_path, + worktree_root.clone(), + ); + let _orch = AgentOrchestrator::new(config).expect("AgentOrchestrator::new must succeed"); + + assert!(!review_dir.exists(), "review-victim should be swept"); + assert!(keep_dir.exists(), "keep-me must survive the sweep"); + assert!( + keep_dir.join("important.txt").exists(), + "keep-me contents must survive the sweep" + ); +} + +/// Direct children of any extra root passed via the sweep are removed +/// regardless of name prefix. +/// +/// The orchestrator currently hard-codes `/tmp/adf-worktrees` as the +/// only extra root, so we cannot inject one through the public API. +/// To exercise the contract without colliding with a real +/// `/tmp/adf-worktrees` on the host, we drive `sweep_stale` directly +/// against an isolated temp root and assert the same outcome the +/// startup wiring would produce. +#[test] +fn sweep_removes_extra_root_children_regardless_of_prefix() { + use terraphim_orchestrator::scope::WorktreeManager; + + let (_repo_tmp, repo_path) = setup_git_repo(); + let worktree_root = _repo_tmp.path().join(".worktrees"); + std::fs::create_dir_all(&worktree_root).unwrap(); + let manager = WorktreeManager::with_base(&repo_path, &worktree_root); + + // Per-host-unique extra root; avoids racing the real + // /tmp/adf-worktrees if another agent is running locally. + let extra_root = std::env::temp_dir().join(format!("adf-worktrees-test-{}", Uuid::new_v4())); + std::fs::create_dir_all(&extra_root).unwrap(); + + let agent_child = extra_root.join("agent-task-7"); + std::fs::create_dir_all(&agent_child).unwrap(); + std::fs::write(agent_child.join("scratch.txt"), "residue").unwrap(); + + let report = manager.sweep_stale(std::slice::from_ref(&extra_root)); + + assert_eq!( + report.swept_count, 1, + "extra-root child should be swept regardless of prefix" + ); + assert!( + !agent_child.exists(), + "{} should be removed", + agent_child.display() + ); + assert_eq!(report.failed_count, 0); + + // Tidy up the now-empty extra root. + let _ = std::fs::remove_dir_all(&extra_root); +}