diff --git a/crates/terraphim_orchestrator/src/scope.rs b/crates/terraphim_orchestrator/src/scope.rs index 74d1d5df9..e9795d909 100644 --- a/crates/terraphim_orchestrator/src/scope.rs +++ b/crates/terraphim_orchestrator/src/scope.rs @@ -6,6 +6,102 @@ use uuid::Uuid; use crate::worktree_guard::WorktreeGuard; +/// Filename of the ownership manifest written at the root of every ADF +/// worktree. Presence of this file with valid contents is the gate for +/// cleanup: sweep only deletes entries carrying this sentinel. +pub const WORKTREE_MANIFEST_FILENAME: &str = ".adf-worktree-manifest.json"; + +/// Ownership manifest stored at the root of each ADF worktree. +/// +/// Existence of this file with matching repo and path fields is the +/// single gate for sweep/cleanup. Directories without a valid manifest +/// are preserved regardless of name prefix or location. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct WorktreeManifest { + /// Schema version for forward compatibility. + pub version: u32, + /// Git repository this worktree belongs to. + pub repo_path: String, + /// Absolute path of this worktree (self-referential). + pub worktree_path: String, + /// Name of the agent or component that created this worktree. + pub creator: String, + /// Session or correlation ID linking this worktree to its task. + pub session_id: String, + /// Process ID that performed the `git worktree add`. + pub pid: u32, + /// ISO-8601 timestamp of creation. + pub created_at: String, +} + +impl WorktreeManifest { + /// Current schema version. Increment when the struct changes + /// incompatibly. + pub const CURRENT_VERSION: u32 = 1; + + /// Write a manifest to `dir / WORKTREE_MANIFEST_FILENAME`. + pub fn write_to_dir(&self, dir: &Path) -> Result<(), std::io::Error> { + let path = dir.join(WORKTREE_MANIFEST_FILENAME); + let json = serde_json::to_string_pretty(self) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + std::fs::write(&path, json)?; + debug!(path = %path.display(), "worktree manifest written"); + Ok(()) + } + + /// Read and validate a manifest from a directory. + /// + /// Returns `None` if the file is absent, unreadable, unparseable, + /// or fails validation (wrong repo, wrong path, or future version). + pub fn read_from_dir(dir: &Path) -> Option { + let path = dir.join(WORKTREE_MANIFEST_FILENAME); + let bytes = std::fs::read(&path).ok()?; + let m: WorktreeManifest = serde_json::from_slice(&bytes).ok()?; + if m.version > Self::CURRENT_VERSION { + warn!( + path = %path.display(), + manifest_version = m.version, + current_version = Self::CURRENT_VERSION, + "worktree manifest version too new, skipping" + ); + return None; + } + Some(m) + } + + /// Check that the manifest's embedded paths match the expected + /// repository and the actual directory on disk. + pub fn validate(&self, expected_repo: &Path, dir_on_disk: &Path) -> bool { + if self.repo_path != expected_repo.to_string_lossy() { + warn!( + manifest_repo = %self.repo_path, + expected_repo = %expected_repo.display(), + "worktree manifest repo mismatch" + ); + return false; + } + if self.worktree_path != dir_on_disk.to_string_lossy() { + warn!( + manifest_path = %self.worktree_path, + dir_on_disk = %dir_on_disk.display(), + "worktree manifest path mismatch" + ); + return false; + } + true + } + + /// Convenience: read from `dir` and validate against `expected_repo`. + pub fn read_valid(dir: &Path, expected_repo: &Path) -> Option { + let m = Self::read_from_dir(dir)?; + if m.validate(expected_repo, dir) { + Some(m) + } else { + None + } + } +} + /// Directory-name prefix for compound-review worktrees. /// /// Single source of truth referenced by: @@ -315,6 +411,27 @@ impl WorktreeManager { } info!(name = %name, path = %worktree_path.display(), "worktree created"); + + // Write ownership manifest so sweep can safely identify this + // worktree as ADF-managed. Best-effort; a missing or invalid + // manifest inhibits cleanup rather than breaking the worktree. + let manifest = WorktreeManifest { + version: WorktreeManifest::CURRENT_VERSION, + repo_path: self.repo_path.to_string_lossy().to_string(), + worktree_path: worktree_path.to_string_lossy().to_string(), + creator: "orchestrator".to_string(), + session_id: name.to_string(), + pid: std::process::id(), + created_at: chrono::Utc::now().to_rfc3339(), + }; + if let Err(e) = manifest.write_to_dir(&worktree_path) { + warn!( + path = %worktree_path.display(), + error = %e, + "failed to write worktree manifest; cleanup will skip this entry" + ); + } + Ok(WorktreeGuard::for_managed(&self.repo_path, worktree_path)) } @@ -538,6 +655,7 @@ impl WorktreeManager { swept_count = report.swept_count, failed_count = report.failed_count, root_owned_skipped = report.root_owned_skipped, + no_manifest_skipped = report.no_manifest_skipped, prune_succeeded = report.prune_succeeded, duration_ms = report.duration_ms, backlog_count = report.swept_count + report.root_owned_skipped, @@ -548,12 +666,36 @@ impl WorktreeManager { /// Remove one worktree path, updating `report` in place. /// + /// Before deletion, checks for a valid [`WorktreeManifest`]. If the + /// manifest is absent, invalid, or mismatched, the directory is + /// preserved regardless of naming convention. This prevents + /// accidental deletion of non-ADF directories that happen to share + /// the `review-` prefix or live under `/tmp/adf-worktrees`. + /// /// Tries `git worktree remove --force` first so the git admin /// registry stays in sync; falls back to `remove_dir_all` on /// non-zero exit. Permission-denied during the fallback path is /// counted as `root_owned_skipped` (Layer 3 territory) rather /// than a hard failure. fn sweep_one(&self, path: &Path, report: &mut SweepReport) { + // Reject entries that do not carry a valid manifest. + let manifest = match WorktreeManifest::read_valid(path, &self.repo_path) { + Some(m) => m, + None => { + warn!( + path = %path.display(), + "sweep_stale skipping directory without valid ADF manifest" + ); + report.no_manifest_skipped += 1; + return; + } + }; + debug!( + path = %path.display(), + creator = %manifest.creator, + session_id = %manifest.session_id, + "sweep_stale found valid manifest, proceeding with removal" + ); let status = std::process::Command::new("git") .arg("-C") .arg(&self.repo_path) @@ -621,6 +763,10 @@ pub struct SweepReport { /// permission. These belong to Layer 3 (`adf-cleanup.sh` run as /// root via `ExecStartPre`). pub root_owned_skipped: usize, + /// Number of entries skipped because no valid ADF worktree + /// manifest was found in the directory. Protects non-ADF data + /// from accidental deletion. + pub no_manifest_skipped: usize, /// Whether `git worktree prune --verbose` exited zero. False /// implies the git admin registry under `/.git/worktrees` /// may still hold dangling entries; the next sweep will retry. @@ -888,6 +1034,22 @@ mod tests { (temp_dir, repo_path) } + /// Write a valid ADF worktree manifest into `dir` for testing. + /// Used by sweep tests to ensure test directories are recognised + /// as ADF-managed worktrees. + fn write_test_manifest(dir: &Path, repo_path: &Path) { + let manifest = WorktreeManifest { + version: WorktreeManifest::CURRENT_VERSION, + repo_path: repo_path.to_string_lossy().to_string(), + worktree_path: dir.to_string_lossy().to_string(), + creator: "test".to_string(), + session_id: "test-session".to_string(), + pid: std::process::id(), + created_at: chrono::Utc::now().to_rfc3339(), + }; + manifest.write_to_dir(dir).expect("write test manifest"); + } + #[tokio::test] async fn test_create_worktree() { let (_temp_dir, repo_path) = setup_git_repo(); @@ -1057,7 +1219,8 @@ mod tests { // Seed three `review-*` directories as plain dirs (not real // worktrees -- sweep_one falls back to remove_dir_all when git // refuses an unregistered path, which is exactly the - // residue-after-SIGKILL shape). + // residue-after-SIGKILL shape). Each must carry a valid + // manifest or the sweep will preserve it. let (_temp_dir, repo_path) = setup_git_repo(); let base = repo_path.join(".worktrees"); std::fs::create_dir_all(&base).unwrap(); @@ -1066,6 +1229,7 @@ mod tests { let dir = base.join(format!("{}{}", WORKTREE_REVIEW_PREFIX, i)); std::fs::create_dir_all(&dir).unwrap(); std::fs::write(dir.join("dummy.txt"), "residue").unwrap(); + write_test_manifest(&dir, &repo_path); } let manager = WorktreeManager::with_base(&repo_path, &base); @@ -1074,7 +1238,7 @@ mod tests { assert_eq!(report.swept_count, 3, "all three review dirs swept"); assert_eq!(report.failed_count, 0); assert_eq!(report.root_owned_skipped, 0); - assert!(report.prune_succeeded); + assert_eq!(report.no_manifest_skipped, 0); for i in 0..3 { let dir = base.join(format!("{}{}", WORKTREE_REVIEW_PREFIX, i)); @@ -1084,8 +1248,9 @@ mod tests { #[test] fn test_sweep_stale_preserves_non_review_prefix() { - // Only `review-` prefixed entries are swept from worktree_base. - // Sibling directories under the same base must be preserved. + // Only `review-` prefixed entries with valid manifests are + // swept from worktree_base. `keep-me` lacks BOTH a review + // prefix AND a manifest, so it must survive. let (_temp_dir, repo_path) = setup_git_repo(); let base = repo_path.join(".worktrees"); std::fs::create_dir_all(&base).unwrap(); @@ -1095,6 +1260,7 @@ mod tests { std::fs::create_dir_all(&review_dir).unwrap(); std::fs::create_dir_all(&keep_dir).unwrap(); std::fs::write(keep_dir.join("important.txt"), "data").unwrap(); + write_test_manifest(&review_dir, &repo_path); let manager = WorktreeManager::with_base(&repo_path, &base); let report = manager.sweep_stale(&[]); @@ -1129,6 +1295,7 @@ mod tests { fn test_sweep_stale_extra_roots_no_prefix_filter() { // Entries under extra_roots are swept regardless of prefix -- // the per-agent root convention has no naming convention. + // Each must carry a valid manifest. let (_temp_dir, repo_path) = setup_git_repo(); let base = repo_path.join(".worktrees"); std::fs::create_dir_all(&base).unwrap(); @@ -1140,6 +1307,7 @@ mod tests { let agent_dir = extra.join("agent-alpha"); std::fs::create_dir_all(&agent_dir).unwrap(); std::fs::write(agent_dir.join("scratch.txt"), "tmp").unwrap(); + write_test_manifest(&agent_dir, &repo_path); let manager = WorktreeManager::with_base(&repo_path, &base); let report = manager.sweep_stale(std::slice::from_ref(&extra)); diff --git a/scripts/adf-setup/adf-cleanup.sh b/scripts/adf-setup/adf-cleanup.sh index 9daee1fe3..38725cad9 100755 --- a/scripts/adf-setup/adf-cleanup.sh +++ b/scripts/adf-setup/adf-cleanup.sh @@ -5,9 +5,12 @@ # Runs as root so it can reclaim worktree contents owned by # sub-process container builds and other elevated agents. # -# Cross-reference: WORKTREE_REVIEW_PREFIX in -# crates/terraphim_orchestrator/src/scope.rs. The literal "review-" -# below must stay in sync with that constant. +# SAFETY: Only deletes directories that contain a valid +# .adf-worktree-manifest.json file matching this repository. +# Unknown directories are preserved regardless of naming convention. +# +# Cross-reference: WORKTREE_REVIEW_PREFIX and WORKTREE_MANIFEST_FILENAME in +# crates/terraphim_orchestrator/src/scope.rs. set -eu umask 022 @@ -18,12 +21,35 @@ ADF_AGENT_TMP_ROOT="${ADF_AGENT_TMP_ROOT:-/tmp/adf-worktrees}" swept=0 failed=0 +skipped=0 + +MANIFEST=".adf-worktree-manifest.json" + +# Check that a directory carries a valid ADF worktree manifest. +# Validates repo_path and worktree_path fields against reality. +valid_manifest() { + dir="$1" + mf="${dir}/${MANIFEST}" + [ -f "$mf" ] || return 1 + # Extract repo_path and worktree_path from the JSON manifest. + # POSIX grep + sed fallback; jq is not guaranteed on minimal hosts. + mf_repo=$(grep -o '"repo_path"[[:space:]]*:[[:space:]]*"[^"]*"' "$mf" 2>/dev/null | sed 's/.*"\([^"]*\)"$/\1/' | head -1) + mf_path=$(grep -o '"worktree_path"[[:space:]]*:[[:space:]]*"[^"]*"' "$mf" 2>/dev/null | sed 's/.*"\([^"]*\)"$/\1/' | head -1) + [ "$mf_repo" = "$ADF_REPO_PATH" ] || return 1 + [ "$mf_path" = "$dir" ] || return 1 + return 0 +} sweep_one() { target="$1" if [ ! -e "$target" ]; then return 0 fi + if ! valid_manifest "$target"; then + printf 'adf-cleanup: skipping %s (no valid ADF manifest)\n' "$target" >&2 + skipped=$((skipped + 1)) + return 0 + fi if git -C "$ADF_REPO_PATH" worktree remove --force "$target" >/dev/null 2>&1; then swept=$((swept + 1)) return 0 @@ -56,7 +82,7 @@ fi # 3. Reconcile git's admin registry. Failure here is not fatal. git -C "$ADF_REPO_PATH" worktree prune --verbose 2>&1 || true -printf 'adf-cleanup: swept=%d failed=%d repo=%s\n' \ - "$swept" "$failed" "$ADF_REPO_PATH" +printf 'adf-cleanup: swept=%d failed=%d skipped=%d repo=%s\n' \ + "$swept" "$failed" "$skipped" "$ADF_REPO_PATH" exit 0 diff --git a/scripts/adf-setup/tests/test_adf_cleanup.sh b/scripts/adf-setup/tests/test_adf_cleanup.sh index 4031fbd48..fe422d5c5 100755 --- a/scripts/adf-setup/tests/test_adf_cleanup.sh +++ b/scripts/adf-setup/tests/test_adf_cleanup.sh @@ -1,10 +1,10 @@ #!/bin/sh # test_adf_cleanup.sh -- POSIX shell driver for adf-cleanup.sh. # -# Seeds three review-* worktrees plus a keep-me/ directory in a -# disposable git repo, runs the sweep script, and asserts that -# review-* entries are removed while keep-me/ is preserved. A -# second run verifies idempotency. +# Seeds three review-* worktrees (with ADF manifests) plus a keep-me/ +# directory (without manifest) in a disposable git repo, runs the +# sweep script, and asserts that review-* entries are removed while +# keep-me/ is preserved. A second run verifies idempotency. set -eu @@ -21,10 +21,28 @@ git -C "$REPO" -c init.defaultBranch=main init -q git -C "$REPO" -c user.email=test@example.com -c user.name=Test \ commit --allow-empty -m "seed" -q +# Worktree manifest helper: writes a valid .adf-worktree-manifest.json +# into the target directory. +write_manifest() { + dir="$1" + cat > "${dir}/.adf-worktree-manifest.json" <