diff --git a/memory/CARDS.md b/memory/CARDS.md deleted file mode 100644 index c586658e..00000000 --- a/memory/CARDS.md +++ /dev/null @@ -1,499 +0,0 @@ - - -# Scope cards — FE-764 petri-sync-server - -Slices 1 + 2 + 3a + 3b + 4 done — FE-764 now supports `brunch cook ---petrinaut-stream` end-to-end: identity-fold default via `--petrinaut-fold`, -in-process stream bus with replay-on-subscribe, ephemeral localhost SSE -server bound to the run, multi-tier base-URL resolution (CLI flag > -`PETRINAUT_BASE_URL` env > hard fail before any cook side effect), URL -composition via `new URL()` + `searchParams`, auto-open via the `open` npm -dep (suppressed by `--no-petrinaut-open` or `CI`), and `server.stop()` in -`finally`. All work lives on `ka/fe-764-petri-sync-server` (stacked on -`ka/fe-784`). - -Slice 5 (brunch web-UI button + endpoint discovery) stays sketched below; -promote to a full card once a real Petrinaut client has consumed the -stream end-to-end at the Bristol demo or a discovery mechanism is chosen. - ---- - -## Slice 5 (sketch — promote to full card after slice 4 ships) - -### Slice 5 — brunch web-UI button + endpoint discovery - -A persistent UI affordance that opens the live run in Petrinaut. Needs a discovery mechanism: the cook process advertises `{ sessionId, url, port }` somewhere the brunch SPA can read (proposed: `/petrinaut-stream.json` written by slice 3 + watched by the SPA; alternative: a brunch-server endpoint `/runs/{runId}/petrinaut` that proxies to the live cook). Decision pending — open coordination item in PLAN.md. - ---- - -## Slice 1: export reducer — `BrunchExecutionExport` from run artifacts - -**Status:** done — commit `a41db69f`. `reduceBrunchExecutionExport` + locked contract types live in `src/orchestrator/src/petrinaut-stream-export.ts` with 11 passing tests (schema, markings, frame-replay oracle, type pins). `createIdentityFolding(blueprint)` landed in `petrinaut-fold.ts` as sibling to `createNetFolding` per the slice queue's note — slice 2 only needs to add CLI surface + SPEC lexicon entry. - -**Deferred to slice 2:** engine-driven version of the frame-replay oracle. `serializeBlueprint` currently hard-codes `createNetFolding`; slice 2 widens it to accept a folding opt so an identity-fold end-to-end run becomes round-trippable through the same path. - -### Target Behavior - -A pure function `reduceBrunchExecutionExport({ sdcpnFile, events })` returns a `BrunchExecutionExport` matching the schema locked in PLAN.md §petri-sync-server: `definition` is a tight `NetDefinition` projection of the input SDCPN file (keeps `version`, `meta`, `title`, `places`, `transitions`, `types`; drops `scenarios`, `differentialEquations`, `parameters`, `metrics`); `initialState` is the marking from the `initial_marking` event reduced into the `Marking` shape (count arm under identity folding; coloured arm passthrough when present); `transitionFirings` are the per-place deltas from every `transition_fired` event in arrival order, with `ts: string` preserved verbatim. - -### Boundary Crossings - -``` -→ src/orchestrator/src/petrinaut-stream-export.ts (new — public reducer) -→ consumes: - src/orchestrator/src/petrinaut-sdcpn.ts (SdcpnFile type) - src/orchestrator/src/petrinaut-events.ts (PetrinautEvent union: initial_marking | transition_fired | terminal) -→ exits at: - src/orchestrator/src/petrinaut-stream-export.test.ts (new — unit + replay oracle) -``` - -No cook-process / filesystem / SSE wiring in this slice. The function is consumed in-memory in tests; later slices wire it into the cook event stream and the SSE response body. - -### Risks and Assumptions - -``` -- ASSUMPTION: Marking ≡ Petrinaut's runtime InitialMarking — the no-colour arm is exactly Record. - → VALIDATE: TS type alias compiles against @hashintel/petrinaut-core's exported InitialMarking shape (mirror the type locally; cross-check via the reducer producing values that pass through Petrinaut's loader). For this slice the validation is the type declaration + the frame-replay oracle proving counts behave like additive multisets. - → memory/PLAN.md §petri-sync-server (Marking ≡ InitialMarking; count arm for the identity fold) - -- ASSUMPTION: petrinaut-events.jsonl preserves arrival order (single producer; append-only) — so reducing events as a sequence reconstructs firing order faithfully. - → VALIDATE: replay oracle reconstructs every frame from initialState + ordered deltas; a single negative-marking violation falsifies the assumption. - -- RISK: PetrinautEvent shape on fe-784 may differ from /tmp/reduce-export.mjs's expectations (the mjs prototype was written against the unfolded fe-763 shape; fe-784's adapter folds concrete→folded, so firing input/output reference folded place ids when color-fold is active). - → MITIGATION: this slice runs against IDENTITY-folded artifacts only (test fixtures use createIdentityFolding so place ids are concrete and the reducer logic from /tmp matches 1:1). The color-fold variant is exercised in slice 2's tests. - -- RISK: `definition = Omit` keeps inert top-level fields (metrics, parameters, differentialEquations) that Petrinaut's loader may flag. - → MITIGATION: keep them — they're explicit placeholders in petrinaut-sdcpn.ts; removing them is a separate decision. -``` - -### Acceptance Criteria - -``` -✓ All public types (`PlaceId`, `TokenColour`, `Marking`, `SdcpnInputArc`, `SdcpnOutputArc`, `SdcpnPlace`, `SdcpnTransition`, `NetDefinition`, `TransitionFiring`, `BrunchExecutionExport`) exported from petrinaut-stream-export.ts and match the schema locked in PLAN.md §petri-sync-server byte-for-byte. - -✓ `Marking = Record` — sum type preserved (count + colour arms); identity-fold runs only populate the count arm, but the type permits the colour arm for future colour-fold consumers feeding the same reducer. - -✓ `TransitionFiring.ts: string` (preserved verbatim from `PetrinautTransitionFiredEvent.ts` — confirmed in petrinaut-events.ts:60). - -✓ `reduceBrunchExecutionExport(input)` is a pure function — no filesystem, no process exits, no globals. - -✓ `definition` projection is the tight 6-field NetDefinition: keeps `version`, `meta`, `title`, `places`, `transitions`, `types`; explicitly drops `scenarios`, `differentialEquations`, `parameters`, `metrics`. Not `Omit` — an explicit constructor that names every kept field. - -✓ Reducer count-reduces every per-place token array to a number under the count arm of `Marking` (identity-fold case); per-place keys with zero tokens are not synthesized (empty places stay absent). - -✓ test: round-trip on a synthetic 2-slice plan under IDENTITY folding — compile blueprint, drive firings through the live engine seam with `createIdentityFolding`, capture events, reduce, then frame-replay: reconstruct every marking from initialState + transitionFirings deltas, assert no negative marking and final marking equals the live PetriNet's terminal marking. (Note: createIdentityFolding doesn't exist yet at slice 1's start — see slice 2. Prefer landing the constructor as a single-file helper here so slice 2 only adds CLI surface; alternative is an inline NetFolding identity mock in the test.) - -✓ test: referential integrity — every place id in initialState and every firing's input/output is present in `definition.places`; every `transitionId` is present in `definition.transitions`. - -✓ test: definition projection — given an SdcpnFile populated with non-empty `scenarios`, `differentialEquations`, `parameters`, `metrics`, the returned `NetDefinition` contains none of those keys and is structurally equal to the input on the 6 kept fields. - -✓ test: ts roundtrip — `TransitionFiring.ts` strings match the source events 1:1 (no Date coercion, no number conversion). - -✓ `npm run check` clean (0 errors); `npm run test` includes the new test file and it passes. -``` - -### Verification Approach - -``` -- Inner: synthetic-fixture unit tests + frame-replay oracle (already validated on real run 904d205d via /tmp/reduce-export.mjs — port the oracle into the test file, drive it off a deterministic in-test plan instead of disk). -- Middle: (deferred) wire the reducer into the cook event stream in a follow-up slice, then snapshot a real-run export and validate against Petrinaut's loader. -- Outer: (deferred) end-to-end Petrinaut import — cross-team validation against a real run; happens after the SSE transport slice lands. -``` - ---- - -## Slice 2: identity fold wiring + `--petrinaut-fold` cook CLI flag - -**Status:** done. `--petrinaut-fold=color|identity` (default `identity`) parsed in `cook-cli.ts` → `OrchestratorInput.petrinautFold` → `engine.ts` constructs one folding (identity or color) and shares it between `serializeBlueprint` (now requires `folding: NetFolding` opt) and `createPetrinautEventStream`. SPEC §Lexicon gained `identity fold`. Engine-driven frame-replay oracle landed in `engine-contract.test.ts` covering both modes. `npm run verify` green (1525 tests). - -### Target Behavior - -`brunch cook` accepts `--petrinaut-fold=color|identity` (default `identity`); when `identity`, the run uses a new `createIdentityFolding(blueprint)` constructor that returns a `NetFolding` mapping every concrete id to itself with no token-colour decoration; when `color`, the run uses the existing `createNetFolding(blueprint)`. - -### Boundary Crossings - -``` -→ src/server/cli.ts (or wherever brunch cook parses argv — confirm at build time) -→ src/orchestrator/src/petrinaut-fold.ts (new public constructor: createIdentityFolding) -→ cook entry that today calls createNetFolding (one site — confirm at build time) -→ consumers (serializeBlueprint, createPetrinautEventStream) UNCHANGED — they still receive a NetFolding and never branch on which constructor produced it. -``` - -### Risks and Assumptions - -``` -- ASSUMPTION: NetFolding's interface as established in petrinaut-fold.ts is expressive enough to encode the identity case without escape hatches (id→id maps, pass-through token decoration, places/transitions = blueprint.places / blueprint.transitions verbatim). - → VALIDATE: createIdentityFolding implementation fits in <20 lines with no internal branches in serializeBlueprint or createPetrinautEventStream — if either consumer needs a new conditional to handle identity output, the interface is wrong and the slice promotes to refactor the interface first. - -- RISK: Flipping the default to identity is a posture change that affects fixtures and tests that currently snapshot folded output (petrinaut-export.test.ts, petrinaut-events.test.ts, petrinaut-fold.test.ts, petrinaut-sdcpn.test.ts). - → MITIGATION: cook CLI default flips; tests that exercise the color-fold path explicitly construct createNetFolding (don't rely on a default). Existing fold-targeted tests should already be explicit; verify and fix any that implicitly relied on the default. Snapshot updates limited to the cook entry's default behaviour. - -- RISK: Flag name and the SPEC §Lexicon entries (`color fold`, `folded net`) need a matching `identity fold` term to stay tight per AGENTS.md. - → MITIGATION: extend SPEC §Lexicon with `identity fold` in this slice; reference it in the cook CLI help text and in PLAN.md. -``` - -### Acceptance Criteria - -``` -✓ `createIdentityFolding(blueprint)` exported from petrinaut-fold.ts; returns the same NetFolding type as createNetFolding; no token-colour decoration; place/transition id maps are identity. - -✓ `serializeBlueprint(blueprint, opts)` and `createPetrinautEventStream(opts)` are byte-identical to before this slice — no new branches, no new conditionals, no new opts fields. - -✓ test: applying createIdentityFolding to a 2-slice blueprint produces a net.json whose places and transitions match the unfolded blueprint shape (42 places, 37 transitions on the standard 2-slice plan — confirm exact counts at build time). - -✓ test: applying createIdentityFolding to a 2-slice blueprint produces an event stream whose transition_fired events reference concrete place ids (no `slice:` prefix stripped). - -✓ `brunch cook` parses `--petrinaut-fold=color|identity`; default is `identity`; unknown values exit non-zero with a clear error. - -✓ test: the cook entry's default code path constructs createIdentityFolding (assert via the cook entry's injection seam, not by invoking the binary). - -✓ SPEC §Lexicon gains `identity fold`; CLI `--help` describes both modes. - -✓ `npm run verify` green (fmt, lint, test, build). -``` - -### Verification Approach - -``` -- Inner: unit tests on createIdentityFolding (id maps + place/transition lists); snapshot/structural tests on serializeBlueprint + event stream under each fold. -- Middle: cook entry test that confirms the default constructor is identity and the flag flips to color. -- Outer: (deferred) cross-team check that Petrinaut's loader accepts both modes' net.json against H-6519 readiness. -``` - -### Notes - -- Slice 2 IS sequentially obvious from slice 1 — its scope wouldn't change based on what slice 1 finds. Keeping both pre-scoped per the prepared-queue rule. -- Once both land, `/tmp/reduce-export.mjs` and `HANDOFF.md` can be deleted (HANDOFF retirement rule fires: branch decision recorded, FE-764 work committed, next-slice scope card exists). - ---- - -## Slice 3a: event-bus + replay buffer + incremental frame translator - -**Status:** done. `petrinaut-stream-bus.ts` + `petrinaut-stream-bus.test.ts` -(12 unit tests). `eventToTransitionFiring`, `reduceMarking`, -`projectNetDefinition` extracted from `petrinaut-stream-export.ts` and -shared by the static reducer + the bus. `OrchestratorInput.onPetrinautEvent` -fan-out hook plumbed through `engine.ts` (no engine-level branching — just -threads the callback into `createPetrinautEventStream`'s existing `onEvent`). -Engine-driven replay-equivalence oracle in `engine-contract.test.ts` runs a -real cook with the bus subscribed pre-publish and a late subscriber. `npm -run verify` green (1538 tests). - -### Target Behavior - -A new pure module `petrinaut-stream-bus.ts` exposes -`createPetrinautStreamBus({ runId, sdcpnFile })` returning -`{ publish(event: PetrinautEvent), subscribe(handler): unsubscribe }` where -every subscriber — including a late subscriber that attaches after firings -have already published — observes the full ordered sequence of -`BrunchExecutionExportFrame` values: exactly one `definition` frame, then -exactly one `initial_state` frame, then zero or more `transition_firing` -frames in publish order, then at most one `terminal` frame after which the -subscriber receives no further frames. - -### Boundary Crossings - -``` -→ src/orchestrator/src/petrinaut-stream-bus.ts (new — pure pub/sub + replay buffer + frame translator) -→ src/orchestrator/src/petrinaut-stream-export.ts (existing — extract per-event `eventToTransitionFiring(event): TransitionFiring` so the bus and the static reducer share one transform) -→ src/orchestrator/src/types.ts (existing — add `onPetrinautEvent?: (event: PetrinautEvent) => void` fan-out hook on OrchestratorInput) -→ src/orchestrator/src/engine.ts (existing — pass `input.onPetrinautEvent` into createPetrinautEventStream as the `onEvent` callback so engine-emitted events fan out to the bus without engine knowing the bus exists) -→ exits at: - src/orchestrator/src/petrinaut-stream-bus.test.ts (new — pub/sub + replay-on-subscribe + frame translation + terminal closure) - src/orchestrator/src/engine-contract.test.ts (existing — extend FE-764 block: drive engine with `onPetrinautEvent` set, attach bus subscriber, assert replay-on-connect invariant on a real run) -``` - -### Risks and Assumptions - -``` -- ASSUMPTION: The `BrunchExecutionExportFrame` discriminated-union shape is the right SSE wire format — one logical frame per SSE `data:` payload, frame kinds disjoint from PetrinautEvent kinds. - → VALIDATE: `Frame = { kind: 'definition'; definition: NetDefinition } | { kind: 'initial_state'; initialState: Marking } | { kind: 'transition_firing'; firing: TransitionFiring } | { kind: 'terminal' }` is structurally equivalent to walking `BrunchExecutionExport` field-by-field. Type-level check in tests: reducing the captured frame sequence reconstructs a `BrunchExecutionExport` byte-equal to `reduceBrunchExecutionExport({ sdcpnFile, events })`. - → If the cross-team Petrinaut team specifies a different envelope (e.g. always-named `event:` SSE field, batched frames), slice 3b adapts the HTTP serializer; this slice's shape stays. - -- ASSUMPTION: A late subscriber gets the full buffered timeline *before* any further live frames. No interleaving — replay completes synchronously on subscribe, then live frames flow. - → VALIDATE: test where subscriber attaches after 3 firings have published; subscriber receives `definition`, `initial_state`, all 3 `transition_firing` frames synchronously, then live firings appear in order. - -- ASSUMPTION: The bus owns the timeline. The engine remains the sole publisher; subscribers are read-only. No backpressure (a slow subscriber doesn't pause the engine — it queues per-subscriber). - → VALIDATE: per-subscriber queue is unbounded for now (in-process, small N); slice 3b revisits if necessary when real HTTP backpressure matters. - -- RISK: Mixing replay and live publish risks a race — if `publish()` is called concurrently with `subscribe()`, the late subscriber could either double-receive a frame or miss one at the boundary. - → MITIGATION: bus is single-threaded JS; replay and the subscribe-mark-live happen in one synchronous tick before publish() can next fire. Test explicitly: `subscribe()` immediately followed by `publish()` produces exactly one delivery of the new frame. - -- RISK: The static reducer (`reduceBrunchExecutionExport`) currently does its own per-event transform inline; factoring it into a shared helper risks regressing slice-1 tests. - → MITIGATION: extract `eventToTransitionFiring` first as a no-op refactor, run all 11 slice-1 tests green, then build the bus on top. -``` - -### Acceptance Criteria - -``` -✓ `createPetrinautStreamBus({ runId, sdcpnFile })` exported from petrinaut-stream-bus.ts; returns `{ publish, subscribe }`; pure (no I/O, no globals, no timers). - -✓ `BrunchExecutionExportFrame` discriminated union exported from petrinaut-stream-bus.ts: `definition` | `initial_state` | `transition_firing` | `terminal`. Type-level pin test mirroring slice-1's locked-schema test. - -✓ test: subscriber attached *before* any publish observes — in order — exactly one `definition` frame, exactly one `initial_state` frame after the first `initial_marking` PetrinautEvent, one `transition_firing` per `transition_fired` PetrinautEvent in publish order, and exactly one `terminal` frame after the first `net_halted` or `net_deadlocked` PetrinautEvent. - -✓ test: subscriber attached *after* N firings and a terminal have published receives the full back-buffer (`definition`, `initial_state`, N × `transition_firing`, `terminal`) synchronously on subscribe, then no further frames. - -✓ test: subscriber attached between firings receives the buffered frames synchronously, then the subsequent live firings; no firing is dropped, no firing is delivered twice. - -✓ test: `unsubscribe()` halts delivery to that handler; other subscribers continue receiving frames. - -✓ test: replay-equivalence oracle — collect every frame from one subscriber attached pre-publish, fold them back into a `BrunchExecutionExport`, assert byte-equal to `reduceBrunchExecutionExport({ sdcpnFile, events })`. - -✓ `OrchestratorInput.onPetrinautEvent?: (event: PetrinautEvent) => void` added to types.ts; engine.ts threads it into createPetrinautEventStream's `onEvent` opt (no other engine changes). - -✓ test: engine-driven integration in engine-contract.test.ts — run cook with `onPetrinautEvent` wired to the bus, subscribe before run, assert replay-equivalence after run completes. - -✓ Refactor: `eventToTransitionFiring(event)` extracted into petrinaut-stream-export.ts and reused by both `reduceBrunchExecutionExport` and the bus; all 11 existing slice-1 tests still pass. - -✓ `npm run verify` green. -``` - -### Verification Approach - -``` -- Inner: unit tests on createPetrinautStreamBus (pre-subscribe, post-subscribe, mid-stream subscribe, unsubscribe, terminal closure, replay-equivalence oracle). -- Middle: engine-contract.test.ts integration — real cook run with onPetrinautEvent wired into the bus; assert replay-equivalence on captured frames. -- Outer: deferred to slice 3b (real HTTP client reading SSE). -``` - ---- - -## Slice 3b: HTTP/SSE server mounted on the 3a bus - -**Status:** done. `petrinaut-stream-server.ts` + 13-test suite. Real -`http.createServer` + `listen(0)` per test; covers wire conformance, -replay-on-connect (both terminated and mid-stream buses), concurrent -connections, AbortController-disconnect-unsubscribes, -404-on-unknown-routes, CORS preflight, idempotent `stop()`, in-flight -response cleanup on `stop()`. `runCook` integration deferred to slice 4 -as planned. `npm run verify` green (1551 tests). - -### Target Behavior - -A new module `petrinaut-stream-server.ts` exposes -`createPetrinautStreamServer({ bus, host?, port? })` returning -`{ start(): Promise<{ host: string; port: number; streamUrl: string }>, stop(): Promise }` -where `start()` binds a Node `http.Server` (default `host: '127.0.0.1'`, -default `port: 0`) and a single route `GET /stream` returns -`Content-Type: text/event-stream` with one SSE event per -`BrunchExecutionExportFrame` (`event: \ndata: \n\n`); each -connection subscribes to the bus on open, replays the buffered timeline -synchronously, streams live frames, and closes the response with -`res.end()` immediately after writing the `terminal` frame. - -### Boundary Crossings - -``` -→ src/orchestrator/src/petrinaut-stream-server.ts (new — pure HTTP shell over the 3a bus) -→ consumes: - src/orchestrator/src/petrinaut-stream-bus.ts (createPetrinautStreamBus → subscribe / publish, BrunchExecutionExportFrame) - node:http (createServer, Server, IncomingMessage, ServerResponse) -→ exits at: - src/orchestrator/src/petrinaut-stream-server.test.ts (new — real fetch() against listen(0); SSE wire conformance + lifecycle) -→ NOT touched in this slice: - src/orchestrator/src/cook-cli.ts (server boot stays opt-in via slice 4's --petrinaut-stream flag) - src/orchestrator/src/engine.ts (no engine changes — slice 3a already added the onPetrinautEvent fan-out hook) -``` - -### Risks and Assumptions - -``` -- ASSUMPTION: One bus subscription per HTTP connection is the right model — replay-on-subscribe happens once per connect, live frames flow until terminal or client disconnect. - → VALIDATE: tests cover (a) single connection sees definition → initial_state → N firings → terminal, (b) two concurrent connections each see the full timeline independently, (c) client-disconnect mid-stream unsubscribes cleanly. - -- ASSUMPTION: SSE wire shape per frame is `event: \ndata: \n\n` with UTF-8 — matches what every SSE client (Petrinaut included) parses out of the box. No `id:` field (we own the timeline; `Last-Event-ID` resume is out of scope for v1 since the buffer is the timeline and a new connection just re-replays). - → VALIDATE: test parses the raw response body and asserts the `event:` / `data:` / blank-line framing per frame. - -- ASSUMPTION: Localhost-only bind (`127.0.0.1`) makes auth and CORS posture irrelevant for v1. `Access-Control-Allow-Origin: *` is safe because nothing outside this host can connect. - → VALIDATE: server defaults to `host: '127.0.0.1'` (not `0.0.0.0`); test explicitly asserts the bound host. CORS header sent unconditionally on `/stream` and `OPTIONS`. - -- ASSUMPTION: Keep-alive comment frames are unnecessary for v1 — Petrinaut consumes the stream over localhost in seconds-to-minutes, well under any reasonable proxy idle timeout. If Petrinaut later loses connection on long-idle runs, slice 4 or later adds `setInterval(() => res.write(': keep-alive\n\n'), 15_000)`. - → VALIDATE: slice 3b ships without keep-alive; revisit when a real Petrinaut client connects to a real run. - -- RISK: Closing the response immediately after the terminal frame may race with the client's read of the terminal — if `res.end()` happens before the OS has flushed, the client could see a truncated body. - → MITIGATION: Node's `res.write()` + `res.end()` flushes through the kernel buffer; for SSE that's effectively atomic. Test explicitly: client reads the full body after `terminal`, then the connection closes. - -- RISK: Multiple connections share one bus → if a slow subscriber blocks, every subscriber stalls. - → MITIGATION: 3a uses synchronous `for (handler of subscribers) handler(frame)`; the bus does not await. v1 accepts that a misbehaving HTTP write would back up the Node event loop briefly — acceptable for localhost / single-client demos. Production hardening (per-subscriber queue + drop policy) deferred. - -- RISK: Test approach for HTTP — fake `Server` / `ServerResponse` mocks tend to drift from real Node behavior. - → MITIGATION: tests use real `http.createServer` with `listen(0)` + Node's built-in `fetch()` against `http://127.0.0.1:/stream`. Each test starts and stops one server in `beforeEach` / `afterEach`. No mocks of the HTTP layer. -``` - -### Acceptance Criteria - -``` -✓ `createPetrinautStreamServer({ bus, host?, port? })` exported from petrinaut-stream-server.ts; returns `{ start, stop }`. - -✓ Defaults: `host: '127.0.0.1'`, `port: 0`. `start()` resolves with `{ host, port, streamUrl }` where `streamUrl` = `http://${host}:${port}/stream` and `port` is the kernel-chosen ephemeral port. - -✓ test: GET /stream returns 200, Content-Type: text/event-stream, Cache-Control: no-cache, Connection: keep-alive. - -✓ test: a pre-subscribed connection (connect before any publish) receives — in order — exactly one `event: definition`, one `event: initial_state`, one `event: transition_firing` per published transition_fired event, and exactly one `event: terminal`, then the response stream closes. - -✓ test: each SSE frame's `data:` line parses as JSON whose shape matches the corresponding BrunchExecutionExportFrame variant. - -✓ test: a connection opened AFTER the bus has published N firings and a terminal receives the full back-buffer synchronously on connect, then the response closes. - -✓ test: two concurrent connections each see the full ordered frame sequence independently; one closing doesn't affect the other. - -✓ test: client closing the connection mid-stream (`AbortController.abort()` on the fetch) unsubscribes from the bus (assert via bus internals — e.g. publish one more event after abort, count subscribers). - -✓ test: requests to any path other than `/stream` return 404. - -✓ test: `OPTIONS /stream` returns 204 with CORS headers (`Access-Control-Allow-Origin: *`, `Access-Control-Allow-Methods: GET, OPTIONS`). - -✓ test: `start()` rejects if called twice; `stop()` is idempotent. - -✓ test: `stop()` ends any in-flight responses and closes the server (any outstanding fetch reads end cleanly). - -✓ `npm run verify` green. -``` - -### Verification Approach - -``` -- Inner: petrinaut-stream-server.test.ts — real http.createServer + listen(0) + Node fetch() per test, no HTTP mocks. Covers wire conformance, lifecycle, concurrent connections, disconnect, 404, OPTIONS, idempotent stop. -- Middle: deferred to slice 4 (cook-CLI integration — boot via --petrinaut-stream flag, real cook run streams real frames to a real Petrinaut client mock). -- Outer: deferred to slice 4 / 5 cross-team check — a real Petrinaut client (Chris's repo) consumes a real cook run end-to-end. -``` - -### Notes - -- Slice 3b is sequentially obvious from 3a — its scope wouldn't change based on what 3a finds (and didn't). The HTTP shell composes 3a's `subscribe` callback into a `res.write()` per frame; that's it. -- Slice 4's job is the trigger surface: `--petrinaut-stream` flag flips server boot on, `--petrinaut-base-url` + env + auto-open compose the Petrinaut launcher URL, `runCook` orchestrates the lifecycle. -- After 3b ships, `/tmp/reduce-export.mjs` is fully obsolete (the bus's replay-equivalence oracle subsumes its frame-replay role). - ---- - -## Slice 4: `--petrinaut-stream` cook wiring + URL composition + auto-open - -**Status:** done. `petrinaut-launcher-url.ts` (pure resolver + composer, 8 tests), `OrchestratorInput.setupPetrinautStream` awaited hook in engine, `createPetrinautStreamSetup` factory in `cook-cli.ts` (bus + server + open lifecycle, 5 tests + 1 await-ordering invariant test in engine-contract.test.ts), `loadLocalEnvShellWins` local `.env` loader, three new flags (`--petrinaut-stream`, `--petrinaut-base-url=`, `--no-petrinaut-open`) with companion-flag validation, `.env.example` lines for `PETRINAUT_BASE_URL`. Engine refactor decouples stream setup from the FE-762 best-effort file-write block so the stream survives a disk-write failure. `npm run verify` green (1571 tests). - -### Target Behavior - -`brunch cook --petrinaut-stream` ensures an ephemeral SSE server is **listening on `127.0.0.1` (kernel-chosen port) before the engine emits the first Petrinaut event (`initial_marking`)**, subscribes the engine's `PetrinautEvent` stream to the bus, prints the composed Petrinaut launcher URL, auto-opens it in the default browser unless `--no-petrinaut-open` or `process.env.CI` is set, leaves the SSE server alive for the entire `engine.run()`, and stops the server in `finally` after the run completes (success, failure, or thrown). Without `--petrinaut-stream`, cook behaviour is **byte-identical to today** (no server, no URL, no open, no `.env` load). When `--petrinaut-stream` is set, base URL resolution (CLI flag > env var > hard fail) runs **before** any cook side effects — no banner, no plan load, no sandbox, no run dir. - -### Boundary Crossings - -``` -→ src/orchestrator/src/cook-cli.ts (existing) - - new fields on CookOptions: petrinautStream, petrinautBaseUrl, petrinautOpen - - new flag parsing: --petrinaut-stream, --petrinaut-base-url=, --no-petrinaut-open - - companion-flag validation: --petrinaut-base-url and --no-petrinaut-open without --petrinaut-stream → hard error in parseCookArgs - - usage string + help text update - - runCook (only when opts.petrinautStream === true): load .env via loadLocalEnvFile(launchCwd); resolve base URL; on error, print and exit(1) BEFORE banner/loadPlan/createSandbox. Then pass setupPetrinautStream into engine input; setup hook creates bus + server, awaits server.start(), composes URL via new URL() + searchParams, prints URL, auto-opens (unless suppressed), and RETURNS a publisher callback. Ensure server.stop() in finally. -→ src/orchestrator/src/petrinaut-launcher-url.ts (new — pure URL composer + base-URL resolver) - - export resolvePetrinautBaseUrl({ cliFlag, env }) → { baseUrl } | { error: string } - - export composeLauncherUrl({ baseUrl, runId, streamUrl }) → string (uses new URL() + searchParams.set so existing query params and encoding are correct) -→ src/orchestrator/src/types.ts (existing) - - REPLACE the existing onPetrinautEvent posture with an awaited setup hook: - setupPetrinautStream?: (input: { runId: string; sdcpnFile: SdcpnFile }) => Promise<((event: PetrinautEvent) => void) | undefined> - - The hook is awaited BEFORE the engine emits initial_marking. The returned callback (if any) becomes the onEvent for createPetrinautEventStream. (onPetrinautEvent from slice 3a stays as-is for callers that want a fire-and-forget hook without lifecycle setup; setupPetrinautStream is additive — when both are set, both fire.) -→ src/orchestrator/src/engine.ts (existing) - - DECOUPLE stream setup from the FE-762 best-effort file-write block. Compute serialized + sdcpnFile ONCE in memory; FE-762 file writes stay best-effort and independent. Then: await input.setupPetrinautStream?.({ runId, sdcpnFile }) before createPetrinautEventStream, and merge the returned callback (if any) into onEvent alongside any existing onPetrinautEvent. Then emitInitialMarking. When setupPetrinautStream is unset, behaviour is byte-identical to today. -→ src/server/runtime-config.ts (existing) - - loadLocalEnvFile(cwd) already exported; reuse from cook-cli.ts. (Helper currently overrides set env values silently — see Risks; document the precedence in cook-cli or copy/adapt the helper locally if precedence needs to flip.) -→ .env.example (existing) - - add line: PETRINAUT_BASE_URL= -→ package.json (existing) - - 'open' dependency already declared (open ^11). No change. -→ exits at: - src/orchestrator/src/petrinaut-launcher-url.test.ts (new — pure tests for base-URL resolution + URL composition) - src/orchestrator/src/cook-cli.test.ts (existing — extend: flag parsing (all 3 flags + companion validation), runCook lifecycle via injected seams (openUrl, createServer / setupPetrinautStream), env-load gating, server stop in finally) - src/orchestrator/src/engine-contract.test.ts (existing — extend FE-764 block ONLY to assert: setupPetrinautStream is awaited before initial_marking; returned callback receives full event sequence. NO real HTTP in this test — transport coverage stays in petrinaut-stream-server.test.ts.) -``` - -### Risks and Assumptions - -``` -- ASSUMPTION: The bus needs the SdcpnFile, which is built inside engine.ts after compileTopology. Engine is the source of truth for compile/fold/sdcpn; CLI must not duplicate that. Seam choice: an **awaited setup hook** (`setupPetrinautStream`) — engine builds sdcpnFile, awaits the hook, the hook stands the bus + HTTP server up and resolves with a publisher callback. This guarantees the server is listening BEFORE `initial_marking` is emitted. A sync `onPetrinautBus(bus): void` callback was rejected because it cannot await `server.start()`, creating a race where the first events publish before the HTTP server accepts connections. - → VALIDATE: engine awaits the hook before any `createPetrinautEventStream` / `emitInitialMarking` call; tests verify the await ordering. Existing callers without the hook see byte-identical behavior. - -- ASSUMPTION: `loadLocalEnvFile(launchCwd)` (currently used by `src/server/cli.ts`) is the right reuse target for cook-cli. Note: it **overrides** existing env vars silently (it sets `process.env[key] = value` unconditionally for any non-empty parsed value), which means `.env` wins over the shell. That's the opposite of what most tools do (shell wins over `.env`). - → DECIDE in build: either (a) accept current precedence and document it in cook-cli help text + risk surface, or (b) copy a tiny local helper (`loadLocalEnvFile` ~10 lines) into orchestrator that only sets unset keys (shell wins). Recommendation: (b) — shell-wins matches every common dotenv tool and prevents the surprise of a stale `.env` clobbering an explicit `PETRINAUT_BASE_URL=...` shell prefix. Either way, **`.env` loading happens ONLY when `--petrinaut-stream` is set** (no behavior change for non-stream runs). - -- ASSUMPTION: The `open` npm package (already a dep) launches the system browser cross-platform and resolves whether or not the browser actually opens. - → VALIDATE: invoke `open(url)` inside a try/catch; failure prints a "Couldn't auto-open browser; visit {url}" warning and continues. Test: auto-open is suppressed in tests by injecting an `openUrl` seam (default = npm `open`) so we never spawn a real browser in CI. - -- ASSUMPTION: Server lifetime ≡ engine.run lifetime. Starting before engine.run guarantees the URL is openable as soon as the user sees it; stopping in `finally` guarantees cleanup on success, failure, and uncaught exception. - → VALIDATE: test: server.stop() is called whether engine.run resolves or rejects; connectionCount() drops to 0 after stop. - -- ASSUMPTION: URL param shape `{baseUrl}?runId={runId}&mode=actual&sse={streamUrl}` is speculative (Chris hasn't locked names). v1 emits these names; slice 5 / coordination revisits. - → VALIDATE: URL composer is a pure function with a single source of truth; renaming a param is a one-line change confined to composeLauncherUrl. - -- ASSUMPTION: CI detection via `process.env.CI` (any truthy value) is the right signal — matches `gh actions`, `circleci`, `vercel`, etc. - → VALIDATE: test: with `process.env.CI = '1'`, auto-open is skipped even without `--no-petrinaut-open`; URL still prints. - -- RISK: If `--petrinaut-stream` is set but base URL resolution fails, cook should hard-fail BEFORE creating the sandbox / writing artifacts (avoid orphan run dirs). - → MITIGATION: resolve base URL as the first step in runCook when `petrinautStream === true`; on failure, print the locked message and exit(1) before resolveCookMode / createSandbox. - -- RISK: Test for engine-contract.test.ts end-to-end may flake on port allocation / connection timing. - → MITIGATION: rely on listen(0) (already done in 3b tests) + await server.start() before composing URL; use Node fetch() with explicit signal; if flake materialises, gate the e2e test on `process.env.PETRINAUT_E2E` and keep inner-loop tests deterministic (unit-level URL composer + cook-cli flag parsing). - -- RISK: Cross-mini-library import (orchestrator → server) might violate the codebase's compartmentalization posture. - → MITIGATION: if so, lift loadLocalEnvFile to `src/shared/` (or copy the ~10-line helper into orchestrator) — both are acceptable; pick the simpler one at build time. Don't block the slice on this. -``` - -### Acceptance Criteria - -``` -✓ `parseCookArgs` accepts `--petrinaut-stream` (boolean), `--petrinaut-base-url=` (string), `--no-petrinaut-open` (boolean). Usage string updated. Unknown values exit non-zero with a clear error (existing pattern). - -✓ Companion-flag validation in parseCookArgs: `--petrinaut-base-url` or `--no-petrinaut-open` set WITHOUT `--petrinaut-stream` → hard error ("--petrinaut-base-url requires --petrinaut-stream" / "--no-petrinaut-open requires --petrinaut-stream"). Test: each combination raises. - -✓ `CookOptions` gains: `petrinautStream: boolean`, `petrinautBaseUrl?: string`, `petrinautOpen: boolean` (CI env check stays in runCook so parseCookArgs remains pure). - -✓ test: parseCookArgs round-trips each new flag with correct precedence (no flag → defaults; `--no-petrinaut-open` → petrinautOpen=false; etc.). - -✓ `resolvePetrinautBaseUrl({ cliFlag, env })` exported from petrinaut-launcher-url.ts: returns `{ baseUrl }` from CLI flag if set, else from `env.PETRINAUT_BASE_URL` if set, else `{ error: 'Petrinaut base URL required: set PETRINAUT_BASE_URL in .env or pass --petrinaut-base-url=' }`. Pure function. - -✓ test: all three resolution branches (CLI > env > error) verified, including the exact error message string. - -✓ `composeLauncherUrl({ baseUrl, runId, streamUrl })` exported from petrinaut-launcher-url.ts: uses `new URL(baseUrl)` + `searchParams.set('runId', runId)` + `searchParams.set('mode', 'actual')` + `searchParams.set('sse', streamUrl)` so existing query params on `baseUrl` survive and encoding is correct. Pure function. - -✓ test: composer handles baseUrl with and without trailing slash; baseUrl with pre-existing query params (they survive); URL-encodes the sse parameter automatically via searchParams; preserves runId verbatim. - -✓ `runCook` (when `opts.petrinautStream === true`): - 1. Calls `loadLocalEnvFile(launchCwd)` BEFORE base-URL resolution. (Skipped entirely when petrinautStream is false.) - 2. Resolves base URL via `resolvePetrinautBaseUrl({ cliFlag: opts.petrinautBaseUrl, env: process.env })`. On error: prints message to stderr and `process.exit(1)` — BEFORE banner, loadPlan, createSandbox. - 3. Constructs a `setupPetrinautStream` hook (closure capturing the openUrl seam and shouldOpen flag) and passes it as `OrchestratorInput.setupPetrinautStream`. The hook: creates bus, creates server, awaits `server.start()` → `{ streamUrl }`. If `server.start()` rejects, the hook rejects → engine.run rejects → exit non-zero before any firing. - 4. Inside the hook (after server.start resolves): composes launcher URL via `composeLauncherUrl(...)`, prints to stderr, calls openUrl unless `opts.petrinautOpen === false` or `process.env.CI` truthy. On openUrl failure: warns and continues (doesn't fail the cook run). Returns `bus.publish` as the publisher callback. - 5. Calls `server.stop()` in a `finally` (success, failure, or thrown error all cleanup). connectionCount() returns 0 after stop. - -✓ test (cook-cli.test.ts): with `--petrinaut-stream` and a base URL set via env, runCook (via injected `openUrl` seam) calls openUrl with the composed launcher URL. With `--no-petrinaut-open`, openUrl is not called but the URL is printed. With `process.env.CI='1'`, openUrl is not called. - -✓ test (cook-cli.test.ts): without `--petrinaut-stream`, loadLocalEnvFile is NOT called; no setupPetrinautStream is passed to engine; no URL is printed. Behavior byte-identical to today (regression guard). - -✓ test (cook-cli.test.ts): `server.start()` rejection (simulated via injected seam) hard-fails the run before any firing; server.stop() is still attempted in finally (idempotent). - -✓ test (engine-contract.test.ts): `setupPetrinautStream` is **awaited before** the first PetrinautEvent (no firings or initial_marking observed before the hook resolves); the returned callback receives the full event sequence (initial_marking → N transition_fired → terminal). NO real HTTP in this test — transport coverage stays in `petrinaut-stream-server.test.ts`. - -✓ `OrchestratorInput.setupPetrinautStream?: (input: { runId: string; sdcpnFile: SdcpnFile }) => Promise<((event: PetrinautEvent) => void) | undefined>` added to types.ts. Existing `onPetrinautEvent` stays additive (when both are present, both fire). engine.ts decouples stream setup from the FE-762 best-effort file-write block. - -✓ Fold interaction: `--petrinaut-stream --petrinaut-fold=color` and `--petrinaut-stream --petrinaut-fold=identity` both work; the stream carries whatever fold the engine produces. Test: parse both combos cleanly; no extra coupling. - -✓ runDir outputs: slice 4 writes NO new file under runDir (no `petrinaut-stream.json` discovery file — that's slice 5). - -✓ `.env.example` gains `PETRINAUT_BASE_URL=` (empty placeholder so first-run setup is obvious). - -✓ `npm run verify` green (fmt, lint, test, build). -``` - -### Verification Approach - -``` -- Inner: petrinaut-launcher-url.test.ts unit tests for base-URL resolution + URL composition (pure); cook-cli.test.ts for flag parsing + companion validation + runCook lifecycle via injected seams (openUrl, setupPetrinautStream factory); engine.ts unchanged for callers without setupPetrinautStream. -- Middle: engine-contract.test.ts extension — assert await-ordering invariant (hook resolves before first event) using a direct setupPetrinautStream that records ordering; NO real HTTP (that lives in petrinaut-stream-server.test.ts from slice 3b). -- Outer: deferred to slice 5 / cross-team validation — Chris's Petrinaut client connects to a real cook run and renders the "actual" view. Bristol-demo readiness gate. -``` - -### Notes - -- The `--petrinaut-fold` flag from slice 2 stays orthogonal: identity-fold is the demo default, and the stream just carries whatever fold the engine produced. Stream tests should not re-litigate fold behaviour. -- After slice 4 lands, the only blocker to running the Bristol demo is Chris's Petrinaut client wiring the `?sse=` query param to its SSE consumer — coordination item, not brunch code. -- Slice 5 (web-UI button) needs a discovery mechanism (`/petrinaut-stream.json` advertised by the cook process is the leading candidate); deferred per PLAN.md. diff --git a/memory/PLAN.md b/memory/PLAN.md index 9e4f61dd..5f00ef88 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -28,6 +28,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen 1. `agent-fixture-substrate` — branch-complete off main, reconciling — FE-705 integration substrate for JSONL agent capability CLI and LLM-as-user probes. 2. `chat-runtime-secondary-chats` — FE-716; V1 done — PR #141 merged to main. 3. **Petrinaut integration sub-track** — umbrella **FE-760** (Orchestrator ⇄ Petrinaut). FE-761 (semantics), FE-762 (`net.json` + SDCPN export), FE-763 (event stream), and FE-784 (colour fold) have **landed**. **`petri-sync-server` (FE-764)** is the active piece, reshaped (2026-06-01 meeting) into an **ephemeral cook-hosted SSE live stream** for the Bristol demo — no-colour, replay-on-connect, brunch-initiated session, supersedes the dropped static-bundle idea. Replaces the POC interpreter's visualization role with Petrinaut as canonical surface. +4. `spec-to-cook-plan` — **FE-800**; **done — branch-complete off FE-764**, PR #167 pending re-description. Six slices landed: 1 (deterministic projection) + 2 (LLM planning pass) + 3 (deterministic reconciliation) + 4 (CLI wiring) + 5 (warning-model hardening) + 6 (read from spec id — `brunch plan `, server-side snapshot builder `buildCompletedSpecSnapshot` over `getEntitiesForSpecificationOnActivePath`, plan driver moved into `src/server/plan-runner.ts`, orchestrator `plan-cli.ts` deleted). Bristol-demo front half (`brunch plan ` → `.brunch/cook/plan.yaml` → `brunch cook --petrinaut-stream`) is now operational against any completed spec in the project DB. Two proving spikes done 2026-06-03. Move to **Recently Completed** on PR merge. ### Recently Completed @@ -39,7 +40,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen #### Follow-ons surfaced by the 2026-05-26 cook-codebase-mode smoke - **pi-actions evaluate-done collapses the TDD workflow** — `pi-actions.ts:70` passes `--tools read,write,edit,bash` to every action including `evaluate-done`. Real pi fixed the buggy file *during evaluation* and reported `done: true` on the first call; write-tests / write-code / run-tests never executed. Affects both modes but is more visible in brownfield. Either restrict evaluator tools to `read` or accept this as the intended pi-as-agent behavior. Worth its own frontier. -- **`cook-artifact-lifecycle` frontier (proposed, not yet authored)** — slice 3's hybrid mechanism creates real slice branches (`cook-slice//`) but never commits to them; the cook branch (`cook/`) still has HEAD === source HEAD and the modification lives in untracked subdirs of the cook branch's working tree. To close the loop: (a) commit slice work to the slice branch on slice completion, (b) replace `mergeSlicesIntoEpicSandbox`'s file-copy with `git merge` of slice branches into an epic branch surfacing real conflicts (today's file-copy is silent last-slice-wins), (c) merge epic branches back to `cook/` so `git merge cook/` from main becomes the promotion path. Pairs with worktree + branch GC story. ~2-3 days of structural work; slice 3 set up the substrate (real branches per slice) so this frontier can land cleanly on top. +- **cook output promotion (follow-on)** — slice 3 creates real slice branches (`cook-slice//`) but never commits; `cook/` HEAD === source HEAD with modifications in untracked subdirs, so there is no promotion path into the user's checkout. To close: commit slice work, `git merge` slice→epic→`cook/`, then `git merge cook/` from the working branch. Pairs with worktree/branch GC. Quality-of-life; the run worktree is already inspectable by hand. ### Next @@ -106,9 +107,10 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Name:** Petri graph compilation — compile nets from plan-graph + relation policy - **Linear:** unassigned in this plan snapshot - **Kind:** structural -- **Status:** horizon (blocked on `intent-graph-semantics` FE-700) +- **Status:** horizon (blocked on `intent-graph-semantics` FE-700) — **premise weakened, partially subsumed by `spec-to-cook-plan` (FE-800); see Reconciliation** - **Objective:** Compile Petri nets from workspace plan-graph nodes and relation-policy edges rather than from YAML plan fixtures. Relation kinds (`plan.depends_on`, `plan.verified_by_oracle`, `plan.introduces_design`, etc.) compile into topology-level requirements (prerequisite tokens, guard predicates, semantic-lane join conditions). Extends the FE-700 relation-policy registry. - **Why now / unlocks:** Without graph compilation, the Petri engine only runs hand-authored YAML plans. Graph compilation makes the engine a planning oracle (simulate before executing) and connects execution to the semantic workspace. +- **Reconciliation with `spec-to-cook-plan` (FE-800, 2026-06-03):** This frontier's premise — compile from `plan.depends_on` relation-policy edges — quietly assumed those execution-order edges exist in the graph (to be supplied by FE-700). The FE-800 spikes proved **execution order is not spec truth and FE-700 will not conjure it** (the observer captures only epistemic deps; requirements are pure sinks of `depends_on`). So the ordering this frontier wanted to read must be **synthesized** — exactly what FE-800's LLM planning pass does at the `plan.yaml` layer, after which the existing `net-compiler.ts` (plan.yaml → net) already produces the net. Net effect: FE-800 + the existing compiler cover the graph→executable-net path; this frontier's remaining **distinct** value is the **Phase-4 simulation oracle** (analyze/simulate the net before running) and richer synthesized token/gate payloads, *not* a separate graph→net compiler. Reframe or fold accordingly before scheduling; do not treat as independent of FE-800. - **Open design constraints (from PR #143 / FE-743 review):** - **Declarative output arcs:** Extracted to its own frontier `petri-declarative-routing` (lands ahead of Phase 3; independent of FE-700). - **Token state enrichment:** Open question whether more metadata should move from reports into tokens (richer typed token payloads per spec §3). FE-738 added `reworkCount`, FE-743 added pool tokens with `agentPoolSize`, but the boundary between control state (tokens) and substantive handoff state (reports) is a design choice this frontier needs to resolve as the token taxonomy gets richer. @@ -284,6 +286,23 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Artifacts:** contract `src/orchestrator/src/petrinaut-stream-contract.ts` + `docs/petrinaut-stream-contract.md`; validated sample export from run `904d205d`. - **Traceability:** §Lexicon `folded net` (export reuse; demo deviates via identity fold); I122-K (tokens are pointers → per-place counts suffice for marking deltas); execution-authority posture (Petrinaut renders; brunch's interpreter runs the net). +### spec-to-cook-plan + +- **Name:** Spec → orchestrator plan emitter — project + plan a `brunch cook` plan.yaml from a completed intent graph +- **Linear:** FE-800 (standalone; not parented under FE-760) +- **Kind:** structural +- **Status:** done — branch-complete off FE-764, PR #167 pending re-description. Six slices landed: 1 (deterministic projection), 2 (LLM planning pass), 3 (deterministic reconciliation — id existence, self-loops, cycle break via Kahn lex-tie-break, non-buildable slice + dep dropping, epic grouping with default-epic fallback, synthesized unit-test verification targets, all transformations surfaced as typed `ReconciliationWarning[]`), 4 (CLI wiring composing the three stages, writes `.brunch/cook/plan.yaml`, surfaces warnings on stderr; emitter falls back to empty enrichment when the LLM throws so a usable orderless plan still emits), 5 (warning-model hardening — single `EmitterWarning` audit stream, synthesis demoted to verbose-only, formatter co-located), 6 (read from spec id — `brunch plan [--out=] [--verbose]`, server-side snapshot builder `buildCompletedSpecSnapshot(db, specId)` over `getEntitiesForSpecificationOnActivePath` mapping accepted requirements/criteria + active-path relationships filtered to accepted ids, plan driver moved to `src/server/plan-runner.ts`, orchestrator `plan-cli.ts` deleted). Two proving spikes done 2026-06-03 (see memory `spec-to-cook-plan-spike`); branch stacks on FE-764. Bristol-demo end-to-end path (`brunch plan ` → `brunch cook --petrinaut-stream`) is now operational +- **Objective:** Emit a `brunch cook` plan.yaml from a completed brunch specification's intent graph. Three-stage emitter: **projection** (deterministic) — `requirement` items → slices, `criterion --verifies--> requirement` edges → per-slice verification linkage, stable slice ids; **planning pass** (LLM) — infer the execution-order `depends_on` DAG + epic grouping + non-buildable-constraint detection, since execution order is not spec truth and reads as zero from the graph; **reconciliation** (deterministic) — validate the LLM output for cook (drop/redirect deps onto non-buildable constraints, guarantee acyclicity, synthesize conventional verification targets, flag contradictions). Output is a reviewable artifact, not a silent input. +- **Why now / unlocks:** The missing front-half of the Bristol end-to-end demo (SPEC → generated plan → cook → Petri → Petrinaut). TRACK F execution + Petrinaut visualization are done/active (FE-760 umbrella, FE-764 streaming) and `cook-codebase-mode` runs brownfield, but every cook run still starts from a hand-authored plan.yaml. This is the smallest bridge from "fixture-driven orchestrator" to "brunch spec drives the orchestrator." +- **Spike findings (2026-06-03, against real completed spec 2 "brunch_graphs"):** (1) projection works today; verification linkage fully covered (every requirement has ≥1 verifying criterion). (2) graph-read dependency synthesis yields **zero** — requirements are only sinks of epistemic `depends_on`; **not fixable by FE-700** (it types relations, it doesn't make the observer emit execution order). (3) one `generateObject` call (claude-sonnet-4, ~900/640 tokens) produced a credible acyclic DAG + free non-buildable detection, but dangled deps onto constraints → requires the reconciliation stage. Not blocked by FE-700/FE-701/FE-705; spec 2 is a usable demo input that exists now. +- **Acceptance:** (1) `brunch` emits `/.brunch/cook/plan.yaml` from a completed specification (all phases confirmed). (2) Projection is deterministic: requirements → slices, verifies edges → verification linkage, stable slice ids. (3) Planning pass produces an acyclic `depends_on` DAG and flags non-buildable constraint-style requirements. (4) Reconciliation guarantees no dangling/cyclic deps and emits cook-valid schema (epics/slices/depends_on/verification). (5) The generated plan round-trips through `loadPlan` and drives a `brunch cook --petrinaut-stream` run end-to-end against a brownfield fixture. (6) Demo mode: ordering can be authored/overridden deterministically (reviewable) instead of LLM-generated, for a controlled Bristol run. +- **Open / pending decisions:** ordering LLM-by-default vs authored-by-default for the demo; whether the emitter lives server-side (capability contract) or in the orchestrator package; brownfield verification-target convention (criterion prose → runnable test path is synthesized, agent authors the test). +- **Follow-on — integration-blind verification (2026-06-04):** the first brownfield cook of `spatial_graph_layout` produced *orphan* feature modules (+ a Ladle story) that satisfied criteria like AC1 ("toggling the layout switch swaps between list and canvas") **without the feature existing in the running app**. Root cause sits in this emitter: the convention-synthesized `verification.target` is integration-blind, so the agent authored a test that passes in isolation. Productizing brownfield cook ("a cooked feature is real and visible in brunch") needs (a) the emitter to emit *integration-shaped* slices + verification that demands host-wiring — an **integration oracle** (product reachability, enforced in the FE-738 semantic lane; distinct from `petri-simulation-oracle`'s *net* reachability), and (b) run-output **promotion** into the checkout (see the cook-codebase-mode promotion follow-on). Not on the demo critical path — the Bristol path shows execution/visualization, which orphan-but-executed does not break. Revisit when brownfield cook moves from "executes a plan" to "ships a feature." +- **Relationship to `petri-graph-compilation` (Phase 3):** these are NOT independent. This frontier projects graph → `plan.yaml` then reuses the working `net-compiler.ts` (plan.yaml → net); Phase 3 wanted to compile graph → net directly from `plan.depends_on` relation edges. The spikes showed those execution-order edges don't exist and FE-700 won't supply them, so Phase 3's ordering input must itself be synthesized — i.e. FE-800 is the grounded source of what Phase 3 assumed it could read. FE-800 partially **subsumes** Phase 3; Phase 3's residual value is the simulation oracle (Phase 4), not the compile path. Keep the two reconciled. +- **Verification:** projection golden tests (spec fixture → plan.yaml); planning-pass acyclicity/contract tests (mock + opt-in real-provider); reconciliation tests (dangling-dep redirect, cycle break, non-buildable handling); end-to-end integration feeding a generated plan into the existing brownfield-smoke harness. +- **Traceability:** Requirements 46–50; D155-K–D160-K (new D160-K); A97 (validated); resolves SPEC §Constraints non-goal tension via D160-K. Spike memory: `spec-to-cook-plan-spike`. +- **Design docs:** `docs/design/orchestrator.md`; `docs/next/architecture/plan-graph-petri-orchestration.md`; umbrella H-6476. + ### petrinaut-colour-fold - **Name:** Petrinaut export — colour-fold per-slice subnet @@ -685,7 +704,8 @@ orchestrator-poc (Phase 0: compiler extraction — done) │ └──→ petri-event-stream (FE-763: initial markings + transition firings — done) │ ├──→ petrinaut-colour-fold (FE-784: colour-fold export projection — done; set aside for the no-colour demo) │ └──→ petri-sync-server (FE-764: ACTIVE — ephemeral cook-hosted SSE live stream; replay-on-connect; brunch-initiated session; Bristol demo) - ├──→ petri-graph-compilation (Phase 3: compile from plan-graph + relation policy; needs FE-700) + ├──→ spec-to-cook-plan (demo front-half: completed intent graph → cook plan.yaml; projection + LLM planning pass + reconciliation; spikes done; feeds FE-764 stream; NOT blocked by FE-700) + ├──→ petri-graph-compilation (Phase 3: compile from plan-graph + relation policy; needs FE-700; premise weakened — partially subsumed by spec-to-cook-plan; residual value = Phase 4 sim oracle) └──→ petri-simulation-oracle (Phase 4: reachability, deadlock, resume; declarative-routing structural prerequisite now satisfied; Phase 3 still needed for graph-derived gates) LOWER-PRIORITY / DEFERRED diff --git a/memory/SPEC.md b/memory/SPEC.md index ddcdb909..3ab82552 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -156,6 +156,7 @@ Brunch operates inside a **workspace**: the cwd-backed software context whose lo | A94 | Durable secondary chats can replace independent side-chat persistence while preserving reloadable side, reconciliation, qa, and strategy conversations inside one workspace surface without introducing a `thread` table yet. | medium | open | D138, D153, Requirement 45 | In-stream secondary-chat rendering/reload walkthroughs over the existing chat/turn substrate. | | A95 | Transcript-first context with explicit context snapshots on turn rows plus active graph-item handles on chats can keep secondary chats useful across multi-chat item changes without a persisted context-spec table. Handles only need re-snapshotting when the referenced item's version/fingerprint advances. | medium | open | D139, D140, D154, Requirement 45 | Context-provision tests for snapshot insertion, item-list/neighborhood/economic-graph snapshot builders, stale-handle refresh, and prompt/context-pack rendering. | | A96 | Async-by-default reconciliation can move Pending review into an in-stream target-grouped reconciliation chat without hiding judgment work or surfacing auto-confirmed noise. | medium | open | D135, D137, D138, D146, D153 | Track 3 classifier scheduling, target-ordering tests, and dense reconciliation walkthroughs. | +| A97 | A completed intent graph can be projected + planned into a valid `brunch cook` plan.yaml: `requirement` items and `criterion --verifies--> requirement` edges read deterministically, but execution-order `depends_on` is **not** spec truth (the observer captures only epistemic deps; FE-700 does not change this) and must come from an LLM planning pass plus a deterministic reconciliation stage, not a graph read. | high | validated | D160-K, Requirements 46–50 | Two spikes 2026-06-03 against real completed spec 2 ("brunch_graphs"): projection clean + verification fully covered; graph-read req→req deps = 0; one `generateObject` call yielded a credible acyclic DAG + free non-buildable-constraint detection, but dangled deps onto constraints (needs reconciliation). | ### Active Decisions @@ -206,6 +207,7 @@ Brunch operates inside a **workspace**: the cwd-backed software context whose lo 157. **Action dispatch is name-keyed and extensible** — engines orchestrate which action fires when; handlers own how. POC uses inline dispatch per engine; promote to a real `ActionRegistry` when a 3rd action type lands. Depends on: Requirement 46. 158. **Plan model is two-level (epics → slices), no milestones in POC** — schema is provisional pending canonical brunch plan emission. Forward-compatible for intent/design/oracle pointers. 159. **Worktree isolation per run** — agents write freely inside `/.brunch/cook/runs//worktree/` (cwd-scoped, not fixture-scoped); fixture dir and source repo untouched. Fixtures stay byte-identical before and after a run. Depends on: Requirement 49. +160. **Spec→cook-plan emission is a CLI/orchestrator-track seam, not a V1 product UI surface** — projecting and planning a cook `plan.yaml` from a completed intent graph is dev-layer orchestrator capability extending Requirements 46–50, so it does not breach the V1 product non-goal "Brunch elicits specs and stops at the handoff/export boundary," which governs interactive product UX. The emitter is three-stage: projection (deterministic graph read of requirements + verifies edges) + planning pass (LLM-inferred execution-order DAG, epic grouping, non-buildable detection) + reconciliation (deterministic validation: no dangling/cyclic deps, cook-valid schema, synthesized verification targets). Generated plans are reviewable artifacts, not silent inputs. Depends on: Requirements 46–50; A97. #### Provider, prompt/context, and agent substrate diff --git a/src/orchestrator/src/__fixtures__/brunch-graphs-snapshot.json b/src/orchestrator/src/__fixtures__/brunch-graphs-snapshot.json new file mode 100644 index 00000000..3fe19a58 --- /dev/null +++ b/src/orchestrator/src/__fixtures__/brunch-graphs-snapshot.json @@ -0,0 +1,61 @@ +{ + "_provenance": "Hand-curated subset of completed spec 2 ('brunch_graphs', referenced in memory/PLAN.md §spec-to-cook-plan spike findings, 2026-06-03). Five requirements + five criteria with full verification coverage and a single non-verifies edge that the projector must drop. Slice 2 (LLM planning pass) will own a richer fixture sourced from the server-side snapshot builder; this one exists solely to pin the spike's positive finding ('every requirement has ≥1 verifying criterion') as a regression oracle.", + "requirements": [ + { + "id": 101, + "content": "Brunch grows from one intent graph into four trace-connected graph planes.", + "kindOrdinal": 1 + }, + { + "id": 102, + "content": "Criteria remain intent nodes; oracle nodes operationalize criteria.", + "kindOrdinal": 2 + }, + { "id": 103, "content": "Oracle graph starts with strategy and check node kinds.", "kindOrdinal": 3 }, + { + "id": 104, + "content": "Design graph represents modules, interfaces, seams, and adapters.", + "kindOrdinal": 4 + }, + { + "id": 105, + "content": "Plan graph contains fractal milestone, frontier, and slice nodes.", + "kindOrdinal": 5 + } + ], + "criteria": [ + { + "id": 201, + "content": "Spec workspace exposes four distinct graph planes that share semantic edge vocabulary.", + "kindOrdinal": 1 + }, + { + "id": 202, + "content": "A criterion verifies a requirement via a verifies edge; an oracle check verifies the criterion.", + "kindOrdinal": 2 + }, + { + "id": 203, + "content": "Oracle graph contains no result nodes; raw run output lives in implementation storage.", + "kindOrdinal": 3 + }, + { + "id": 204, + "content": "Two adapters at one seam read as a real seam; one adapter is hypothetical only.", + "kindOrdinal": 4 + }, + { + "id": 205, + "content": "A frontier's first slice is a tracer-bullet that establishes an end-to-end invariant.", + "kindOrdinal": 5 + } + ], + "edges": [ + { "fromItemId": 201, "toItemId": 101, "relation": "verifies" }, + { "fromItemId": 202, "toItemId": 102, "relation": "verifies" }, + { "fromItemId": 203, "toItemId": 103, "relation": "verifies" }, + { "fromItemId": 204, "toItemId": 104, "relation": "verifies" }, + { "fromItemId": 205, "toItemId": 105, "relation": "verifies" }, + { "fromItemId": 102, "toItemId": 101, "relation": "depends_on" } + ] +} diff --git a/src/orchestrator/src/cook-cli.test.ts b/src/orchestrator/src/cook-cli.test.ts index 371b3015..10483331 100644 --- a/src/orchestrator/src/cook-cli.test.ts +++ b/src/orchestrator/src/cook-cli.test.ts @@ -1,5 +1,5 @@ import { execFileSync } from 'node:child_process'; -import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import { mkdirSync, mkdtempSync, rmSync, utimesSync, writeFileSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; @@ -109,6 +109,21 @@ describe('parseCookArgs', () => { ); }); + it('parses --spec= and exposes it on opts', () => { + expect(parseCookArgs(['./f', '--spec=42']).specId).toBe(42); + }); + + it('omits specId when --spec is not passed', () => { + expect(parseCookArgs(['./f']).specId).toBeUndefined(); + }); + + it('rejects non-integer, zero, and negative --spec values', () => { + expect(() => parseCookArgs(['./f', '--spec=abc'])).toThrow(/--spec/i); + expect(() => parseCookArgs(['./f', '--spec=0'])).toThrow(/--spec/i); + expect(() => parseCookArgs(['./f', '--spec=-3'])).toThrow(/--spec/i); + expect(() => parseCookArgs(['./f', '--spec=1.5'])).toThrow(/--spec/i); + }); + it('allows --petrinaut-stream + --petrinaut-fold=color together', () => { const opts = parseCookArgs([ './f', @@ -403,4 +418,78 @@ describe('resolveCookMode', () => { expect(result.message).toMatch(/plan/i); } }); + + it('resolves explicit --spec= from .brunch/cook/specs//plan.yaml', () => { + const d = makeTmpDir(); + initCleanGitRepo(d); + const specDir = join(d, '.brunch', 'cook', 'specs', '7'); + mkdirSync(specDir, { recursive: true }); + writeFileSync(join(specDir, 'plan.yaml'), 'epics: []\nslices: []\n'); + + const result = resolveCookMode(d, 7); + expect(result.mode).toBe('codebase'); + if (result.mode === 'codebase') { + expect(result.planPath).toBe(join(specDir, 'plan.yaml')); + } + }); + + it('errors when explicit --spec= plan is missing', () => { + const d = makeTmpDir(); + initCleanGitRepo(d); + + const result = resolveCookMode(d, 99); + expect(result.mode).toBe('error'); + if (result.mode === 'error') { + expect(result.message).toMatch(/spec 99/); + } + }); + + it('auto-picks the newest spec plan by mtime when no --spec is given', () => { + const d = makeTmpDir(); + initCleanGitRepo(d); + const older = join(d, '.brunch', 'cook', 'specs', '1'); + const newer = join(d, '.brunch', 'cook', 'specs', '2'); + mkdirSync(older, { recursive: true }); + mkdirSync(newer, { recursive: true }); + writeFileSync(join(older, 'plan.yaml'), 'epics: []\nslices: []\n'); + writeFileSync(join(newer, 'plan.yaml'), 'epics: []\nslices: []\n'); + // Force mtime ordering deterministically: older = 60s ago. + const past = new Date(Date.now() - 60_000); + utimesSync(join(older, 'plan.yaml'), past, past); + + const result = resolveCookMode(d); + expect(result.mode).toBe('codebase'); + if (result.mode === 'codebase') { + expect(result.planPath).toBe(join(newer, 'plan.yaml')); + } + }); + + it('falls back to legacy .brunch/cook/plan.yaml when no spec plans exist', () => { + const d = makeTmpDir(); + initCleanGitRepo(d); + mkdirSync(join(d, '.brunch', 'cook'), { recursive: true }); + writeFileSync(join(d, '.brunch', 'cook', 'plan.yaml'), 'epics: []\nslices: []\n'); + + const result = resolveCookMode(d); + expect(result.mode).toBe('codebase'); + if (result.mode === 'codebase') { + expect(result.planPath).toBe(join(d, '.brunch', 'cook', 'plan.yaml')); + } + }); + + it('prefers a newer spec plan over the legacy top-level plan', () => { + const d = makeTmpDir(); + initCleanGitRepo(d); + mkdirSync(join(d, '.brunch', 'cook'), { recursive: true }); + writeFileSync(join(d, '.brunch', 'cook', 'plan.yaml'), 'epics: []\nslices: []\n'); + const specDir = join(d, '.brunch', 'cook', 'specs', '5'); + mkdirSync(specDir, { recursive: true }); + writeFileSync(join(specDir, 'plan.yaml'), 'epics: []\nslices: []\n'); + + const result = resolveCookMode(d); + expect(result.mode).toBe('codebase'); + if (result.mode === 'codebase') { + expect(result.planPath).toBe(join(specDir, 'plan.yaml')); + } + }); }); diff --git a/src/orchestrator/src/cook-cli.ts b/src/orchestrator/src/cook-cli.ts index b0728d30..ebbc5663 100644 --- a/src/orchestrator/src/cook-cli.ts +++ b/src/orchestrator/src/cook-cli.ts @@ -11,6 +11,7 @@ import { createPetrinautStreamBus, type PetrinautStreamBus } from './petrinaut-s import { createPetrinautStreamServer, type PetrinautStreamServer } from './petrinaut-stream-server.js'; import { createPiActions } from './pi-actions.js'; import { loadPlan } from './plan-loader.js'; +import { parseSpecId, resolveLatestSpecPlanPath, specPlanPath, specsRootDir } from './spec-plan-paths.js'; import { BunTestRunner } from './test-runner.js'; import { createSandbox } from './worktree.js'; @@ -35,6 +36,13 @@ export type CookOptions = { petrinautBaseUrl?: string; /** Whether to auto-launch the system browser; CI=true also suppresses at runtime. */ petrinautOpen: boolean; + /** + * Explicit specification id whose emitted plan (under + * `/.brunch/cook/specs//plan.yaml`) should be cooked. + * When omitted, `resolveCookMode` auto-picks the most recently + * emitted spec plan (or falls back to legacy paths). + */ + specId?: number; }; export function parseCookArgs(args: string[]): CookOptions { @@ -46,12 +54,15 @@ export function parseCookArgs(args: string[]): CookOptions { let petrinautStream = false; let petrinautBaseUrl: string | undefined; let petrinautOpen = true; + let specId: number | undefined; let sawNoOpen = false; let sawBaseUrl = false; for (let i = 0; i < args.length; i++) { const arg = args[i]!; - if (arg.startsWith('--policy=')) { + if (arg.startsWith('--spec=')) { + specId = parseSpecId(arg.split('=').slice(1).join('='), '--spec'); + } else if (arg.startsWith('--policy=')) { const val = arg.split('=')[1]!; if (val !== 'serial' && val !== 'parallel') { throw new Error(`Unknown policy: ${val}. Use serial or parallel.`); @@ -86,7 +97,7 @@ export function parseCookArgs(args: string[]): CookOptions { if (!dir) { throw new Error( - 'Usage: brunch cook [--policy=serial|parallel] [--max-retries=N] [--petrinaut-fold=color|identity] [--petrinaut-stream [--petrinaut-base-url=] [--no-petrinaut-open]] [--verbose]', + 'Usage: brunch cook [--spec=] [--policy=serial|parallel] [--max-retries=N] [--petrinaut-fold=color|identity] [--petrinaut-stream [--petrinaut-base-url=] [--no-petrinaut-open]] [--verbose]', ); } @@ -107,6 +118,7 @@ export function parseCookArgs(args: string[]): CookOptions { petrinautStream, petrinautBaseUrl, petrinautOpen, + ...(specId !== undefined ? { specId } : {}), }; } @@ -251,23 +263,41 @@ export type ResolvedCookMode = | { mode: 'error'; message: string }; /** - * Resolve cook's run mode by inspecting ``: - * - `/plan.yaml` exists → fixture mode (greenfield). - * - `/.brunch/cook/plan.yaml` → codebase mode (brownfield); requires - * `` to be a git repo with a clean - * working tree. - * - neither → error. + * Resolve cook's run mode by inspecting `` in precedence order: + * + * 1. `/plan.yaml` exists → fixture mode (greenfield). + * 2. Explicit `specId`: + * `/.brunch/cook/specs//plan.yaml` exists → codebase mode. + * missing → error. + * 3. No `specId`, any `/.brunch/cook/specs//plan.yaml` → newest by mtime, codebase mode. + * 4. Legacy `/.brunch/cook/plan.yaml` → codebase mode. + * 5. None of the above → error. + * + * Codebase modes additionally require `` to be a git repo with a clean + * working tree (untracked files ignored). * * Pure function — no process exits, no side effects beyond filesystem reads. */ -export function resolveCookMode(dir: string): ResolvedCookMode { +export function resolveCookMode(dir: string, specId?: number): ResolvedCookMode { const fixturePath = join(dir, 'plan.yaml'); if (existsSync(fixturePath)) { return { mode: 'fixture', planPath: fixturePath }; } - const codebasePath = join(dir, '.brunch', 'cook', 'plan.yaml'); - if (existsSync(codebasePath)) { + const legacyPath = join(dir, '.brunch', 'cook', 'plan.yaml'); + + let codebasePath: string | undefined; + if (specId !== undefined) { + const explicit = specPlanPath(dir, specId); + if (!existsSync(explicit)) { + return { mode: 'error', message: `No plan emitted for spec ${specId}: ${explicit}` }; + } + codebasePath = explicit; + } else { + codebasePath = resolveLatestSpecPlanPath(dir) ?? (existsSync(legacyPath) ? legacyPath : undefined); + } + + if (codebasePath) { const gitCheck = isCleanGitWorkingTree(dir); if (gitCheck.kind === 'not-git') { return { mode: 'error', message: `Codebase mode requires to be a git repo: ${dir}` }; @@ -281,7 +311,10 @@ export function resolveCookMode(dir: string): ResolvedCookMode { return { mode: 'codebase', planPath: codebasePath, sourceDir: dir }; } - return { mode: 'error', message: `No plan found at ${fixturePath} or ${codebasePath}` }; + return { + mode: 'error', + message: `No plan found at ${fixturePath}, ${specsRootDir(dir)}//plan.yaml, or ${legacyPath}`, + }; } type GitWorkingTreeCheck = { kind: 'clean' } | { kind: 'dirty'; status: string } | { kind: 'not-git' }; @@ -323,7 +356,7 @@ export async function runCook(opts: CookOptions): Promise { petrinautBaseUrl = resolvedBaseUrl.baseUrl; } - const resolved = resolveCookMode(opts.dir); + const resolved = resolveCookMode(opts.dir, opts.specId); if (resolved.mode === 'error') { console.error(resolved.message); process.exit(1); diff --git a/src/orchestrator/src/pi-actions.ts b/src/orchestrator/src/pi-actions.ts index 29e25be9..12862a87 100644 --- a/src/orchestrator/src/pi-actions.ts +++ b/src/orchestrator/src/pi-actions.ts @@ -6,6 +6,7 @@ import { promisify } from 'node:util'; const execAsync = promisify(exec); import { createReport } from './report-helpers.js'; +import { sliceLabel } from './slice-label.js'; import type { ActionContext, ActionHandlers } from './types.js'; const __dirname = dirname(fileURLToPath(import.meta.url)); @@ -163,12 +164,13 @@ export function createPiActions(opts?: { verbose?: boolean; runStart?: number }) return { 'evaluate-done': async (ctx: ActionContext) => { - log('?', `evaluate ${ctx.slice.id}`); + const label = sliceLabel(ctx.slice); + log('?', `evaluate ${label}`); const task = `Evaluate slice "${ctx.slice.id}": ${ctx.slice.definition}\nVerification targets: ${ctx.slice.verification.map((v) => v.target).join(', ')}\nDetermine if all verification targets are satisfied. Respond with a JSON object: { "done": true/false, "reasoning": "..." }`; try { const raw = await runPi({ - label: `evaluate ${ctx.slice.id}`, + label: `evaluate ${label}`, model: 'claude-haiku-4-5', promptFile: join(promptsDir, 'evaluator.md'), task, @@ -176,13 +178,13 @@ export function createPiActions(opts?: { verbose?: boolean; runStart?: number }) }); const parsed = extractJson(raw) as { done?: boolean; reasoning?: string } | undefined; const done = !!parsed?.done; - log(done ? '●' : '○', `verdict ${ctx.slice.id} → ${done ? 'DONE' : 'NEEDS WORK'}`); + log(done ? '●' : '○', `verdict ${label} → ${done ? 'DONE' : 'NEEDS WORK'}`); return report(ctx, 'evaluator', 'eval-done', { done, reasoning: parsed?.reasoning ?? raw.slice(0, 200), }); } catch (err) { - log('✗', `evaluate ${ctx.slice.id} — ${err instanceof Error ? err.message : err}`); + log('✗', `evaluate ${label} — ${err instanceof Error ? err.message : err}`); return report(ctx, 'evaluator', 'eval-done', { done: false, reasoning: `evaluation failed: ${err instanceof Error ? err.message : String(err)}`, @@ -191,11 +193,12 @@ export function createPiActions(opts?: { verbose?: boolean; runStart?: number }) }, 'write-tests': async (ctx: ActionContext) => { - log('▸', `tests ${ctx.slice.id}`); + const label = sliceLabel(ctx.slice); + log('▸', `tests ${label}`); const task = `Write failing tests for slice "${ctx.slice.id}": ${ctx.slice.definition}\nVerification targets: ${ctx.slice.verification.map((v) => `${v.kind}: ${v.target}`).join(', ')}\nWrite test files that will initially fail. Use bun test conventions.`; await runPi({ - label: `tests ${ctx.slice.id}`, + label: `tests ${label}`, model: 'claude-sonnet-4-6', promptFile: join(promptsDir, 'test-writer.md'), task, @@ -209,11 +212,12 @@ export function createPiActions(opts?: { verbose?: boolean; runStart?: number }) }, 'write-code': async (ctx: ActionContext) => { - log('▸', `code ${ctx.slice.id}`); + const label = sliceLabel(ctx.slice); + log('▸', `code ${label}`); const task = `Write code to make tests pass for slice "${ctx.slice.id}": ${ctx.slice.definition}\nVerification targets: ${ctx.slice.verification.map((v) => `${v.kind}: ${v.target}`).join(', ')}\nImplement the minimum code to make all tests pass.`; await runPi({ - label: `code ${ctx.slice.id}`, + label: `code ${label}`, model: 'claude-sonnet-4-6', promptFile: join(promptsDir, 'code-writer.md'), task, @@ -226,7 +230,8 @@ export function createPiActions(opts?: { verbose?: boolean; runStart?: number }) }, 'assess-semantic': async (ctx: ActionContext) => { - log('?', `semantic ${ctx.slice.id}`); + const label = sliceLabel(ctx.slice); + log('?', `semantic ${label}`); // POC: auto-satisfy — real semantic assessment requires graph-derived gates (Phase 3) return report(ctx, 'semantic-assessor', 'semantic-assessed', { satisfied: true }); }, diff --git a/src/orchestrator/src/plan-emitter.test.ts b/src/orchestrator/src/plan-emitter.test.ts new file mode 100644 index 00000000..9381ade0 --- /dev/null +++ b/src/orchestrator/src/plan-emitter.test.ts @@ -0,0 +1,127 @@ +import { mkdtempSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { describe, expect, it } from 'vitest'; +import { stringify as stringifyYaml } from 'yaml'; + +import { emitPlanFromSnapshot, emitterWarningCategory, formatEmitterWarning } from './plan-emitter.js'; +import type { PlanningEnrichment, RunModel } from './plan-llm-planning.js'; +import { loadPlan } from './plan-loader.js'; +import type { CompletedSpecSnapshot } from './plan-projection.js'; + +const snapshot: CompletedSpecSnapshot = { + requirements: [ + { id: 10, content: 'First requirement', kindOrdinal: 1 }, + { id: 11, content: 'Second requirement', kindOrdinal: 2 }, + ], + criteria: [{ id: 20, content: 'A criterion', kindOrdinal: 1 }], + edges: [{ fromItemId: 20, toItemId: 10, relation: 'verifies' }], +}; + +describe('emitPlanFromSnapshot', () => { + it('composes projection + planning + reconciliation with an injected runModel', async () => { + const enrichment: PlanningEnrichment = { + sliceDependencies: [{ sliceId: 'req-2', dependsOn: ['req-1'] }], + epics: [{ id: 'core', summary: 'Core', sliceIds: ['req-1', 'req-2'] }], + nonBuildableSliceIds: [], + }; + const runModel: RunModel = async () => enrichment; + + const result = await emitPlanFromSnapshot(snapshot, { runModel }); + + expect(result.planningResult.status).toBe('succeeded'); + expect(result.plan.slices.map((s) => s.id)).toEqual(['req-1', 'req-2']); + expect(result.plan.epics.map((e) => e.id)).toEqual(['core']); + for (const slice of result.plan.slices) { + expect(slice.verification).toEqual([{ kind: 'unit-test', target: `tests/${slice.id}.test.ts` }]); + } + // Reconciliation warning surfaces synthesis events at minimum. + expect(result.warnings.some((w) => w.code === 'synthesized-verification-target')).toBe(true); + }); + + it('falls back to an empty enrichment when the runModel throws — plan still emits, planningResult is failed', async () => { + const runModel: RunModel = async () => { + throw new Error('boom'); + }; + + const result = await emitPlanFromSnapshot(snapshot, { runModel }); + + expect(result.planningResult.status).toBe('failed'); + if (result.planningResult.status === 'failed') { + expect(result.planningResult.reason).toContain('boom'); + } + // Plan still usable — slices present, synthesized verification, no deps. + expect(result.plan.slices.map((s) => s.id)).toEqual(['req-1', 'req-2']); + for (const slice of result.plan.slices) { + expect(slice.depends_on).toEqual([]); + expect(slice.verification).toEqual([{ kind: 'unit-test', target: `tests/${slice.id}.test.ts` }]); + } + }); + + it('pushes exactly one planning-failed warning when the runModel throws (single audit stream)', async () => { + const runModel: RunModel = async () => { + throw new Error('llm-down'); + }; + + const result = await emitPlanFromSnapshot(snapshot, { runModel }); + + const failures = result.warnings.filter((w) => w.code === 'planning-failed'); + expect(failures).toHaveLength(1); + expect(failures[0]!.code).toBe('planning-failed'); + if (failures[0]!.code === 'planning-failed') { + expect(failures[0]!.reason).toContain('llm-down'); + } + }); + + it('does not push a planning-failed warning when the runModel succeeds', async () => { + const runModel: RunModel = async () => ({ + sliceDependencies: [], + epics: [], + nonBuildableSliceIds: [], + }); + + const result = await emitPlanFromSnapshot(snapshot, { runModel }); + + expect(result.warnings.some((w) => w.code === 'planning-failed')).toBe(false); + }); + + it('categorizes planning-failed as failure and delegates other codes to reconciliation', async () => { + const failure = await emitPlanFromSnapshot(snapshot, { + runModel: async () => { + throw new Error('x'); + }, + }); + const failureWarning = failure.warnings.find((w) => w.code === 'planning-failed')!; + expect(emitterWarningCategory(failureWarning)).toBe('failure'); + expect(formatEmitterWarning(failureWarning)).toContain('planning-failed'); + + const success = await emitPlanFromSnapshot(snapshot, { + runModel: async () => ({ sliceDependencies: [], epics: [], nonBuildableSliceIds: [] }), + }); + const synthesis = success.warnings.find((w) => w.code === 'synthesized-verification-target')!; + expect(emitterWarningCategory(synthesis)).toBe('synthesis'); + }); + + it('round-trips through loadPlan after YAML serialization', async () => { + const runModel: RunModel = async () => ({ + sliceDependencies: [], + epics: [], + nonBuildableSliceIds: [], + }); + + const result = await emitPlanFromSnapshot(snapshot, { runModel }); + + const dir = mkdtempSync(join(tmpdir(), 'plan-emitter-')); + const yamlPath = join(dir, 'plan.yaml'); + writeFileSync(yamlPath, stringifyYaml(result.plan)); + const reloaded = loadPlan(yamlPath); + + expect(reloaded).toEqual(result.plan); + + const epicIds = new Set(reloaded.epics.map((e) => e.id)); + for (const slice of reloaded.slices) { + expect(epicIds.has(slice.epic_id)).toBe(true); + } + }); +}); diff --git a/src/orchestrator/src/plan-emitter.ts b/src/orchestrator/src/plan-emitter.ts new file mode 100644 index 00000000..8cfefdad --- /dev/null +++ b/src/orchestrator/src/plan-emitter.ts @@ -0,0 +1,103 @@ +// FE-800 slice 4: end-to-end composition. +// FE-800 slice 5: single warning stream (`EmitterWarning` widens +// `ReconciliationWarning` with `planning-failed` so callers have one +// audit-ready source instead of forking on `planningResult.status`). +// +// Glue function that walks one `CompletedSpecSnapshot` through all +// three FE-800 stages — deterministic projection (slice 1), LLM +// planning (slice 2), deterministic reconciliation (slice 3) — and +// returns the cook-runnable Plan plus every warning surfaced along +// the way. Pure modulo the injected `runModel`; defaults to the +// production anthropic seam (`defaultRunModel`) but tests pass a stub. +// +// On LLM failure the planning result is preserved as +// `{ status: 'failed', reason }` for callers that want the raw stage +// status, AND a `{ code: 'planning-failed', reason }` warning is +// pushed onto `warnings` so iterating one stream is sufficient. +// Reconciliation still runs against an empty enrichment so the caller +// receives a usable orderless plan rather than no plan at all. + +import { + defaultRunModel, + planExecutionOrdering, + type PlanningEnrichment, + type PlanningResult, + type RunModel, +} from './plan-llm-planning.js'; +import { projectPlanFromSpec, type CompletedSpecSnapshot } from './plan-projection.js'; +import { + formatReconciliationWarning, + reconcilePlan, + reconciliationWarningCategory, + type ReconciliationWarning, +} from './plan-reconciliation.js'; +import type { Plan } from './types.js'; + +const EMPTY_ENRICHMENT: PlanningEnrichment = { + sliceDependencies: [], + epics: [], + nonBuildableSliceIds: [], +}; + +/** + * Single warning union for the emitter. Widens `ReconciliationWarning` + * with `planning-failed` so a caller iterating `warnings` sees both + * reconciliation transformations and LLM-stage failures in one stream. + */ +export type EmitterWarning = ReconciliationWarning | { code: 'planning-failed'; reason: string }; + +export type EmitPlanResult = { + plan: Plan; + warnings: EmitterWarning[]; + planningResult: PlanningResult; +}; + +export type EmitPlanOptions = { + /** + * LLM seam used by the planning stage. Defaults to the production + * anthropic adapter (`defaultRunModel`). Tests inject a stub. + */ + runModel?: RunModel; +}; + +export async function emitPlanFromSnapshot( + snapshot: CompletedSpecSnapshot, + options: EmitPlanOptions = {}, +): Promise { + const runModel = options.runModel ?? defaultRunModel; + + const projected = projectPlanFromSpec(snapshot); + const planningResult = await planExecutionOrdering(projected, runModel); + const enrichment = planningResult.status === 'succeeded' ? planningResult.enrichment : EMPTY_ENRICHMENT; + const { plan, warnings: reconciliationWarnings } = reconcilePlan(projected, enrichment); + + const warnings: EmitterWarning[] = []; + if (planningResult.status === 'failed') { + warnings.push({ code: 'planning-failed', reason: planningResult.reason }); + } + warnings.push(...reconciliationWarnings); + + return { plan, warnings, planningResult }; +} + +/** + * Audit-weight classification for an `EmitterWarning`. Mirrors + * `reconciliationWarningCategory` and adds `'failure'` for + * `planning-failed`. Exhaustive — adding a new emitter-level warning + * forces an update here. + */ +export function emitterWarningCategory(warning: EmitterWarning): 'transformation' | 'synthesis' | 'failure' { + if (warning.code === 'planning-failed') return 'failure'; + return reconciliationWarningCategory(warning); +} + +/** + * Render an `EmitterWarning` as one human-readable line. Delegates + * to `formatReconciliationWarning` for reconciliation codes. + */ +export function formatEmitterWarning(warning: EmitterWarning): string { + if (warning.code === 'planning-failed') { + return `planning-failed ${warning.reason}`; + } + return formatReconciliationWarning(warning); +} diff --git a/src/orchestrator/src/plan-llm-planning.test.ts b/src/orchestrator/src/plan-llm-planning.test.ts new file mode 100644 index 00000000..e19f7973 --- /dev/null +++ b/src/orchestrator/src/plan-llm-planning.test.ts @@ -0,0 +1,168 @@ +import { readFileSync } from 'node:fs'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +import { describe, expect, it } from 'vitest'; + +import { defaultRunModel, planExecutionOrdering } from './plan-llm-planning.js'; +import { projectPlanFromSpec, type CompletedSpecSnapshot } from './plan-projection.js'; +import type { Plan } from './types.js'; + +const samplePlan: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [ + { id: 'req-1', epic_id: 'default', definition: 'First requirement', depends_on: [], verification: [] }, + { id: 'req-2', epic_id: 'default', definition: 'Second requirement', depends_on: [], verification: [] }, + ], +}; + +describe('planExecutionOrdering', () => { + it('returns succeeded with a parsed enrichment for a well-formed LLM response', async () => { + const stubModelOutput = { + sliceDependencies: [ + { sliceId: 'req-1', dependsOn: [] }, + { sliceId: 'req-2', dependsOn: ['req-1'] }, + ], + epics: [ + { id: 'foundation', summary: 'Foundational work', sliceIds: ['req-1'] }, + { id: 'follow-on', summary: 'Built on the foundation', sliceIds: ['req-2'] }, + ], + nonBuildableSliceIds: [], + }; + + const result = await planExecutionOrdering(samplePlan, async () => stubModelOutput); + + expect(result).toEqual({ + status: 'succeeded', + enrichment: stubModelOutput, + }); + }); + + it('returns failed with the error message when runModel throws', async () => { + const result = await planExecutionOrdering(samplePlan, async () => { + throw new Error('upstream timeout'); + }); + + expect(result.status).toBe('failed'); + if (result.status === 'failed') { + expect(result.reason).toContain('upstream timeout'); + } + }); + + it('returns failed when the LLM output misses a required field', async () => { + // Missing `nonBuildableSliceIds` — schema-required. + const malformed = { + sliceDependencies: [], + epics: [], + }; + + const result = await planExecutionOrdering(samplePlan, async () => malformed); + + expect(result.status).toBe('failed'); + if (result.status === 'failed') { + expect(result.reason.length).toBeGreaterThan(0); + } + }); + + it('returns failed when the LLM output has a wrongly-typed field', async () => { + const malformed = { + sliceDependencies: [{ sliceId: 'req-1', dependsOn: 'not-an-array' }], + epics: [], + nonBuildableSliceIds: [], + }; + + const result = await planExecutionOrdering(samplePlan, async () => malformed); + + expect(result.status).toBe('failed'); + }); + + it('does NOT semantically validate referenced ids — hallucinated slice ids parse as succeeded', async () => { + // Slice 2 deliberately defers id-existence / cycle / dangle checks + // to slice 3 (deterministic reconciliation). The schema must accept + // a well-typed but semantically wrong response; this regression pin + // protects that contract. + const semanticallyWrong = { + sliceDependencies: [{ sliceId: 'req-1', dependsOn: ['req-999-does-not-exist'] }], + epics: [{ id: 'ghost-epic', summary: '', sliceIds: ['req-1', 'never-projected'] }], + nonBuildableSliceIds: ['also-never-projected'], + }; + + const result = await planExecutionOrdering(samplePlan, async () => semanticallyWrong); + + expect(result.status).toBe('succeeded'); + if (result.status === 'succeeded') { + expect(result.enrichment.sliceDependencies[0]!.dependsOn).toEqual(['req-999-does-not-exist']); + expect(result.enrichment.nonBuildableSliceIds).toEqual(['also-never-projected']); + } + }); + + it('includes every slice id and definition in the prompt the model receives', async () => { + let capturedPrompt = ''; + await planExecutionOrdering(samplePlan, async (prompt) => { + capturedPrompt = prompt; + return { + sliceDependencies: [], + epics: [], + nonBuildableSliceIds: [], + }; + }); + + for (const slice of samplePlan.slices) { + expect(capturedPrompt).toContain(slice.id); + expect(capturedPrompt).toContain(slice.definition); + } + }); + + it('short-circuits on an empty Plan without calling runModel', async () => { + const emptyPlan: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [], + }; + let runModelCalled = false; + + const result = await planExecutionOrdering(emptyPlan, async () => { + runModelCalled = true; + return {}; + }); + + expect(runModelCalled).toBe(false); + expect(result).toEqual({ + status: 'succeeded', + enrichment: { sliceDependencies: [], epics: [], nonBuildableSliceIds: [] }, + }); + }); + + // Opt-in middle-loop test. Skipped unless both PLANNING_REAL_LLM=1 and + // ANTHROPIC_API_KEY are set, so it stays out of CI and the default + // local `npm run verify`. Run with: + // PLANNING_REAL_LLM=1 ANTHROPIC_API_KEY=… npx vitest run \ + // src/orchestrator/src/plan-llm-planning.test.ts + const realLlmEnabled = process.env.PLANNING_REAL_LLM === '1' && Boolean(process.env.ANTHROPIC_API_KEY); + const itReal = realLlmEnabled ? it : it.skip; + + itReal( + 'real LLM: brunch_graphs fixture yields well-formed enrichment with non-trivial signal', + async () => { + const fixturePath = join( + dirname(fileURLToPath(import.meta.url)), + '__fixtures__', + 'brunch-graphs-snapshot.json', + ); + const fixture = JSON.parse(readFileSync(fixturePath, 'utf8')) as CompletedSpecSnapshot; + const projected = projectPlanFromSpec(fixture); + + const result = await planExecutionOrdering(projected, defaultRunModel); + + expect(result.status).toBe('succeeded'); + if (result.status === 'succeeded') { + const hasOrdering = result.enrichment.sliceDependencies.some((entry) => entry.dependsOn.length > 0); + const hasNonBuildable = result.enrichment.nonBuildableSliceIds.length > 0; + // The model should produce SOME signal on a non-trivial spec — + // either an ordering edge or a non-buildable flag. If both come + // back empty, the prompt is failing to convey what we want. + expect(hasOrdering || hasNonBuildable).toBe(true); + } + }, + 30_000, + ); +}); diff --git a/src/orchestrator/src/plan-llm-planning.ts b/src/orchestrator/src/plan-llm-planning.ts new file mode 100644 index 00000000..37ab3b1f --- /dev/null +++ b/src/orchestrator/src/plan-llm-planning.ts @@ -0,0 +1,129 @@ +// FE-800 slice 2: LLM planning pass. +// +// Pure function that takes a slice-1 projected Plan plus an injected +// `runModel` LLM seam, performs one structured LLM round-trip, and +// returns a typed enrichment with per-slice depends_on, epic grouping, +// and non-buildable slice ids. +// +// Slice 2 enforces SHAPE only — id existence, cycles, dangling deps +// onto constraint slices, and epic-coverage gaps are slice 3's +// deterministic reconciliation. Failures (thrown LLM, parse error, +// malformed shape) collapse to a recoverable `{ status: 'failed' }` +// result so slice 3 can fall back instead of crashing. + +import { anthropic } from '@ai-sdk/anthropic'; +import { generateText, Output } from 'ai'; +import * as z from 'zod/v4'; + +import type { Plan } from './types.js'; + +export const planningEnrichmentSchema = z.object({ + sliceDependencies: z.array( + z.object({ + sliceId: z.string(), + dependsOn: z.array(z.string()), + }), + ), + epics: z.array( + z.object({ + id: z.string(), + summary: z.string(), + sliceIds: z.array(z.string()), + }), + ), + nonBuildableSliceIds: z.array(z.string()), +}); + +export type PlanningEnrichment = z.infer; + +export type PlanningResult = + | { status: 'succeeded'; enrichment: PlanningEnrichment } + | { status: 'failed'; reason: string }; + +export type RunModel = (prompt: string) => Promise; + +export async function planExecutionOrdering(plan: Plan, runModel: RunModel): Promise { + if (plan.slices.length === 0) { + return { + status: 'succeeded', + enrichment: { sliceDependencies: [], epics: [], nonBuildableSliceIds: [] }, + }; + } + + const prompt = buildPlanningPrompt(plan); + + let raw: unknown; + try { + raw = await runModel(prompt); + } catch (error) { + return { status: 'failed', reason: errorMessage(error) }; + } + + const parsed = planningEnrichmentSchema.safeParse(raw); + if (!parsed.success) { + return { + status: 'failed', + reason: `Parse error: ${parsed.error.issues.map((issue) => issue.message).join('; ')}`, + }; + } + + return { status: 'succeeded', enrichment: parsed.data }; +} + +function buildPlanningPrompt(plan: Plan): string { + const sliceLines = plan.slices.map((slice) => `- ${slice.id}: ${slice.definition}`).join('\n'); + const allSliceIds = plan.slices.map((slice) => slice.id).join(', '); + + return [ + 'You are sequencing a software build plan derived from a product specification.', + '', + 'Each slice corresponds to one product requirement. Your job is to produce three things:', + '', + '1. `sliceDependencies`: for each slice, the list of OTHER slice ids it must be built AFTER.', + ' Only emit ordering edges that are real engineering prerequisites (e.g. a slice that', + ' establishes a schema must precede slices that query it). Avoid edges that just reflect', + ' topical similarity. Aim for a sparse, acyclic DAG. If a slice is independent, emit an', + ' empty `dependsOn` array.', + '', + '2. `epics`: a grouping of slices into a small number (typically 2–5) of named epics.', + ' Each epic has an id (kebab-case slug), a short human summary, and the list of slice', + ' ids it contains. Every slice should appear in exactly one epic.', + '', + '3. `nonBuildableSliceIds`: slice ids whose requirement text reads as a CONSTRAINT or', + ' architectural policy rather than a buildable unit of work (e.g. "the system must', + ' never lose data" is a constraint; "implement durable storage for X" is buildable).', + ' Constraints should NOT be built directly; they shape how buildable slices are', + ' implemented. Be conservative — only flag a slice as non-buildable if its definition', + ' clearly describes a policy, invariant, or constraint rather than an action.', + '', + `Available slice ids: ${allSliceIds}`, + '', + 'Plan slices:', + sliceLines, + ].join('\n'); +} + +function errorMessage(error: unknown): string { + if (error instanceof Error) return error.message; + return String(error); +} + +/** + * Production LLM seam. Threads the prompt through the AI SDK adapter + * (`generateText` + `Output.object`) using the same model knob shape + * as the server-side reconciliation classifier. Single-shot, no tools, + * no multi-turn. + * + * Returned value is the raw structured object from the model; + * `planExecutionOrdering` still parses it through the Zod schema so a + * model that bypasses `Output.object` cannot smuggle past the contract. + */ +export const defaultRunModel: RunModel = async (prompt) => { + const result = await generateText({ + model: anthropic(process.env.SPEC_TO_COOK_PLAN_MODEL || 'claude-sonnet-4-20250514'), + maxOutputTokens: 2048, + prompt, + output: Output.object({ schema: planningEnrichmentSchema }), + }); + return result.output; +}; diff --git a/src/orchestrator/src/plan-projection.test.ts b/src/orchestrator/src/plan-projection.test.ts new file mode 100644 index 00000000..0c961332 --- /dev/null +++ b/src/orchestrator/src/plan-projection.test.ts @@ -0,0 +1,181 @@ +import { mkdtempSync, readFileSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +import { describe, expect, it } from 'vitest'; +import { stringify as stringifyYaml } from 'yaml'; + +import { loadPlan } from './plan-loader.js'; +import { projectPlanFromSpec, type CompletedSpecSnapshot } from './plan-projection.js'; + +describe('projectPlanFromSpec', () => { + it('returns a single default epic and zero slices for an empty snapshot', () => { + const snapshot: CompletedSpecSnapshot = { + requirements: [], + criteria: [], + edges: [], + }; + + const plan = projectPlanFromSpec(snapshot); + + expect(plan.epics).toHaveLength(1); + expect(plan.epics[0]!.id).toBe('default'); + expect(plan.epics[0]!.depends_on).toEqual([]); + expect(plan.epics[0]!.verification).toEqual([]); + expect(plan.slices).toEqual([]); + }); + + it('produces one slice per requirement, ordered by kindOrdinal, with stable ids', () => { + const snapshot: CompletedSpecSnapshot = { + requirements: [ + { id: 11, content: 'Second requirement', kindOrdinal: 2 }, + { id: 10, content: 'First requirement', kindOrdinal: 1 }, + { id: 12, content: 'Third requirement', kindOrdinal: 3 }, + ], + criteria: [], + edges: [], + }; + + const plan = projectPlanFromSpec(snapshot); + + expect(plan.slices).toHaveLength(3); + expect(plan.slices.map((slice) => slice.id)).toEqual(['req-1', 'req-2', 'req-3']); + expect(plan.slices.map((slice) => slice.definition)).toEqual([ + 'First requirement', + 'Second requirement', + 'Third requirement', + ]); + for (const slice of plan.slices) { + expect(slice.epic_id).toBe('default'); + expect(slice.depends_on).toEqual([]); + expect(slice.verification).toEqual([]); + } + }); + + it('populates a slice verification from `criterion --verifies--> requirement` edges', () => { + const snapshot: CompletedSpecSnapshot = { + requirements: [{ id: 10, content: 'A requirement', kindOrdinal: 1 }], + criteria: [ + { id: 20, content: 'Second criterion text', kindOrdinal: 2 }, + { id: 21, content: 'First criterion text', kindOrdinal: 1 }, + ], + edges: [ + { fromItemId: 20, toItemId: 10, relation: 'verifies' }, + { fromItemId: 21, toItemId: 10, relation: 'verifies' }, + ], + }; + + const plan = projectPlanFromSpec(snapshot); + + expect(plan.slices).toHaveLength(1); + expect(plan.slices[0]!.verification).toEqual([ + { kind: 'criterion', target: 'First criterion text' }, + { kind: 'criterion', target: 'Second criterion text' }, + ]); + }); + + it('does NOT project requirement→requirement `depends_on` edges into slice.depends_on', () => { + // Slice 1 intentionally drops graph-read execution ordering — the LLM + // planning pass (slice 2) owns that. We assert the drop is silent and + // deliberate so a future regression that quietly re-introduces a + // graph-read ordering rule will be caught. + const snapshot: CompletedSpecSnapshot = { + requirements: [ + { id: 10, content: 'First requirement', kindOrdinal: 1 }, + { id: 11, content: 'Second requirement', kindOrdinal: 2 }, + ], + criteria: [], + edges: [{ fromItemId: 11, toItemId: 10, relation: 'depends_on' }], + }; + + const plan = projectPlanFromSpec(snapshot); + + for (const slice of plan.slices) { + expect(slice.depends_on).toEqual([]); + } + }); + + it('is deterministic — same snapshot yields structurally equal Plans', () => { + const snapshot: CompletedSpecSnapshot = { + requirements: [ + { id: 11, content: 'Req B', kindOrdinal: 2 }, + { id: 10, content: 'Req A', kindOrdinal: 1 }, + ], + criteria: [{ id: 20, content: 'Crit', kindOrdinal: 1 }], + edges: [{ fromItemId: 20, toItemId: 10, relation: 'verifies' }], + }; + + expect(projectPlanFromSpec(snapshot)).toEqual(projectPlanFromSpec(snapshot)); + }); + + it('round-trips through loadPlan — projected Plan survives YAML serialise + parse', () => { + const snapshot: CompletedSpecSnapshot = { + requirements: [ + { id: 10, content: 'First requirement', kindOrdinal: 1 }, + { id: 11, content: 'Second requirement', kindOrdinal: 2 }, + ], + criteria: [{ id: 20, content: 'A criterion', kindOrdinal: 1 }], + edges: [{ fromItemId: 20, toItemId: 10, relation: 'verifies' }], + }; + + const projected = projectPlanFromSpec(snapshot); + + const dir = mkdtempSync(join(tmpdir(), 'plan-projection-')); + const yamlPath = join(dir, 'plan.yaml'); + writeFileSync(yamlPath, stringifyYaml(projected)); + + const reloaded = loadPlan(yamlPath); + + expect(reloaded).toEqual(projected); + + // Schema-conformance pin: every slice.epic_id resolves to an existing epic. + const epicIds = new Set(reloaded.epics.map((epic) => epic.id)); + for (const slice of reloaded.slices) { + expect(epicIds.has(slice.epic_id)).toBe(true); + } + }); + + it('emits a non-empty definition on every slice (display + LLM-prompt invariant)', () => { + // sliceLabel and the pi-agent task prompts both read slice.definition; + // pin that projection never emits an empty one so cook progress + // lines stay legible and pi never receives a content-free task. + const fixturePath = join( + dirname(fileURLToPath(import.meta.url)), + '__fixtures__', + 'brunch-graphs-snapshot.json', + ); + const fixture = JSON.parse(readFileSync(fixturePath, 'utf8')) as CompletedSpecSnapshot; + + const plan = projectPlanFromSpec(fixture); + + expect(plan.slices.length).toBeGreaterThan(0); + for (const slice of plan.slices) { + expect(slice.definition.trim().length).toBeGreaterThan(0); + } + }); + + it('preserves the brunch_graphs spike oracle — every requirement gets ≥1 verifying criterion', () => { + // Pin the spike's positive finding (2026-06-03 against completed + // spec 2 'brunch_graphs', memory/PLAN.md §spec-to-cook-plan): + // projection works AND verification linkage is fully covered. The + // fixture is a hand-curated subset; the eventual server-side + // snapshot builder (separate slice) will obviate it. + const fixturePath = join( + dirname(fileURLToPath(import.meta.url)), + '__fixtures__', + 'brunch-graphs-snapshot.json', + ); + const fixture = JSON.parse(readFileSync(fixturePath, 'utf8')) as CompletedSpecSnapshot; + + const plan = projectPlanFromSpec(fixture); + + expect(plan.slices).toHaveLength(fixture.requirements.length); + for (const slice of plan.slices) { + expect(slice.verification.length).toBeGreaterThanOrEqual(1); + for (const verification of slice.verification) { + expect(verification.kind).toBe('criterion'); + } + } + }); +}); diff --git a/src/orchestrator/src/plan-projection.ts b/src/orchestrator/src/plan-projection.ts new file mode 100644 index 00000000..e8948d88 --- /dev/null +++ b/src/orchestrator/src/plan-projection.ts @@ -0,0 +1,73 @@ +import type { Plan } from './types.js'; + +/** + * Structural snapshot of the relevant portion of a completed brunch + * specification's intent graph. Declared locally so the orchestrator + * package does not import from `@/server/*`; the server-side snapshot + * builder is a separate slice. + */ +export interface CompletedSpecSnapshot { + requirements: readonly KnowledgeItemSnapshot[]; + criteria: readonly KnowledgeItemSnapshot[]; + edges: readonly KnowledgeEdgeSnapshot[]; +} + +export interface KnowledgeItemSnapshot { + id: number; + content: string; + kindOrdinal: number; +} + +export interface KnowledgeEdgeSnapshot { + fromItemId: number; + toItemId: number; + relation: 'depends_on' | 'derived_from' | 'constrains' | 'verifies' | 'refines'; +} + +const DEFAULT_EPIC_ID = 'default'; +const DEFAULT_EPIC_SUMMARY = 'All requirements'; + +export function projectPlanFromSpec(snapshot: CompletedSpecSnapshot): Plan { + const orderedRequirements = [...snapshot.requirements].sort(byKindOrdinal); + const criteriaById = new Map(snapshot.criteria.map((criterion) => [criterion.id, criterion])); + + const verifiersByRequirementId = new Map(); + for (const edge of snapshot.edges) { + if (edge.relation !== 'verifies') continue; + const criterion = criteriaById.get(edge.fromItemId); + if (!criterion) continue; + const existing = verifiersByRequirementId.get(edge.toItemId) ?? []; + existing.push(criterion); + verifiersByRequirementId.set(edge.toItemId, existing); + } + + const slices = orderedRequirements.map((requirement) => { + const verifiers = (verifiersByRequirementId.get(requirement.id) ?? []).sort(byKindOrdinal); + return { + id: `req-${requirement.kindOrdinal}`, + epic_id: DEFAULT_EPIC_ID, + definition: requirement.content, + depends_on: [], + verification: verifiers.map((criterion) => ({ + kind: 'criterion', + target: criterion.content, + })), + }; + }); + + return { + epics: [ + { + id: DEFAULT_EPIC_ID, + summary: DEFAULT_EPIC_SUMMARY, + depends_on: [], + verification: [], + }, + ], + slices, + }; +} + +function byKindOrdinal(a: KnowledgeItemSnapshot, b: KnowledgeItemSnapshot): number { + return a.kindOrdinal - b.kindOrdinal; +} diff --git a/src/orchestrator/src/plan-reconciliation.test.ts b/src/orchestrator/src/plan-reconciliation.test.ts new file mode 100644 index 00000000..984f3b79 --- /dev/null +++ b/src/orchestrator/src/plan-reconciliation.test.ts @@ -0,0 +1,471 @@ +import { mkdtempSync, readFileSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +import { describe, expect, it } from 'vitest'; +import { stringify as stringifyYaml } from 'yaml'; + +import type { PlanningEnrichment } from './plan-llm-planning.js'; +import { loadPlan } from './plan-loader.js'; +import { projectPlanFromSpec, type CompletedSpecSnapshot } from './plan-projection.js'; +import { + formatReconciliationWarning, + reconcilePlan, + reconciliationWarningCategory, + type ReconciliationWarning, +} from './plan-reconciliation.js'; +import type { Plan } from './types.js'; + +const emptyEnrichment: PlanningEnrichment = { + sliceDependencies: [], + epics: [], + nonBuildableSliceIds: [], +}; + +const emptyPlan: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [], +}; + +describe('reconcilePlan', () => { + it('returns an empty plan and zero warnings when both inputs are empty', () => { + const result = reconcilePlan(emptyPlan, emptyEnrichment); + + expect(result.warnings).toEqual([]); + expect(result.plan.slices).toEqual([]); + // Default epic survives so loadPlan round-trips don't break — an + // empty epics array would still be valid YAML but cook conventions + // keep at least the fallback epic visible. + expect(result.plan.epics).toHaveLength(1); + expect(result.plan.epics[0]!.id).toBe('default'); + }); + + it('synthesizes one unit-test verification per surviving slice at tests/.test.ts', () => { + const projected: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [ + { id: 'req-1', epic_id: 'default', definition: 'First', depends_on: [], verification: [] }, + { id: 'req-2', epic_id: 'default', definition: 'Second', depends_on: [], verification: [] }, + ], + }; + + const result = reconcilePlan(projected, emptyEnrichment); + + expect(result.plan.slices).toHaveLength(2); + for (const slice of result.plan.slices) { + expect(slice.verification).toEqual([{ kind: 'unit-test', target: `tests/${slice.id}.test.ts` }]); + } + const synthesisWarnings = result.warnings.filter((w) => w.code === 'synthesized-verification-target'); + expect(synthesisWarnings).toHaveLength(2); + }); + + it('enriches slice.definition with verifying-criteria text from the projected verification array', () => { + const projected: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [ + { + id: 'req-1', + epic_id: 'default', + definition: 'First requirement', + depends_on: [], + verification: [ + { kind: 'criterion', target: 'First criterion text' }, + { kind: 'criterion', target: 'Second criterion text' }, + ], + }, + ], + }; + + const result = reconcilePlan(projected, emptyEnrichment); + + const slice = result.plan.slices[0]!; + expect(slice.definition).toContain('First requirement'); + expect(slice.definition).toContain('First criterion text'); + expect(slice.definition).toContain('Second criterion text'); + // Original criterion entries are gone; only the synthesized unit-test remains. + expect(slice.verification).toHaveLength(1); + expect(slice.verification[0]!.kind).toBe('unit-test'); + }); + + it('drops dependsOn references to nonexistent slice ids with a warning', () => { + const projected: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [{ id: 'req-1', epic_id: 'default', definition: 'A', depends_on: [], verification: [] }], + }; + const enrichment: PlanningEnrichment = { + sliceDependencies: [{ sliceId: 'req-1', dependsOn: ['ghost-req-99'] }], + epics: [], + nonBuildableSliceIds: [], + }; + + const result = reconcilePlan(projected, enrichment); + + expect(result.plan.slices[0]!.depends_on).toEqual([]); + expect(result.warnings).toContainEqual({ + code: 'dropped-dependency-nonexistent-id', + sliceId: 'req-1', + missingId: 'ghost-req-99', + }); + }); + + it('drops self-loops (slice depending on itself) with a warning', () => { + const projected: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [{ id: 'req-1', epic_id: 'default', definition: 'A', depends_on: [], verification: [] }], + }; + const enrichment: PlanningEnrichment = { + sliceDependencies: [{ sliceId: 'req-1', dependsOn: ['req-1'] }], + epics: [], + nonBuildableSliceIds: [], + }; + + const result = reconcilePlan(projected, enrichment); + + expect(result.plan.slices[0]!.depends_on).toEqual([]); + expect(result.warnings).toContainEqual({ + code: 'dropped-self-loop', + sliceId: 'req-1', + }); + }); + + it('drops a non-buildable slice with a warning and drops incoming deps onto it', () => { + const projected: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [ + { + id: 'req-1', + epic_id: 'default', + definition: 'Build A', + depends_on: [], + verification: [], + }, + { + id: 'req-2', + epic_id: 'default', + definition: 'Never lose data', + depends_on: [], + verification: [], + }, + ], + }; + const enrichment: PlanningEnrichment = { + sliceDependencies: [{ sliceId: 'req-1', dependsOn: ['req-2'] }], + epics: [], + nonBuildableSliceIds: ['req-2'], + }; + + const result = reconcilePlan(projected, enrichment); + + expect(result.plan.slices.map((s) => s.id)).toEqual(['req-1']); + expect(result.plan.slices[0]!.depends_on).toEqual([]); + expect(result.warnings).toContainEqual({ + code: 'dropped-non-buildable-slice', + sliceId: 'req-2', + definition: 'Never lose data', + }); + expect(result.warnings).toContainEqual({ + code: 'dropped-dependency-on-non-buildable', + sliceId: 'req-1', + nonBuildableId: 'req-2', + }); + }); + + it('breaks a 2-cycle by dropping the incoming edges of the lex-smallest sliceId', () => { + const projected: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [ + { id: 'req-a', epic_id: 'default', definition: 'A', depends_on: [], verification: [] }, + { id: 'req-b', epic_id: 'default', definition: 'B', depends_on: [], verification: [] }, + ], + }; + const enrichment: PlanningEnrichment = { + sliceDependencies: [ + { sliceId: 'req-a', dependsOn: ['req-b'] }, + { sliceId: 'req-b', dependsOn: ['req-a'] }, + ], + epics: [], + nonBuildableSliceIds: [], + }; + + const result = reconcilePlan(projected, enrichment); + + const bySliceId = new Map(result.plan.slices.map((s) => [s.id, s] as const)); + // Lex-smallest is 'req-a': drop its incoming dep edge (req-a depends on req-b). + expect(bySliceId.get('req-a')!.depends_on).toEqual([]); + expect(bySliceId.get('req-b')!.depends_on).toEqual(['req-a']); + expect(result.warnings).toContainEqual({ + code: 'cycle-break-dropped-edge', + sliceId: 'req-a', + droppedDependsOn: 'req-b', + }); + }); + + it('breaks a 3-cycle deterministically across re-runs', () => { + const projected: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [ + { id: 'req-a', epic_id: 'default', definition: 'A', depends_on: [], verification: [] }, + { id: 'req-b', epic_id: 'default', definition: 'B', depends_on: [], verification: [] }, + { id: 'req-c', epic_id: 'default', definition: 'C', depends_on: [], verification: [] }, + ], + }; + const enrichment: PlanningEnrichment = { + sliceDependencies: [ + { sliceId: 'req-a', dependsOn: ['req-c'] }, + { sliceId: 'req-b', dependsOn: ['req-a'] }, + { sliceId: 'req-c', dependsOn: ['req-b'] }, + ], + epics: [], + nonBuildableSliceIds: [], + }; + + const first = reconcilePlan(projected, enrichment); + const second = reconcilePlan(projected, enrichment); + + expect(first).toEqual(second); + + // Output must be acyclic — assert with a fresh Kahn pass. + const depsById = new Map(first.plan.slices.map((s) => [s.id, s.depends_on] as const)); + const remaining = new Set(depsById.keys()); + let processedSomething = true; + while (remaining.size > 0 && processedSomething) { + processedSomething = false; + for (const id of [...remaining]) { + if ((depsById.get(id) ?? []).every((d) => !remaining.has(d))) { + remaining.delete(id); + processedSomething = true; + } + } + } + expect(remaining.size).toBe(0); + }); + + it('drops an empty epic and assigns orphan slices to a synthesized default epic', () => { + const projected: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [ + { id: 'req-1', epic_id: 'default', definition: 'A', depends_on: [], verification: [] }, + { id: 'req-2', epic_id: 'default', definition: 'B', depends_on: [], verification: [] }, + ], + }; + const enrichment: PlanningEnrichment = { + sliceDependencies: [], + epics: [ + // Covers req-1. + { id: 'core', summary: 'Core', sliceIds: ['req-1'] }, + // Refers only to a nonexistent slice → empty after filtering. + { id: 'orphan-epic', summary: 'Orphan', sliceIds: ['ghost-req-99'] }, + ], + nonBuildableSliceIds: [], + }; + + const result = reconcilePlan(projected, enrichment); + + const epicIds = result.plan.epics.map((e) => e.id); + expect(epicIds).toContain('core'); + expect(epicIds).toContain('default'); + expect(epicIds).not.toContain('orphan-epic'); + + const sliceById = new Map(result.plan.slices.map((s) => [s.id, s] as const)); + expect(sliceById.get('req-1')!.epic_id).toBe('core'); + expect(sliceById.get('req-2')!.epic_id).toBe('default'); + + expect(result.warnings).toContainEqual({ + code: 'dropped-empty-epic', + epicId: 'orphan-epic', + epicSummary: 'Orphan', + }); + expect(result.warnings).toContainEqual({ + code: 'orphan-slice-assigned-to-default-epic', + sliceId: 'req-2', + }); + }); + + it('does not synthesize a default epic when every surviving slice is covered', () => { + const projected: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [{ id: 'req-1', epic_id: 'default', definition: 'A', depends_on: [], verification: [] }], + }; + const enrichment: PlanningEnrichment = { + sliceDependencies: [], + epics: [{ id: 'core', summary: 'Core', sliceIds: ['req-1'] }], + nonBuildableSliceIds: [], + }; + + const result = reconcilePlan(projected, enrichment); + + expect(result.plan.epics.map((e) => e.id)).toEqual(['core']); + expect(result.warnings.some((w) => w.code === 'orphan-slice-assigned-to-default-epic')).toBe(false); + }); + + it('returns structurally-equal outputs across two identical calls (determinism pin)', () => { + const projected: Plan = { + epics: [{ id: 'default', summary: 'All requirements', depends_on: [], verification: [] }], + slices: [ + { + id: 'req-1', + epic_id: 'default', + definition: 'First', + depends_on: [], + verification: [{ kind: 'criterion', target: 'Crit' }], + }, + { id: 'req-2', epic_id: 'default', definition: 'Second', depends_on: [], verification: [] }, + ], + }; + const enrichment: PlanningEnrichment = { + sliceDependencies: [{ sliceId: 'req-2', dependsOn: ['req-1'] }], + epics: [{ id: 'core', summary: 'Core', sliceIds: ['req-1', 'req-2'] }], + nonBuildableSliceIds: [], + }; + + expect(reconcilePlan(projected, enrichment)).toEqual(reconcilePlan(projected, enrichment)); + }); + + it('brunch_graphs corpus end-to-end — round-trips through loadPlan after reconciliation', () => { + const fixturePath = join( + dirname(fileURLToPath(import.meta.url)), + '__fixtures__', + 'brunch-graphs-snapshot.json', + ); + const snapshot = JSON.parse(readFileSync(fixturePath, 'utf8')) as CompletedSpecSnapshot; + const projected = projectPlanFromSpec(snapshot); + expect(projected.slices.length).toBeGreaterThan(2); + + // Hand-craft a representative enrichment: one dep edge, one non-buildable + // slice, no epic coverage so every surviving slice falls through to the + // default epic. + const [firstSlice, secondSlice, thirdSlice, ...rest] = projected.slices; + const enrichment: PlanningEnrichment = { + sliceDependencies: [{ sliceId: secondSlice!.id, dependsOn: [firstSlice!.id] }], + epics: [], + nonBuildableSliceIds: [thirdSlice!.id], + }; + + const result = reconcilePlan(projected, enrichment); + + // (a) Non-buildable slice removed. + expect(result.plan.slices.map((s) => s.id)).not.toContain(thirdSlice!.id); + // (b) Every surviving slice carries the synthesized unit-test target. + for (const slice of result.plan.slices) { + expect(slice.verification).toEqual([{ kind: 'unit-test', target: `tests/${slice.id}.test.ts` }]); + } + // (c) Warnings non-empty (synthesis + non-buildable removal at minimum). + expect(result.warnings.length).toBeGreaterThan(0); + expect(result.warnings).toContainEqual({ + code: 'dropped-non-buildable-slice', + sliceId: thirdSlice!.id, + definition: thirdSlice!.definition, + }); + + // (d) YAML round-trip via loadPlan preserves the reconciled plan. + const dir = mkdtempSync(join(tmpdir(), 'plan-reconciliation-')); + const yamlPath = join(dir, 'plan.yaml'); + writeFileSync(yamlPath, stringifyYaml(result.plan)); + const reloaded = loadPlan(yamlPath); + expect(reloaded).toEqual(result.plan); + + // Schema-conformance pin: every slice.epic_id resolves to an existing epic. + const epicIds = new Set(reloaded.epics.map((e) => e.id)); + for (const slice of reloaded.slices) { + expect(epicIds.has(slice.epic_id)).toBe(true); + } + + // Silence unused-var lint for `rest` — we intentionally only need three + // representative slices for this corpus check. + void rest; + }); +}); + +describe('reconciliationWarningCategory', () => { + // One example per code so a new warning code in the union forces an + // exhaustive-switch update in `reconciliationWarningCategory` AND a + // matching expectation here. + const examples: { warning: ReconciliationWarning; expected: 'transformation' | 'synthesis' }[] = [ + { + warning: { code: 'synthesized-verification-target', sliceId: 'req-1', target: 'tests/req-1.test.ts' }, + expected: 'synthesis', + }, + { + warning: { code: 'dropped-dependency-nonexistent-id', sliceId: 'req-1', missingId: 'ghost' }, + expected: 'transformation', + }, + { + warning: { code: 'dropped-self-loop', sliceId: 'req-1' }, + expected: 'transformation', + }, + { + warning: { code: 'cycle-break-dropped-edge', sliceId: 'req-a', droppedDependsOn: 'req-b' }, + expected: 'transformation', + }, + { + warning: { code: 'dropped-dependency-on-non-buildable', sliceId: 'req-1', nonBuildableId: 'req-2' }, + expected: 'transformation', + }, + { + warning: { code: 'dropped-non-buildable-slice', sliceId: 'req-2', definition: 'Never lose data' }, + expected: 'transformation', + }, + { + warning: { code: 'dropped-empty-epic', epicId: 'orphan', epicSummary: 'Orphan' }, + expected: 'transformation', + }, + { + warning: { code: 'orphan-slice-assigned-to-default-epic', sliceId: 'req-1' }, + expected: 'transformation', + }, + ]; + + for (const { warning, expected } of examples) { + it(`classifies '${warning.code}' as '${expected}'`, () => { + expect(reconciliationWarningCategory(warning)).toBe(expected); + }); + } +}); + +describe('formatReconciliationWarning', () => { + const examples: { warning: ReconciliationWarning; mustContain: string[] }[] = [ + { + warning: { code: 'synthesized-verification-target', sliceId: 'req-1', target: 'tests/req-1.test.ts' }, + mustContain: ['synthesized-verification-target', 'req-1', 'tests/req-1.test.ts'], + }, + { + warning: { code: 'dropped-dependency-nonexistent-id', sliceId: 'req-1', missingId: 'ghost' }, + mustContain: ['dropped-dependency-nonexistent-id', 'req-1', 'ghost'], + }, + { + warning: { code: 'dropped-self-loop', sliceId: 'req-1' }, + mustContain: ['dropped-self-loop', 'req-1'], + }, + { + warning: { code: 'cycle-break-dropped-edge', sliceId: 'req-a', droppedDependsOn: 'req-b' }, + mustContain: ['cycle-break-dropped-edge', 'req-a', 'req-b'], + }, + { + warning: { code: 'dropped-dependency-on-non-buildable', sliceId: 'req-1', nonBuildableId: 'req-2' }, + mustContain: ['dropped-dependency-on-non-buildable', 'req-1', 'req-2'], + }, + { + warning: { code: 'dropped-non-buildable-slice', sliceId: 'req-2', definition: 'Never lose data' }, + mustContain: ['dropped-non-buildable-slice', 'req-2'], + }, + { + warning: { code: 'dropped-empty-epic', epicId: 'orphan', epicSummary: 'Orphan' }, + mustContain: ['dropped-empty-epic', 'orphan', 'Orphan'], + }, + { + warning: { code: 'orphan-slice-assigned-to-default-epic', sliceId: 'req-1' }, + mustContain: ['orphan-slice-assigned-to-default-epic', 'req-1'], + }, + ]; + + for (const { warning, mustContain } of examples) { + it(`renders '${warning.code}' as a non-empty line containing code + key fields`, () => { + const line = formatReconciliationWarning(warning); + expect(line.length).toBeGreaterThan(0); + for (const fragment of mustContain) { + expect(line).toContain(fragment); + } + }); + } +}); diff --git a/src/orchestrator/src/plan-reconciliation.ts b/src/orchestrator/src/plan-reconciliation.ts new file mode 100644 index 00000000..60ce4fb0 --- /dev/null +++ b/src/orchestrator/src/plan-reconciliation.ts @@ -0,0 +1,245 @@ +// FE-800 slice 3: deterministic reconciliation. +// +// Takes slice 1's projected Plan plus slice 2's LLM PlanningEnrichment +// and produces a cook-runnable Plan + structured warnings. Pure — no +// I/O, no LLM, no randomness. Every transformation that drops, +// redirects, breaks, or synthesizes a value surfaces as a typed +// ReconciliationWarning so the reviewer can audit slice 2's output. + +import type { PlanningEnrichment } from './plan-llm-planning.js'; +import type { Epic, Plan, Slice } from './types.js'; + +export type ReconciliationWarning = + | { code: 'synthesized-verification-target'; sliceId: string; target: string } + | { code: 'dropped-dependency-nonexistent-id'; sliceId: string; missingId: string } + | { code: 'dropped-self-loop'; sliceId: string } + | { code: 'cycle-break-dropped-edge'; sliceId: string; droppedDependsOn: string } + | { code: 'dropped-dependency-on-non-buildable'; sliceId: string; nonBuildableId: string } + | { code: 'dropped-non-buildable-slice'; sliceId: string; definition: string } + | { code: 'dropped-empty-epic'; epicId: string; epicSummary: string } + | { code: 'orphan-slice-assigned-to-default-epic'; sliceId: string }; + +const DEFAULT_EPIC_ID = 'default'; +const DEFAULT_EPIC_SUMMARY = 'All requirements'; + +export function reconcilePlan( + projected: Plan, + enrichment: PlanningEnrichment, +): { plan: Plan; warnings: ReconciliationWarning[] } { + const warnings: ReconciliationWarning[] = []; + + // 1. Partition projected slices into surviving vs non-buildable. + const projectedSliceIds = new Set(projected.slices.map((slice) => slice.id)); + const nonBuildableIds = new Set(enrichment.nonBuildableSliceIds.filter((id) => projectedSliceIds.has(id))); + const survivingSlices: Slice[] = []; + for (const slice of projected.slices) { + if (nonBuildableIds.has(slice.id)) { + warnings.push({ + code: 'dropped-non-buildable-slice', + sliceId: slice.id, + definition: slice.definition, + }); + continue; + } + survivingSlices.push(slice); + } + const survivingIds = new Set(survivingSlices.map((slice) => slice.id)); + + // 2. Project the enrichment's per-slice depends_on through the survivor set. + // Filter self-loops, non-buildable targets, and nonexistent ids — each with a warning. + const dependsOnBySliceId = new Map(); + for (const id of survivingIds) dependsOnBySliceId.set(id, []); + for (const entry of enrichment.sliceDependencies) { + if (!survivingIds.has(entry.sliceId)) continue; + const filtered: string[] = []; + let selfLoopWarned = false; + for (const dep of entry.dependsOn) { + if (dep === entry.sliceId) { + if (!selfLoopWarned) { + warnings.push({ code: 'dropped-self-loop', sliceId: entry.sliceId }); + selfLoopWarned = true; + } + continue; + } + if (nonBuildableIds.has(dep)) { + warnings.push({ + code: 'dropped-dependency-on-non-buildable', + sliceId: entry.sliceId, + nonBuildableId: dep, + }); + continue; + } + if (!projectedSliceIds.has(dep)) { + warnings.push({ + code: 'dropped-dependency-nonexistent-id', + sliceId: entry.sliceId, + missingId: dep, + }); + continue; + } + filtered.push(dep); + } + dependsOnBySliceId.set(entry.sliceId, filtered); + } + + // 3. Cycle-break via Kahn's algorithm with lex-smallest tie-break. + // When no in-degree-zero node remains, drop all "remaining" incoming + // deps of the lex-smallest remaining sliceId (warning per edge). + const remaining = new Set(survivingIds); + while (remaining.size > 0) { + const ready: string[] = []; + for (const id of remaining) { + const deps = dependsOnBySliceId.get(id) ?? []; + if (deps.every((dep) => !remaining.has(dep))) ready.push(id); + } + if (ready.length > 0) { + for (const id of ready) remaining.delete(id); + continue; + } + const sorted = [...remaining].sort(); + const target = sorted[0]!; + const deps = dependsOnBySliceId.get(target) ?? []; + const kept: string[] = []; + for (const dep of deps) { + if (remaining.has(dep)) { + warnings.push({ + code: 'cycle-break-dropped-edge', + sliceId: target, + droppedDependsOn: dep, + }); + } else { + kept.push(dep); + } + } + dependsOnBySliceId.set(target, kept); + // Loop continues; `target` now has zero remaining-in-degree. + } + + // 4. Resolve epic grouping. LLM-proposed epics with zero surviving slices + // are dropped (warning). Surviving slices not covered by any epic land + // in a synthesized default epic (warning per orphan). + const epicAssignment = new Map(); // sliceId -> epicId + const epicOrder: string[] = []; + const epicSummaryById = new Map(); + for (const epic of enrichment.epics) { + const includedSliceIds = epic.sliceIds.filter((id) => survivingIds.has(id) && !epicAssignment.has(id)); + if (includedSliceIds.length === 0) { + warnings.push({ + code: 'dropped-empty-epic', + epicId: epic.id, + epicSummary: epic.summary, + }); + continue; + } + epicOrder.push(epic.id); + epicSummaryById.set(epic.id, epic.summary); + for (const sid of includedSliceIds) epicAssignment.set(sid, epic.id); + } + const orphans: string[] = []; + for (const slice of survivingSlices) { + if (!epicAssignment.has(slice.id)) { + orphans.push(slice.id); + warnings.push({ code: 'orphan-slice-assigned-to-default-epic', sliceId: slice.id }); + } + } + if (orphans.length > 0) { + if (!epicSummaryById.has(DEFAULT_EPIC_ID)) { + epicOrder.push(DEFAULT_EPIC_ID); + epicSummaryById.set(DEFAULT_EPIC_ID, DEFAULT_EPIC_SUMMARY); + } + for (const sid of orphans) epicAssignment.set(sid, DEFAULT_EPIC_ID); + } + + // 5. Construct output slices in projected order with synthesized verification. + const outputSlices: Slice[] = survivingSlices.map((slice) => { + const target = `tests/${slice.id}.test.ts`; + warnings.push({ code: 'synthesized-verification-target', sliceId: slice.id, target }); + return { + id: slice.id, + epic_id: epicAssignment.get(slice.id) ?? slice.epic_id, + definition: enrichDefinitionWithCriteria(slice), + depends_on: dependsOnBySliceId.get(slice.id) ?? [], + verification: [{ kind: 'unit-test', target }], + }; + }); + + // 6. Construct output epics. Fallback to default epic if nothing was built. + let outputEpics: Epic[] = epicOrder.map((id) => ({ + id, + summary: epicSummaryById.get(id) ?? DEFAULT_EPIC_SUMMARY, + depends_on: [], + verification: [], + })); + if (outputEpics.length === 0) { + outputEpics = [ + { + id: DEFAULT_EPIC_ID, + summary: DEFAULT_EPIC_SUMMARY, + depends_on: [], + verification: [], + }, + ]; + } + + return { plan: { epics: outputEpics, slices: outputSlices }, warnings }; +} + +/** + * Classify a warning by audit weight so the CLI / display layer can + * route `'transformation'` (something happened to the LLM output the + * reviewer should see) versus `'synthesis'` (deterministic completion + * that happens for every surviving slice and is predictable from the + * slice id alone). Exhaustive switch — adding a new warning code is + * a build break here. + */ +export function reconciliationWarningCategory( + warning: ReconciliationWarning, +): 'transformation' | 'synthesis' { + switch (warning.code) { + case 'synthesized-verification-target': + return 'synthesis'; + case 'dropped-dependency-nonexistent-id': + case 'dropped-self-loop': + case 'cycle-break-dropped-edge': + case 'dropped-dependency-on-non-buildable': + case 'dropped-non-buildable-slice': + case 'dropped-empty-epic': + case 'orphan-slice-assigned-to-default-epic': + return 'transformation'; + } +} + +/** + * Render a warning as a single human-readable line. Co-located with + * the warning union so a new code adds its formatter in the same diff + * as its type definition. + */ +export function formatReconciliationWarning(warning: ReconciliationWarning): string { + switch (warning.code) { + case 'synthesized-verification-target': + return `synthesized-verification-target ${warning.sliceId} → ${warning.target}`; + case 'dropped-dependency-nonexistent-id': + return `dropped-dependency-nonexistent-id ${warning.sliceId} → ${warning.missingId}`; + case 'dropped-self-loop': + return `dropped-self-loop ${warning.sliceId}`; + case 'cycle-break-dropped-edge': + return `cycle-break-dropped-edge ${warning.sliceId} → ${warning.droppedDependsOn}`; + case 'dropped-dependency-on-non-buildable': + return `dropped-dependency-on-non-buildable ${warning.sliceId} → ${warning.nonBuildableId}`; + case 'dropped-non-buildable-slice': + return `dropped-non-buildable-slice ${warning.sliceId}`; + case 'dropped-empty-epic': + return `dropped-empty-epic ${warning.epicId} (${warning.epicSummary})`; + case 'orphan-slice-assigned-to-default-epic': + return `orphan-slice-assigned-to-default-epic ${warning.sliceId}`; + } +} + +function enrichDefinitionWithCriteria(slice: Slice): string { + const criterionTexts = slice.verification + .filter((entry) => entry.kind === 'criterion') + .map((entry) => entry.target); + if (criterionTexts.length === 0) return slice.definition; + const bulletList = criterionTexts.map((text) => `- ${text}`).join('\n'); + return `${slice.definition}\n\nVerifying criteria:\n${bulletList}`; +} diff --git a/src/orchestrator/src/slice-label.test.ts b/src/orchestrator/src/slice-label.test.ts new file mode 100644 index 00000000..58f67fb6 --- /dev/null +++ b/src/orchestrator/src/slice-label.test.ts @@ -0,0 +1,72 @@ +// FE-800 display polish: short, deterministic label derived from a +// slice's definition. Used purely for cook progress logging — slice +// ids stay canonical for branches, depends_on, reports, and tests. + +import { describe, expect, it } from 'vitest'; + +import { sliceLabel } from './slice-label.js'; + +describe('sliceLabel', () => { + it('appends a slug derived from the first significant words of the definition', () => { + expect( + sliceLabel({ + id: 'req-4', + definition: + 'Users can drag nodes to reposition them; positions persist per specification and are restored on reload.', + }), + ).toBe('req-4 · users-can-drag-nodes'); + }); + + it('drops leading stop words so the slug starts with content', () => { + expect( + sliceLabel({ + id: 'req-2', + definition: + 'The spatial canvas renders every intent item as a positioned node and every typed relationship as a drawn edge.', + }), + ).toBe('req-2 · spatial-canvas-renders-every'); + }); + + it('cuts at the first clause boundary (comma, semicolon, period, colon)', () => { + expect( + sliceLabel({ + id: 'req-1', + definition: 'Graph mode exposes a layout switch, toggling between list and canvas.', + }), + ).toBe('req-1 · graph-mode-exposes-layout'); + }); + + it('caps the slug at ~32 characters on a word boundary', () => { + expect( + sliceLabel({ + id: 'req-5', + definition: 'Canvas edges are visually distinguished by relation kind, documented by a legend.', + }), + ).toBe('req-5 · canvas-edges-visually'); + }); + + it('returns just the id when the definition is empty or only stop words', () => { + expect(sliceLabel({ id: 'req-7', definition: '' })).toBe('req-7'); + expect(sliceLabel({ id: 'req-7', definition: ' ' })).toBe('req-7'); + expect(sliceLabel({ id: 'req-7', definition: 'The a an to' })).toBe('req-7'); + }); + + it('returns just the id when no definition is provided', () => { + expect(sliceLabel({ id: 'req-99' })).toBe('req-99'); + }); + + it('strips non-alphanumeric runs and drops sub-3-char fragments', () => { + // "em-dash" splits into [`em`, `dash`]; `em` is too short to keep. + expect( + sliceLabel({ + id: 'req-3', + definition: '`Quoted` & escaped (parens) — em-dash, weird stuff!!!', + }), + ).toBe('req-3 · quoted-escaped-parens-dash'); + }); + + it('is stable: same definition always produces the same label', () => { + const def = 'Pan and zoom move and scale the viewport.'; + expect(sliceLabel({ id: 'x', definition: def })).toBe(sliceLabel({ id: 'x', definition: def })); + }); +}); diff --git a/src/orchestrator/src/slice-label.ts b/src/orchestrator/src/slice-label.ts new file mode 100644 index 00000000..ca9041c9 --- /dev/null +++ b/src/orchestrator/src/slice-label.ts @@ -0,0 +1,78 @@ +// Cook display polish: derive a short, deterministic, human-readable +// suffix from a slice's `definition` so live progress lines read +// `req-4 · users-can-drag-nodes` instead of bare `req-4`. +// +// Display-only. The slice id stays the canonical key for branches +// (`cook-slice//`), `depends_on`, `reports.jsonl`, and any +// log scraper — none of which should churn when a requirement's text +// is edited. + +const SLUG_STOP_WORDS = new Set([ + 'a', + 'an', + 'and', + 'are', + 'as', + 'be', + 'by', + 'for', + 'in', + 'is', + 'of', + 'on', + 'or', + 'the', + 'to', + 'with', +]); + +const MAX_WORDS = 4; +const MAX_SLUG_CHARS = 32; +const CLAUSE_BOUNDARY = /[,;.:]/; + +/** + * `${slice.id}` when the definition yields no usable slug; otherwise + * `${slice.id} · ${slug}` where the slug is derived from the first + * significant words of `slice.definition` — non-alphanumeric stripped, + * stop words and sub-3-char fragments dropped wherever they appear, + * capped to {@link MAX_WORDS} words and {@link MAX_SLUG_CHARS} + * characters on a word boundary. Pure function — same input always + * returns the same output. + */ +export function sliceLabel(slice: { id: string; definition?: string }): string { + const slug = deriveSlug(slice.definition); + return slug ? `${slice.id} · ${slug}` : slice.id; +} + +function deriveSlug(definition: string | undefined): string | undefined { + if (!definition) return undefined; + + // Cut at the first clause boundary so multi-clause requirements still + // produce a focused slug from the lead clause. + const clauseEnd = definition.search(CLAUSE_BOUNDARY); + const lead = clauseEnd === -1 ? definition : definition.slice(0, clauseEnd); + + // Strip non-alphanumeric, drop stop words anywhere in the stream + // (not just leading), and drop tokens shorter than 3 characters so + // short fragments like the `em` in `em-dash` don't make it through. + const content = lead + .toLowerCase() + .split(/[^a-z0-9]+/) + .filter((token) => token.length >= 3 && !SLUG_STOP_WORDS.has(token)) + .slice(0, MAX_WORDS); + if (content.length === 0) return undefined; + + // Cap on word boundary: keep appending words while the joined result + // stays under MAX_SLUG_CHARS; never emit a partial trailing word. + const slug: string[] = []; + let length = 0; + for (const word of content) { + const next = length === 0 ? word.length : length + 1 + word.length; + if (next > MAX_SLUG_CHARS) break; + slug.push(word); + length = next; + } + if (slug.length === 0) return undefined; + + return slug.join('-'); +} diff --git a/src/orchestrator/src/spec-plan-paths.test.ts b/src/orchestrator/src/spec-plan-paths.test.ts new file mode 100644 index 00000000..4b22efb9 --- /dev/null +++ b/src/orchestrator/src/spec-plan-paths.test.ts @@ -0,0 +1,115 @@ +// FE-800 cleanup: single-owner module for the spec-scoped plan layout +// (`/.brunch/cook/specs//plan.yaml`). Pins layout, latest-by-mtime +// selection, and spec-id parsing — all of which the writer (plan-runner) +// and resolver (cook-cli) previously rebuilt independently. + +import { mkdirSync, mkdtempSync, rmSync, utimesSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { afterEach, describe, expect, it } from 'vitest'; + +import { parseSpecId, resolveLatestSpecPlanPath, specPlanPath } from './spec-plan-paths.js'; + +const dirs: string[] = []; +afterEach(() => { + for (const d of dirs) rmSync(d, { recursive: true, force: true }); + dirs.length = 0; +}); + +function makeTmpDir(prefix = 'spec-plan-paths-'): string { + const d = mkdtempSync(join(tmpdir(), prefix)); + dirs.push(d); + return d; +} + +describe('specPlanPath', () => { + it('joins .brunch/cook/specs//plan.yaml under dir', () => { + expect(specPlanPath('/x', 23)).toBe(join('/x', '.brunch', 'cook', 'specs', '23', 'plan.yaml')); + expect(specPlanPath('/x', 1)).toBe(join('/x', '.brunch', 'cook', 'specs', '1', 'plan.yaml')); + }); +}); + +describe('resolveLatestSpecPlanPath', () => { + it('returns undefined when the specs root is absent', () => { + const d = makeTmpDir(); + expect(resolveLatestSpecPlanPath(d)).toBeUndefined(); + }); + + it('returns undefined when the specs root is empty', () => { + const d = makeTmpDir(); + mkdirSync(join(d, '.brunch', 'cook', 'specs'), { recursive: true }); + expect(resolveLatestSpecPlanPath(d)).toBeUndefined(); + }); + + it('returns the only spec plan when exactly one exists', () => { + const d = makeTmpDir(); + const specDir = join(d, '.brunch', 'cook', 'specs', '7'); + mkdirSync(specDir, { recursive: true }); + writeFileSync(join(specDir, 'plan.yaml'), 'epics: []\nslices: []\n'); + + expect(resolveLatestSpecPlanPath(d)).toBe(join(specDir, 'plan.yaml')); + }); + + it('picks the newest plan by mtime when several exist', () => { + const d = makeTmpDir(); + const older = join(d, '.brunch', 'cook', 'specs', '1'); + const newer = join(d, '.brunch', 'cook', 'specs', '2'); + mkdirSync(older, { recursive: true }); + mkdirSync(newer, { recursive: true }); + writeFileSync(join(older, 'plan.yaml'), 'epics: []\nslices: []\n'); + writeFileSync(join(newer, 'plan.yaml'), 'epics: []\nslices: []\n'); + + // Force mtime ordering deterministically: older = 60s ago. + const past = new Date(Date.now() - 60_000); + utimesSync(join(older, 'plan.yaml'), past, past); + + expect(resolveLatestSpecPlanPath(d)).toBe(join(newer, 'plan.yaml')); + }); + + it('ignores spec subdirs whose names are not positive integers', () => { + const d = makeTmpDir(); + const valid = join(d, '.brunch', 'cook', 'specs', '5'); + const bogus = join(d, '.brunch', 'cook', 'specs', 'scratch'); + const zero = join(d, '.brunch', 'cook', 'specs', '0'); + mkdirSync(valid, { recursive: true }); + mkdirSync(bogus, { recursive: true }); + mkdirSync(zero, { recursive: true }); + writeFileSync(join(valid, 'plan.yaml'), 'x: 1\n'); + writeFileSync(join(bogus, 'plan.yaml'), 'x: 1\n'); + writeFileSync(join(zero, 'plan.yaml'), 'x: 1\n'); + + expect(resolveLatestSpecPlanPath(d)).toBe(join(valid, 'plan.yaml')); + }); + + it('skips spec subdirs that have no plan.yaml inside them', () => { + const d = makeTmpDir(); + const empty = join(d, '.brunch', 'cook', 'specs', '3'); + const populated = join(d, '.brunch', 'cook', 'specs', '4'); + mkdirSync(empty, { recursive: true }); + mkdirSync(populated, { recursive: true }); + writeFileSync(join(populated, 'plan.yaml'), 'x: 1\n'); + + expect(resolveLatestSpecPlanPath(d)).toBe(join(populated, 'plan.yaml')); + }); +}); + +describe('parseSpecId', () => { + it('accepts positive integers and returns the number', () => { + expect(parseSpecId('1', '--spec')).toBe(1); + expect(parseSpecId('42', '--spec')).toBe(42); + }); + + it('rejects zero, negatives, non-numerics, and fractional values', () => { + expect(() => parseSpecId('0', '--spec')).toThrow(/--spec/); + expect(() => parseSpecId('-3', '--spec')).toThrow(/--spec/); + expect(() => parseSpecId('abc', '--spec')).toThrow(/--spec/); + expect(() => parseSpecId('1.5', '--spec')).toThrow(/--spec/); + expect(() => parseSpecId('', '--spec')).toThrow(/--spec/); + }); + + it('includes the caller-provided flag label in the error message', () => { + expect(() => parseSpecId('abc', 'spec id')).toThrow(/spec id/); + expect(() => parseSpecId('abc', '')).toThrow(//); + }); +}); diff --git a/src/orchestrator/src/spec-plan-paths.ts b/src/orchestrator/src/spec-plan-paths.ts new file mode 100644 index 00000000..354f57a3 --- /dev/null +++ b/src/orchestrator/src/spec-plan-paths.ts @@ -0,0 +1,64 @@ +// Single owner for the spec-scoped plan-emission layout used by `brunch +// plan ` (writer) and `brunch cook [--spec=]` (resolver). +// +// Layout: `/.brunch/cook/specs//plan.yaml`. Each spec gets +// its own subdirectory so multiple completed specifications can coexist +// on the same project without overwriting each other; cook resolves +// either by explicit `--spec=` or by auto-picking the newest plan. +// +// Lives in `src/orchestrator/` (not `src/server/`) so the server-side +// plan-runner can import it without inverting the established +// orchestrator-pure-of-server dependency direction. + +import { existsSync, readdirSync, statSync } from 'node:fs'; +import { join } from 'node:path'; + +/** Absolute path to `/.brunch/cook/specs//plan.yaml`. */ +export function specPlanPath(dir: string, specId: number): string { + return join(specsRootDir(dir), String(specId), 'plan.yaml'); +} + +/** Absolute path to `/.brunch/cook/specs`. */ +export function specsRootDir(dir: string): string { + return join(dir, '.brunch', 'cook', 'specs'); +} + +/** + * Walk `/.brunch/cook/specs//plan.yaml` and return the most + * recently modified plan path by mtime, or `undefined` if none exist. + * Subdirectory names that aren't positive integers are ignored (the + * writer only ever creates positive-integer dirs via + * `String(specificationId)`), so unrelated directories sitting next to + * spec stores don't poison the auto-pick. + */ +export function resolveLatestSpecPlanPath(dir: string): string | undefined { + const root = specsRootDir(dir); + if (!existsSync(root)) return undefined; + + let newest: { path: string; mtimeMs: number } | undefined; + for (const entry of readdirSync(root, { withFileTypes: true })) { + if (!entry.isDirectory()) continue; + const parsed = Number(entry.name); + if (!Number.isInteger(parsed) || parsed <= 0) continue; + const planPath = join(root, entry.name, 'plan.yaml'); + if (!existsSync(planPath)) continue; + const mtimeMs = statSync(planPath).mtimeMs; + if (!newest || mtimeMs > newest.mtimeMs) { + newest = { path: planPath, mtimeMs }; + } + } + return newest?.path; +} + +/** + * Parse a CLI-supplied spec id (positive integer or throw). `flagLabel` + * appears verbatim in the error message — pass `--spec`, ``, or + * whatever the calling surface uses so the error reads naturally. + */ +export function parseSpecId(raw: string, flagLabel: string): number { + const parsed = Number(raw); + if (!Number.isInteger(parsed) || parsed <= 0) { + throw new Error(`Invalid ${flagLabel} value: "${raw}". Must be a positive integer.`); + } + return parsed; +} diff --git a/src/server/cli.test.ts b/src/server/cli.test.ts index 35a42dbb..53d723b7 100644 --- a/src/server/cli.test.ts +++ b/src/server/cli.test.ts @@ -182,6 +182,31 @@ describe('published CLI entrypoint', () => { expect(result.stderr).toBe(''); expect(result.stdout).toContain('Usage: brunch'); expect(result.stdout).toContain('Launch the Brunch web UI in the current project directory.'); + expect(result.stdout).toContain('plan '); + }); + + it('rejects `brunch plan` invocations with no spec id', async () => { + const result = await runCli(['plan'], makeTempDir('brunch-plan-usage-')); + + expect(result.code).not.toBe(0); + expect(result.stderr).toContain('Failed to run brunch plan'); + expect(result.stderr.toLowerCase()).toContain('spec id'); + }); + + it('rejects `brunch plan ` with a friendly usage error', async () => { + const result = await runCli(['plan', 'abc'], makeTempDir('brunch-plan-bad-id-')); + + expect(result.code).not.toBe(0); + expect(result.stderr).toContain('Failed to run brunch plan'); + expect(result.stderr.toLowerCase()).toContain('spec id'); + }); + + it('reports `specification not found` when the project DB is empty', async () => { + const result = await runCli(['plan', '999'], makeTempDir('brunch-plan-missing-')); + + expect(result.code).not.toBe(0); + expect(result.stderr).toContain('Failed to run brunch plan'); + expect(result.stderr).toContain('specification 999 not found'); }); it('executes through the package bin wrapper when launched outside the package root', async () => { diff --git a/src/server/cli.ts b/src/server/cli.ts index c09572fc..17cfe21e 100644 --- a/src/server/cli.ts +++ b/src/server/cli.ts @@ -5,7 +5,11 @@ import { dirname, join } from 'node:path'; import { fileURLToPath } from 'node:url'; import { runAgentJsonlSession } from './agent-jsonl.js'; -import { createDb } from './db.js'; +import { createDb, getSpecification } from './db.js'; +import { + assertCompletedSpecReadyForPlanning, + buildCompletedSpecSnapshot, +} from './db/completed-spec-snapshot.js'; import { launch } from './launcher.js'; import { resolveBrunchProject } from './project.js'; import { loadLocalEnvFile } from './runtime-config.js'; @@ -29,13 +33,23 @@ if (args.has('--help') || args.has('-h') || args.has('help')) { console.log('Launch the Brunch web UI in the current project directory.'); console.log(''); console.log('Commands:'); - console.log(' agent Run a JSONL capability session on stdin/stdout.'); - console.log(' cook [flags] Run the orchestrator on a plan directory.'); + console.log(' agent Run a JSONL capability session on stdin/stdout.'); + console.log(' cook [flags] Run the orchestrator on a plan directory.'); + console.log( + ' plan [flags] Emit .brunch/cook/specs//plan.yaml from a completed specification.', + ); console.log(''); console.log('Cook flags:'); + console.log(' --spec= Pick .brunch/cook/specs//plan.yaml (default: newest spec)'); console.log(' --policy=serial|parallel Firing policy (default: serial)'); - console.log(' --max-retries=N Retry budget per slice (default: 3)'); - console.log(' --verbose, -v Show raw pi-agent output'); + console.log(' --max-retries=N Retry budget per slice (default: 3)'); + console.log(' --verbose, -v Show raw pi-agent output'); + console.log(''); + console.log('Plan flags:'); + console.log( + ' --out= Output directory (default: cwd); plan lands under .brunch/cook/specs//', + ); + console.log(' --verbose, -v Verbose output'); process.exit(0); } @@ -46,6 +60,31 @@ if (rawArgs[0] === 'cook') { console.error('Failed to run brunch cook:', error); process.exit(1); }); +} else if (rawArgs[0] === 'plan') { + const { parsePlanArgs, runPlan } = await import('./plan-runner.js'); + let db: ReturnType | undefined; + try { + const opts = parsePlanArgs(rawArgs.slice(1)); + const project = resolveBrunchProject(launchCwd); + db = createDb(project.dbPath); + if (!getSpecification(db, opts.specificationId)) { + throw new Error(`specification ${opts.specificationId} not found`); + } + const snapshot = buildCompletedSpecSnapshot(db, opts.specificationId); + assertCompletedSpecReadyForPlanning(db, opts.specificationId, snapshot); + await runPlan({ + specificationId: opts.specificationId, + snapshot, + outDir: opts.outDir, + verbose: opts.verbose, + }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error(`Failed to run brunch plan: ${message}`); + process.exit(1); + } finally { + db?.$client.close(); + } } else if (rawArgs[0] === 'agent') { const project = resolveBrunchProject(launchCwd); const db = createDb(project.dbPath); diff --git a/src/server/db/completed-spec-snapshot.test.ts b/src/server/db/completed-spec-snapshot.test.ts new file mode 100644 index 00000000..f6b72088 --- /dev/null +++ b/src/server/db/completed-spec-snapshot.test.ts @@ -0,0 +1,216 @@ +// FE-800 slice 6: unit tests for `buildCompletedSpecSnapshot`. +// +// Seeds a minimal completed-spec scenario in an in-memory DB: +// - one specification with confirmed grounding + design phases +// - two requirements; one accepted, one captured-only +// - two criteria; one accepted, one captured-only +// - one `verifies` edge whose source AND target are both accepted +// - one `verifies` edge whose source is not accepted (must be filtered) +// - one `depends_on` edge between two accepted requirements +// +// Then asserts the resulting `CompletedSpecSnapshot` contains only the +// accepted items, drops edges that reference non-accepted items, and +// preserves the edge relation enum verbatim. + +import { beforeEach, afterEach, describe, expect, it } from 'vitest'; + +import { + addKnowledgeRelationship, + advanceHead, + createConfirmedPhaseOutcome, + createDb, + createKnowledgeItem, + createTurn, + getOrCreateSpecification, + linkKnowledgeItemToTurn, + type DB, +} from '../db.js'; +import { + assertCompletedSpecReadyForPlanning, + buildCompletedSpecSnapshot, +} from './completed-spec-snapshot.js'; + +let db: DB; + +beforeEach(() => { + db = createDb(); +}); + +afterEach(() => { + db.$client.close(); +}); + +function seedCompletedSpec() { + const specification = getOrCreateSpecification(db); + + const groundingTurn = createTurn(db, specification.id, { + phase: 'grounding', + question: 'What is the goal?', + answer: 'A demo spec', + }); + advanceHead(db, specification.id, groundingTurn.id); + createConfirmedPhaseOutcome(db, { + specificationId: specification.id, + phase: 'grounding', + proposal_turn_id: groundingTurn.id, + confirmation_turn_id: groundingTurn.id, + summary: 'Grounding captured.', + }); + + const designTurn = createTurn(db, specification.id, { + phase: 'design', + parent_turn_id: groundingTurn.id, + question: 'Any design notes?', + answer: 'None.', + }); + advanceHead(db, specification.id, designTurn.id); + createConfirmedPhaseOutcome(db, { + specificationId: specification.id, + phase: 'design', + proposal_turn_id: designTurn.id, + confirmation_turn_id: designTurn.id, + summary: 'Design captured.', + }); + + // Two requirements; only the first one is accepted in the reviewed set. + const acceptedReq1 = createKnowledgeItem(db, specification.id, 'requirement', 'Requirement one'); + const acceptedReq2 = createKnowledgeItem(db, specification.id, 'requirement', 'Requirement two'); + const draftReq = createKnowledgeItem(db, specification.id, 'requirement', 'Draft requirement'); + linkKnowledgeItemToTurn(db, acceptedReq1.id, designTurn.id, 'captured'); + linkKnowledgeItemToTurn(db, acceptedReq2.id, designTurn.id, 'captured'); + linkKnowledgeItemToTurn(db, draftReq.id, designTurn.id, 'captured'); + + const requirementsReviewTurn = createTurn(db, specification.id, { + phase: 'requirements', + parent_turn_id: designTurn.id, + question: 'Please review the current requirement set.', + answer: 'Accept review', + }); + linkKnowledgeItemToTurn(db, acceptedReq1.id, requirementsReviewTurn.id, 'reviewed'); + linkKnowledgeItemToTurn(db, acceptedReq2.id, requirementsReviewTurn.id, 'reviewed'); + // draftReq intentionally NOT reviewed → stays out of the accepted set. + advanceHead(db, specification.id, requirementsReviewTurn.id); + createConfirmedPhaseOutcome(db, { + specificationId: specification.id, + phase: 'requirements', + proposal_turn_id: requirementsReviewTurn.id, + confirmation_turn_id: requirementsReviewTurn.id, + summary: 'Requirements accepted.', + }); + + const acceptedCrit = createKnowledgeItem(db, specification.id, 'criterion', 'Verifying criterion'); + const draftCrit = createKnowledgeItem(db, specification.id, 'criterion', 'Draft criterion'); + linkKnowledgeItemToTurn(db, acceptedCrit.id, requirementsReviewTurn.id, 'captured'); + linkKnowledgeItemToTurn(db, draftCrit.id, requirementsReviewTurn.id, 'captured'); + + const criteriaReviewTurn = createTurn(db, specification.id, { + phase: 'criteria', + parent_turn_id: requirementsReviewTurn.id, + question: 'Please review the current criterion set.', + answer: 'Accept review', + }); + linkKnowledgeItemToTurn(db, acceptedCrit.id, criteriaReviewTurn.id, 'reviewed'); + advanceHead(db, specification.id, criteriaReviewTurn.id); + createConfirmedPhaseOutcome(db, { + specificationId: specification.id, + phase: 'criteria', + proposal_turn_id: criteriaReviewTurn.id, + confirmation_turn_id: criteriaReviewTurn.id, + summary: 'Criteria accepted.', + }); + + // Edges: + // accepted criterion `verifies` accepted requirement 1 → keep + // draft criterion `verifies` accepted requirement 1 → drop (source not accepted) + // accepted req 2 `depends_on` accepted req 1 → keep + addKnowledgeRelationship(db, acceptedCrit.id, acceptedReq1.id, 'verifies'); + addKnowledgeRelationship(db, draftCrit.id, acceptedReq1.id, 'verifies'); + addKnowledgeRelationship(db, acceptedReq2.id, acceptedReq1.id, 'depends_on'); + + return { + specificationId: specification.id, + acceptedReq1, + acceptedReq2, + acceptedCrit, + draftReq, + draftCrit, + }; +} + +describe('buildCompletedSpecSnapshot', () => { + it('includes only accepted requirements and criteria with stable kindOrdinal mapping', () => { + const { specificationId, acceptedReq1, acceptedReq2, acceptedCrit } = seedCompletedSpec(); + + const snapshot = buildCompletedSpecSnapshot(db, specificationId); + + expect(snapshot.requirements).toEqual([ + { id: acceptedReq1.id, content: 'Requirement one', kindOrdinal: 1 }, + { id: acceptedReq2.id, content: 'Requirement two', kindOrdinal: 2 }, + ]); + expect(snapshot.criteria).toEqual([ + { id: acceptedCrit.id, content: 'Verifying criterion', kindOrdinal: 1 }, + ]); + }); + + it('drops edges whose endpoints reference non-accepted items and preserves the relation enum', () => { + const { specificationId, acceptedReq1, acceptedReq2, acceptedCrit } = seedCompletedSpec(); + + const snapshot = buildCompletedSpecSnapshot(db, specificationId); + + expect(snapshot.edges).toEqual( + expect.arrayContaining([ + { fromItemId: acceptedCrit.id, toItemId: acceptedReq1.id, relation: 'verifies' }, + { fromItemId: acceptedReq2.id, toItemId: acceptedReq1.id, relation: 'depends_on' }, + ]), + ); + expect(snapshot.edges).toHaveLength(2); + }); + + it('returns an empty snapshot when no items are accepted for the specification', () => { + const specification = getOrCreateSpecification(db); + + const snapshot = buildCompletedSpecSnapshot(db, specification.id); + + expect(snapshot).toEqual({ requirements: [], criteria: [], edges: [] }); + }); + + it('rejects planning when the criteria phase has not been confirmed', () => { + const specification = getOrCreateSpecification(db); + const snapshot = { + requirements: [{ id: 10, content: 'Requirement one', kindOrdinal: 1 }], + criteria: [{ id: 20, content: 'Criterion one', kindOrdinal: 1 }], + edges: [], + }; + + expect(() => assertCompletedSpecReadyForPlanning(db, specification.id, snapshot)).toThrow( + /confirm the criteria phase before planning/, + ); + }); + + it('rejects planning when confirmed criteria contain no accepted criteria', () => { + const specification = getOrCreateSpecification(db); + const turn = createTurn(db, specification.id, { + phase: 'criteria', + question: 'Please review the current criterion set.', + answer: 'Accept review', + }); + advanceHead(db, specification.id, turn.id); + createConfirmedPhaseOutcome(db, { + specificationId: specification.id, + phase: 'criteria', + proposal_turn_id: turn.id, + confirmation_turn_id: turn.id, + summary: 'Criteria accepted.', + }); + + const snapshot = { + requirements: [{ id: 10, content: 'Requirement one', kindOrdinal: 1 }], + criteria: [], + edges: [], + }; + + expect(() => assertCompletedSpecReadyForPlanning(db, specification.id, snapshot)).toThrow( + /has no accepted criteria/, + ); + }); +}); diff --git a/src/server/db/completed-spec-snapshot.ts b/src/server/db/completed-spec-snapshot.ts new file mode 100644 index 00000000..6c6b94f4 --- /dev/null +++ b/src/server/db/completed-spec-snapshot.ts @@ -0,0 +1,80 @@ +// FE-800 slice 6: server-side adapter that turns a completed brunch +// specification into the orchestrator's `CompletedSpecSnapshot`. +// +// Uses the active-confirmation-path projection — accepted requirements +// + accepted criteria + relationships filtered to the active turn path +// — because the orchestrator emitter is meant to plan from the spec +// the user has confirmed, not from every draft item in the project. +// +// Edges are filtered to those whose source AND target are in the +// snapshot's accepted-id set so the orchestrator never sees dangling +// references to non-snapshot items (goals, terms, decisions, etc.). +// +// Pure type-time dependency on the orchestrator: imports +// `CompletedSpecSnapshot` as a `type` only. No runtime import path +// flows from orchestrator → server. + +import type { CompletedSpecSnapshot } from '../../orchestrator/src/plan-projection.js'; +import { + getAcceptedCriterionEntitiesForSpecification, + getAcceptedRequirementEntitiesForSpecification, + getEntitiesForSpecificationOnActivePath, + getCurrentWorkflowState, +} from '../db.js'; +import type { DB } from '../db.js'; + +export function buildCompletedSpecSnapshot(db: DB, specificationId: number): CompletedSpecSnapshot { + const requirements = getAcceptedRequirementEntitiesForSpecification(db, specificationId); + const criteria = getAcceptedCriterionEntitiesForSpecification(db, specificationId); + const acceptedIds = new Set([ + ...requirements.map((requirement) => requirement.id), + ...criteria.map((criterion) => criterion.id), + ]); + const { relationships } = getEntitiesForSpecificationOnActivePath(db, specificationId); + + return { + requirements: requirements.map((requirement) => ({ + id: requirement.id, + content: requirement.content, + kindOrdinal: requirement.kind_ordinal, + })), + criteria: criteria.map((criterion) => ({ + id: criterion.id, + content: criterion.content, + kindOrdinal: criterion.kind_ordinal, + })), + edges: relationships + .filter( + (relationship) => acceptedIds.has(relationship.source.id) && acceptedIds.has(relationship.target.id), + ) + .map((relationship) => ({ + fromItemId: relationship.source.id, + toItemId: relationship.target.id, + relation: relationship.type, + })), + }; +} + +export function assertCompletedSpecReadyForPlanning( + db: DB, + specificationId: number, + snapshot: CompletedSpecSnapshot, +): void { + if (snapshot.requirements.length === 0) { + throw new Error( + `specification ${specificationId} has no accepted requirements — confirm the requirements phase before planning`, + ); + } + + if (getCurrentWorkflowState(db, specificationId).phases.criteria.status !== 'closed') { + throw new Error( + `specification ${specificationId} criteria are not confirmed — confirm the criteria phase before planning`, + ); + } + + if (snapshot.criteria.length === 0) { + throw new Error( + `specification ${specificationId} has no accepted criteria — confirm the criteria phase before planning`, + ); + } +} diff --git a/src/server/plan-runner.test.ts b/src/server/plan-runner.test.ts new file mode 100644 index 00000000..5909faa6 --- /dev/null +++ b/src/server/plan-runner.test.ts @@ -0,0 +1,164 @@ +// FE-800 slice 6: `parsePlanArgs` + `runPlan` against an in-memory +// `CompletedSpecSnapshot`. Ports the slice-4/5 plan-cli test shapes +// (cycle-break transformation always printed, synthesis hidden by +// default, planning-failed in the audit stream) to the new spec-id +// surface — the snapshot is supplied directly so these tests stay +// pure of DB seeding (covered separately in +// `db/completed-spec-snapshot.test.ts`). + +import { mkdtempSync, readFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { describe, expect, it } from 'vitest'; +import { parse as parseYaml } from 'yaml'; + +import type { PlanningEnrichment, RunModel } from '../orchestrator/src/plan-llm-planning.js'; +import type { CompletedSpecSnapshot } from '../orchestrator/src/plan-projection.js'; +import type { Plan } from '../orchestrator/src/types.js'; +import { parsePlanArgs, runPlan } from './plan-runner.js'; + +describe('parsePlanArgs', () => { + it('parses , --out=, --verbose', () => { + const opts = parsePlanArgs(['2', '--out=/tmp/x', '--verbose']); + + expect(opts.specificationId).toBe(2); + expect(opts.outDir.endsWith('/tmp/x') || opts.outDir.endsWith('\\tmp\\x')).toBe(true); + expect(opts.verbose).toBe(true); + }); + + it('defaults outDir to current working directory and verbose to false', () => { + const opts = parsePlanArgs(['2']); + + expect(opts.outDir).toBe(process.cwd()); + expect(opts.verbose).toBe(false); + }); + + it('supports short -v alias for verbose', () => { + const opts = parsePlanArgs(['2', '-v']); + expect(opts.verbose).toBe(true); + }); + + it('throws a usage error mentioning spec id when the argument is missing', () => { + expect(() => parsePlanArgs([])).toThrow(/spec id|specId/i); + expect(() => parsePlanArgs(['--out=/tmp/x'])).toThrow(/spec id|specId/i); + }); + + it('rejects non-numeric and non-positive spec ids', () => { + expect(() => parsePlanArgs(['abc'])).toThrow(/spec id/i); + expect(() => parsePlanArgs(['0'])).toThrow(/spec id/i); + }); + + it('rejects unknown flags instead of silently swallowing them', () => { + expect(() => parsePlanArgs(['2', '--bogus'])).toThrow(/--bogus/); + expect(() => parsePlanArgs(['2', '--out'])).toThrow(/--out/); + // `-1` looks like a flag, not a positional — caught by the unknown-flag arm. + expect(() => parsePlanArgs(['-1'])).toThrow(/-1/); + }); + + it('rejects a second positional argument instead of overwriting the first', () => { + expect(() => parsePlanArgs(['2', '3'])).toThrow(/positional|"3"/); + }); +}); + +describe('runPlan', () => { + function makeRunWithCycle() { + const snapshot: CompletedSpecSnapshot = { + requirements: [ + { id: 10, content: 'A', kindOrdinal: 1 }, + { id: 11, content: 'B', kindOrdinal: 2 }, + ], + criteria: [], + edges: [], + }; + const dir = mkdtempSync(join(tmpdir(), 'plan-runner-')); + + // 2-cycle req-1 ↔ req-2: reconciliation drops req-1's incoming edge. + const enrichment: PlanningEnrichment = { + sliceDependencies: [ + { sliceId: 'req-1', dependsOn: ['req-2'] }, + { sliceId: 'req-2', dependsOn: ['req-1'] }, + ], + epics: [{ id: 'core', summary: 'Core', sliceIds: ['req-1', 'req-2'] }], + nonBuildableSliceIds: [], + }; + const runModel: RunModel = async () => enrichment; + + return { snapshot, dir, runModel }; + } + + it('writes .brunch/cook/plan.yaml and hides synthesis events at default verbosity', async () => { + const { snapshot, dir, runModel } = makeRunWithCycle(); + const stderrLines: string[] = []; + + await runPlan({ + specificationId: 2, + snapshot, + outDir: dir, + verbose: false, + runModel, + log: (line) => stderrLines.push(line), + }); + + const planPath = join(dir, '.brunch', 'cook', 'specs', '2', 'plan.yaml'); + const reloaded = parseYaml(readFileSync(planPath, 'utf8')) as Plan; + expect(reloaded.slices.map((slice) => slice.id)).toEqual(['req-1', 'req-2']); + + // Transformation warning (cycle break) is always printed. + expect(stderrLines.some((line) => line.includes('cycle-break-dropped-edge'))).toBe(true); + // Synthesis warning is suppressed at default verbosity. + expect(stderrLines.some((line) => line.includes('synthesized-verification-target'))).toBe(false); + // Header echoes the spec id. + expect(stderrLines.some((line) => line.includes('spec') && line.includes('2'))).toBe(true); + }); + + it('shows synthesis events when --verbose is set', async () => { + const { snapshot, dir, runModel } = makeRunWithCycle(); + const stderrLines: string[] = []; + + await runPlan({ + specificationId: 2, + snapshot, + outDir: dir, + verbose: true, + runModel, + log: (line) => stderrLines.push(line), + }); + + expect(stderrLines.some((line) => line.includes('cycle-break-dropped-edge'))).toBe(true); + const synth = stderrLines.filter((line) => line.includes('synthesized-verification-target')); + expect(synth.length).toBe(2); + }); + + it('surfaces planning-failed as a stderr warning line when the LLM throws', async () => { + const snapshot: CompletedSpecSnapshot = { + requirements: [{ id: 10, content: 'Only req', kindOrdinal: 1 }], + criteria: [], + edges: [], + }; + const dir = mkdtempSync(join(tmpdir(), 'plan-runner-fail-')); + + const runModel: RunModel = async () => { + throw new Error('llm-boom'); + }; + const stderrLines: string[] = []; + + await runPlan({ + specificationId: 2, + snapshot, + outDir: dir, + verbose: false, + runModel, + log: (line) => stderrLines.push(line), + }); + + const planPath = join(dir, '.brunch', 'cook', 'specs', '2', 'plan.yaml'); + const reloaded = parseYaml(readFileSync(planPath, 'utf8')) as Plan; + expect(reloaded.slices.map((slice) => slice.id)).toEqual(['req-1']); + + expect(stderrLines.some((line) => line.startsWith(' ! ') && line.includes('planning-failed'))).toBe( + true, + ); + expect(stderrLines.some((line) => line.includes('llm-boom'))).toBe(true); + }); +}); diff --git a/src/server/plan-runner.ts b/src/server/plan-runner.ts new file mode 100644 index 00000000..a99c8dab --- /dev/null +++ b/src/server/plan-runner.ts @@ -0,0 +1,118 @@ +// FE-800 slice 6: server-side `brunch plan ` driver. +// +// Replaces the orchestrator-side `plan-cli.ts` (snapshot JSON file path). +// Lives in `src/server/` because it needs DB access to resolve the +// `` argument into a `CompletedSpecSnapshot`. The orchestrator +// package remains pure: this file imports the emitter + warning +// helpers but the reverse never happens. +// +// Display rules unchanged from slice 5: failure + transformation +// warnings always printed; synthesis warnings only with `--verbose`. + +import { mkdirSync, writeFileSync } from 'node:fs'; +import { dirname, resolve } from 'node:path'; + +import { stringify as stringifyYaml } from 'yaml'; + +import { + emitPlanFromSnapshot, + emitterWarningCategory, + formatEmitterWarning, + type EmitterWarning, +} from '../orchestrator/src/plan-emitter.js'; +import type { RunModel } from '../orchestrator/src/plan-llm-planning.js'; +import type { CompletedSpecSnapshot } from '../orchestrator/src/plan-projection.js'; +import { parseSpecId, specPlanPath } from '../orchestrator/src/spec-plan-paths.js'; + +export type PlanOptions = { + specificationId: number; + outDir: string; + verbose: boolean; +}; + +const USAGE = 'Usage: brunch plan [--out=] [--verbose]'; + +export function parsePlanArgs(args: string[]): PlanOptions { + let specIdRaw: string | undefined; + let outDir = process.cwd(); + let verbose = false; + + for (const arg of args) { + if (arg.startsWith('--out=')) { + outDir = resolve(arg.slice('--out='.length)); + } else if (arg === '--verbose' || arg === '-v') { + verbose = true; + } else if (arg.startsWith('-')) { + throw new Error(`Unknown flag "${arg}". ${USAGE}`); + } else if (specIdRaw === undefined) { + specIdRaw = arg; + } else { + throw new Error(`Unexpected positional argument "${arg}". ${USAGE}`); + } + } + + if (specIdRaw === undefined) { + throw new Error(`Missing spec id. ${USAGE}`); + } + + const specificationId = parseSpecId(specIdRaw, 'spec id'); + + return { specificationId, outDir, verbose }; +} + +export type RunPlanArgs = { + specificationId: number; + snapshot: CompletedSpecSnapshot; + outDir: string; + verbose: boolean; + /** Injectable LLM seam. Defaults to the production anthropic adapter via the emitter. */ + runModel?: RunModel; + /** Injectable stderr writer. Defaults to `console.error`. */ + log?: (line: string) => void; +}; + +export async function runPlan(args: RunPlanArgs): Promise { + const log = args.log ?? ((line: string) => console.error(line)); + + log(''); + log(' brunch plan'); + log(' ──────────────────────────────────────'); + log(` spec ${args.specificationId}`); + log(` out ${args.outDir}`); + log(''); + + const emitOptions = args.runModel ? { runModel: args.runModel } : {}; + const result = await emitPlanFromSnapshot(args.snapshot, emitOptions); + + // Spec-scoped output path. Each spec gets its own subdir so multiple + // specs can live side-by-side on the same project / branch. `brunch + // cook` resolves either by `--spec=` or by auto-picking the most + // recently emitted plan; the legacy `/.brunch/cook/plan.yaml` + // path stays in cook's resolver as the authored-single-plan fallback + // (this command never writes there). Layout owned by spec-plan-paths. + const planPath = specPlanPath(args.outDir, args.specificationId); + mkdirSync(dirname(planPath), { recursive: true }); + writeFileSync(planPath, stringifyYaml(result.plan)); + + log(` ✓ plan ${planPath}`); + log(` ${result.plan.epics.length} epics, ${result.plan.slices.length} slices`); + log(''); + + // Audit-weight display: failure + transformation always; synthesis + // only when --verbose. The header counts only what we print so the + // number on screen matches the lines below it. + const printed = result.warnings.filter((warning) => shouldPrint(warning, args.verbose)); + if (printed.length > 0) { + log(` ${printed.length} warnings:`); + for (const warning of printed) { + log(` ! ${formatEmitterWarning(warning)}`); + } + log(''); + } +} + +function shouldPrint(warning: EmitterWarning, verbose: boolean): boolean { + const category = emitterWarningCategory(warning); + if (category === 'synthesis') return verbose; + return true; +}