From 8b572019000bfbff9a4d059cda0cced567ffb861 Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Sun, 31 May 2026 16:49:00 +0800 Subject: [PATCH 1/6] mcp+memory: bind namespace into the signed cap service (#147, approach B) memory.put/get now mint the cap with service="memory:" instead of a static "memory". Because the broker signs `service` and the worker already derives the S3 key, AAD, and on-chain scope check from cap.payload.service, this makes the namespace: - tamper-proof (signed into the cap), - authorized via the existing isServiceInScope gate, - storage-segregated (bots//memory/memory:.enc), - AAD-bound, with NO CapPayload change, NO broker change, and no byte-exact broker<->worker signature risk. Also fixes a latent bug where every namespace collided at the single memory.enc key. No worker behavior change (it already keys/scopes/AADs off the signed service); added a test proving namespace-folded services segregate storage. Verified: cargo test -p agentkeys-mcp-server (35) + -p agentkeys-worker-memory green. --- .../agentkeys-mcp-server/src/tools/memory.rs | 26 ++++++++++++------- .../agentkeys-worker-memory/src/handlers.rs | 15 +++++++++++ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/crates/agentkeys-mcp-server/src/tools/memory.rs b/crates/agentkeys-mcp-server/src/tools/memory.rs index 91f735f5..dbf85385 100644 --- a/crates/agentkeys-mcp-server/src/tools/memory.rs +++ b/crates/agentkeys-mcp-server/src/tools/memory.rs @@ -63,11 +63,14 @@ pub async fn put( "device_key_hash", config.default_device_key_hash.as_deref(), )?; - let service = params - .get("service") - .and_then(|v| v.as_str()) - .unwrap_or("memory") - .to_string(); + // Issue #147 (approach B): fold the namespace into the SIGNED `service`, + // so the cap is cryptographically bound to exactly one namespace and + // authorized via the existing on-chain `isServiceInScope` check. A + // `memory:travel` cap cannot touch `memory:personal` — different service + // ⇒ different scope entry, different S3 key, different AAD. No CapPayload + // change, no broker change: the broker already signs whatever `service` + // it's given and the worker already keys storage + scope + AAD off it. + let service = format!("memory:{namespace}"); let ttl_seconds = params .get("ttl_seconds") .and_then(|v| v.as_u64()) @@ -140,11 +143,14 @@ pub async fn get( "device_key_hash", config.default_device_key_hash.as_deref(), )?; - let service = params - .get("service") - .and_then(|v| v.as_str()) - .unwrap_or("memory") - .to_string(); + // Issue #147 (approach B): fold the namespace into the SIGNED `service`, + // so the cap is cryptographically bound to exactly one namespace and + // authorized via the existing on-chain `isServiceInScope` check. A + // `memory:travel` cap cannot touch `memory:personal` — different service + // ⇒ different scope entry, different S3 key, different AAD. No CapPayload + // change, no broker change: the broker already signs whatever `service` + // it's given and the worker already keys storage + scope + AAD off it. + let service = format!("memory:{namespace}"); let ttl_seconds = params .get("ttl_seconds") .and_then(|v| v.as_u64()) diff --git a/crates/agentkeys-worker-memory/src/handlers.rs b/crates/agentkeys-worker-memory/src/handlers.rs index b11997b9..6fb5eb63 100644 --- a/crates/agentkeys-worker-memory/src/handlers.rs +++ b/crates/agentkeys-worker-memory/src/handlers.rs @@ -281,4 +281,19 @@ mod tests { fn s3_prefix_uses_memory_path() { assert_eq!(s3_prefix("0xABCDEF"), "bots/abcdef/memory/"); } + + #[test] + fn namespace_folded_service_segregates_storage() { + // Issue #147 (approach B): the MCP mints memory caps with + // service="memory:". Because the worker keys S3 off the + // SIGNED service, two namespaces land at distinct keys — a + // `memory:travel` cap physically cannot read/write the + // `memory:personal` object. This is the namespace-isolation gate, + // enforced by construction (signed service ⇒ key + scope + AAD). + let travel = s3_key("0xabc", "memory:travel"); + let personal = s3_key("0xabc", "memory:personal"); + assert_ne!(travel, personal); + assert_eq!(travel, "bots/abc/memory/memory:travel.enc"); + assert!(personal.contains("memory:personal")); + } } From f8d7c51dea8769bacb7db494b956abc430e4f7f3 Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Tue, 2 Jun 2026 16:46:54 +0800 Subject: [PATCH 2/6] =?UTF-8?q?docs:=20memory=20engine=20integration=20?= =?UTF-8?q?=E2=80=94=20Hermes=20providers=20+=20adapter=20seam=20(#147)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Answers 'Hermes lists many memory providers — which to pick, how to stay compatible' and folds the strategy into the source of truth. plan/agentkeys-memory-design.md: - New §6a 'Engine integration — Hermes providers + the adapter seam': - Reframe: the ~9 Hermes providers bundle engine+store+delivery; they slot into AgentKeys' pluggable ENGINE axis, not as peers. - Delivery stays at the pre_llm_call hook (#141), NOT the runtime memory.provider interface (its lifecycle step 6 hands the LLM memory-enumeration tools — breaks the no-whole-context invariant). - Canonical engine = OpenViking (self-hosted, deterministic, zero third-party egress); Holographic second; cloud providers = tier 3. - Adapter seam = one MemoryEngine trait (extract/rank/synthesize); compatibility = one conformance test with the engine swapped and store+gate+delivery held constant. - Two compatibility tiers, one gate: local=own-store+gate-read, cloud=gate-egress+audit. - New engine stage E0 (the recommended start): MemoryEngine trait + OpenViking reference adapter + swap-the-engine conformance test. arch.md: - §22 pluggable surfaces: add 'Memory engine' axis row (fix stale 'six'->'eight' axes count). - §15.2 memory-service: document namespace = signed service 'memory:' (#147) and the pluggable-engine posture; add the previously-missing outward links to the plan + research docs. --- docs/arch.md | 5 +- docs/plan/agentkeys-memory-design.md | 77 +++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/docs/arch.md b/docs/arch.md index 94f2ea5d..7d3fa0cd 100644 --- a/docs/arch.md +++ b/docs/arch.md @@ -893,6 +893,8 @@ Each data class gets its own worker — independent IAM, independent deploy life - **`master_wallet` on chain?** No - **Operations:** R/W agent state at high frequency. **STS session policies enable direct S3 access** from the agent process for the duration of the session — the worker is NOT in the LLM-call hot path. The worker mints a TTL-bounded STS session at session start; the agent's localhost SDK uses STS creds for many ops within the TTL. - **OIDC federation (issue #90):** Same `X-Aws-*` header passthrough as creds. Each data-class has its own IAM role (`agentkeys-memory-role`); memory-role STS creds are rejected at the vault bucket and vice versa. See §17.5. +- **Namespace = signed service (issue #147):** the memory `service` carries the namespace as **`memory:`** (e.g. `memory:travel`). Because `service` is a signed cap field, the namespace is tamper-proof and is authorized by the existing on-chain `isServiceInScope(operator, actor, keccak("memory:"))` gate. The worker keys storage (`bots//memory/memory:.enc`), the envelope AAD, and the scope check all off that one signed field — so two namespaces are physically segregated with no new mechanism. Minted in `crates/agentkeys-mcp-server/src/tools/memory.rs`; enforced in `crates/agentkeys-worker-memory/src/handlers.rs`. +- **Memory engine — pluggable, not built in v0 (Position C):** the worker is **store + gate only** (deterministic, no ranking, no LLM). Ranking / extraction / consolidation is delegated to an external engine via an adapter trait (`extract` / `rank` / `synthesize`); canonical reference engine **OpenViking**; delivery via the `pre_llm_call` hook (#141), never a runtime `memory.provider`. Full design + Hermes-provider compatibility strategy: [`plan/agentkeys-memory-design.md`](plan/agentkeys-memory-design.md) (§6a engine seam; §22 pluggable-axis row). Background: [`research/ai-memory-systems-survey.md`](research/ai-memory-systems-survey.md), decision record [`research/memory-build-vs-gate-decision.md`](research/memory-build-vs-gate-decision.md), [`research/universal-gate-pattern.md`](research/universal-gate-pattern.md). ### 15.3 audit-service @@ -1645,7 +1647,7 @@ The only things that change: `K3EpochCounter.current_epoch` (1 chain tx), signer ## 22. Pluggable surfaces -The architecture is intentionally pluggable on six axes. Each axis has a default v2 implementation and a documented swap-in path. +The architecture is intentionally pluggable on eight axes. Each axis has a default v2 implementation and a documented swap-in path. | Axis | v2 default | Future swap | Swap mechanism | |---|---|---|---| @@ -1656,6 +1658,7 @@ The architecture is intentionally pluggable on six axes. Each axis has a default | **Worker runtime** | AWS Lambda + API Gateway | axum microservice (vendor-neutral); Cloudflare Worker (edge); Tencent SCF (China) | Worker shape per §15 is uniform across runtimes | | **Payment rail** | Per mode: P-1 service-pool / P-2 escrow / P-3 direct | Mode + upstream (Stripe, USDC, SOL, fiat) | Per-mode plugins layer on the §15.5 wire shape | | **Clear-signing metadata** (issue #82) | Bundled ERC-7730 v2 set under `agentkeys-core::clear_signing::fixtures/` (USDC permit + curated DEX routers + permit2) | Registry fetch from `github.com/ethereum/clear-signing-erc7730-registry` at daemon startup; on-chain registry / IPFS-pinned + signature-verified | `ClearSigningCatalog` trait in [`crates/agentkeys-core/src/clear_signing/`](../crates/agentkeys-core/src/clear_signing/); bundled → registry-cached → on-chain progression. Operator-custom files via `$AGENTKEYS_7730_DIR` env var | +| **Memory engine** (issue #147) | None in v0 — `agentkeys-worker-memory` is **store + gate only** (deterministic, no ranking, no LLM) | OpenViking (canonical) / Holographic / mem0-self-hosted / Hermes-native / Claude memory tool / agentmemory | External-engine adapter trait (`extract` / `rank` / `synthesize`) over the cap-gated store. Delivery via the `pre_llm_call` injection hook (#141), **not** a runtime `memory.provider` (which would hand the LLM memory-enumeration tools). Local engines: own-store + gate-the-read; cloud engines: gate-the-egress + audit. See [`plan/agentkeys-memory-design.md` §6a](plan/agentkeys-memory-design.md) | **Pluggability is the point.** No single backend is load-bearing for the architecture; the contracts (auth-plugin trait, signer-protocol, audit trait, worker shape, chain ABI) are. This is what lets: diff --git a/docs/plan/agentkeys-memory-design.md b/docs/plan/agentkeys-memory-design.md index 930ba5b7..ad2d1b2b 100644 --- a/docs/plan/agentkeys-memory-design.md +++ b/docs/plan/agentkeys-memory-design.md @@ -87,7 +87,7 @@ - Stay backward-compatible with the current `memory_put` / `memory_get` blob primitive (one operator's "service" might genuinely want raw blob KV). - Land **zero** changes to: broker cap-mint protocol, the data_class isolation gate (`DataClass::Memory`), the per-data-class IAM bucket separation (arch.md §17.5), K3-derived KEK, AES-256-GCM envelope format. -**Explicitly delegated (NOT an AgentKeys goal, per decision record Position C):** the memory *engine* — embeddings, vector/BM25/graph ranking, extraction, consolidation, decay. These run in a pluggable external engine in front of the store, or in the optional E1/E2 stages (§9) only if an operator demands in-worker ranking. +**Explicitly delegated (NOT an AgentKeys goal, per decision record Position C):** the memory *engine* — embeddings, vector/BM25/graph ranking, extraction, consolidation, decay. These run in a pluggable external engine in front of the store, or in the optional E1/E2 stages (§9) only if an operator demands in-worker ranking. **How an external engine plugs in — the adapter seam, the canonical engine pick (OpenViking), and Hermes-provider compatibility — is specified in §6a.** **Non-goals (v0):** @@ -494,6 +494,78 @@ The reference implementation ships §6.1; §6.2 is a documented hook with the sc --- +## 6a. Engine integration — Hermes providers + the adapter seam + +> **🔌 ENGINE — pluggable, not built in v0.** This section specifies *how* an external engine plugs onto the store+gate, using the [Hermes runtime's memory-provider ecosystem](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory-providers) as the worked example. It adds nothing to the v0 build; it defines the adapter contract the first engine milestone (§9 stage **E0**) implements. This is the answer to "Hermes lists many memory providers — which do we pick, and how do we stay compatible with the rest?" + +### 6a.1 The reframe: Hermes "memory providers" are *engines*, not peers + +Hermes ships ~9 memory providers — Honcho, Mem0, Hindsight, Holographic, OpenViking, RetainDB, ByteRover, Supermemory, Memori. **Each bundles three things this design deliberately splits**: an *engine* (extract / rank / synthesize), a *store* (where the bytes live), and a *delivery* path (how memory reaches the LLM). AgentKeys owns the **store** (K3-encrypted per-actor S3) and the **gate** (cap + scope + namespace + audit). So a Hermes provider is not a peer of AgentKeys — it slots into AgentKeys' **engine** axis. The integration question is "which engine ranks the lines our store holds and our gate authorizes," never "which provider replaces AgentKeys." + +### 6a.2 Delivery stays at the hook layer, NOT the provider interface + +The Hermes provider lifecycle's step 6 is *"adds provider-specific tools for memory management"* — it hands the LLM tools to query/enumerate memory. That breaks invariant #2 (LLM never sees the whole memory) and weakens invariant #3 (LLM pluggable). So AgentKeys delivers memory through the **`pre_llm_call` hook** (`agentkeys wire hermes`, issue #141), **not** by registering as a Hermes `memory.provider`. The hook (`crates/agentkeys-cli/src/hook.rs` → `memory-inject`) *injects* a namespaced block into the prompt and deliberately exposes no query tool to the model (it does not even read the host's prompt from stdin). The privacy thesis, in code: + +| Integration surface | Who controls retrieval | LLM gets memory tools? | Verdict | +|---|---|---|---| +| Hermes `memory.provider: ` | the provider | **yes** (lifecycle step 6) | ✗ violates invariant #2 / #3 | +| AgentKeys `pre_llm_call` hook (#141) | the gate + engine, off-LLM | no — passive injection only | ✓ canonical delivery | + +**Coexistence rule:** a wired AgentKeys runtime keeps `memory.provider` unset (or `none`) — the AgentKeys hook is the *sole* memory delivery. A Hermes provider running in addition would double-inject from a second source of truth. `agentkeys wire` already owns the `hooks:` block (see [`../user-manual.md`](../user-manual.md)); it intentionally leaves `memory.provider` untouched. + +### 6a.3 How to start: pick a canonical engine by one axis + +The axis that protects the two load-bearing properties (own-the-bytes + LLM-pluggable) is **store-locality + determinism + zero third-party egress**: + +| Tier | Providers | Why this tier | Action | +|---|---|---|---| +| **1 — canonical** | **OpenViking** (self-hosted, `OPENVIKING_ENDPOINT`, tiered retrieval over a hierarchy); **Holographic** (local SQLite, HRR algebra — no LLM in the loop) | bytes stay on operator infra; ranking is deterministic; config is one endpoint/path we control. OpenViking's "filesystem hierarchy + tiered retrieval" is ~1:1 with our namespaced S3 store. | **Build the adapter against OpenViking first.** Holographic second — it proves the no-LLM-call ranking property. | +| **2 — extraction-local** | ByteRover (local pre-compression extraction); Hindsight (local mode) | local-ish; useful for the `extract` call, not just `rank` | after Tier 1 | +| **3 — gate-the-egress only** | Mem0, Honcho, Supermemory, RetainDB, Memori (cloud-bundled store) | their cloud sees the bytes — fights own-store. We cannot *store*, but the gate still controls the *call*. | support as "operator accepts egress"; the cap authorizes whether the egress happens, audit records it | + +**Recommendation: OpenViking is the canonical engine to test.** Self-hosted single endpoint, no cloud account, maps onto the store, privacy thesis intact out of the box. Confirm its exact interface with a ½–1 day spike before writing the adapter (the provider doc is a summary, not a contract). + +### 6a.4 The adapter seam — one trait, three calls + +Compatibility does **not** mean matching Hermes' provider API. It means normalizing every engine onto **AgentKeys' own narrow seam**, with store + gate + delivery held invariant and only the engine swapping: + +```rust +trait MemoryEngine { + // optional — many engines extract server-side; deterministic engines skip it + fn extract(&self, turn: &Turn) -> Vec; + // the load-bearing call: order gate-authorized line IDs for this query + fn rank(&self, query: &Query, candidates: &[LineId], budget: Budget) -> Vec; + // optional — summary/consolidation, when the engine offers it + fn synthesize(&self, facts: &[Fact]) -> Option; +} +``` + +`rank` is load-bearing: the engine sees only **line IDs + metadata** from `/v1/memory/list` (already namespace-filtered by the gate), orders them, then the caller reads the winners via `/v1/memory/get`. The engine never holds the plaintext store — it ranks references the gate already authorized. `extract` / `synthesize` are optional (cloud engines extract server-side; Holographic skips extraction entirely). + +### 6a.5 Compatibility = one conformance test, engine swapped + +An engine **"is compatible"** iff it passes a single golden-path conformance test with store / gate / delivery constant and only the engine swapped: + +> seed the Chengdu fixture → gated `append` → engine `rank` over `list` output → `pre_llm_call` injects the top-K block → assert the injected text. + +Same test, swap the `MemoryEngine` impl. That is the testable definition of "fits the others" — behavioral conformance over a fixed store+gate+delivery, not API-shape matching. + +### 6a.6 Two compatibility tiers, one gate + +| Engine class | Store posture | Gate posture | What the cap authorizes | +|---|---|---|---| +| **Local** (OpenViking, Holographic, ByteRover-local) | own-the-store (S3) | gate-the-read | which actor / namespace may `get` / `list` | +| **Cloud** (Mem0, Honcho, …) | can't own (egress) | gate-the-egress + audit | *whether* actor / namespace may call out at all | + +The same cap-token + scope contract drives both; only the enforcement point moves (read-time vs. call-time). This is the [universal gate pattern](../research/universal-gate-pattern.md) applied to the engine axis — the gate stays deterministic and policy-carrying whether or not we hold the bytes. + +### 6a.7 Relationship to existing sections + +- **§7.4 (Mem0 / Letta / LangMem export adapter)** is the *data-portability* bridge — move bytes between runtimes at rest. **This section** is the *live-ranking* bridge — let an external engine rank our at-rest store per turn. Same delegation philosophy, different verb (migrate vs. rank). +- **§5 / §6** describe an engine's *internal* concerns (index, extraction) if one is ever built in-worker (stages E1 / E2). This section describes the *boundary* to an engine running outside the worker — the common case under Position C. + +--- + ## 7. Portability — `agentkeys memory export` / `import` ### 7.1 Export bundle format @@ -628,7 +700,8 @@ Core path: **M-1 → M0 → M1 → M1.5 → M2** is the v0 gated-backend ship (~ | Stage | Deliverable | Status | |---|---|---| -| **E1** | `/v1/memory/rebuild-index` + `/v1/memory/search` (caller embeds, worker scores cosine; optionally BM25 + RRF per the agentmemory-followup research). Index format per §5. Microbench at 10K/100K/1M. Adds the `search`/`rebuild_index` handler modules deferred in M0. | **Deferred / pluggable.** Most operators use an external engine (mem0-self-hosted / Claude memory tool / Hermes-native) instead. Build E1 only if "ranking inside the AgentKeys worker, no external engine" is an explicit operator requirement. | +| **E0** | **External-engine adapter seam** (§6a): the `MemoryEngine` trait (`extract` / `rank` / `synthesize`) + an **OpenViking** reference adapter + the swap-the-engine conformance test (Chengdu golden path over a fixed store+gate+`pre_llm_call` delivery). Depends on the core gate (M1.5 namespaces) being green so `rank` operates over gate-authorized `list` output. | **First engine milestone — the recommended start.** Proves "external engine ranks, AgentKeys store+gate holds + authorizes, hook injects." No in-worker ranking; delivery stays at the hook layer, never the runtime's `memory.provider` interface. | +| **E1** | `/v1/memory/rebuild-index` + `/v1/memory/search` (caller embeds, worker scores cosine; optionally BM25 + RRF per the agentmemory-followup research). Index format per §5. Microbench at 10K/100K/1M. Adds the `search`/`rebuild_index` handler modules deferred in M0. | **Deferred / pluggable.** Most operators use an external engine (mem0-self-hosted / Claude memory tool / Hermes-native) instead. Build E1 only if "ranking *inside* the AgentKeys worker, no external engine" is an explicit operator requirement — i.e. the in-worker alternative to E0. | | **E2** | Extractor sidecar reference (§6.2) — client-side extraction, never in the worker. | **Deferred / pluggable.** External engines bring their own extraction. | The engine stages are the part the decision record says the ecosystem already does well — buildable fallback, not the plan of record. E1/E2 fork independently of the core trunk if ever taken. From d9351d85fd6496cdfd0c3dbe613bd69b0e24b45d Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Tue, 2 Jun 2026 17:09:36 +0800 Subject: [PATCH 3/6] =?UTF-8?q?feat(memory):=20pluggable=20engine=20seam?= =?UTF-8?q?=20(E0)=20=E2=80=94=20MemoryEngine=20trait=20+=20engines=20(#14?= =?UTF-8?q?7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starts implementing plan §6a / arch.md §22 'Memory engine' axis: the caller-side, deterministic engine seam that ranks/selects gate-authorized memory lines before injection (never in the worker, never an LLM in the gate). crates/agentkeys-core/src/memory_engine.rs (new): - MemoryEngine trait: select(query, lines, budget) -> lines. - PassthroughEngine: identity when unbounded (today's full-blob inject); recency-trim when a budget is set. - LexicalEngine: deterministic term-overlap ranking with a query (stopword filtered), recency fallback without one. Real reference engine, no LLM, no external service. - select_blob(): the blob->blob seam contract — swapping the engine never changes the signature, only the selected subset (plan §6a.5 conformance). - SelectionBudget + env config (AGENTKEYS_MEMORY_ENGINE / _MAX_LINES / _MAX_BYTES). 8 unit + conformance tests. crates/agentkeys-cli/src/hook.rs: - memory-inject (pre_llm_call) now runs the configured engine over each namespace blob. Default passthrough + unbounded budget = byte-identical to prior behavior (the Chengdu single-line fixture is unchanged). OpenViking/Holographic adapters implement the SAME trait and are the next adapters — deferred pending the API spike (plan §6a.3); not fabricated here. Tests: cargo test -p agentkeys-core -p agentkeys-cli green (core 38 incl. 8 new engine tests; cli unchanged-green). --- crates/agentkeys-cli/src/hook.rs | 17 +- crates/agentkeys-core/src/lib.rs | 1 + crates/agentkeys-core/src/memory_engine.rs | 293 +++++++++++++++++++++ 3 files changed, 310 insertions(+), 1 deletion(-) create mode 100644 crates/agentkeys-core/src/memory_engine.rs diff --git a/crates/agentkeys-cli/src/hook.rs b/crates/agentkeys-cli/src/hook.rs index 40cef97d..4cb5de96 100644 --- a/crates/agentkeys-cli/src/hook.rs +++ b/crates/agentkeys-cli/src/hook.rs @@ -246,6 +246,13 @@ pub async fn memory_inject( // pipe a payload (EOF arrives) so they were unaffected; direct calls were not. let client = HookClient::resolve(mcp_url, vendor_token, actor, operator); + // Pluggable engine seam (plan §6a): the gate already authorized these bytes; + // the engine — caller-side, deterministic, no LLM — selects which lines to + // inject within a budget. Default `passthrough` + unbounded budget injects + // the whole namespace unchanged. Passive injection carries no query (None). + let engine = agentkeys_core::memory_engine::engine_from_env(); + let budget = agentkeys_core::memory_engine::SelectionBudget::from_env(); + let mut chunks = Vec::new(); for ns in namespaces .split(',') @@ -258,7 +265,15 @@ pub async fn memory_inject( { Ok(result) => { if let Some(text) = extract_memory_content(&result) { - chunks.push(format!("## Memory: {ns}\n{text}")); + let selected = agentkeys_core::memory_engine::select_blob( + engine.as_ref(), + None, + &text, + &budget, + ); + if !selected.is_empty() { + chunks.push(format!("## Memory: {ns}\n{selected}")); + } } } Err(e) => { diff --git a/crates/agentkeys-core/src/lib.rs b/crates/agentkeys-core/src/lib.rs index 5b5926c4..009996fe 100644 --- a/crates/agentkeys-core/src/lib.rs +++ b/crates/agentkeys-core/src/lib.rs @@ -6,6 +6,7 @@ pub mod chain_profile; pub mod clear_signing; pub mod device_crypto; pub mod init_flow; +pub mod memory_engine; pub mod mock_client; pub mod otp; pub mod payment; diff --git a/crates/agentkeys-core/src/memory_engine.rs b/crates/agentkeys-core/src/memory_engine.rs new file mode 100644 index 00000000..8fd6abac --- /dev/null +++ b/crates/agentkeys-core/src/memory_engine.rs @@ -0,0 +1,293 @@ +//! Pluggable memory-engine seam — plan `docs/plan/agentkeys-memory-design.md` +//! §6a, arch.md §22 "Memory engine" axis. +//! +//! The engine runs CALLER-SIDE over already-gate-authorized memory lines — +//! never inside the worker, never with an LLM in the gate. `select` is the +//! load-bearing call: passive `pre_llm_call` injection passes `query = None` +//! plus a budget (which lines to inject when a namespace grows large); a +//! future `memory.search` tool passes `query = Some(..)`. +//! +//! External engines (OpenViking, Holographic, mem0-self-hosted, …) implement +//! this same trait — that is the compatibility seam (plan §6a.4): swap the +//! engine, hold store + gate + delivery constant. The two engines shipped +//! here are deterministic and need no external service. + +use std::collections::HashSet; + +/// One unit the engine ranks/selects over. In the v0 single-blob store a line +/// is a `\n`-split segment of a namespace blob; under the future per-line store +/// (plan M1) it maps 1:1 to a stored line. `seq` is blob position — higher is +/// later, treated as more recent for recency ranking. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct MemoryLine { + pub text: String, + pub seq: usize, +} + +impl MemoryLine { + pub fn from_blob(blob: &str) -> Vec { + blob.lines() + .map(str::trim_end) + .filter(|line| !line.is_empty()) + .enumerate() + .map(|(seq, text)| MemoryLine { + text: text.to_string(), + seq, + }) + .collect() + } +} + +/// Upper bounds on what gets injected. Unbounded (both `None`) means the engine +/// is an identity passthrough — preserving today's full-blob injection. +#[derive(Debug, Clone, Default)] +pub struct SelectionBudget { + pub max_lines: Option, + pub max_bytes: Option, +} + +impl SelectionBudget { + pub fn is_unbounded(&self) -> bool { + self.max_lines.is_none() && self.max_bytes.is_none() + } + + pub fn from_env() -> SelectionBudget { + SelectionBudget { + max_lines: env_usize("AGENTKEYS_MEMORY_MAX_LINES"), + max_bytes: env_usize("AGENTKEYS_MEMORY_MAX_BYTES"), + } + } +} + +fn env_usize(key: &str) -> Option { + std::env::var(key).ok().and_then(|raw| raw.trim().parse().ok()) +} + +/// The pluggable engine. Input is gate-authorized lines; output is the ordered +/// subset to inject. Implementations MUST be pure (no LLM, no I/O in the gate +/// path) so the selection stays deterministic and auditable. +pub trait MemoryEngine: Send + Sync { + fn name(&self) -> &'static str; + fn select( + &self, + query: Option<&str>, + lines: Vec, + budget: &SelectionBudget, + ) -> Vec; +} + +/// Keep a prefix of a priority-ordered list within the budget. +fn apply_budget(ordered: Vec, budget: &SelectionBudget) -> Vec { + let line_capped = match budget.max_lines { + Some(max) => ordered.into_iter().take(max).collect(), + None => ordered, + }; + let Some(max_bytes) = budget.max_bytes else { + return line_capped; + }; + let mut used = 0usize; + let mut kept = Vec::new(); + for line in line_capped { + let cost = line.text.len() + 1; + if used + cost > max_bytes && !kept.is_empty() { + break; + } + used += cost; + kept.push(line); + } + kept +} + +/// Identity engine — preserves the current behavior. Unbounded budget returns +/// every line untouched; a bounded budget keeps the most recent lines. +pub struct PassthroughEngine; + +impl MemoryEngine for PassthroughEngine { + fn name(&self) -> &'static str { + "passthrough" + } + + fn select( + &self, + _query: Option<&str>, + lines: Vec, + budget: &SelectionBudget, + ) -> Vec { + if budget.is_unbounded() { + return lines; + } + let mut by_recency = lines; + by_recency.sort_by(|a, b| b.seq.cmp(&a.seq)); + apply_budget(by_recency, budget) + } +} + +/// Deterministic lexical engine. With a query it ranks by term overlap (recency +/// breaks ties); without a query it falls back to pure recency. No LLM, no +/// embeddings, no external service — a real reference engine for the seam. +pub struct LexicalEngine; + +const STOPWORDS: &[&str] = &[ + "a", "an", "and", "are", "as", "at", "be", "but", "by", "did", "do", "does", "for", "from", + "had", "has", "have", "in", "is", "it", "my", "of", "on", "or", "the", "to", "was", "what", + "when", "where", "which", "who", "will", "with", "you", "your", +]; + +fn tokenize(text: &str) -> HashSet { + text.to_lowercase() + .split(|c: char| !c.is_alphanumeric()) + .filter(|token| token.len() > 1 && !STOPWORDS.contains(token)) + .map(|token| token.to_string()) + .collect() +} + +impl MemoryEngine for LexicalEngine { + fn name(&self) -> &'static str { + "lexical" + } + + fn select( + &self, + query: Option<&str>, + lines: Vec, + budget: &SelectionBudget, + ) -> Vec { + let query_terms = query.map(tokenize).unwrap_or_default(); + let mut scored: Vec<(i64, usize, MemoryLine)> = lines + .into_iter() + .map(|line| { + let score = if query_terms.is_empty() { + 0 + } else { + let line_terms = tokenize(&line.text); + query_terms + .iter() + .filter(|term| line_terms.contains(*term)) + .count() as i64 + }; + (score, line.seq, line) + }) + .collect(); + scored.sort_by(|a, b| b.0.cmp(&a.0).then(b.1.cmp(&a.1))); + let ordered = scored.into_iter().map(|(_, _, line)| line).collect(); + apply_budget(ordered, budget) + } +} + +pub fn engine_from_name(name: &str) -> Box { + match name.trim().to_lowercase().as_str() { + "lexical" => Box::new(LexicalEngine), + _ => Box::new(PassthroughEngine), + } +} + +pub fn engine_from_env() -> Box { + engine_from_name(&std::env::var("AGENTKEYS_MEMORY_ENGINE").unwrap_or_default()) +} + +/// Apply an engine to one namespace blob and return the injection-ready text. +/// Selected lines are re-sorted to chronological (`seq`) order so the injected +/// block reads naturally regardless of how the engine ranked internally. This +/// `blob -> blob` contract is the seam: swapping the engine never changes the +/// signature, only the selected subset. +pub fn select_blob( + engine: &dyn MemoryEngine, + query: Option<&str>, + blob: &str, + budget: &SelectionBudget, +) -> String { + let lines = MemoryLine::from_blob(blob); + if lines.is_empty() { + return blob.trim().to_string(); + } + let mut selected = engine.select(query, lines, budget); + selected.sort_by_key(|line| line.seq); + selected + .into_iter() + .map(|line| line.text) + .collect::>() + .join("\n") +} + +#[cfg(test)] +mod tests { + use super::*; + + const BLOB: &str = "\ +Chengdu trip — Apr 12 to 16, hotpot at Yulin. +Allergic to peanuts. +Prefers window seats on flights. +Tokyo conference in March, stayed in Shibuya."; + + fn budget(max_lines: Option) -> SelectionBudget { + SelectionBudget { + max_lines, + max_bytes: None, + } + } + + #[test] + fn passthrough_unbounded_is_identity() { + let lines = MemoryLine::from_blob(BLOB); + let out = PassthroughEngine.select(None, lines.clone(), &SelectionBudget::default()); + assert_eq!(out, lines); + } + + #[test] + fn passthrough_budget_keeps_most_recent() { + let out = select_blob(&PassthroughEngine, None, BLOB, &budget(Some(2))); + // most-recent two lines, re-sorted chronologically + assert_eq!( + out, + "Prefers window seats on flights.\nTokyo conference in March, stayed in Shibuya." + ); + } + + #[test] + fn lexical_with_query_selects_relevant_line() { + let out = select_blob(&LexicalEngine, Some("where did I go in Chengdu"), BLOB, &budget(Some(1))); + assert_eq!(out, "Chengdu trip — Apr 12 to 16, hotpot at Yulin."); + } + + #[test] + fn lexical_without_query_is_recency() { + let out = select_blob(&LexicalEngine, None, BLOB, &budget(Some(1))); + assert_eq!(out, "Tokyo conference in March, stayed in Shibuya."); + } + + #[test] + fn single_line_blob_unchanged_across_engines() { + let single = "Chengdu trip — Apr 12 to 16, hotpot at Yulin."; + let unbounded = SelectionBudget::default(); + assert_eq!(select_blob(&PassthroughEngine, None, single, &unbounded), single); + assert_eq!(select_blob(&LexicalEngine, Some("chengdu"), single, &unbounded), single); + } + + #[test] + fn conformance_swap_engine_same_contract_different_selection() { + // The seam definition (plan §6a.5): same blob + budget + query, swap + // only the engine. The String->String contract holds for both; the + // engine is the sole variable, so the selected subset differs. + let query = Some("peanuts allergic"); + let b = budget(Some(1)); + let passthrough = select_blob(&PassthroughEngine, query, BLOB, &b); + let lexical = select_blob(&LexicalEngine, query, BLOB, &b); + assert_eq!(passthrough, "Tokyo conference in March, stayed in Shibuya."); // recency + assert_eq!(lexical, "Allergic to peanuts."); // relevance + assert_ne!(passthrough, lexical); + } + + #[test] + fn from_name_defaults_to_passthrough() { + assert_eq!(engine_from_name("lexical").name(), "lexical"); + assert_eq!(engine_from_name("passthrough").name(), "passthrough"); + assert_eq!(engine_from_name("nonsense").name(), "passthrough"); + assert_eq!(engine_from_name("").name(), "passthrough"); + } + + #[test] + fn empty_blob_stays_empty() { + assert_eq!(select_blob(&PassthroughEngine, None, " ", &SelectionBudget::default()), ""); + assert_eq!(select_blob(&LexicalEngine, Some("x"), "", &SelectionBudget::default()), ""); + } +} From 1768b0aa896bd95395be0ce7d7555f684642ede3 Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Tue, 2 Jun 2026 17:18:41 +0800 Subject: [PATCH 4/6] feat(wire+demo): bake memory engine into wired hook; demo over real worker (#147) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task 2 — make phase1-wire-demo prove memory against the REAL provider, now through the pluggable engine (plan §6a). The engine runs caller-side in the wired pre_llm_call hook, so it must be baked at wire time (Hermes invokes the hook in the Phase-4 chat, where the demo's env can't reach). agentkeys wire (main.rs + wire.rs): - New --memory-engine (passthrough|lexical, default passthrough) and --memory-max-lines flags. They bake AGENTKEYS_MEMORY_ENGINE / AGENTKEYS_MEMORY_MAX_LINES into the generated memory-inject script. - Default passthrough + no budget emits NO engine env → the generated script is byte-identical to before (idempotency + existing tests hold). - 2 new tests: omitted-by-default, and baked-when-set (env precedes exec). harness/phase1-wire-demo.sh: - MEMORY_ENGINE / MEMORY_MAX_LINES config (default passthrough = unchanged). - Passes them to , so in --real the wired hook runs the engine over the REAL memory worker's lines. - 3.1 now surfaces engine + source ('engine=lexical via REAL worker → …'). - bash -n clean. docs/operator-runbook-wire.md: document the MEMORY_ENGINE / MEMORY_MAX_LINES knobs in the env-override list (runbook-fix-fold-back). Note: the full --real run needs the operator's live stack (broker + Heima + WebAuthn + sandbox), not runnable from this worktree; verified via cargo test (core+cli green) + bash -n. To SEE selection, run --real with a multi-line SEED_MEMORY_CONTENT and MEMORY_ENGINE=lexical MEMORY_MAX_LINES=N. --- crates/agentkeys-cli/src/main.rs | 14 ++++++++++ crates/agentkeys-cli/src/wire.rs | 47 +++++++++++++++++++++++++++++++- docs/operator-runbook-wire.md | 1 + harness/phase1-wire-demo.sh | 12 ++++++-- 4 files changed, 71 insertions(+), 3 deletions(-) diff --git a/crates/agentkeys-cli/src/main.rs b/crates/agentkeys-cli/src/main.rs index 5b18815f..c4af2fdd 100644 --- a/crates/agentkeys-cli/src/main.rs +++ b/crates/agentkeys-cli/src/main.rs @@ -348,6 +348,16 @@ enum Commands { /// empty for the in-memory backend. JWTs expire — re-run wire to refresh. #[arg(long, env = "AGENTKEYS_SESSION_BEARER", default_value = "")] session_bearer: String, + + /// Memory engine baked into the pre_llm_call hook: `passthrough` + /// (inject the whole namespace, default) or `lexical` (deterministic + /// recency/relevance selection). Plan §6a / arch.md §22. + #[arg(long, env = "AGENTKEYS_MEMORY_ENGINE", default_value = "passthrough")] + memory_engine: String, + + /// Cap how many memory lines the engine injects (omit = unbounded). + #[arg(long, env = "AGENTKEYS_MEMORY_MAX_LINES")] + memory_max_lines: Option, }, #[command( @@ -1087,6 +1097,8 @@ async fn main() { mcp_url, vendor_token, session_bearer, + memory_engine, + memory_max_lines, } => agentkeys_cli::wire::cmd_wire( runtime, agentkeys_cli::wire::WireRequest { @@ -1097,6 +1109,8 @@ async fn main() { mcp_url: mcp_url.clone(), vendor_token: vendor_token.clone(), session_bearer: session_bearer.clone(), + memory_engine: memory_engine.clone(), + memory_max_lines: *memory_max_lines, check_only: *check_only, }, ), diff --git a/crates/agentkeys-cli/src/wire.rs b/crates/agentkeys-cli/src/wire.rs index 76f5402e..1791791b 100644 --- a/crates/agentkeys-cli/src/wire.rs +++ b/crates/agentkeys-cli/src/wire.rs @@ -38,6 +38,12 @@ pub struct WireRequest { /// (TTL ≤ 5h) — re-run `agentkeys wire` to refresh, or point the demo at /// a fresh session. pub session_bearer: String, + /// Memory engine baked into the pre_llm_call hook (`passthrough` | `lexical`, + /// plan §6a). `passthrough`/empty injects the whole namespace and emits no + /// engine env, so the generated script stays byte-identical to the default. + pub memory_engine: String, + /// Optional cap on how many memory lines the engine injects (None = all). + pub memory_max_lines: Option, /// When true, report drift without writing (drift-check / dry-run). pub check_only: bool, } @@ -124,6 +130,19 @@ impl HermesAdapter { body = body, ) }; + let memory_engine_exports = { + let mut exports = String::new(); + if !req.memory_engine.is_empty() && req.memory_engine != "passthrough" { + exports.push_str(&format!( + "export AGENTKEYS_MEMORY_ENGINE={}\n", + shell_quote(&req.memory_engine) + )); + } + if let Some(max_lines) = req.memory_max_lines { + exports.push_str(&format!("export AGENTKEYS_MEMORY_MAX_LINES={max_lines}\n")); + } + exports + }; vec![ ( "agentkeys-pretool-permission-gate.sh".to_string(), @@ -139,7 +158,7 @@ impl HermesAdapter { ( "agentkeys-prellm-memory-inject.sh".to_string(), header(&format!( - "exec {bin} hook memory-inject --namespaces {ns}", + "{memory_engine_exports}exec {bin} hook memory-inject --namespaces {ns}", ns = shell_quote(&req.namespaces), )), ), @@ -513,6 +532,8 @@ mod tests { mcp_url: "http://localhost:8088/mcp".into(), vendor_token: "demo-tok".into(), session_bearer: String::new(), + memory_engine: "passthrough".into(), + memory_max_lines: None, check_only: false, } } @@ -559,6 +580,30 @@ mod tests { .contains("hook memory-inject --namespaces 'travel,personal'")); } + #[test] + fn scripts_omit_memory_engine_by_default() { + let a = HermesAdapter; + // Default passthrough + no budget → no engine env, byte-identical script. + let scripts = a.scripts("/usr/local/bin/agentkeys", &req()); + assert!(!scripts[2].1.contains("AGENTKEYS_MEMORY_ENGINE")); + assert!(!scripts[2].1.contains("AGENTKEYS_MEMORY_MAX_LINES")); + } + + #[test] + fn scripts_bake_memory_engine_when_set() { + let a = HermesAdapter; + let mut r = req(); + r.memory_engine = "lexical".into(); + r.memory_max_lines = Some(3); + let prellm = &a.scripts("/usr/local/bin/agentkeys", &r)[2].1; + assert!(prellm.contains("export AGENTKEYS_MEMORY_ENGINE='lexical'")); + assert!(prellm.contains("export AGENTKEYS_MEMORY_MAX_LINES=3")); + // engine env precedes the exec line so it is in scope for the hook + let engine_at = prellm.find("AGENTKEYS_MEMORY_ENGINE").unwrap(); + let exec_at = prellm.find("hook memory-inject").unwrap(); + assert!(engine_at < exec_at); + } + #[test] fn write_if_changed_is_idempotent() { let dir = std::env::temp_dir().join(format!("agentkeys-wire-{}", std::process::id())); diff --git a/docs/operator-runbook-wire.md b/docs/operator-runbook-wire.md index abdfb3f5..a55c7e09 100644 --- a/docs/operator-runbook-wire.md +++ b/docs/operator-runbook-wire.md @@ -273,6 +273,7 @@ Env overrides: `SANDBOX_URL`, `MCP_PORT`, `SESSION_ID` (default `alice`), `BUILDER_IMAGE` / `CARGO_REGISTRY_VOL` / `CARGO_GIT_VOL` / `RUSTUP_VOL` (build cache), `SBX_EXEC_MAXTIME` (per-sandbox-call ceiling, default 600s), `SEED_MEMORY_CONTENT` / `SEED_SCOPE_SERVICES` (real-mode 1.5 seed), +`MEMORY_ENGINE` (default `passthrough`; set `lexical` for deterministic recency/relevance selection) / `MEMORY_MAX_LINES` (cap injected lines) — the engine is baked into the wired `pre_llm_call` hook and runs over the **real worker's** lines (plan §6a); a multi-line `SEED_MEMORY_CONTENT` makes the selection visible, `OPERATOR_KEY_FILE` (master key for the 0.7 operator-session mint), `AGENTKEYS_REUSE_AGENT=1` (skip Phase P fresh pairing; reuse a master-side agent) · `AGENTKEYS_AGENT_SESSION_BEARER` (override the agent session) · diff --git a/harness/phase1-wire-demo.sh b/harness/phase1-wire-demo.sh index cb16477a..7bc88d3e 100755 --- a/harness/phase1-wire-demo.sh +++ b/harness/phase1-wire-demo.sh @@ -48,6 +48,12 @@ LLM_API_KEY="${LLM_API_KEY:-${OPENROUTER_API_KEY:-}}" LLM_BASE_URL="${LLM_BASE_URL:-https://openrouter.ai/api/v1}" LLM_MODEL="${LLM_MODEL:-deepseek/deepseek-v4-flash}" # OpenRouter slug; ':free' tier is 429-throttled MEMORY_NS="${MEMORY_NS:-travel}" +# Memory engine baked into the wired pre_llm_call hook (plan §6a / arch.md §22). +# Default `passthrough` injects the whole namespace (demo unchanged). Set +# MEMORY_ENGINE=lexical (+ optional MEMORY_MAX_LINES, + a multi-line +# SEED_MEMORY_CONTENT) to demo deterministic selection over the REAL worker. +MEMORY_ENGINE="${MEMORY_ENGINE:-passthrough}" +MEMORY_MAX_LINES="${MEMORY_MAX_LINES:-}" PAYMENT_SCOPE="${PAYMENT_SCOPE:-payment.spend}" ENV_FILE="${ENV_FILE:-$REPO_ROOT/scripts/operator-workstation.env}" AGENT_FILE="${AGENT_FILE:-$HOME/.agentkeys/agents/${AGENT_LABEL}.json}" @@ -996,8 +1002,9 @@ phase2_wire() { skip_phase 2 && { log "Phase 2 — wire: skip (--skip-2)"; return; } log "Phase 2 — wire (#141 core)" resolve_sbx_paths || return - local wire_args="hermes --actor-omni $ACTOR_OMNI --operator-omni $OPERATOR_OMNI --namespaces $MEMORY_NS --payment-scope $PAYMENT_SCOPE --mcp-url $MCP_URL_IN_SANDBOX --vendor-token $VENDOR_TOKEN" + local wire_args="hermes --actor-omni $ACTOR_OMNI --operator-omni $OPERATOR_OMNI --namespaces $MEMORY_NS --payment-scope $PAYMENT_SCOPE --mcp-url $MCP_URL_IN_SANDBOX --vendor-token $VENDOR_TOKEN --memory-engine $MEMORY_ENGINE" [[ -n "$SESSION_BEARER" ]] && wire_args="$wire_args --session-bearer $SESSION_BEARER" + [[ -n "$MEMORY_MAX_LINES" ]] && wire_args="$wire_args --memory-max-lines $MEMORY_MAX_LINES" # 2.1 check-only (read-only) sbx_exec "$AGENT_BIN_DST wire $wire_args --check-only" | sed 's/^/ /' @@ -1029,7 +1036,8 @@ phase3_acts() { # 3.1 Act 1 — memory inject (pre_llm_call) local a1; a1="$(sbx_hook 'agentkeys-prellm-memory-inject.sh' '{"hook_event_name":"pre_llm_call"}')" if echo "$a1" | jq -e '.context' >/dev/null 2>&1; then - ok "3.1 Act1 memory" "$(echo "$a1" | jq -r '.context' | tr '\n' ' ' | cut -c1-60)…" + local mem_src; mem_src="$([[ "$MODE" == real ]] && echo 'REAL worker' || echo 'in-mem fixture')" + ok "3.1 Act1 memory" "engine=$MEMORY_ENGINE via $mem_src → $(echo "$a1" | jq -r '.context' | tr '\n' ' ' | cut -c1-44)…" else fail "3.1 Act1 memory" "no context returned: $a1" fi From 21f617e3986d50e0386beba2f2f98e9638a3abb8 Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Tue, 2 Jun 2026 18:25:59 +0800 Subject: [PATCH 5/6] test(harness): idempotent self-contained storage-solution test (#147) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit harness/storage-test.sh — proves the memory STORAGE solution from a fresh checkout with NO external infra (no AWS, no chain, no broker, no network): step 0 env+cache resolves CARGO_TARGET_DIR/CARGO_HOME (build cache → fast re-runs); unsets AGENTKEYS_BROKER_URL/_DATA_ROLE_ARN so tests can never reach a live broker (the env leak that makes cli provision tests hit prod). step 1 prereqs cargo / jq / curl, fail-loud with install hints. step 2 build cargo build CLI + MCP server; reports cache-hit vs compiled. step 3 suites runs the REAL storage code paths — envelope AES-256-GCM (encrypt-at-rest), per-actor S3 key derivation, namespace isolation (#147), pluggable engine. cli uses --lib to skip the env-dependent provision integration tests. step 4 roundtrip starts an in-process in-memory MCP server and drives put → get → inject end-to-end, plus an engine-selection check (lexical + max_lines budget → 1 of N lines). Idempotent: cargo no-op when unchanged; the MCP server is killed + restarted fresh each run (ephemeral state). Verified: two consecutive runs both ALL GREEN, 2nd reports build cache-hit. NOT a real-S3 proof (in-memory backend = plumbing only). For the authoritative live-worker proof, harness/phase1-wire-demo.sh --real remains the path; the header documents this. --- harness/storage-test.sh | 186 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100755 harness/storage-test.sh diff --git a/harness/storage-test.sh b/harness/storage-test.sh new file mode 100755 index 00000000..9e2860ab --- /dev/null +++ b/harness/storage-test.sh @@ -0,0 +1,186 @@ +#!/usr/bin/env bash +# harness/storage-test.sh — idempotent, self-contained test of the AgentKeys +# memory STORAGE solution. +# +# What it proves, from a FRESH checkout, with NO external infra (no AWS, no +# chain, no broker, no network): +# 1. env + cache — resolves the cargo build cache; sanitizes broker env. +# 2. prereqs — cargo / jq / curl present (fails loud with install hints). +# 3. build — builds the CLI + MCP server (cargo cache → fast re-runs). +# 4. test suites — runs the REAL storage code paths: envelope crypto +# (encrypt-at-rest), per-actor S3 key derivation, +# namespace isolation (#147), and the pluggable engine. +# 5. live roundtrip — starts an in-process MCP server (in-memory backend) and +# drives put → get → inject end-to-end, plus an engine +# selection check (lexical + budget). +# +# Idempotent: every run is a cargo no-op when nothing changed; the MCP server +# is killed + restarted fresh (ephemeral state) each run. Re-run safely. +# +# NOT a real-S3 proof. The in-memory backend exercises the put/get/engine +# PLUMBING without AWS. For the authoritative real-worker proof (broker cap-mint +# → per-actor STS → memory.litentry.org → S3), run: +# bash harness/phase1-wire-demo.sh --real +# +# Usage: bash harness/storage-test.sh [--release] [--no-build] [--keep-server] +# --release build + test in release profile (default: debug, faster) +# --no-build skip the build step (use existing binaries) +# --keep-server leave the MCP server running after exit (for manual poking) +# +# Env overrides (no hardcoded values — all have sane defaults): +# CARGO_TARGET_DIR / CARGO_HOME build cache locations +# STORAGE_TEST_PORT (18099) MCP listen port +# STORAGE_TEST_ACTOR / _OPERATOR / _DEVICE demo identities (mirror +# crates/agentkeys-mcp-server/src/backend/in_memory.rs) +# STORAGE_TEST_VENDOR (magiclick) / _TOKEN (demo-tok) + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +cd "$REPO_ROOT" || exit 1 + +# ─── output (CLAUDE.md ok/skip/fail convention) ────────────────────────────── +log() { printf '\n[storage-test] %s\n' "$*"; } +ok() { printf ' %-28s ok proceeding (%s)\n' "$1" "$2"; } +skip() { printf ' %-28s skip %s\n' "$1" "$2"; } +fail() { printf ' %-28s FAIL %s\n' "$1" "$2" >&2; FAILED=$((FAILED + 1)); } +FAILED=0 + +# ─── flags ─────────────────────────────────────────────────────────────────── +PROFILE="debug" +CARGO_PROFILE_FLAG="" +DO_BUILD=1 +KEEP_SERVER=0 +for arg in "$@"; do + case "$arg" in + --release) PROFILE="release"; CARGO_PROFILE_FLAG="--release" ;; + --no-build) DO_BUILD=0 ;; + --keep-server) KEEP_SERVER=1 ;; + -h|--help) grep '^#' "$0" | sed 's/^#\{1,\} \{0,1\}//'; exit 0 ;; + *) echo "unknown arg: $arg (try --help)" >&2; exit 2 ;; + esac +done + +# ─── step 0 — env + cache ──────────────────────────────────────────────────── +log "step 0 — env + cache" +export CARGO_TARGET_DIR="${CARGO_TARGET_DIR:-$REPO_ROOT/target}" +export CARGO_HOME="${CARGO_HOME:-$HOME/.cargo}" +# Sanitize broker env so the storage tests can NEVER reach a live broker — the +# AGENTKEYS_BROKER_URL leak that otherwise makes provision tests hit prod. +unset AGENTKEYS_BROKER_URL AGENTKEYS_DATA_ROLE_ARN 2>/dev/null || true + +ACTOR="${STORAGE_TEST_ACTOR:-0xa0c701a0c701a0c701a0c701a0c701a0c701a0c701a0c701a0c701a0c701a0c7}" +OPERATOR="${STORAGE_TEST_OPERATOR:-0x07e8a107e8a107e8a107e8a107e8a107e8a107e8a107e8a107e8a107e8a107e8}" +DEVICE="${STORAGE_TEST_DEVICE:-0xdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef}" +PORT="${STORAGE_TEST_PORT:-18099}" +VENDOR="${STORAGE_TEST_VENDOR:-magiclick}" +TOKEN="${STORAGE_TEST_TOKEN:-demo-tok}" +MCP_URL="http://127.0.0.1:$PORT/mcp" +ok "cache" "CARGO_TARGET_DIR=$CARGO_TARGET_DIR" +ok "env" "profile=$PROFILE port=$PORT actor=${ACTOR:0:12}…" + +# ─── step 1 — prereqs ──────────────────────────────────────────────────────── +log "step 1 — prereqs" +have() { command -v "$1" >/dev/null 2>&1; } +if have cargo; then ok "cargo" "$(cargo --version 2>/dev/null | cut -d' ' -f1-2)"; else fail "cargo" "not found — install rust via https://rustup.rs"; fi +if have jq; then ok "jq" "present"; else fail "jq" "not found — brew install jq / apt-get install jq"; fi +if have curl; then ok "curl" "present"; else fail "curl" "not found"; fi +if [[ $FAILED -gt 0 ]]; then log "prereqs missing — aborting ($FAILED)"; exit 1; fi + +# ─── step 2 — build (cached) ───────────────────────────────────────────────── +BIN_DIR="$CARGO_TARGET_DIR/$PROFILE" +AGENTKEYS_BIN="$BIN_DIR/agentkeys" +MCP_BIN="$BIN_DIR/agentkeys-mcp-server" +if [[ "$DO_BUILD" == 1 ]]; then + log "step 2 — build (cargo cache → fast re-runs)" + build_out="$(cargo build $CARGO_PROFILE_FLAG -p agentkeys-cli -p agentkeys-mcp-server 2>&1)" + build_rc=$? + if [[ $build_rc -ne 0 ]]; then + echo "$build_out" | tail -25 >&2 + fail "build" "cargo build failed (rc=$build_rc)" + log "summary: $FAILED failure(s)"; exit 1 + fi + if echo "$build_out" | grep -q "Compiling"; then ok "build" "compiled — cache updated"; else ok "build" "up to date — cache hit (no rebuild)"; fi +else + log "step 2 — build skipped (--no-build)" +fi +[[ -x "$AGENTKEYS_BIN" ]] || { fail "build" "missing binary $AGENTKEYS_BIN (drop --no-build)"; log "summary: $FAILED failure(s)"; exit 1; } +[[ -x "$MCP_BIN" ]] || { fail "build" "missing binary $MCP_BIN (drop --no-build)"; log "summary: $FAILED failure(s)"; exit 1; } + +# ─── step 3 — storage test suites (real code paths) ────────────────────────── +log "step 3 — storage test suites (envelope crypto · per-actor key · namespace isolation · engine)" +run_suite() { + local crate="$1"; shift + local out rc passed failed + out="$(cargo test $CARGO_PROFILE_FLAG -p "$crate" "$@" 2>&1)"; rc=$? + passed="$(echo "$out" | grep -oE '[0-9]+ passed' | awk '{s+=$1} END{print s+0}')" + failed="$(echo "$out" | grep -oE '[0-9]+ failed' | awk '{s+=$1} END{print s+0}')" + if [[ $rc -eq 0 ]]; then ok "test:$crate" "$passed passed"; else echo "$out" | tail -30 >&2; fail "test:$crate" "$failed failed (rc=$rc)"; fi +} +run_suite agentkeys-core # envelope (AES-256-GCM, AAD), s3_backend, memory_engine +run_suite agentkeys-worker-memory # s3_key derivation, memory/credentials prefix split, namespace segregation (#147) +run_suite agentkeys-mcp-server # memory.put / memory.get tools +run_suite agentkeys-cli --lib # engine wiring: wire-bake + hook (--lib skips env-dependent provision integration tests) + +# ─── step 4 — live storage roundtrip (in-memory backend) ───────────────────── +log "step 4 — live roundtrip: put → get → inject → engine-select (in-memory MCP; no AWS/chain/broker)" +# idempotent: clear any prior storage-test server on this port, then start fresh +pkill -f "agentkeys-mcp-server.*--listen 127.0.0.1:$PORT" 2>/dev/null || true +sleep 0.3 +SERVER_LOG="$(mktemp -t storage-test-mcp.XXXXXX 2>/dev/null || echo /tmp/storage-test-mcp.$$.log)" +"$MCP_BIN" --backend in-memory --transport http --listen "127.0.0.1:$PORT" \ + --vendor-tokens "$VENDOR:$TOKEN" \ + --default-actor "$ACTOR" --default-operator-omni "$OPERATOR" --default-device-key-hash "$DEVICE" \ + >"$SERVER_LOG" 2>&1 & +SERVER_PID=$! +cleanup() { if [[ "$KEEP_SERVER" != 1 && -n "${SERVER_PID:-}" ]]; then kill "$SERVER_PID" 2>/dev/null || true; fi; } +trap cleanup EXIT + +healthy=0 +for _ in $(seq 1 50); do + if curl -fsS -m 2 "http://127.0.0.1:$PORT/healthz" >/dev/null 2>&1; then healthy=1; break; fi + sleep 0.2 +done +if [[ "$healthy" != 1 ]]; then + echo "--- mcp server log ---" >&2; tail -20 "$SERVER_LOG" >&2 + fail "4.0 mcp up" "server not healthy on :$PORT" + log "summary: $FAILED failure(s)"; exit 1 +fi +ok "4.0 mcp up" "in-memory MCP on :$PORT (pid $SERVER_PID)" + +export AGENTKEYS_MCP_URL="$MCP_URL" +export AGENTKEYS_MCP_VENDOR_TOKEN="$TOKEN" +export AGENTKEYS_ACTOR_OMNI="$ACTOR" +export AGENTKEYS_OPERATOR_OMNI="$OPERATOR" + +# 4.1 READ a pre-seeded namespace (proves read from storage) +seeded="$("$AGENTKEYS_BIN" hook memory-inject --namespaces travel /dev/null | jq -r '.context // ""')" +if echo "$seeded" | grep -q "Chengdu"; then ok "4.1 read seeded" "travel → $(echo "$seeded" | tr '\n' ' ' | cut -c1-40)…"; else fail "4.1 read seeded" "expected 'Chengdu', got: $(echo "$seeded" | cut -c1-80)"; fi + +# 4.2 WRITE a fresh multi-line namespace (proves write to storage) +NS="storagetest" +MARKER="roundtrip-$$" +CONTENT=$'Booked Chengdu flight CA4515 on Apr 12.\nPeanut allergy noted for inflight meals.\nHotel in Yulin district near hotpot street.\nMarker '"$MARKER" +put_out="$("$AGENTKEYS_BIN" memory put --namespace "$NS" --content "$CONTENT" 2>&1)" +if echo "$put_out" | grep -q "s3_key"; then ok "4.2 put" "wrote 4-line '$NS'"; else fail "4.2 put" "$(echo "$put_out" | tr '\n' ' ' | cut -c1-140)"; fi + +# 4.3 READ-BACK via inject, default passthrough engine (proves the round trip) +got="$("$AGENTKEYS_BIN" hook memory-inject --namespaces "$NS" /dev/null | jq -r '.context // ""')" +got_body_lines="$(echo "$got" | grep -vc '^## Memory:')" +if echo "$got" | grep -q "$MARKER"; then ok "4.3 get roundtrip" "read back marker; $got_body_lines body lines (passthrough = all)"; else fail "4.3 get roundtrip" "marker '$MARKER' missing: $(echo "$got" | tr '\n' ' ' | cut -c1-100)"; fi + +# 4.4 ENGINE selection over storage: lexical + max_lines=1 → exactly 1 body line +sel="$(AGENTKEYS_MEMORY_ENGINE=lexical AGENTKEYS_MEMORY_MAX_LINES=1 "$AGENTKEYS_BIN" hook memory-inject --namespaces "$NS" /dev/null | jq -r '.context // ""')" +sel_body="$(echo "$sel" | grep -v '^## Memory:')" +sel_lines="$(echo "$sel_body" | grep -c .)" +if [[ "$sel_lines" == 1 ]]; then ok "4.4 engine select" "lexical/max_lines=1 → 1 of $got_body_lines lines: $(echo "$sel_body" | cut -c1-44)"; else fail "4.4 engine select" "expected 1 body line, got $sel_lines: $(echo "$sel_body" | tr '\n' ' ' | cut -c1-80)"; fi + +# ─── summary ───────────────────────────────────────────────────────────────── +if [[ $FAILED -eq 0 ]]; then + log "ALL GREEN — storage solution verified (build · suites · roundtrip · engine)" + exit 0 +else + log "$FAILED FAILURE(S) — see above" + exit 1 +fi From 7579fd42f95e7b23c198dcd84b8068b33f244cbf Mon Sep 17 00:00:00 2001 From: Hanwen Cheng Date: Tue, 2 Jun 2026 19:04:37 +0800 Subject: [PATCH 6/6] =?UTF-8?q?fix(memory=5Fengine):=20cargo=20fmt=20+=20c?= =?UTF-8?q?lippy=20unnecessary=5Fsort=5Fby=20=E2=80=94=20green=20CI=20(#14?= =?UTF-8?q?7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI (harness-ci.yml + mcp-server.yml) failed on the new memory_engine.rs: - cargo fmt --all -- --check: long test-call lines + env_usize chain needed rustfmt wrapping. - cargo clippy --workspace -- -D warnings: clippy::unnecessary_sort_by on the PassthroughEngine recency sort → sort_by(|a,b| b.seq.cmp(&a.seq)) → sort_by_key(|l| std::cmp::Reverse(l.seq)). Verified locally against the exact CI commands: - cargo fmt --all -- --check (exit 0) - cargo clippy --workspace --all-targets -- -D warnings (exit 0) - cargo test --workspace -- --test-threads=1 (735 passed) - cargo test -p agentkeys-mcp-server --all-features (35 passed) --- crates/agentkeys-core/src/memory_engine.rs | 33 +++++++++++++++++----- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/crates/agentkeys-core/src/memory_engine.rs b/crates/agentkeys-core/src/memory_engine.rs index 8fd6abac..8dfd85a0 100644 --- a/crates/agentkeys-core/src/memory_engine.rs +++ b/crates/agentkeys-core/src/memory_engine.rs @@ -60,7 +60,9 @@ impl SelectionBudget { } fn env_usize(key: &str) -> Option { - std::env::var(key).ok().and_then(|raw| raw.trim().parse().ok()) + std::env::var(key) + .ok() + .and_then(|raw| raw.trim().parse().ok()) } /// The pluggable engine. Input is gate-authorized lines; output is the ordered @@ -117,7 +119,7 @@ impl MemoryEngine for PassthroughEngine { return lines; } let mut by_recency = lines; - by_recency.sort_by(|a, b| b.seq.cmp(&a.seq)); + by_recency.sort_by_key(|line| std::cmp::Reverse(line.seq)); apply_budget(by_recency, budget) } } @@ -245,7 +247,12 @@ Tokyo conference in March, stayed in Shibuya."; #[test] fn lexical_with_query_selects_relevant_line() { - let out = select_blob(&LexicalEngine, Some("where did I go in Chengdu"), BLOB, &budget(Some(1))); + let out = select_blob( + &LexicalEngine, + Some("where did I go in Chengdu"), + BLOB, + &budget(Some(1)), + ); assert_eq!(out, "Chengdu trip — Apr 12 to 16, hotpot at Yulin."); } @@ -259,8 +266,14 @@ Tokyo conference in March, stayed in Shibuya."; fn single_line_blob_unchanged_across_engines() { let single = "Chengdu trip — Apr 12 to 16, hotpot at Yulin."; let unbounded = SelectionBudget::default(); - assert_eq!(select_blob(&PassthroughEngine, None, single, &unbounded), single); - assert_eq!(select_blob(&LexicalEngine, Some("chengdu"), single, &unbounded), single); + assert_eq!( + select_blob(&PassthroughEngine, None, single, &unbounded), + single + ); + assert_eq!( + select_blob(&LexicalEngine, Some("chengdu"), single, &unbounded), + single + ); } #[test] @@ -287,7 +300,13 @@ Tokyo conference in March, stayed in Shibuya."; #[test] fn empty_blob_stays_empty() { - assert_eq!(select_blob(&PassthroughEngine, None, " ", &SelectionBudget::default()), ""); - assert_eq!(select_blob(&LexicalEngine, Some("x"), "", &SelectionBudget::default()), ""); + assert_eq!( + select_blob(&PassthroughEngine, None, " ", &SelectionBudget::default()), + "" + ); + assert_eq!( + select_blob(&LexicalEngine, Some("x"), "", &SelectionBudget::default()), + "" + ); } }