From 8b572019000bfbff9a4d059cda0cced567ffb861 Mon Sep 17 00:00:00 2001
From: Hanwen Cheng <heawen.cheng@gmail.com>
Date: Sun, 31 May 2026 16:49:00 +0800
Subject: [PATCH 1/6] mcp+memory: bind namespace into the signed cap service
 (#147, approach B)

memory.put/get now mint the cap with service="memory:<namespace>" instead
of a static "memory". Because the broker signs `service` and the worker
already derives the S3 key, AAD, and on-chain scope check from
cap.payload.service, this makes the namespace:
  - tamper-proof (signed into the cap),
  - authorized via the existing isServiceInScope gate,
  - storage-segregated (bots/<actor>/memory/memory:<ns>.enc),
  - AAD-bound,
with NO CapPayload change, NO broker change, and no byte-exact
broker<->worker signature risk. Also fixes a latent bug where every
namespace collided at the single memory.enc key.

No worker behavior change (it already keys/scopes/AADs off the signed
service); added a test proving namespace-folded services segregate
storage.

Verified: cargo test -p agentkeys-mcp-server (35) + -p agentkeys-worker-memory green.
---
 .../agentkeys-mcp-server/src/tools/memory.rs  | 26 ++++++++++++-------
 .../agentkeys-worker-memory/src/handlers.rs   | 15 +++++++++++
 2 files changed, 31 insertions(+), 10 deletions(-)
diff --git a/crates/agentkeys-mcp-server/src/tools/memory.rs b/crates/agentkeys-mcp-server/src/tools/memory.rs
index 91f735f5..dbf85385 100644
--- a/crates/agentkeys-mcp-server/src/tools/memory.rs
+++ b/crates/agentkeys-mcp-server/src/tools/memory.rs
@@ -63,11 +63,14 @@ pub async fn put(
         "device_key_hash",
         config.default_device_key_hash.as_deref(),
     )?;
-    let service = params
-        .get("service")
-        .and_then(|v| v.as_str())
-        .unwrap_or("memory")
-        .to_string();
+    // Issue #147 (approach B): fold the namespace into the SIGNED `service`,
+    // so the cap is cryptographically bound to exactly one namespace and
+    // authorized via the existing on-chain `isServiceInScope` check. A
+    // `memory:travel` cap cannot touch `memory:personal` — different service
+    // ⇒ different scope entry, different S3 key, different AAD. No CapPayload
+    // change, no broker change: the broker already signs whatever `service`
+    // it's given and the worker already keys storage + scope + AAD off it.
+    let service = format!("memory:{namespace}");
     let ttl_seconds = params
         .get("ttl_seconds")
         .and_then(|v| v.as_u64())
@@ -140,11 +143,14 @@ pub async fn get(
         "device_key_hash",
         config.default_device_key_hash.as_deref(),
     )?;
-    let service = params
-        .get("service")
-        .and_then(|v| v.as_str())
-        .unwrap_or("memory")
-        .to_string();
+    // Issue #147 (approach B): fold the namespace into the SIGNED `service`,
+    // so the cap is cryptographically bound to exactly one namespace and
+    // authorized via the existing on-chain `isServiceInScope` check. A
+    // `memory:travel` cap cannot touch `memory:personal` — different service
+    // ⇒ different scope entry, different S3 key, different AAD. No CapPayload
+    // change, no broker change: the broker already signs whatever `service`
+    // it's given and the worker already keys storage + scope + AAD off it.
+    let service = format!("memory:{namespace}");
     let ttl_seconds = params
         .get("ttl_seconds")
         .and_then(|v| v.as_u64())
diff --git a/crates/agentkeys-worker-memory/src/handlers.rs b/crates/agentkeys-worker-memory/src/handlers.rs
index b11997b9..6fb5eb63 100644
--- a/crates/agentkeys-worker-memory/src/handlers.rs
+++ b/crates/agentkeys-worker-memory/src/handlers.rs
@@ -281,4 +281,19 @@ mod tests {
     fn s3_prefix_uses_memory_path() {
         assert_eq!(s3_prefix("0xABCDEF"), "bots/abcdef/memory/");
     }
+
+    #[test]
+    fn namespace_folded_service_segregates_storage() {
+        // Issue #147 (approach B): the MCP mints memory caps with
+        // service="memory:<namespace>". Because the worker keys S3 off the
+        // SIGNED service, two namespaces land at distinct keys — a
+        // `memory:travel` cap physically cannot read/write the
+        // `memory:personal` object. This is the namespace-isolation gate,
+        // enforced by construction (signed service ⇒ key + scope + AAD).
+        let travel = s3_key("0xabc", "memory:travel");
+        let personal = s3_key("0xabc", "memory:personal");
+        assert_ne!(travel, personal);
+        assert_eq!(travel, "bots/abc/memory/memory:travel.enc");
+        assert!(personal.contains("memory:personal"));
+    }
 }

From f8d7c51dea8769bacb7db494b956abc430e4f7f3 Mon Sep 17 00:00:00 2001
From: Hanwen Cheng <heawen.cheng@gmail.com>
Date: Tue, 2 Jun 2026 16:46:54 +0800
Subject: [PATCH 2/6] =?UTF-8?q?docs:=20memory=20engine=20integration=20?=
 =?UTF-8?q?=E2=80=94=20Hermes=20providers=20+=20adapter=20seam=20(#147)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Answers 'Hermes lists many memory providers — which to pick, how to stay
compatible' and folds the strategy into the source of truth.

plan/agentkeys-memory-design.md:
- New §6a 'Engine integration — Hermes providers + the adapter seam':
  - Reframe: the ~9 Hermes providers bundle engine+store+delivery; they
    slot into AgentKeys' pluggable ENGINE axis, not as peers.
  - Delivery stays at the pre_llm_call hook (#141), NOT the runtime
    memory.provider interface (its lifecycle step 6 hands the LLM
    memory-enumeration tools — breaks the no-whole-context invariant).
  - Canonical engine = OpenViking (self-hosted, deterministic, zero
    third-party egress); Holographic second; cloud providers = tier 3.
  - Adapter seam = one MemoryEngine trait (extract/rank/synthesize);
    compatibility = one conformance test with the engine swapped and
    store+gate+delivery held constant.
  - Two compatibility tiers, one gate: local=own-store+gate-read,
    cloud=gate-egress+audit.
- New engine stage E0 (the recommended start): MemoryEngine trait +
  OpenViking reference adapter + swap-the-engine conformance test.

arch.md:
- §22 pluggable surfaces: add 'Memory engine' axis row (fix stale
  'six'->'eight' axes count).
- §15.2 memory-service: document namespace = signed service
  'memory:<namespace>' (#147) and the pluggable-engine posture; add the
  previously-missing outward links to the plan + research docs.
---
 docs/arch.md                         |  5 +-
 docs/plan/agentkeys-memory-design.md | 77 +++++++++++++++++++++++++++-
 2 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/docs/arch.md b/docs/arch.md
index 94f2ea5d..7d3fa0cd 100644
--- a/docs/arch.md
+++ b/docs/arch.md
@@ -893,6 +893,8 @@ Each data class gets its own worker — independent IAM, independent deploy life
 - **`master_wallet` on chain?** No
 - **Operations:** R/W agent state at high frequency. **STS session policies enable direct S3 access** from the agent process for the duration of the session — the worker is NOT in the LLM-call hot path. The worker mints a TTL-bounded STS session at session start; the agent's localhost SDK uses STS creds for many ops within the TTL.
 - **OIDC federation (issue #90):** Same `X-Aws-*` header passthrough as creds. Each data-class has its own IAM role (`agentkeys-memory-role`); memory-role STS creds are rejected at the vault bucket and vice versa. See §17.5.
+- **Namespace = signed service (issue #147):** the memory `service` carries the namespace as **`memory:<namespace>`** (e.g. `memory:travel`). Because `service` is a signed cap field, the namespace is tamper-proof and is authorized by the existing on-chain `isServiceInScope(operator, actor, keccak("memory:<ns>"))` gate. The worker keys storage (`bots/<actor_omni_hex>/memory/memory:<ns>.enc`), the envelope AAD, and the scope check all off that one signed field — so two namespaces are physically segregated with no new mechanism. Minted in `crates/agentkeys-mcp-server/src/tools/memory.rs`; enforced in `crates/agentkeys-worker-memory/src/handlers.rs`.
+- **Memory engine — pluggable, not built in v0 (Position C):** the worker is **store + gate only** (deterministic, no ranking, no LLM). Ranking / extraction / consolidation is delegated to an external engine via an adapter trait (`extract` / `rank` / `synthesize`); canonical reference engine **OpenViking**; delivery via the `pre_llm_call` hook (#141), never a runtime `memory.provider`. Full design + Hermes-provider compatibility strategy: [`plan/agentkeys-memory-design.md`](plan/agentkeys-memory-design.md) (§6a engine seam; §22 pluggable-axis row). Background: [`research/ai-memory-systems-survey.md`](research/ai-memory-systems-survey.md), decision record [`research/memory-build-vs-gate-decision.md`](research/memory-build-vs-gate-decision.md), [`research/universal-gate-pattern.md`](research/universal-gate-pattern.md).
 
 ### 15.3 audit-service
 
@@ -1645,7 +1647,7 @@ The only things that change: `K3EpochCounter.current_epoch` (1 chain tx), signer
 
 ## 22. Pluggable surfaces
 
-The architecture is intentionally pluggable on six axes. Each axis has a default v2 implementation and a documented swap-in path.
+The architecture is intentionally pluggable on eight axes. Each axis has a default v2 implementation and a documented swap-in path.
 
 | Axis | v2 default | Future swap | Swap mechanism |
 |---|---|---|---|
@@ -1656,6 +1658,7 @@ The architecture is intentionally pluggable on six axes. Each axis has a default
 | **Worker runtime** | AWS Lambda + API Gateway | axum microservice (vendor-neutral); Cloudflare Worker (edge); Tencent SCF (China) | Worker shape per §15 is uniform across runtimes |
 | **Payment rail** | Per mode: P-1 service-pool / P-2 escrow / P-3 direct | Mode + upstream (Stripe, USDC, SOL, fiat) | Per-mode plugins layer on the §15.5 wire shape |
 | **Clear-signing metadata** (issue #82) | Bundled ERC-7730 v2 set under `agentkeys-core::clear_signing::fixtures/` (USDC permit + curated DEX routers + permit2) | Registry fetch from `github.com/ethereum/clear-signing-erc7730-registry` at daemon startup; on-chain registry / IPFS-pinned + signature-verified | `ClearSigningCatalog` trait in [`crates/agentkeys-core/src/clear_signing/`](../crates/agentkeys-core/src/clear_signing/); bundled → registry-cached → on-chain progression. Operator-custom files via `$AGENTKEYS_7730_DIR` env var |
+| **Memory engine** (issue #147) | None in v0 — `agentkeys-worker-memory` is **store + gate only** (deterministic, no ranking, no LLM) | OpenViking (canonical) / Holographic / mem0-self-hosted / Hermes-native / Claude memory tool / agentmemory | External-engine adapter trait (`extract` / `rank` / `synthesize`) over the cap-gated store. Delivery via the `pre_llm_call` injection hook (#141), **not** a runtime `memory.provider` (which would hand the LLM memory-enumeration tools). Local engines: own-store + gate-the-read; cloud engines: gate-the-egress + audit. See [`plan/agentkeys-memory-design.md` §6a](plan/agentkeys-memory-design.md) |
 
 **Pluggability is the point.** No single backend is load-bearing for the architecture; the contracts (auth-plugin trait, signer-protocol, audit trait, worker shape, chain ABI) are. This is what lets:
 
diff --git a/docs/plan/agentkeys-memory-design.md b/docs/plan/agentkeys-memory-design.md
index 930ba5b7..ad2d1b2b 100644
--- a/docs/plan/agentkeys-memory-design.md
+++ b/docs/plan/agentkeys-memory-design.md
@@ -87,7 +87,7 @@
 - Stay backward-compatible with the current `memory_put` / `memory_get` blob primitive (one operator's "service" might genuinely want raw blob KV).
 - Land **zero** changes to: broker cap-mint protocol, the data_class isolation gate (`DataClass::Memory`), the per-data-class IAM bucket separation (arch.md §17.5), K3-derived KEK, AES-256-GCM envelope format.
 
-**Explicitly delegated (NOT an AgentKeys goal, per decision record Position C):** the memory *engine* — embeddings, vector/BM25/graph ranking, extraction, consolidation, decay. These run in a pluggable external engine in front of the store, or in the optional E1/E2 stages (§9) only if an operator demands in-worker ranking.
+**Explicitly delegated (NOT an AgentKeys goal, per decision record Position C):** the memory *engine* — embeddings, vector/BM25/graph ranking, extraction, consolidation, decay. These run in a pluggable external engine in front of the store, or in the optional E1/E2 stages (§9) only if an operator demands in-worker ranking. **How an external engine plugs in — the adapter seam, the canonical engine pick (OpenViking), and Hermes-provider compatibility — is specified in §6a.**
 
 **Non-goals (v0):**
 
@@ -494,6 +494,78 @@ The reference implementation ships §6.1; §6.2 is a documented hook with the sc
 
 ---
 
+## 6a. Engine integration — Hermes providers + the adapter seam
+
+> **🔌 ENGINE — pluggable, not built in v0.** This section specifies *how* an external engine plugs onto the store+gate, using the [Hermes runtime's memory-provider ecosystem](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory-providers) as the worked example. It adds nothing to the v0 build; it defines the adapter contract the first engine milestone (§9 stage **E0**) implements. This is the answer to "Hermes lists many memory providers — which do we pick, and how do we stay compatible with the rest?"
+
+### 6a.1 The reframe: Hermes "memory providers" are *engines*, not peers
+
+Hermes ships ~9 memory providers — Honcho, Mem0, Hindsight, Holographic, OpenViking, RetainDB, ByteRover, Supermemory, Memori. **Each bundles three things this design deliberately splits**: an *engine* (extract / rank / synthesize), a *store* (where the bytes live), and a *delivery* path (how memory reaches the LLM). AgentKeys owns the **store** (K3-encrypted per-actor S3) and the **gate** (cap + scope + namespace + audit). So a Hermes provider is not a peer of AgentKeys — it slots into AgentKeys' **engine** axis. The integration question is "which engine ranks the lines our store holds and our gate authorizes," never "which provider replaces AgentKeys."
+
+### 6a.2 Delivery stays at the hook layer, NOT the provider interface
+
+The Hermes provider lifecycle's step 6 is *"adds provider-specific tools for memory management"* — it hands the LLM tools to query/enumerate memory. That breaks invariant #2 (LLM never sees the whole memory) and weakens invariant #3 (LLM pluggable). So AgentKeys delivers memory through the **`pre_llm_call` hook** (`agentkeys wire hermes`, issue #141), **not** by registering as a Hermes `memory.provider`. The hook (`crates/agentkeys-cli/src/hook.rs` → `memory-inject`) *injects* a namespaced block into the prompt and deliberately exposes no query tool to the model (it does not even read the host's prompt from stdin). The privacy thesis, in code:
+
+| Integration surface | Who controls retrieval | LLM gets memory tools? | Verdict |
+|---|---|---|---|
+| Hermes `memory.provider: <name>` | the provider | **yes** (lifecycle step 6) | ✗ violates invariant #2 / #3 |
+| AgentKeys `pre_llm_call` hook (#141) | the gate + engine, off-LLM | no — passive injection only | ✓ canonical delivery |
+
+**Coexistence rule:** a wired AgentKeys runtime keeps `memory.provider` unset (or `none`) — the AgentKeys hook is the *sole* memory delivery. A Hermes provider running in addition would double-inject from a second source of truth. `agentkeys wire` already owns the `hooks:` block (see [`../user-manual.md`](../user-manual.md)); it intentionally leaves `memory.provider` untouched.
+
+### 6a.3 How to start: pick a canonical engine by one axis
+
+The axis that protects the two load-bearing properties (own-the-bytes + LLM-pluggable) is **store-locality + determinism + zero third-party egress**:
+
+| Tier | Providers | Why this tier | Action |
+|---|---|---|---|
+| **1 — canonical** | **OpenViking** (self-hosted, `OPENVIKING_ENDPOINT`, tiered retrieval over a hierarchy); **Holographic** (local SQLite, HRR algebra — no LLM in the loop) | bytes stay on operator infra; ranking is deterministic; config is one endpoint/path we control. OpenViking's "filesystem hierarchy + tiered retrieval" is ~1:1 with our namespaced S3 store. | **Build the adapter against OpenViking first.** Holographic second — it proves the no-LLM-call ranking property. |
+| **2 — extraction-local** | ByteRover (local pre-compression extraction); Hindsight (local mode) | local-ish; useful for the `extract` call, not just `rank` | after Tier 1 |
+| **3 — gate-the-egress only** | Mem0, Honcho, Supermemory, RetainDB, Memori (cloud-bundled store) | their cloud sees the bytes — fights own-store. We cannot *store*, but the gate still controls the *call*. | support as "operator accepts egress"; the cap authorizes whether the egress happens, audit records it |
+
+**Recommendation: OpenViking is the canonical engine to test.** Self-hosted single endpoint, no cloud account, maps onto the store, privacy thesis intact out of the box. Confirm its exact interface with a ½–1 day spike before writing the adapter (the provider doc is a summary, not a contract).
+
+### 6a.4 The adapter seam — one trait, three calls
+
+Compatibility does **not** mean matching Hermes' provider API. It means normalizing every engine onto **AgentKeys' own narrow seam**, with store + gate + delivery held invariant and only the engine swapping:
+
+```rust
+trait MemoryEngine {
+    // optional — many engines extract server-side; deterministic engines skip it
+    fn extract(&self, turn: &Turn) -> Vec<Fact>;
+    // the load-bearing call: order gate-authorized line IDs for this query
+    fn rank(&self, query: &Query, candidates: &[LineId], budget: Budget) -> Vec<LineId>;
+    // optional — summary/consolidation, when the engine offers it
+    fn synthesize(&self, facts: &[Fact]) -> Option<Summary>;
+}
+```
+
+`rank` is load-bearing: the engine sees only **line IDs + metadata** from `/v1/memory/list` (already namespace-filtered by the gate), orders them, then the caller reads the winners via `/v1/memory/get`. The engine never holds the plaintext store — it ranks references the gate already authorized. `extract` / `synthesize` are optional (cloud engines extract server-side; Holographic skips extraction entirely).
+
+### 6a.5 Compatibility = one conformance test, engine swapped
+
+An engine **"is compatible"** iff it passes a single golden-path conformance test with store / gate / delivery constant and only the engine swapped:
+
+> seed the Chengdu fixture → gated `append` → engine `rank` over `list` output → `pre_llm_call` injects the top-K block → assert the injected text.
+
+Same test, swap the `MemoryEngine` impl. That is the testable definition of "fits the others" — behavioral conformance over a fixed store+gate+delivery, not API-shape matching.
+
+### 6a.6 Two compatibility tiers, one gate
+
+| Engine class | Store posture | Gate posture | What the cap authorizes |
+|---|---|---|---|
+| **Local** (OpenViking, Holographic, ByteRover-local) | own-the-store (S3) | gate-the-read | which actor / namespace may `get` / `list` |
+| **Cloud** (Mem0, Honcho, …) | can't own (egress) | gate-the-egress + audit | *whether* actor / namespace may call out at all |
+
+The same cap-token + scope contract drives both; only the enforcement point moves (read-time vs. call-time). This is the [universal gate pattern](../research/universal-gate-pattern.md) applied to the engine axis — the gate stays deterministic and policy-carrying whether or not we hold the bytes.
+
+### 6a.7 Relationship to existing sections
+
+- **§7.4 (Mem0 / Letta / LangMem export adapter)** is the *data-portability* bridge — move bytes between runtimes at rest. **This section** is the *live-ranking* bridge — let an external engine rank our at-rest store per turn. Same delegation philosophy, different verb (migrate vs. rank).
+- **§5 / §6** describe an engine's *internal* concerns (index, extraction) if one is ever built in-worker (stages E1 / E2). This section describes the *boundary* to an engine running outside the worker — the common case under Position C.
+
+---
+
 ## 7. Portability — `agentkeys memory export` / `import`
 
 ### 7.1 Export bundle format
@@ -628,7 +700,8 @@ Core path: **M-1 → M0 → M1 → M1.5 → M2** is the v0 gated-backend ship (~
 
 | Stage | Deliverable | Status |
 |---|---|---|
-| **E1** | `/v1/memory/rebuild-index` + `/v1/memory/search` (caller embeds, worker scores cosine; optionally BM25 + RRF per the agentmemory-followup research). Index format per §5. Microbench at 10K/100K/1M. Adds the `search`/`rebuild_index` handler modules deferred in M0. | **Deferred / pluggable.** Most operators use an external engine (mem0-self-hosted / Claude memory tool / Hermes-native) instead. Build E1 only if "ranking inside the AgentKeys worker, no external engine" is an explicit operator requirement. |
+| **E0** | **External-engine adapter seam** (§6a): the `MemoryEngine` trait (`extract` / `rank` / `synthesize`) + an **OpenViking** reference adapter + the swap-the-engine conformance test (Chengdu golden path over a fixed store+gate+`pre_llm_call` delivery). Depends on the core gate (M1.5 namespaces) being green so `rank` operates over gate-authorized `list` output. | **First engine milestone — the recommended start.** Proves "external engine ranks, AgentKeys store+gate holds + authorizes, hook injects." No in-worker ranking; delivery stays at the hook layer, never the runtime's `memory.provider` interface. |
+| **E1** | `/v1/memory/rebuild-index` + `/v1/memory/search` (caller embeds, worker scores cosine; optionally BM25 + RRF per the agentmemory-followup research). Index format per §5. Microbench at 10K/100K/1M. Adds the `search`/`rebuild_index` handler modules deferred in M0. | **Deferred / pluggable.** Most operators use an external engine (mem0-self-hosted / Claude memory tool / Hermes-native) instead. Build E1 only if "ranking *inside* the AgentKeys worker, no external engine" is an explicit operator requirement — i.e. the in-worker alternative to E0. |
 | **E2** | Extractor sidecar reference (§6.2) — client-side extraction, never in the worker. | **Deferred / pluggable.** External engines bring their own extraction. |
 
 The engine stages are the part the decision record says the ecosystem already does well — buildable fallback, not the plan of record. E1/E2 fork independently of the core trunk if ever taken.

From d9351d85fd6496cdfd0c3dbe613bd69b0e24b45d Mon Sep 17 00:00:00 2001
From: Hanwen Cheng <heawen.cheng@gmail.com>
Date: Tue, 2 Jun 2026 17:09:36 +0800
Subject: [PATCH 3/6] =?UTF-8?q?feat(memory):=20pluggable=20engine=20seam?=
 =?UTF-8?q?=20(E0)=20=E2=80=94=20MemoryEngine=20trait=20+=20engines=20(#14?=
 =?UTF-8?q?7)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Starts implementing plan §6a / arch.md §22 'Memory engine' axis: the
caller-side, deterministic engine seam that ranks/selects gate-authorized
memory lines before injection (never in the worker, never an LLM in the gate).

crates/agentkeys-core/src/memory_engine.rs (new):
- MemoryEngine trait: select(query, lines, budget) -> lines.
- PassthroughEngine: identity when unbounded (today's full-blob inject);
  recency-trim when a budget is set.
- LexicalEngine: deterministic term-overlap ranking with a query (stopword
  filtered), recency fallback without one. Real reference engine, no LLM,
  no external service.
- select_blob(): the blob->blob seam contract — swapping the engine never
  changes the signature, only the selected subset (plan §6a.5 conformance).
- SelectionBudget + env config (AGENTKEYS_MEMORY_ENGINE / _MAX_LINES /
  _MAX_BYTES). 8 unit + conformance tests.

crates/agentkeys-cli/src/hook.rs:
- memory-inject (pre_llm_call) now runs the configured engine over each
  namespace blob. Default passthrough + unbounded budget = byte-identical
  to prior behavior (the Chengdu single-line fixture is unchanged).

OpenViking/Holographic adapters implement the SAME trait and are the next
adapters — deferred pending the API spike (plan §6a.3); not fabricated here.

Tests: cargo test -p agentkeys-core -p agentkeys-cli green (core 38 incl. 8
new engine tests; cli unchanged-green).
---
 crates/agentkeys-cli/src/hook.rs           |  17 +-
 crates/agentkeys-core/src/lib.rs           |   1 +
 crates/agentkeys-core/src/memory_engine.rs | 293 +++++++++++++++++++++
 3 files changed, 310 insertions(+), 1 deletion(-)
 create mode 100644 crates/agentkeys-core/src/memory_engine.rs

diff --git a/crates/agentkeys-cli/src/hook.rs b/crates/agentkeys-cli/src/hook.rs
index 40cef97d..4cb5de96 100644
--- a/crates/agentkeys-cli/src/hook.rs
+++ b/crates/agentkeys-cli/src/hook.rs
@@ -246,6 +246,13 @@ pub async fn memory_inject(
     // pipe a payload (EOF arrives) so they were unaffected; direct calls were not.
     let client = HookClient::resolve(mcp_url, vendor_token, actor, operator);
 
+    // Pluggable engine seam (plan §6a): the gate already authorized these bytes;
+    // the engine — caller-side, deterministic, no LLM — selects which lines to
+    // inject within a budget. Default `passthrough` + unbounded budget injects
+    // the whole namespace unchanged. Passive injection carries no query (None).
+    let engine = agentkeys_core::memory_engine::engine_from_env();
+    let budget = agentkeys_core::memory_engine::SelectionBudget::from_env();
+
     let mut chunks = Vec::new();
     for ns in namespaces
         .split(',')
@@ -258,7 +265,15 @@ pub async fn memory_inject(
         {
             Ok(result) => {
                 if let Some(text) = extract_memory_content(&result) {
-                    chunks.push(format!("## Memory: {ns}\n{text}"));
+                    let selected = agentkeys_core::memory_engine::select_blob(
+                        engine.as_ref(),
+                        None,
+                        &text,
+                        &budget,
+                    );
+                    if !selected.is_empty() {
+                        chunks.push(format!("## Memory: {ns}\n{selected}"));
+                    }
                 }
             }
             Err(e) => {
diff --git a/crates/agentkeys-core/src/lib.rs b/crates/agentkeys-core/src/lib.rs
index 5b5926c4..009996fe 100644
--- a/crates/agentkeys-core/src/lib.rs
+++ b/crates/agentkeys-core/src/lib.rs
@@ -6,6 +6,7 @@ pub mod chain_profile;
 pub mod clear_signing;
 pub mod device_crypto;
 pub mod init_flow;
+pub mod memory_engine;
 pub mod mock_client;
 pub mod otp;
 pub mod payment;
diff --git a/crates/agentkeys-core/src/memory_engine.rs b/crates/agentkeys-core/src/memory_engine.rs
new file mode 100644
index 00000000..8fd6abac
--- /dev/null
+++ b/crates/agentkeys-core/src/memory_engine.rs
@@ -0,0 +1,293 @@
+//! Pluggable memory-engine seam — plan `docs/plan/agentkeys-memory-design.md`
+//! §6a, arch.md §22 "Memory engine" axis.
+//!
+//! The engine runs CALLER-SIDE over already-gate-authorized memory lines —
+//! never inside the worker, never with an LLM in the gate. `select` is the
+//! load-bearing call: passive `pre_llm_call` injection passes `query = None`
+//! plus a budget (which lines to inject when a namespace grows large); a
+//! future `memory.search` tool passes `query = Some(..)`.
+//!
+//! External engines (OpenViking, Holographic, mem0-self-hosted, …) implement
+//! this same trait — that is the compatibility seam (plan §6a.4): swap the
+//! engine, hold store + gate + delivery constant. The two engines shipped
+//! here are deterministic and need no external service.
+
+use std::collections::HashSet;
+
+/// One unit the engine ranks/selects over. In the v0 single-blob store a line
+/// is a `\n`-split segment of a namespace blob; under the future per-line store
+/// (plan M1) it maps 1:1 to a stored line. `seq` is blob position — higher is
+/// later, treated as more recent for recency ranking.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct MemoryLine {
+    pub text: String,
+    pub seq: usize,
+}
+
+impl MemoryLine {
+    pub fn from_blob(blob: &str) -> Vec<MemoryLine> {
+        blob.lines()
+            .map(str::trim_end)
+            .filter(|line| !line.is_empty())
+            .enumerate()
+            .map(|(seq, text)| MemoryLine {
+                text: text.to_string(),
+                seq,
+            })
+            .collect()
+    }
+}
+
+/// Upper bounds on what gets injected. Unbounded (both `None`) means the engine
+/// is an identity passthrough — preserving today's full-blob injection.
+#[derive(Debug, Clone, Default)]
+pub struct SelectionBudget {
+    pub max_lines: Option<usize>,
+    pub max_bytes: Option<usize>,
+}
+
+impl SelectionBudget {
+    pub fn is_unbounded(&self) -> bool {
+        self.max_lines.is_none() && self.max_bytes.is_none()
+    }
+
+    pub fn from_env() -> SelectionBudget {
+        SelectionBudget {
+            max_lines: env_usize("AGENTKEYS_MEMORY_MAX_LINES"),
+            max_bytes: env_usize("AGENTKEYS_MEMORY_MAX_BYTES"),
+        }
+    }
+}
+
+fn env_usize(key: &str) -> Option<usize> {
+    std::env::var(key).ok().and_then(|raw| raw.trim().parse().ok())
+}
+
+/// The pluggable engine. Input is gate-authorized lines; output is the ordered
+/// subset to inject. Implementations MUST be pure (no LLM, no I/O in the gate
+/// path) so the selection stays deterministic and auditable.
+pub trait MemoryEngine: Send + Sync {
+    fn name(&self) -> &'static str;
+    fn select(
+        &self,
+        query: Option<&str>,
+        lines: Vec<MemoryLine>,
+        budget: &SelectionBudget,
+    ) -> Vec<MemoryLine>;
+}
+
+/// Keep a prefix of a priority-ordered list within the budget.
+fn apply_budget(ordered: Vec<MemoryLine>, budget: &SelectionBudget) -> Vec<MemoryLine> {
+    let line_capped = match budget.max_lines {
+        Some(max) => ordered.into_iter().take(max).collect(),
+        None => ordered,
+    };
+    let Some(max_bytes) = budget.max_bytes else {
+        return line_capped;
+    };
+    let mut used = 0usize;
+    let mut kept = Vec::new();
+    for line in line_capped {
+        let cost = line.text.len() + 1;
+        if used + cost > max_bytes && !kept.is_empty() {
+            break;
+        }
+        used += cost;
+        kept.push(line);
+    }
+    kept
+}
+
+/// Identity engine — preserves the current behavior. Unbounded budget returns
+/// every line untouched; a bounded budget keeps the most recent lines.
+pub struct PassthroughEngine;
+
+impl MemoryEngine for PassthroughEngine {
+    fn name(&self) -> &'static str {
+        "passthrough"
+    }
+
+    fn select(
+        &self,
+        _query: Option<&str>,
+        lines: Vec<MemoryLine>,
+        budget: &SelectionBudget,
+    ) -> Vec<MemoryLine> {
+        if budget.is_unbounded() {
+            return lines;
+        }
+        let mut by_recency = lines;
+        by_recency.sort_by(|a, b| b.seq.cmp(&a.seq));
+        apply_budget(by_recency, budget)
+    }
+}
+
+/// Deterministic lexical engine. With a query it ranks by term overlap (recency
+/// breaks ties); without a query it falls back to pure recency. No LLM, no
+/// embeddings, no external service — a real reference engine for the seam.
+pub struct LexicalEngine;
+
+const STOPWORDS: &[&str] = &[
+    "a", "an", "and", "are", "as", "at", "be", "but", "by", "did", "do", "does", "for", "from",
+    "had", "has", "have", "in", "is", "it", "my", "of", "on", "or", "the", "to", "was", "what",
+    "when", "where", "which", "who", "will", "with", "you", "your",
+];
+
+fn tokenize(text: &str) -> HashSet<String> {
+    text.to_lowercase()
+        .split(|c: char| !c.is_alphanumeric())
+        .filter(|token| token.len() > 1 && !STOPWORDS.contains(token))
+        .map(|token| token.to_string())
+        .collect()
+}
+
+impl MemoryEngine for LexicalEngine {
+    fn name(&self) -> &'static str {
+        "lexical"
+    }
+
+    fn select(
+        &self,
+        query: Option<&str>,
+        lines: Vec<MemoryLine>,
+        budget: &SelectionBudget,
+    ) -> Vec<MemoryLine> {
+        let query_terms = query.map(tokenize).unwrap_or_default();
+        let mut scored: Vec<(i64, usize, MemoryLine)> = lines
+            .into_iter()
+            .map(|line| {
+                let score = if query_terms.is_empty() {
+                    0
+                } else {
+                    let line_terms = tokenize(&line.text);
+                    query_terms
+                        .iter()
+                        .filter(|term| line_terms.contains(*term))
+                        .count() as i64
+                };
+                (score, line.seq, line)
+            })
+            .collect();
+        scored.sort_by(|a, b| b.0.cmp(&a.0).then(b.1.cmp(&a.1)));
+        let ordered = scored.into_iter().map(|(_, _, line)| line).collect();
+        apply_budget(ordered, budget)
+    }
+}
+
+pub fn engine_from_name(name: &str) -> Box<dyn MemoryEngine> {
+    match name.trim().to_lowercase().as_str() {
+        "lexical" => Box::new(LexicalEngine),
+        _ => Box::new(PassthroughEngine),
+    }
+}
+
+pub fn engine_from_env() -> Box<dyn MemoryEngine> {
+    engine_from_name(&std::env::var("AGENTKEYS_MEMORY_ENGINE").unwrap_or_default())
+}
+
+/// Apply an engine to one namespace blob and return the injection-ready text.
+/// Selected lines are re-sorted to chronological (`seq`) order so the injected
+/// block reads naturally regardless of how the engine ranked internally. This
+/// `blob -> blob` contract is the seam: swapping the engine never changes the
+/// signature, only the selected subset.
+pub fn select_blob(
+    engine: &dyn MemoryEngine,
+    query: Option<&str>,
+    blob: &str,
+    budget: &SelectionBudget,
+) -> String {
+    let lines = MemoryLine::from_blob(blob);
+    if lines.is_empty() {
+        return blob.trim().to_string();
+    }
+    let mut selected = engine.select(query, lines, budget);
+    selected.sort_by_key(|line| line.seq);
+    selected
+        .into_iter()
+        .map(|line| line.text)
+        .collect::<Vec<_>>()
+        .join("\n")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const BLOB: &str = "\
+Chengdu trip — Apr 12 to 16, hotpot at Yulin.
+Allergic to peanuts.
+Prefers window seats on flights.
+Tokyo conference in March, stayed in Shibuya.";
+
+    fn budget(max_lines: Option<usize>) -> SelectionBudget {
+        SelectionBudget {
+            max_lines,
+            max_bytes: None,
+        }
+    }
+
+    #[test]
+    fn passthrough_unbounded_is_identity() {
+        let lines = MemoryLine::from_blob(BLOB);
+        let out = PassthroughEngine.select(None, lines.clone(), &SelectionBudget::default());
+        assert_eq!(out, lines);
+    }
+
+    #[test]
+    fn passthrough_budget_keeps_most_recent() {
+        let out = select_blob(&PassthroughEngine, None, BLOB, &budget(Some(2)));
+        // most-recent two lines, re-sorted chronologically
+        assert_eq!(
+            out,
+            "Prefers window seats on flights.\nTokyo conference in March, stayed in Shibuya."
+        );
+    }
+
+    #[test]
+    fn lexical_with_query_selects_relevant_line() {
+        let out = select_blob(&LexicalEngine, Some("where did I go in Chengdu"), BLOB, &budget(Some(1)));
+        assert_eq!(out, "Chengdu trip — Apr 12 to 16, hotpot at Yulin.");
+    }
+
+    #[test]
+    fn lexical_without_query_is_recency() {
+        let out = select_blob(&LexicalEngine, None, BLOB, &budget(Some(1)));
+        assert_eq!(out, "Tokyo conference in March, stayed in Shibuya.");
+    }
+
+    #[test]
+    fn single_line_blob_unchanged_across_engines() {
+        let single = "Chengdu trip — Apr 12 to 16, hotpot at Yulin.";
+        let unbounded = SelectionBudget::default();
+        assert_eq!(select_blob(&PassthroughEngine, None, single, &unbounded), single);
+        assert_eq!(select_blob(&LexicalEngine, Some("chengdu"), single, &unbounded), single);
+    }
+
+    #[test]
+    fn conformance_swap_engine_same_contract_different_selection() {
+        // The seam definition (plan §6a.5): same blob + budget + query, swap
+        // only the engine. The String->String contract holds for both; the
+        // engine is the sole variable, so the selected subset differs.
+        let query = Some("peanuts allergic");
+        let b = budget(Some(1));
+        let passthrough = select_blob(&PassthroughEngine, query, BLOB, &b);
+        let lexical = select_blob(&LexicalEngine, query, BLOB, &b);
+        assert_eq!(passthrough, "Tokyo conference in March, stayed in Shibuya."); // recency
+        assert_eq!(lexical, "Allergic to peanuts."); // relevance
+        assert_ne!(passthrough, lexical);
+    }
+
+    #[test]
+    fn from_name_defaults_to_passthrough() {
+        assert_eq!(engine_from_name("lexical").name(), "lexical");
+        assert_eq!(engine_from_name("passthrough").name(), "passthrough");
+        assert_eq!(engine_from_name("nonsense").name(), "passthrough");
+        assert_eq!(engine_from_name("").name(), "passthrough");
+    }
+
+    #[test]
+    fn empty_blob_stays_empty() {
+        assert_eq!(select_blob(&PassthroughEngine, None, "   ", &SelectionBudget::default()), "");
+        assert_eq!(select_blob(&LexicalEngine, Some("x"), "", &SelectionBudget::default()), "");
+    }
+}

From 1768b0aa896bd95395be0ce7d7555f684642ede3 Mon Sep 17 00:00:00 2001
From: Hanwen Cheng <heawen.cheng@gmail.com>
Date: Tue, 2 Jun 2026 17:18:41 +0800
Subject: [PATCH 4/6] feat(wire+demo): bake memory engine into wired hook; demo
 over real worker (#147)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Task 2 — make phase1-wire-demo prove memory against the REAL provider, now
through the pluggable engine (plan §6a). The engine runs caller-side in the
wired pre_llm_call hook, so it must be baked at wire time (Hermes invokes the
hook in the Phase-4 chat, where the demo's env can't reach).

agentkeys wire (main.rs + wire.rs):
- New --memory-engine (passthrough|lexical, default passthrough) and
  --memory-max-lines flags. They bake AGENTKEYS_MEMORY_ENGINE /
  AGENTKEYS_MEMORY_MAX_LINES into the generated memory-inject script.
- Default passthrough + no budget emits NO engine env → the generated
  script is byte-identical to before (idempotency + existing tests hold).
- 2 new tests: omitted-by-default, and baked-when-set (env precedes exec).

harness/phase1-wire-demo.sh:
- MEMORY_ENGINE / MEMORY_MAX_LINES config (default passthrough = unchanged).
- Passes them to , so in --real the wired hook runs the
  engine over the REAL memory worker's lines.
- 3.1 now surfaces engine + source ('engine=lexical via REAL worker → …').
- bash -n clean.

docs/operator-runbook-wire.md: document the MEMORY_ENGINE / MEMORY_MAX_LINES
knobs in the env-override list (runbook-fix-fold-back).

Note: the full --real run needs the operator's live stack (broker + Heima +
WebAuthn + sandbox), not runnable from this worktree; verified via cargo test
(core+cli green) + bash -n. To SEE selection, run --real with a multi-line
SEED_MEMORY_CONTENT and MEMORY_ENGINE=lexical MEMORY_MAX_LINES=N.
---
 crates/agentkeys-cli/src/main.rs | 14 ++++++++++
 crates/agentkeys-cli/src/wire.rs | 47 +++++++++++++++++++++++++++++++-
 docs/operator-runbook-wire.md    |  1 +
 harness/phase1-wire-demo.sh      | 12 ++++++--
 4 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/crates/agentkeys-cli/src/main.rs b/crates/agentkeys-cli/src/main.rs
index 5b18815f..c4af2fdd 100644
--- a/crates/agentkeys-cli/src/main.rs
+++ b/crates/agentkeys-cli/src/main.rs
@@ -348,6 +348,16 @@ enum Commands {
         /// empty for the in-memory backend. JWTs expire — re-run wire to refresh.
         #[arg(long, env = "AGENTKEYS_SESSION_BEARER", default_value = "")]
         session_bearer: String,
+
+        /// Memory engine baked into the pre_llm_call hook: `passthrough`
+        /// (inject the whole namespace, default) or `lexical` (deterministic
+        /// recency/relevance selection). Plan §6a / arch.md §22.
+        #[arg(long, env = "AGENTKEYS_MEMORY_ENGINE", default_value = "passthrough")]
+        memory_engine: String,
+
+        /// Cap how many memory lines the engine injects (omit = unbounded).
+        #[arg(long, env = "AGENTKEYS_MEMORY_MAX_LINES")]
+        memory_max_lines: Option<u32>,
     },
 
     #[command(
@@ -1087,6 +1097,8 @@ async fn main() {
             mcp_url,
             vendor_token,
             session_bearer,
+            memory_engine,
+            memory_max_lines,
         } => agentkeys_cli::wire::cmd_wire(
             runtime,
             agentkeys_cli::wire::WireRequest {
@@ -1097,6 +1109,8 @@ async fn main() {
                 mcp_url: mcp_url.clone(),
                 vendor_token: vendor_token.clone(),
                 session_bearer: session_bearer.clone(),
+                memory_engine: memory_engine.clone(),
+                memory_max_lines: *memory_max_lines,
                 check_only: *check_only,
             },
         ),
diff --git a/crates/agentkeys-cli/src/wire.rs b/crates/agentkeys-cli/src/wire.rs
index 76f5402e..1791791b 100644
--- a/crates/agentkeys-cli/src/wire.rs
+++ b/crates/agentkeys-cli/src/wire.rs
@@ -38,6 +38,12 @@ pub struct WireRequest {
     /// (TTL ≤ 5h) — re-run `agentkeys wire` to refresh, or point the demo at
     /// a fresh session.
     pub session_bearer: String,
+    /// Memory engine baked into the pre_llm_call hook (`passthrough` | `lexical`,
+    /// plan §6a). `passthrough`/empty injects the whole namespace and emits no
+    /// engine env, so the generated script stays byte-identical to the default.
+    pub memory_engine: String,
+    /// Optional cap on how many memory lines the engine injects (None = all).
+    pub memory_max_lines: Option<u32>,
     /// When true, report drift without writing (drift-check / dry-run).
     pub check_only: bool,
 }
@@ -124,6 +130,19 @@ impl HermesAdapter {
                 body = body,
             )
         };
+        let memory_engine_exports = {
+            let mut exports = String::new();
+            if !req.memory_engine.is_empty() && req.memory_engine != "passthrough" {
+                exports.push_str(&format!(
+                    "export AGENTKEYS_MEMORY_ENGINE={}\n",
+                    shell_quote(&req.memory_engine)
+                ));
+            }
+            if let Some(max_lines) = req.memory_max_lines {
+                exports.push_str(&format!("export AGENTKEYS_MEMORY_MAX_LINES={max_lines}\n"));
+            }
+            exports
+        };
         vec![
             (
                 "agentkeys-pretool-permission-gate.sh".to_string(),
@@ -139,7 +158,7 @@ impl HermesAdapter {
             (
                 "agentkeys-prellm-memory-inject.sh".to_string(),
                 header(&format!(
-                    "exec {bin} hook memory-inject --namespaces {ns}",
+                    "{memory_engine_exports}exec {bin} hook memory-inject --namespaces {ns}",
                     ns = shell_quote(&req.namespaces),
                 )),
             ),
@@ -513,6 +532,8 @@ mod tests {
             mcp_url: "http://localhost:8088/mcp".into(),
             vendor_token: "demo-tok".into(),
             session_bearer: String::new(),
+            memory_engine: "passthrough".into(),
+            memory_max_lines: None,
             check_only: false,
         }
     }
@@ -559,6 +580,30 @@ mod tests {
             .contains("hook memory-inject --namespaces 'travel,personal'"));
     }
 
+    #[test]
+    fn scripts_omit_memory_engine_by_default() {
+        let a = HermesAdapter;
+        // Default passthrough + no budget → no engine env, byte-identical script.
+        let scripts = a.scripts("/usr/local/bin/agentkeys", &req());
+        assert!(!scripts[2].1.contains("AGENTKEYS_MEMORY_ENGINE"));
+        assert!(!scripts[2].1.contains("AGENTKEYS_MEMORY_MAX_LINES"));
+    }
+
+    #[test]
+    fn scripts_bake_memory_engine_when_set() {
+        let a = HermesAdapter;
+        let mut r = req();
+        r.memory_engine = "lexical".into();
+        r.memory_max_lines = Some(3);
+        let prellm = &a.scripts("/usr/local/bin/agentkeys", &r)[2].1;
+        assert!(prellm.contains("export AGENTKEYS_MEMORY_ENGINE='lexical'"));
+        assert!(prellm.contains("export AGENTKEYS_MEMORY_MAX_LINES=3"));
+        // engine env precedes the exec line so it is in scope for the hook
+        let engine_at = prellm.find("AGENTKEYS_MEMORY_ENGINE").unwrap();
+        let exec_at = prellm.find("hook memory-inject").unwrap();
+        assert!(engine_at < exec_at);
+    }
+
     #[test]
     fn write_if_changed_is_idempotent() {
         let dir = std::env::temp_dir().join(format!("agentkeys-wire-{}", std::process::id()));
diff --git a/docs/operator-runbook-wire.md b/docs/operator-runbook-wire.md
index abdfb3f5..a55c7e09 100644
--- a/docs/operator-runbook-wire.md
+++ b/docs/operator-runbook-wire.md
@@ -273,6 +273,7 @@ Env overrides: `SANDBOX_URL`, `MCP_PORT`, `SESSION_ID` (default `alice`),
 `BUILDER_IMAGE` / `CARGO_REGISTRY_VOL` / `CARGO_GIT_VOL` / `RUSTUP_VOL` (build cache),
 `SBX_EXEC_MAXTIME` (per-sandbox-call ceiling, default 600s),
 `SEED_MEMORY_CONTENT` / `SEED_SCOPE_SERVICES` (real-mode 1.5 seed),
+`MEMORY_ENGINE` (default `passthrough`; set `lexical` for deterministic recency/relevance selection) / `MEMORY_MAX_LINES` (cap injected lines) — the engine is baked into the wired `pre_llm_call` hook and runs over the **real worker's** lines (plan §6a); a multi-line `SEED_MEMORY_CONTENT` makes the selection visible,
 `OPERATOR_KEY_FILE` (master key for the 0.7 operator-session mint),
 `AGENTKEYS_REUSE_AGENT=1` (skip Phase P fresh pairing; reuse a master-side agent) ·
 `AGENTKEYS_AGENT_SESSION_BEARER` (override the agent session) ·
diff --git a/harness/phase1-wire-demo.sh b/harness/phase1-wire-demo.sh
index cb16477a..7bc88d3e 100755
--- a/harness/phase1-wire-demo.sh
+++ b/harness/phase1-wire-demo.sh
@@ -48,6 +48,12 @@ LLM_API_KEY="${LLM_API_KEY:-${OPENROUTER_API_KEY:-}}"
 LLM_BASE_URL="${LLM_BASE_URL:-https://openrouter.ai/api/v1}"
 LLM_MODEL="${LLM_MODEL:-deepseek/deepseek-v4-flash}"   # OpenRouter slug; ':free' tier is 429-throttled
 MEMORY_NS="${MEMORY_NS:-travel}"
+# Memory engine baked into the wired pre_llm_call hook (plan §6a / arch.md §22).
+# Default `passthrough` injects the whole namespace (demo unchanged). Set
+# MEMORY_ENGINE=lexical (+ optional MEMORY_MAX_LINES, + a multi-line
+# SEED_MEMORY_CONTENT) to demo deterministic selection over the REAL worker.
+MEMORY_ENGINE="${MEMORY_ENGINE:-passthrough}"
+MEMORY_MAX_LINES="${MEMORY_MAX_LINES:-}"
 PAYMENT_SCOPE="${PAYMENT_SCOPE:-payment.spend}"
 ENV_FILE="${ENV_FILE:-$REPO_ROOT/scripts/operator-workstation.env}"
 AGENT_FILE="${AGENT_FILE:-$HOME/.agentkeys/agents/${AGENT_LABEL}.json}"
@@ -996,8 +1002,9 @@ phase2_wire() {
   skip_phase 2 && { log "Phase 2 — wire: skip (--skip-2)"; return; }
   log "Phase 2 — wire (#141 core)"
   resolve_sbx_paths || return
-  local wire_args="hermes --actor-omni $ACTOR_OMNI --operator-omni $OPERATOR_OMNI --namespaces $MEMORY_NS --payment-scope $PAYMENT_SCOPE --mcp-url $MCP_URL_IN_SANDBOX --vendor-token $VENDOR_TOKEN"
+  local wire_args="hermes --actor-omni $ACTOR_OMNI --operator-omni $OPERATOR_OMNI --namespaces $MEMORY_NS --payment-scope $PAYMENT_SCOPE --mcp-url $MCP_URL_IN_SANDBOX --vendor-token $VENDOR_TOKEN --memory-engine $MEMORY_ENGINE"
   [[ -n "$SESSION_BEARER" ]] && wire_args="$wire_args --session-bearer $SESSION_BEARER"
+  [[ -n "$MEMORY_MAX_LINES" ]] && wire_args="$wire_args --memory-max-lines $MEMORY_MAX_LINES"
 
   # 2.1 check-only (read-only)
   sbx_exec "$AGENT_BIN_DST wire $wire_args --check-only" | sed 's/^/    /'
@@ -1029,7 +1036,8 @@ phase3_acts() {
   # 3.1 Act 1 — memory inject (pre_llm_call)
   local a1; a1="$(sbx_hook 'agentkeys-prellm-memory-inject.sh' '{"hook_event_name":"pre_llm_call"}')"
   if echo "$a1" | jq -e '.context' >/dev/null 2>&1; then
-    ok "3.1 Act1 memory" "$(echo "$a1" | jq -r '.context' | tr '\n' ' ' | cut -c1-60)…"
+    local mem_src; mem_src="$([[ "$MODE" == real ]] && echo 'REAL worker' || echo 'in-mem fixture')"
+    ok "3.1 Act1 memory" "engine=$MEMORY_ENGINE via $mem_src → $(echo "$a1" | jq -r '.context' | tr '\n' ' ' | cut -c1-44)…"
   else
     fail "3.1 Act1 memory" "no context returned: $a1"
   fi

From 21f617e3986d50e0386beba2f2f98e9638a3abb8 Mon Sep 17 00:00:00 2001
From: Hanwen Cheng <heawen.cheng@gmail.com>
Date: Tue, 2 Jun 2026 18:25:59 +0800
Subject: [PATCH 5/6] test(harness): idempotent self-contained storage-solution
 test (#147)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

harness/storage-test.sh — proves the memory STORAGE solution from a fresh
checkout with NO external infra (no AWS, no chain, no broker, no network):

  step 0 env+cache   resolves CARGO_TARGET_DIR/CARGO_HOME (build cache → fast
                     re-runs); unsets AGENTKEYS_BROKER_URL/_DATA_ROLE_ARN so
                     tests can never reach a live broker (the env leak that
                     makes cli provision tests hit prod).
  step 1 prereqs     cargo / jq / curl, fail-loud with install hints.
  step 2 build       cargo build CLI + MCP server; reports cache-hit vs compiled.
  step 3 suites      runs the REAL storage code paths — envelope AES-256-GCM
                     (encrypt-at-rest), per-actor S3 key derivation, namespace
                     isolation (#147), pluggable engine. cli uses --lib to skip
                     the env-dependent provision integration tests.
  step 4 roundtrip   starts an in-process in-memory MCP server and drives
                     put → get → inject end-to-end, plus an engine-selection
                     check (lexical + max_lines budget → 1 of N lines).

Idempotent: cargo no-op when unchanged; the MCP server is killed + restarted
fresh each run (ephemeral state). Verified: two consecutive runs both ALL GREEN,
2nd reports build cache-hit.

NOT a real-S3 proof (in-memory backend = plumbing only). For the authoritative
live-worker proof, harness/phase1-wire-demo.sh --real remains the path; the
header documents this.
---
 harness/storage-test.sh | 186 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 186 insertions(+)
 create mode 100755 harness/storage-test.sh

diff --git a/harness/storage-test.sh b/harness/storage-test.sh
new file mode 100755
index 00000000..9e2860ab
--- /dev/null
+++ b/harness/storage-test.sh
@@ -0,0 +1,186 @@
+#!/usr/bin/env bash
+# harness/storage-test.sh — idempotent, self-contained test of the AgentKeys
+# memory STORAGE solution.
+#
+# What it proves, from a FRESH checkout, with NO external infra (no AWS, no
+# chain, no broker, no network):
+#   1. env + cache    — resolves the cargo build cache; sanitizes broker env.
+#   2. prereqs        — cargo / jq / curl present (fails loud with install hints).
+#   3. build          — builds the CLI + MCP server (cargo cache → fast re-runs).
+#   4. test suites    — runs the REAL storage code paths: envelope crypto
+#                       (encrypt-at-rest), per-actor S3 key derivation,
+#                       namespace isolation (#147), and the pluggable engine.
+#   5. live roundtrip — starts an in-process MCP server (in-memory backend) and
+#                       drives put → get → inject end-to-end, plus an engine
+#                       selection check (lexical + budget).
+#
+# Idempotent: every run is a cargo no-op when nothing changed; the MCP server
+# is killed + restarted fresh (ephemeral state) each run. Re-run safely.
+#
+# NOT a real-S3 proof. The in-memory backend exercises the put/get/engine
+# PLUMBING without AWS. For the authoritative real-worker proof (broker cap-mint
+# → per-actor STS → memory.litentry.org → S3), run:
+#     bash harness/phase1-wire-demo.sh --real
+#
+# Usage: bash harness/storage-test.sh [--release] [--no-build] [--keep-server]
+#   --release       build + test in release profile (default: debug, faster)
+#   --no-build      skip the build step (use existing binaries)
+#   --keep-server   leave the MCP server running after exit (for manual poking)
+#
+# Env overrides (no hardcoded values — all have sane defaults):
+#   CARGO_TARGET_DIR / CARGO_HOME          build cache locations
+#   STORAGE_TEST_PORT (18099)              MCP listen port
+#   STORAGE_TEST_ACTOR / _OPERATOR / _DEVICE   demo identities (mirror
+#                                          crates/agentkeys-mcp-server/src/backend/in_memory.rs)
+#   STORAGE_TEST_VENDOR (magiclick) / _TOKEN (demo-tok)
+
+set -uo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+cd "$REPO_ROOT" || exit 1
+
+# ─── output (CLAUDE.md ok/skip/fail convention) ──────────────────────────────
+log()  { printf '\n[storage-test] %s\n' "$*"; }
+ok()   { printf '  %-28s ok proceeding (%s)\n' "$1" "$2"; }
+skip() { printf '  %-28s skip %s\n' "$1" "$2"; }
+fail() { printf '  %-28s FAIL %s\n' "$1" "$2" >&2; FAILED=$((FAILED + 1)); }
+FAILED=0
+
+# ─── flags ───────────────────────────────────────────────────────────────────
+PROFILE="debug"
+CARGO_PROFILE_FLAG=""
+DO_BUILD=1
+KEEP_SERVER=0
+for arg in "$@"; do
+  case "$arg" in
+    --release)     PROFILE="release"; CARGO_PROFILE_FLAG="--release" ;;
+    --no-build)    DO_BUILD=0 ;;
+    --keep-server) KEEP_SERVER=1 ;;
+    -h|--help)     grep '^#' "$0" | sed 's/^#\{1,\} \{0,1\}//'; exit 0 ;;
+    *)             echo "unknown arg: $arg (try --help)" >&2; exit 2 ;;
+  esac
+done
+
+# ─── step 0 — env + cache ────────────────────────────────────────────────────
+log "step 0 — env + cache"
+export CARGO_TARGET_DIR="${CARGO_TARGET_DIR:-$REPO_ROOT/target}"
+export CARGO_HOME="${CARGO_HOME:-$HOME/.cargo}"
+# Sanitize broker env so the storage tests can NEVER reach a live broker — the
+# AGENTKEYS_BROKER_URL leak that otherwise makes provision tests hit prod.
+unset AGENTKEYS_BROKER_URL AGENTKEYS_DATA_ROLE_ARN 2>/dev/null || true
+
+ACTOR="${STORAGE_TEST_ACTOR:-0xa0c701a0c701a0c701a0c701a0c701a0c701a0c701a0c701a0c701a0c701a0c7}"
+OPERATOR="${STORAGE_TEST_OPERATOR:-0x07e8a107e8a107e8a107e8a107e8a107e8a107e8a107e8a107e8a107e8a107e8}"
+DEVICE="${STORAGE_TEST_DEVICE:-0xdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef}"
+PORT="${STORAGE_TEST_PORT:-18099}"
+VENDOR="${STORAGE_TEST_VENDOR:-magiclick}"
+TOKEN="${STORAGE_TEST_TOKEN:-demo-tok}"
+MCP_URL="http://127.0.0.1:$PORT/mcp"
+ok "cache" "CARGO_TARGET_DIR=$CARGO_TARGET_DIR"
+ok "env" "profile=$PROFILE port=$PORT actor=${ACTOR:0:12}…"
+
+# ─── step 1 — prereqs ────────────────────────────────────────────────────────
+log "step 1 — prereqs"
+have() { command -v "$1" >/dev/null 2>&1; }
+if have cargo; then ok "cargo" "$(cargo --version 2>/dev/null | cut -d' ' -f1-2)"; else fail "cargo" "not found — install rust via https://rustup.rs"; fi
+if have jq;    then ok "jq" "present"; else fail "jq" "not found — brew install jq / apt-get install jq"; fi
+if have curl;  then ok "curl" "present"; else fail "curl" "not found"; fi
+if [[ $FAILED -gt 0 ]]; then log "prereqs missing — aborting ($FAILED)"; exit 1; fi
+
+# ─── step 2 — build (cached) ─────────────────────────────────────────────────
+BIN_DIR="$CARGO_TARGET_DIR/$PROFILE"
+AGENTKEYS_BIN="$BIN_DIR/agentkeys"
+MCP_BIN="$BIN_DIR/agentkeys-mcp-server"
+if [[ "$DO_BUILD" == 1 ]]; then
+  log "step 2 — build (cargo cache → fast re-runs)"
+  build_out="$(cargo build $CARGO_PROFILE_FLAG -p agentkeys-cli -p agentkeys-mcp-server 2>&1)"
+  build_rc=$?
+  if [[ $build_rc -ne 0 ]]; then
+    echo "$build_out" | tail -25 >&2
+    fail "build" "cargo build failed (rc=$build_rc)"
+    log "summary: $FAILED failure(s)"; exit 1
+  fi
+  if echo "$build_out" | grep -q "Compiling"; then ok "build" "compiled — cache updated"; else ok "build" "up to date — cache hit (no rebuild)"; fi
+else
+  log "step 2 — build skipped (--no-build)"
+fi
+[[ -x "$AGENTKEYS_BIN" ]] || { fail "build" "missing binary $AGENTKEYS_BIN (drop --no-build)"; log "summary: $FAILED failure(s)"; exit 1; }
+[[ -x "$MCP_BIN" ]]       || { fail "build" "missing binary $MCP_BIN (drop --no-build)"; log "summary: $FAILED failure(s)"; exit 1; }
+
+# ─── step 3 — storage test suites (real code paths) ──────────────────────────
+log "step 3 — storage test suites (envelope crypto · per-actor key · namespace isolation · engine)"
+run_suite() {
+  local crate="$1"; shift
+  local out rc passed failed
+  out="$(cargo test $CARGO_PROFILE_FLAG -p "$crate" "$@" 2>&1)"; rc=$?
+  passed="$(echo "$out" | grep -oE '[0-9]+ passed' | awk '{s+=$1} END{print s+0}')"
+  failed="$(echo "$out" | grep -oE '[0-9]+ failed' | awk '{s+=$1} END{print s+0}')"
+  if [[ $rc -eq 0 ]]; then ok "test:$crate" "$passed passed"; else echo "$out" | tail -30 >&2; fail "test:$crate" "$failed failed (rc=$rc)"; fi
+}
+run_suite agentkeys-core              # envelope (AES-256-GCM, AAD), s3_backend, memory_engine
+run_suite agentkeys-worker-memory     # s3_key derivation, memory/credentials prefix split, namespace segregation (#147)
+run_suite agentkeys-mcp-server        # memory.put / memory.get tools
+run_suite agentkeys-cli --lib         # engine wiring: wire-bake + hook (--lib skips env-dependent provision integration tests)
+
+# ─── step 4 — live storage roundtrip (in-memory backend) ─────────────────────
+log "step 4 — live roundtrip: put → get → inject → engine-select (in-memory MCP; no AWS/chain/broker)"
+# idempotent: clear any prior storage-test server on this port, then start fresh
+pkill -f "agentkeys-mcp-server.*--listen 127.0.0.1:$PORT" 2>/dev/null || true
+sleep 0.3
+SERVER_LOG="$(mktemp -t storage-test-mcp.XXXXXX 2>/dev/null || echo /tmp/storage-test-mcp.$$.log)"
+"$MCP_BIN" --backend in-memory --transport http --listen "127.0.0.1:$PORT" \
+  --vendor-tokens "$VENDOR:$TOKEN" \
+  --default-actor "$ACTOR" --default-operator-omni "$OPERATOR" --default-device-key-hash "$DEVICE" \
+  >"$SERVER_LOG" 2>&1 &
+SERVER_PID=$!
+cleanup() { if [[ "$KEEP_SERVER" != 1 && -n "${SERVER_PID:-}" ]]; then kill "$SERVER_PID" 2>/dev/null || true; fi; }
+trap cleanup EXIT
+
+healthy=0
+for _ in $(seq 1 50); do
+  if curl -fsS -m 2 "http://127.0.0.1:$PORT/healthz" >/dev/null 2>&1; then healthy=1; break; fi
+  sleep 0.2
+done
+if [[ "$healthy" != 1 ]]; then
+  echo "--- mcp server log ---" >&2; tail -20 "$SERVER_LOG" >&2
+  fail "4.0 mcp up" "server not healthy on :$PORT"
+  log "summary: $FAILED failure(s)"; exit 1
+fi
+ok "4.0 mcp up" "in-memory MCP on :$PORT (pid $SERVER_PID)"
+
+export AGENTKEYS_MCP_URL="$MCP_URL"
+export AGENTKEYS_MCP_VENDOR_TOKEN="$TOKEN"
+export AGENTKEYS_ACTOR_OMNI="$ACTOR"
+export AGENTKEYS_OPERATOR_OMNI="$OPERATOR"
+
+# 4.1 READ a pre-seeded namespace (proves read from storage)
+seeded="$("$AGENTKEYS_BIN" hook memory-inject --namespaces travel </dev/null 2>/dev/null | jq -r '.context // ""')"
+if echo "$seeded" | grep -q "Chengdu"; then ok "4.1 read seeded" "travel → $(echo "$seeded" | tr '\n' ' ' | cut -c1-40)…"; else fail "4.1 read seeded" "expected 'Chengdu', got: $(echo "$seeded" | cut -c1-80)"; fi
+
+# 4.2 WRITE a fresh multi-line namespace (proves write to storage)
+NS="storagetest"
+MARKER="roundtrip-$$"
+CONTENT=$'Booked Chengdu flight CA4515 on Apr 12.\nPeanut allergy noted for inflight meals.\nHotel in Yulin district near hotpot street.\nMarker '"$MARKER"
+put_out="$("$AGENTKEYS_BIN" memory put --namespace "$NS" --content "$CONTENT" 2>&1)"
+if echo "$put_out" | grep -q "s3_key"; then ok "4.2 put" "wrote 4-line '$NS'"; else fail "4.2 put" "$(echo "$put_out" | tr '\n' ' ' | cut -c1-140)"; fi
+
+# 4.3 READ-BACK via inject, default passthrough engine (proves the round trip)
+got="$("$AGENTKEYS_BIN" hook memory-inject --namespaces "$NS" </dev/null 2>/dev/null | jq -r '.context // ""')"
+got_body_lines="$(echo "$got" | grep -vc '^## Memory:')"
+if echo "$got" | grep -q "$MARKER"; then ok "4.3 get roundtrip" "read back marker; $got_body_lines body lines (passthrough = all)"; else fail "4.3 get roundtrip" "marker '$MARKER' missing: $(echo "$got" | tr '\n' ' ' | cut -c1-100)"; fi
+
+# 4.4 ENGINE selection over storage: lexical + max_lines=1 → exactly 1 body line
+sel="$(AGENTKEYS_MEMORY_ENGINE=lexical AGENTKEYS_MEMORY_MAX_LINES=1 "$AGENTKEYS_BIN" hook memory-inject --namespaces "$NS" </dev/null 2>/dev/null | jq -r '.context // ""')"
+sel_body="$(echo "$sel" | grep -v '^## Memory:')"
+sel_lines="$(echo "$sel_body" | grep -c .)"
+if [[ "$sel_lines" == 1 ]]; then ok "4.4 engine select" "lexical/max_lines=1 → 1 of $got_body_lines lines: $(echo "$sel_body" | cut -c1-44)"; else fail "4.4 engine select" "expected 1 body line, got $sel_lines: $(echo "$sel_body" | tr '\n' ' ' | cut -c1-80)"; fi
+
+# ─── summary ─────────────────────────────────────────────────────────────────
+if [[ $FAILED -eq 0 ]]; then
+  log "ALL GREEN — storage solution verified (build · suites · roundtrip · engine)"
+  exit 0
+else
+  log "$FAILED FAILURE(S) — see above"
+  exit 1
+fi

From 7579fd42f95e7b23c198dcd84b8068b33f244cbf Mon Sep 17 00:00:00 2001
From: Hanwen Cheng <heawen.cheng@gmail.com>
Date: Tue, 2 Jun 2026 19:04:37 +0800
Subject: [PATCH 6/6] =?UTF-8?q?fix(memory=5Fengine):=20cargo=20fmt=20+=20c?=
 =?UTF-8?q?lippy=20unnecessary=5Fsort=5Fby=20=E2=80=94=20green=20CI=20(#14?=
 =?UTF-8?q?7)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CI (harness-ci.yml + mcp-server.yml) failed on the new memory_engine.rs:
- cargo fmt --all -- --check: long test-call lines + env_usize chain needed
  rustfmt wrapping.
- cargo clippy --workspace -- -D warnings: clippy::unnecessary_sort_by on the
  PassthroughEngine recency sort →
  sort_by(|a,b| b.seq.cmp(&a.seq))  →  sort_by_key(|l| std::cmp::Reverse(l.seq)).

Verified locally against the exact CI commands:
- cargo fmt --all -- --check                              (exit 0)
- cargo clippy --workspace --all-targets -- -D warnings   (exit 0)
- cargo test --workspace -- --test-threads=1              (735 passed)
- cargo test -p agentkeys-mcp-server --all-features       (35 passed)
---
 crates/agentkeys-core/src/memory_engine.rs | 33 +++++++++++++++++-----
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/crates/agentkeys-core/src/memory_engine.rs b/crates/agentkeys-core/src/memory_engine.rs
index 8fd6abac..8dfd85a0 100644
--- a/crates/agentkeys-core/src/memory_engine.rs
+++ b/crates/agentkeys-core/src/memory_engine.rs
@@ -60,7 +60,9 @@ impl SelectionBudget {
 }
 
 fn env_usize(key: &str) -> Option<usize> {
-    std::env::var(key).ok().and_then(|raw| raw.trim().parse().ok())
+    std::env::var(key)
+        .ok()
+        .and_then(|raw| raw.trim().parse().ok())
 }
 
 /// The pluggable engine. Input is gate-authorized lines; output is the ordered
@@ -117,7 +119,7 @@ impl MemoryEngine for PassthroughEngine {
             return lines;
         }
         let mut by_recency = lines;
-        by_recency.sort_by(|a, b| b.seq.cmp(&a.seq));
+        by_recency.sort_by_key(|line| std::cmp::Reverse(line.seq));
         apply_budget(by_recency, budget)
     }
 }
@@ -245,7 +247,12 @@ Tokyo conference in March, stayed in Shibuya.";
 
     #[test]
     fn lexical_with_query_selects_relevant_line() {
-        let out = select_blob(&LexicalEngine, Some("where did I go in Chengdu"), BLOB, &budget(Some(1)));
+        let out = select_blob(
+            &LexicalEngine,
+            Some("where did I go in Chengdu"),
+            BLOB,
+            &budget(Some(1)),
+        );
         assert_eq!(out, "Chengdu trip — Apr 12 to 16, hotpot at Yulin.");
     }
 
@@ -259,8 +266,14 @@ Tokyo conference in March, stayed in Shibuya.";
     fn single_line_blob_unchanged_across_engines() {
         let single = "Chengdu trip — Apr 12 to 16, hotpot at Yulin.";
         let unbounded = SelectionBudget::default();
-        assert_eq!(select_blob(&PassthroughEngine, None, single, &unbounded), single);
-        assert_eq!(select_blob(&LexicalEngine, Some("chengdu"), single, &unbounded), single);
+        assert_eq!(
+            select_blob(&PassthroughEngine, None, single, &unbounded),
+            single
+        );
+        assert_eq!(
+            select_blob(&LexicalEngine, Some("chengdu"), single, &unbounded),
+            single
+        );
     }
 
     #[test]
@@ -287,7 +300,13 @@ Tokyo conference in March, stayed in Shibuya.";
 
     #[test]
     fn empty_blob_stays_empty() {
-        assert_eq!(select_blob(&PassthroughEngine, None, "   ", &SelectionBudget::default()), "");
-        assert_eq!(select_blob(&LexicalEngine, Some("x"), "", &SelectionBudget::default()), "");
+        assert_eq!(
+            select_blob(&PassthroughEngine, None, "   ", &SelectionBudget::default()),
+            ""
+        );
+        assert_eq!(
+            select_blob(&LexicalEngine, Some("x"), "", &SelectionBudget::default()),
+            ""
+        );
     }
 }