diff --git a/AGENTS.md b/AGENTS.md
index c4aa494..5ed3829 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -5,4 +5,4 @@
 所有的测试文件只能写在现有的test文件夹下
 修改过程中发现错误，如果是本次范围就修改，否则要在最后指出
 当前的设计不能假设单会话的，而应该假设多会话场景
-在用户明确要求执行修改代码时才能改代码，以最新的一条用户信息为准
\ No newline at end of file
+在用户的最新的一条消息除非有显式命令（执行方案、修改代码等）要求修改代码，否则绝对不改代码，之前要求修改的指令全部不算数，别再根据之前的上下文或者当前不确定的指令猜是不是要直接修改代码了
\ No newline at end of file
diff --git a/CLAUDE.md b/CLAUDE.md
index c4aa494..5ed3829 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -5,4 +5,4 @@
 所有的测试文件只能写在现有的test文件夹下
 修改过程中发现错误，如果是本次范围就修改，否则要在最后指出
 当前的设计不能假设单会话的，而应该假设多会话场景
-在用户明确要求执行修改代码时才能改代码，以最新的一条用户信息为准
\ No newline at end of file
+在用户的最新的一条消息除非有显式命令（执行方案、修改代码等）要求修改代码，否则绝对不改代码，之前要求修改的指令全部不算数，别再根据之前的上下文或者当前不确定的指令猜是不是要直接修改代码了
\ No newline at end of file
diff --git a/packages/codingcode/src/agent/agent.ts b/packages/codingcode/src/agent/agent.ts
index ac9f02d..002a867 100644
--- a/packages/codingcode/src/agent/agent.ts
+++ b/packages/codingcode/src/agent/agent.ts
@@ -1,8 +1,11 @@
 import { Effect } from 'effect';
+import { appendFileSync } from 'fs';
 import type { Message, ToolCall } from '../core/types.js';
 import { AgentError } from '../core/error.js';
 import { Result } from '../core/result.js';
 import type { ToolDescription } from '../tools/types.js';
+import type { LLMResponse } from '../llm/types.js';
+import type { LLMClient } from '../llm/client.js';
 import { ToolService } from '../tools/registry.js';
 import { ToolExecutorService } from '../tools/executor.js';
 import { ContextService } from '../context/context.js';
@@ -11,6 +14,7 @@ import { CheckpointService } from '../checkpoint/checkpoint-service.js';
 import { buildSystemPrompt, type SystemPromptVariant } from './prompt.js';
 import { resolveConfig } from './config.js';
 import { getContextConfig } from '../context/config.js';
+import { estimateTokens } from '../context/utils/tokens.js';
 import { ToolSearchService } from '../tools/tool-search-service.js';
 import { sharedTodoStore } from '../self/todo.js';
 import { buildToolsForAgent, buildDeferredCatalogContent } from './build-tools.js';
@@ -26,7 +30,7 @@ export const sendMessage = (
   sessionId: string | undefined,
   input: string,
   cwd: string,
-  llm: any,
+  llm: LLMClient,
   options?: {
     signal?: AbortSignal
   },
@@ -66,15 +70,16 @@ export type AgentEvent =
   | { readonly _tag: 'ApprovalRequest'; readonly id: string; readonly tool: string; readonly args: Record<string, unknown> }
   | { readonly _tag: 'ToolResult'; readonly id: string; readonly name: string; readonly output: string; readonly ok: boolean }
   | { readonly _tag: 'Step'; readonly step: number; readonly max: number }
-  | { readonly _tag: 'ReactiveCompact'; readonly attempt: number; readonly released: number }
+  | { readonly _tag: 'ReactiveCompact'; readonly attempt: number; readonly released: number; readonly promptEstimate: number }
   | { readonly _tag: 'Error'; readonly error: AgentError }
   | { readonly _tag: 'Done'; readonly content: string }
   | { readonly _tag: 'TodoUpdate'; readonly items: ReadonlyArray<{ readonly step: string; readonly status: 'pending' | 'in_progress' | 'completed' }> }
-  | { readonly _tag: 'TurnId'; readonly turnId: number };
+  | { readonly _tag: 'TurnId'; readonly turnId: number }
+  | { readonly _tag: 'Usage'; readonly prompt: number; readonly completion: number; readonly total: number };
 
 export interface RunStreamOptions {
   state: SessionStoreState;
-  llm: LLMStreamAdapter;
+  llm: LLMClient;
   skillInstruction?: string;
   systemPromptVariant?: SystemPromptVariant;
   systemOverride?: string;
@@ -87,19 +92,6 @@ export interface RunStreamOptions {
   approvalOverride?: any;
 }
 
-export interface LLMStreamAdapter {
-  completeStream(params: {
-    messages: Message[];
-    system?: string;
-    tools?: ToolDescription[];
-    maxSteps?: number;
-    signal?: AbortSignal;
-  }): {
-    stream: AsyncIterable<string>;
-    response: Promise<Result<{ content: string; toolCalls?: ToolCall[] }, AgentError>>;
-  };
-}
-
 interface RunReActDeps {
   maxSteps: number;
   maxStopContinuations: number;
@@ -169,7 +161,12 @@ export async function* runReActLoop(
   const maxStopContinuations = opts.maxStopContinuations ?? deps.maxStopContinuations;
 
   for (let attempt = 0; attempt <= maxOverflowRetries; attempt++) {
-    const messages = Effect.runSync(ctx.build(state.sessionId, state.projectPath));
+    const { messages, snipTokensFreed, newBudgets } = Effect.runSync(ctx.build(state.sessionId, state.projectPath));
+    if (newBudgets.length > 0) {
+      for (const ev of newBudgets) {
+        appendFileSync(state.transcriptPath, JSON.stringify(ev) + '\n', 'utf8');
+      }
+    }
     let lastResult: Result<string, AgentError> | null = null;
     let overflow = false;
 
@@ -201,6 +198,23 @@ export async function* runReActLoop(
       const stepBeforePayload = { sessionId, step: step + 1 };
       await Effect.runPromise(hooks.emitDecision('agent.step.before', stepBeforePayload));
 
+      // Threshold-triggered LLM compaction
+      const compressResult = await Effect.runPromise(ctx.compactIfNeeded(state.sessionId, state.projectPath, llm, estimateTokens(messages), snipTokensFreed, llm.modelInfo.maxTokens, config));
+      if (compressResult.didCompress) {
+        yield { _tag: 'ReactiveCompact', attempt: 1, released: compressResult.released, promptEstimate: compressResult.promptEstimate };
+
+        const rebuilt = Effect.runSync(ctx.build(state.sessionId, state.projectPath));
+        if (rebuilt.newBudgets.length > 0) {
+          for (const ev of rebuilt.newBudgets) {
+            appendFileSync(state.transcriptPath, JSON.stringify(ev) + '\n', 'utf8');
+          }
+        }
+        messages.length = 0;
+        messages.push(...rebuilt.messages);
+        state.usage = undefined;
+        state.promptEstimate = estimateTokens(rebuilt.messages);
+      }
+
       // Build LLM messages: original messages + step.before transients
       const llmMessages = [...messages];
 
@@ -209,8 +223,7 @@ export async function* runReActLoop(
 
       const { stream: rawStream, response: respPromise } = llm.completeStream({
         messages: llmMessages, system: systemWithCatalog, tools, maxSteps: 1,
-        signal: opts.abortSignal,
-      });
+      }, opts.abortSignal);
 
       for await (const chunk of rawStream) {
         if (opts.abortSignal?.aborted) break;
@@ -221,8 +234,8 @@ export async function* runReActLoop(
       if (!llmResult.ok) {
         if (llmResult.error.code === 'CONTEXT_OVERFLOW' && attempt < maxOverflowRetries) {
           const aggressiveConfig = { ...config, keepRecentTurns: config.reactiveCompactKeepTurns };
-          const compressResult = await Effect.runPromise(ctx.compress(state.sessionId, state.projectPath, null, aggressiveConfig));
-          yield { _tag: 'ReactiveCompact', attempt: attempt + 1, released: compressResult.released };
+          const compressResult = await Effect.runPromise(ctx.compress(state.sessionId, state.projectPath, null, undefined, llm.modelInfo.maxTokens, aggressiveConfig));
+          yield { _tag: 'ReactiveCompact', attempt: attempt + 1, released: compressResult.released, promptEstimate: compressResult.promptEstimate };
           overflow = true;
           break;
         }
@@ -242,10 +255,13 @@ export async function* runReActLoop(
       }
       messages.push(assistantMsg);
       yield { _tag: 'Assistant', content: resp.content, toolCalls };
+      if (resp.usage) {
+        yield { _tag: 'Usage', prompt: resp.usage.prompt, completion: resp.usage.completion, total: resp.usage.total };
+      }
 
       if (!toolCalls || toolCalls.length === 0) {
         // LLM done — record assistant, then check stop hook
-        await Effect.runPromise(session.recordAssistant(state, resp.content, toolCalls || [], model));
+        await Effect.runPromise(session.recordAssistant(state, resp.content, toolCalls || [], model, resp.usage));
         const stopDecision: any = await Effect.runPromise(hooks.emitDecision('agent.turn.stop', {
           sessionId, content: resp.content, turnId: state.currentTurnId,
         }));
@@ -287,7 +303,7 @@ export async function* runReActLoop(
       // Execute tool calls — record assistant, execute batch, record results in one pipeline
       const allResults = await Effect.runPromise(
         Effect.gen(function* () {
-          const record = yield* session.recordAssistant(state, resp.content, toolCalls!, model);
+          const record = yield* session.recordAssistant(state, resp.content, toolCalls!, model, resp.usage);
           const results = yield* executor.executeBatch(toolCalls, state.sessionId, {
             turnId: state.currentTurnId,
             projectPath,
@@ -338,9 +354,8 @@ export async function* runReActLoop(
 
     if (overflow) continue;
 
-    // Turn completed — snapshot and compact
+    // Turn completed — snapshot
     checkpoint.snapshotFinal(projectPath, state.sessionId, state.currentTurnId);
-    await Effect.runPromise(ctx.appendTurnEnd(state.sessionId, state.projectPath, llm as any));
 
     // Fire-and-forget memory flush
     flushSessionToMemory(state.sessionId, llm).catch(e => logger.error('memory flush failed:', e));
diff --git a/packages/codingcode/src/client/direct.ts b/packages/codingcode/src/client/direct.ts
index 3f6fd4b..0fc2ef1 100644
--- a/packages/codingcode/src/client/direct.ts
+++ b/packages/codingcode/src/client/direct.ts
@@ -54,6 +54,12 @@ export async function* agentEventToStreamChunk(
       case 'TodoUpdate':
         yield { type: 'todo_update', items: event.items as any };
         break;
+      case 'Usage':
+        yield { type: 'usage', prompt: event.prompt, completion: event.completion, total: event.total };
+        break;
+      case 'ReactiveCompact':
+        yield { type: 'reactive_compact', released: event.released, promptEstimate: event.promptEstimate };
+        break;
     }
   }
 }
diff --git a/packages/codingcode/src/client/http.ts b/packages/codingcode/src/client/http.ts
index 053690d..da7389d 100644
--- a/packages/codingcode/src/client/http.ts
+++ b/packages/codingcode/src/client/http.ts
@@ -47,6 +47,9 @@ export async function createHttpClient(serverUrl: string): Promise<AgentClient>
           case 'todo_update':
             yield { type: 'todo_update', items: data.items as any };
             break;
+          case 'usage':
+            yield { type: 'usage', prompt: data.prompt as number, completion: data.completion as number, total: data.total as number };
+            break;
           case 'error':
             throw new Error(data.message as string);
           case 'done':
diff --git a/packages/codingcode/src/client/http/agent-runtime.ts b/packages/codingcode/src/client/http/agent-runtime.ts
index 4c02d8d..2580f42 100644
--- a/packages/codingcode/src/client/http/agent-runtime.ts
+++ b/packages/codingcode/src/client/http/agent-runtime.ts
@@ -65,6 +65,12 @@ export function createHttpAgentClient(
           case 'todo_update':
             yield { type: 'todo_update', items: data.items as any };
             break;
+          case 'usage':
+            yield { type: 'usage', prompt: data.prompt as number, completion: data.completion as number, total: data.total as number };
+            break;
+          case 'reactive_compact':
+            yield { type: 'reactive_compact', released: data.released as number, promptEstimate: data.promptEstimate as number };
+            break;
           case 'error':
             throw new Error(data.message as string);
           case 'done':
diff --git a/packages/codingcode/src/client/types.ts b/packages/codingcode/src/client/types.ts
index 3c1cfda..1421f57 100644
--- a/packages/codingcode/src/client/types.ts
+++ b/packages/codingcode/src/client/types.ts
@@ -15,7 +15,9 @@ export type StreamChunk =
   | { type: 'tool_denied'; id: string; name: string; reason: string }
   | { type: 'error'; message: string }
   | { type: 'done' }
-  | { type: 'todo_update'; items: ReadonlyArray<{ step: string; status: string }> };
+  | { type: 'todo_update'; items: ReadonlyArray<{ step: string; status: string }> }
+  | { type: 'usage'; prompt: number; completion: number; total: number }
+  | { type: 'reactive_compact'; released: number; promptEstimate: number };
 
 export interface AgentClient {
   sendMessage(input: string, cwd?: string): AsyncGenerator<StreamChunk>;
diff --git a/packages/codingcode/src/context/compressor/index.ts b/packages/codingcode/src/context/compressor/index.ts
index 6303dee..fd303bc 100644
--- a/packages/codingcode/src/context/compressor/index.ts
+++ b/packages/codingcode/src/context/compressor/index.ts
@@ -1,20 +1,21 @@
 import { randomUUID } from 'crypto';
-import { readHistory, buildMessagesFromEvents, findSessionIndex } from '../../session/store.js';
+import { readHistory, findSessionIndex } from '../../session/store.js';
 import { resolveSessionDir } from '../../session/store.js';
-import { estimateTokensForContent } from '../utils/tokens.js';
+import { estimateTokens, estimateMessageTokens } from '../utils/tokens.js';
 import { resolveCompactionLLM } from './llm-resolver.js';
 import { COMPACTION_SYSTEM_PROMPT } from './prompt.js';
 import type { ContextConfig } from '../config.js';
 import type { Message } from '../../core/types.js';
 import type { SessionEvent, SummaryEvent } from '../../session/types.js';
 import type { LLMClient } from '../../llm/client.js';
-import { persistToolResult } from '../persist/store.js';
+import { assemblePayload } from '../organizer.js';
 import { join } from 'path';
 import { appendFileSync } from 'fs';
 
 export interface CompressResult {
   didCompress: boolean;
   released: number;
+  promptEstimate: number;
 }
 
 interface CompressContext {
@@ -27,43 +28,48 @@ interface CompressContext {
   hiddenUuids: Set<string>;
 }
 
-/**
- * Compress in a single linear pass. Each step is idempotent within one turn.
- */
-export async function run(
+const compactFailureTracker = new Map<string, { count: number; lastAttempt: number }>();
+const FAILURE_TTL_MS = 24 * 60 * 60 * 1000;
+
+function getFailures(sessionId: string): number {
+  const entry = compactFailureTracker.get(sessionId);
+  if (!entry) return 0;
+  if (Date.now() - entry.lastAttempt > FAILURE_TTL_MS) {
+    compactFailureTracker.delete(sessionId);
+    return 0;
+  }
+  return entry.count;
+}
+
+export async function compactIfNeeded(
   sessionId: string,
   encodedProjectPath: string,
-  usage: number,
-  llm: LLMClient | null,
+  promptEstimate: number,
+  snipTokensFreed: number,
+  modelMaxTokens: number,
   config: ContextConfig,
+  llm: LLMClient | null,
 ): Promise<CompressResult> {
-  const idx = findSessionIndex(sessionId);
-  const currentTurnId = idx?.currentTurnId ?? 0;
-  const ctx = buildContext(sessionId, encodedProjectPath, config, llm, currentTurnId);
-  const budget = config.defaultMaxTokens;
-
-  let remaining = usage;
-
-  // L1 Persist (always: large results from persistable tools → disk)
-  remaining -= applyToolResultBudget(ctx);
-
-  // Prune (>70% budget)
-  if (remaining > budget * config.thresholds.prune) {
-    remaining -= tryPruneTools(ctx);
+  const failures = getFailures(sessionId);
+  if (failures >= 3) {
+    return { didCompress: false, released: 0, promptEstimate };
   }
 
-  // L2 Snip (message count threshold)
-  remaining -= trySnip(ctx);
+  const threshold = modelMaxTokens * config.compactionThreshold;
+  const effectiveEstimate = promptEstimate - (snipTokensFreed ?? 0);
+  if (effectiveEstimate <= threshold) {
+    return { didCompress: false, released: 0, promptEstimate };
+  }
 
-  // L3 Microcompact (tool result count threshold)
-  remaining -= tryMicrocompact(ctx);
+  const result = await compactWithLLM(sessionId, encodedProjectPath, config, llm, promptEstimate, modelMaxTokens);
 
-  // L5 Compaction (>90% budget)
-  if (remaining > budget * config.thresholds.compaction) {
-    remaining -= await tryL5Compaction(ctx);
+  if (result.didCompress) {
+    compactFailureTracker.set(sessionId, { count: 0, lastAttempt: Date.now() });
+  } else {
+    compactFailureTracker.set(sessionId, { count: failures + 1, lastAttempt: Date.now() });
   }
 
-  return { didCompress: remaining < usage, released: usage - remaining };
+  return result;
 }
 
 export async function compactWithLLM(
@@ -71,12 +77,24 @@ export async function compactWithLLM(
   encodedProjectPath: string,
   config: ContextConfig,
   llm: LLMClient | null,
+  usage?: number,
+  modelMaxTokens?: number,
 ): Promise<CompressResult> {
   const idx = findSessionIndex(sessionId);
   const currentTurnId = idx?.currentTurnId ?? 0;
   const ctx = buildContext(sessionId, encodedProjectPath, config, llm, currentTurnId);
-  const released = await tryL5Compaction(ctx);
-  return { didCompress: released > 0, released };
+
+  let released = 0;
+
+  const threshold = modelMaxTokens ? modelMaxTokens * config.compactionThreshold : Infinity;
+  if (usage === undefined || usage - released > threshold) {
+    released += await tryL5Compaction(ctx);
+  }
+
+  const payload = assemblePayload(sessionId, encodedProjectPath, config);
+  const promptEstimate = estimateTokens(payload.messages);
+
+  return { didCompress: released > 0, released, promptEstimate };
 }
 
 // ---------- Context building ----------
@@ -152,146 +170,7 @@ function appendSummaryToSession(sessionId: string, event: SummaryEvent): void {
   appendFileSync(jsonlPath, JSON.stringify(event) + '\n', 'utf8');
 }
 
-// ---------- L1 Persist ----------
-
-function applyToolResultBudget(ctx: CompressContext): number {
-  const { sessionId, encodedProjectPath, config, events, hiddenUuids } = ctx;
-  let released = 0;
-
-  for (const ev of events) {
-    if (ev.type !== 'tool_result') continue;
-    if (hiddenUuids.has(ev.uuid)) continue;
-    if (ev.tokenCount <= config.thresholdTokens) continue;
-
-    const { path } = persistToolResult(encodedProjectPath, sessionId, ev.toolCallId, ev.output);
-    const preview = ev.output.slice(0, config.persistPreviewChars);
-    const replacement = `${preview}\n\n[…full output persisted at: ${path}. Use Read tool to access if needed.]`;
-
-    const summaryEvent: SummaryEvent = {
-      type: 'summary',
-      uuid: randomUUID(),
-      replaces: [ev.uuid],
-      summaryText: replacement,
-      method: 'collapse-llm',
-      timestamp: new Date().toISOString(),
-    };
-    appendSummaryToSession(sessionId,summaryEvent);
-    hiddenUuids.add(ev.uuid);
-    released += ev.tokenCount - estimateTokensForContent(replacement);
-  }
-
-  return released;
-}
-
-// ---------- L2 Prune ----------
-
-function tryPruneTools(ctx: CompressContext): number {
-  const { sessionId, config, currentTurnId, events, hiddenUuids } = ctx;
-  const candidates = collectPrunableTools(events, hiddenUuids, config, currentTurnId);
-  if (candidates.length === 0) return 0;
-
-  let released = 0;
-  for (const tool of candidates) {
-    if (released >= config.pruneMinRelease) break;
-    const tokenCount = estimateTokensForContent(tool.output);
-    const replacement = '[Old tool result content cleared]';
-
-    const event: SummaryEvent = {
-      type: 'summary',
-      uuid: randomUUID(),
-      replaces: [tool.uuid],
-      summaryText: replacement,
-      method: 'prune',
-      timestamp: new Date().toISOString(),
-    };
-    appendSummaryToSession(sessionId,event);
-    hiddenUuids.add(tool.uuid);
-    released += tokenCount - estimateTokensForContent(replacement);
-  }
-  return released;
-}
-
-// ---------- L2 Snip ----------
-
-function trySnip(ctx: CompressContext): number {
-  const { sessionId, config, events, hiddenUuids } = ctx;
-
-  // Build visible non-meta events for snip count
-  const visibleEvents = events.filter((ev) => {
-    if (ev.type === 'session_meta') return false;
-    if ('uuid' in ev && hiddenUuids.has((ev as any).uuid)) return false;
-    return true;
-  });
-
-  if (visibleEvents.length <= config.snipMaxMessages) return 0;
-
-  const headCount = config.snipKeepHead;
-  const tailCount = config.snipMaxMessages - config.snipKeepHead;
-
-  const head = visibleEvents.slice(0, headCount);
-  const tail = visibleEvents.slice(-tailCount);
-
-  const snippedEvents = visibleEvents.slice(headCount, visibleEvents.length - tailCount);
-  if (snippedEvents.length === 0) return 0;
-
-  const snippedTokens = snippedEvents.reduce((s, ev) => {
-    if ('content' in ev && typeof ev.content === 'string') return s + estimateTokensForContent(ev.content);
-    if ('output' in ev && typeof (ev as any).output === 'string') return s + estimateTokensForContent((ev as any).output);
-    return s;
-  }, 0);
-
-  const lastHeadTurn = head.length > 0 && 'turnId' in head[head.length - 1] ? (head[head.length - 1] as any).turnId : 0;
-  const firstTailTurn = tail.length > 0 && 'turnId' in tail[0] ? (tail[0] as any).turnId : lastHeadTurn;
-
-  // Collect uuids of snipped events
-  const snippedUuids = snippedEvents.filter((e) => 'uuid' in e).map((e) => (e as any).uuid as string);
-
-  const summaryText = `[${snippedEvents.length} messages snipped from conversation middle]`;
-
-  const event: SummaryEvent = {
-    type: 'summary',
-    uuid: randomUUID(),
-    replaces: snippedUuids,
-    summaryText,
-    method: 'context-collapse',
-    timestamp: new Date().toISOString(),
-  };
-  appendSummaryToSession(sessionId,event);
-  for (const u of snippedUuids) hiddenUuids.add(u);
-  return Math.max(0, snippedTokens - estimateTokensForContent(summaryText));
-}
-
-// ---------- L3 Microcompact ----------
-
-function tryMicrocompact(ctx: CompressContext): number {
-  const { sessionId, config, events, hiddenUuids } = ctx;
-  const toolResults = events.filter((ev) => ev.type === 'tool_result' && !hiddenUuids.has(ev.uuid)) as Extract<SessionEvent, { type: 'tool_result' }>[];
-  if (toolResults.length <= config.microKeepRecentTools) return 0;
-
-  let released = 0;
-  const recentIds = new Set(toolResults.slice(-config.microKeepRecentTools).map((e) => e.uuid));
-  for (const tool of toolResults) {
-    if (recentIds.has(tool.uuid)) continue;
-    if (tool.output.length <= 120) continue;
-    const originalTokens = tool.tokenCount;
-    const replacement = '[Earlier tool result compacted. Re-run if needed.]';
-
-    const event: SummaryEvent = {
-      type: 'summary',
-      uuid: randomUUID(),
-      replaces: [tool.uuid],
-      summaryText: replacement,
-      method: 'prune',
-      timestamp: new Date().toISOString(),
-    };
-    appendSummaryToSession(sessionId,event);
-    hiddenUuids.add(tool.uuid);
-    released += originalTokens - estimateTokensForContent(replacement);
-  }
-  return released;
-}
-
-// ---------- L5 Compaction ----------
+// ---------- LLM Compaction ----------
 
 async function tryL5Compaction(ctx: CompressContext): Promise<number> {
   const { sessionId, config, currentTurnId, events, hiddenUuids } = ctx;
@@ -302,13 +181,6 @@ async function tryL5Compaction(ctx: CompressContext): Promise<number> {
   const turnsInRange = endTurn - startTurn + 1;
   if (turnsInRange < config.minTurnsBetweenCompactions) return 0;
 
-  // Check if there's already a summary covering this range
-  for (const ev of events) {
-    if (ev.type !== 'summary') continue;
-    // Simple check: if any summary event replaces events in this range, skip
-    // (Exact range overlap check would require knowing turnIds of replaced events)
-  }
-
   // Collect visible messages in the range for LLM transcript
   const inRange = events.filter((ev) => {
     if (ev.type === 'session_meta') return false;
@@ -353,8 +225,9 @@ async function tryL5Compaction(ctx: CompressContext): Promise<number> {
   appendSummaryToSession(sessionId,event);
   for (const u of replacedUuids) hiddenUuids.add(u);
 
-  const replacedTokens = transcript.reduce((sum, m) => sum + estimateTokensForContent(m.content), 0);
-  const summaryTokens = estimateTokensForContent(summary);
+  const replacedTokens = transcript.reduce((sum, m) => sum + estimateMessageTokens(m), 0);
+  const summaryMsg: Message = { role: 'system', name: 'compacted_history', content: summary };
+  const summaryTokens = estimateMessageTokens(summaryMsg);
   return Math.max(0, replacedTokens - summaryTokens);
 }
 
@@ -392,38 +265,3 @@ function extractSummary(raw: string): string {
 }
 
 // ---------- Helpers ----------
-
-function collectPrunableTools(
-  events: SessionEvent[],
-  hiddenUuids: Set<string>,
-  config: ContextConfig,
-  currentTurnId: number,
-): Extract<SessionEvent, { type: 'tool_result' }>[] {
-  const all = events.filter(
-    (ev): ev is Extract<SessionEvent, { type: 'tool_result' }> =>
-      ev.type === 'tool_result' && !hiddenUuids.has(ev.uuid),
-  );
-
-  const turnCutoff = currentTurnId - config.prefixTurnsProtected - 1;
-  const oldEnough = all.filter((t) => t.turnId <= turnCutoff);
-
-  const whitelisted = oldEnough.filter(
-    (t) => !config.toolsExemptFromPrune.includes(t.toolName ?? ''),
-  );
-
-  const sortedByTurn = [...whitelisted].sort((a, b) => b.turnId - a.turnId);
-  const prunable: typeof all = [];
-  let recentTokenSum = 0;
-  for (const tool of sortedByTurn) {
-    const t = tool.tokenCount;
-    if (recentTokenSum < config.pruneProtectedTokens) {
-      recentTokenSum += t;
-      continue;
-    }
-    prunable.push(tool);
-  }
-
-  return prunable.sort(
-    (a, b) => (b.output?.length ?? 0) - (a.output?.length ?? 0),
-  );
-}
diff --git a/packages/codingcode/src/context/context.ts b/packages/codingcode/src/context/context.ts
index debc6fe..2f83377 100644
--- a/packages/codingcode/src/context/context.ts
+++ b/packages/codingcode/src/context/context.ts
@@ -1,49 +1,31 @@
 import { Effect } from 'effect';
-import type { Message } from '../core/types.js';
 import { getContextConfig, type ContextConfig } from './config.js';
-import { run, compactWithLLM, type CompressResult } from './compressor/index.js';
-import { assemblePayload } from './organizer.js';
-import { findSessionIndex } from '../session/store.js';
+import { compactWithLLM, compactIfNeeded, type CompressResult } from './compressor/index.js';
+import { assemblePayload, type BuildResult } from './organizer.js';
 import type { LLMClient } from '../llm/client.js';
 
 export class ContextService extends Effect.Service<ContextService>()('Context', {
   effect: Effect.gen(function* () {
     return {
-      /**
-       * Called at the end of each agent turn. Uses the cheap O(1) gate from
-       * `index.tokenCountEstimate` (maintained incrementally by recordX +
-       * summary events) instead of rebuilding the full LLM view just to
-       * count tokens. The Compressor itself does the precise accounting when
-       * it actually needs to act.
-       */
-      appendTurnEnd: (sessionId: string, encodedProjectPath: string, llm: LLMClient | null = null, config?: ContextConfig): Effect.Effect<CompressResult> =>
-        Effect.promise(async () => {
-          const cfg = config ?? getContextConfig();
-          const idx = findSessionIndex(sessionId);
-          const usage = idx?.tokenCountEstimate ?? 0;
-          if (usage > cfg.defaultMaxTokens * cfg.thresholds.prune) {
-            return await run(sessionId, encodedProjectPath, usage, llm, cfg);
-          }
-          return { didCompress: false, released: 0 };
-        }),
-
       /**
        * Build the message array to send to the LLM next. Uses the event
-       * pipeline (raw JSONL → summary/hide filter → fitToBudget).
-       *
-       * The optional `pendingUser` lets the caller append the about-to-be-sent
-       * user message; if omitted, only the persisted history is returned.
+       * pipeline (raw JSONL → summary/hide filter).
        */
-      build: (sessionId: string, encodedProjectPath: string, pendingUser?: Message, pinned: Message[] = [], config?: ContextConfig): Effect.Effect<Message[]> =>
+      build: (sessionId: string, encodedProjectPath: string, config?: ContextConfig): Effect.Effect<BuildResult> =>
         Effect.sync(() => {
           const cfg = config ?? getContextConfig();
-          return assemblePayload(sessionId, encodedProjectPath, pendingUser ?? null, pinned, cfg);
+          return assemblePayload(sessionId, encodedProjectPath, cfg);
         }),
 
-      compress: (sessionId: string, encodedProjectPath: string, llm: LLMClient | null = null, config?: ContextConfig): Effect.Effect<CompressResult> =>
+      compress: (sessionId: string, encodedProjectPath: string, llm: LLMClient | null = null, usage?: number, modelMaxTokens?: number, config?: ContextConfig): Effect.Effect<CompressResult> =>
+        Effect.promise(async () => {
+          const cfg = config ?? getContextConfig();
+          return await compactWithLLM(sessionId, encodedProjectPath, cfg, llm, usage, modelMaxTokens);
+        }),
+      compactIfNeeded: (sessionId: string, encodedProjectPath: string, llm: LLMClient | null, promptEstimate: number, snipTokensFreed: number, modelMaxTokens: number, config?: ContextConfig): Effect.Effect<CompressResult> =>
         Effect.promise(async () => {
           const cfg = config ?? getContextConfig();
-          return await compactWithLLM(sessionId, encodedProjectPath, cfg, llm);
+          return await compactIfNeeded(sessionId, encodedProjectPath, promptEstimate, snipTokensFreed, modelMaxTokens, cfg, llm);
         }),
     };
   }),
diff --git a/packages/codingcode/src/context/organizer.ts b/packages/codingcode/src/context/organizer.ts
index 30a191f..78f2d1d 100644
--- a/packages/codingcode/src/context/organizer.ts
+++ b/packages/codingcode/src/context/organizer.ts
@@ -1,67 +1,203 @@
 import type { ContextConfig } from './config.js';
 import type { Message } from '../core/types.js';
-import { resolveSessionDir, buildMessages } from '../session/store.js';
-import { estimateTokens, estimateTokensForContent } from './utils/tokens.js';
+import { resolveSessionDir, readHistory, applyVisibilityEvents, findSessionIndex, buildMessagesFromEvents, persistToolResult } from '../session/store.js';
+import { estimateMessageTokens, estimateTokensForContent } from './utils/tokens.js';
 import { join } from 'path';
+import { randomUUID } from 'crypto';
+import type { SessionEvent, ToolResultEvent, ToolBudgetEvent, SummaryEvent, UserEvent } from '../session/types.js';
+
+export interface BuildResult {
+  messages: Message[];
+  snipTokensFreed: number;
+  newBudgets: ToolBudgetEvent[];
+}
 
 export function assemblePayload(
   sessionId: string,
   encodedProjectPath: string,
-  pendingUser: Message | null,
-  pinned: Message[],
   config: ContextConfig,
-): Message[] {
+): BuildResult {
   const dir = resolveSessionDir(sessionId);
   if (!dir) throw new Error(`Session ${sessionId} not found`);
   const jsonlPath = join(dir, `${sessionId}.jsonl`);
-  const base = buildMessages(jsonlPath);
+  const events = readHistory(jsonlPath);
+
+  const hidden = applyVisibilityEvents(events);
+  const visible = events.filter((ev) => {
+    if (ev.type === 'hide' || ev.type === 'unhide') return false;
+    if ('uuid' in ev && hidden.has((ev as any).uuid)) return false;
+    return true;
+  }) as SessionEvent[];
 
-  // Strip trailing incomplete assistant messages (API rejects them)
-  const cleaned = stripOrphanToolCalls(base);
+  const idx = findSessionIndex(sessionId);
+  const currentTurnId = idx?.currentTurnId ?? 0;
+  const { events: compacted, snipTokensFreed, newBudgets } = applyLocalCompaction(visible, currentTurnId, config, jsonlPath, sessionId, encodedProjectPath);
 
-  const full = pendingUser ? [...pinned, ...cleaned, pendingUser] : [...pinned, ...cleaned];
-  return fitToBudget(full, config, pinned.length);
+  const messages = buildMessagesFromEvents(compacted as any);
+
+  return { messages, snipTokensFreed, newBudgets };
 }
 
-export function fitToBudget(
-  messages: Message[],
+function applyLocalCompaction(
+  events: SessionEvent[],
+  currentTurnId: number,
   config: ContextConfig,
-  pinnedCount: number = 0,
-): Message[] {
-  const budget = config.defaultMaxTokens - config.reservedTokens;
-  let usage = estimateTokens(messages);
-  if (usage <= budget) return messages;
-
-  const result = [...messages];
-  let i = pinnedCount;
-  while (i < result.length && usage > budget) {
-    // Skip non-user messages that might have been left orphaned
-    if (result[i]?.role !== 'user') { i++; continue; }
-
-    // Find end of this user turn (next user message or array end)
-    let end = i + 1;
-    while (end < result.length && result[end]?.role !== 'user') {
-      end++;
-    }
+  jsonlPath: string,
+  sessionId: string,
+  encodedProjectPath: string,
+): { events: SessionEvent[]; snipTokensFreed: number; newBudgets: ToolBudgetEvent[] } {
+  const budgetResult = applyToolResultBudget(events, config, jsonlPath, sessionId, encodedProjectPath);
+  const snipResult = snipEvents(budgetResult.events, config);
+  const result = microcompact(snipResult.events, config);
+  return { events: result, snipTokensFreed: snipResult.tokensFreed, newBudgets: budgetResult.newBudgets };
+}
 
-    const removed = result.splice(i, end - i);
-    usage -= removed.reduce((s, m) => s + estimateTokensForContent(m.content), 0);
-  }
-  return result;
+function toolMsgTokens(output: string, tool: ToolResultEvent): number {
+  return estimateMessageTokens({ role: 'tool', content: output, tool_call_id: tool.toolCallId, tool_name: tool.toolName } as any);
 }
 
-function stripOrphanToolCalls(messages: Message[]): Message[] {
-  const resolvedIds = new Set<string>();
-  for (const m of messages) {
-    if (m.role === 'tool' && m.tool_call_id) resolvedIds.add(m.tool_call_id);
+function applyToolResultBudget(
+  events: SessionEvent[],
+  config: ContextConfig,
+  jsonlPath: string,
+  sessionId: string,
+  encodedProjectPath: string,
+): { events: SessionEvent[]; newBudgets: ToolBudgetEvent[] } {
+  const budgetMap = new Map<string, ToolBudgetEvent>();
+  for (const ev of events) {
+    if (ev.type === 'tool_budget') budgetMap.set(ev.toolCallId, ev);
+  }
+
+  const lastUserIdx = [...events].reverse().findIndex(e => e.type === 'user');
+  if (lastUserIdx < 0) return { events: replaceBudgeted(events, budgetMap), newBudgets: [] };
+
+  const lastUser = events[events.length - 1 - lastUserIdx] as UserEvent;
+  const lastUserTurnId = lastUser.turnId;
+
+  const toolResults = events.filter((e): e is ToolResultEvent => {
+    if (e.type !== 'tool_result') return false;
+    if (e.turnId !== lastUserTurnId) return false;
+    if (budgetMap.has(e.toolCallId)) return false;
+    return true;
+  });
+
+  if (toolResults.length === 0) return { events: replaceBudgeted(events, budgetMap), newBudgets: [] };
+
+  const totalTokens = toolResults.reduce((sum, t) => sum + toolMsgTokens(t.output, t), 0);
+
+  if (totalTokens <= config.toolResultBudgetThreshold) {
+    return { events: replaceBudgeted(events, budgetMap), newBudgets: [] };
+  }
+
+  const ranked = [...toolResults].sort((a, b) => {
+    return estimateTokensForContent(b.output) - estimateTokensForContent(a.output);
+  });
+
+  let remaining = totalTokens;
+  const newBudgets: ToolBudgetEvent[] = [];
+
+  for (const tool of ranked) {
+    if (remaining <= config.toolResultBudgetThreshold) break;
+    const result = persistToolResult(encodedProjectPath, sessionId, tool.toolCallId, tool.output);
+    const preview = tool.output.slice(0, config.persistPreviewChars);
+
+    const budgetEvent: ToolBudgetEvent = {
+      type: 'tool_budget',
+      uuid: randomUUID(),
+      toolCallId: tool.toolCallId,
+      path: result.path,
+      preview,
+      bytes: result.bytes,
+      timestamp: new Date().toISOString(),
+    };
+    newBudgets.push(budgetEvent);
+    budgetMap.set(tool.toolCallId, budgetEvent);
+    const replacementOutput = `[...persisted at: ${result.path} (${result.bytes} bytes)]\n\n${preview}`;
+    const saved = toolMsgTokens(tool.output, tool) - toolMsgTokens(replacementOutput, tool);
+    remaining -= saved;
   }
-  while (messages.length > 0) {
-    const last = messages[messages.length - 1];
-    if (!last || last.role !== 'assistant') break;
-    const tcs = last.tool_calls;
-    if (!tcs || tcs.length === 0) break;
-    if (tcs.every((tc) => resolvedIds.has(tc.id))) break;
-    messages.pop();
+
+  return { events: replaceBudgeted(events, budgetMap), newBudgets };
+}
+
+function replaceBudgeted(
+  events: SessionEvent[],
+  budgetMap: Map<string, ToolBudgetEvent>,
+): SessionEvent[] {
+  return events
+    .filter(e => e.type !== 'tool_budget')
+    .map(e => {
+      if (e.type === 'tool_result' && budgetMap.has(e.toolCallId)) {
+        const b = budgetMap.get(e.toolCallId)!;
+        return { ...e, output: `[...persisted at: ${b.path} (${b.bytes} bytes)]\n\n${b.preview}` };
+      }
+      return e;
+    });
+}
+
+interface SnipResult {
+  events: SessionEvent[];
+  tokensFreed: number;
+}
+
+export function snipEvents(events: SessionEvent[], config: ContextConfig): SnipResult {
+  if (events.length <= config.snipMaxMessages) return { events, tokensFreed: 0 };
+
+  const keepFrom = events.length - config.snipMaxMessages;
+  let boundary = keepFrom;
+  while (boundary < events.length && events[boundary]?.type !== 'user') {
+    boundary++;
   }
-  return messages;
+  if (boundary >= events.length) return { events, tokensFreed: 0 };
+
+  const snipped = events.slice(0, boundary);
+  const snippedTokens = snipped.reduce((sum, e) => {
+    if (e.type === 'user') return sum + estimateMessageTokens({ role: 'user', content: e.content });
+    if (e.type === 'assistant') return sum + estimateMessageTokens({ role: 'assistant', content: e.content });
+    if (e.type === 'tool_result') {
+      return sum + estimateMessageTokens({ role: 'tool', content: e.output, tool_call_id: e.toolCallId, tool_name: e.toolName } as any);
+    }
+    if (e.type === 'summary') {
+      return sum + estimateMessageTokens({ role: 'system', name: 'compacted_history', content: e.summaryText });
+    }
+    return sum;
+  }, 0);
+
+  const summary: SummaryEvent = {
+    type: 'summary',
+    uuid: randomUUID(),
+    replaces: snipped.filter(e => 'uuid' in e).map(e => (e as any).uuid),
+    summaryText: `[${snipped.length} messages snipped]`,
+    method: 'context-collapse',
+    timestamp: new Date().toISOString(),
+  };
+
+  return { events: [summary, ...events.slice(boundary)], tokensFreed: snippedTokens };
 }
+
+export function microcompact(events: SessionEvent[], config: ContextConfig): SessionEvent[] {
+  const replacement = '[Old tool result content cleared]';
+  const toolResults = events.filter((e): e is ToolResultEvent => {
+    if (e.type !== 'tool_result') return false;
+    if (config.toolsExemptFromMicrocompact.includes(e.toolName ?? '')) return false;
+    if (estimateTokensForContent(e.output ?? '') <= 120) return false;
+    return true;
+  });
+
+  if (toolResults.length <= config.keepRecentToolResults) return events;
+
+  const recentUuids = new Set(
+    toolResults.slice(-config.keepRecentToolResults).map(e => e.uuid)
+  );
+  const prunedUuids = new Set(
+    toolResults.filter(e => !recentUuids.has(e.uuid)).map(e => e.uuid)
+  );
+
+  return events.map(e => {
+    if (e.type === 'tool_result' && prunedUuids.has(e.uuid)) {
+      return { ...e, output: replacement };
+    }
+    return e;
+  });
+}
+
diff --git a/packages/codingcode/src/context/persist/store.ts b/packages/codingcode/src/context/persist/store.ts
deleted file mode 100644
index 5a50444..0000000
--- a/packages/codingcode/src/context/persist/store.ts
+++ /dev/null
@@ -1,25 +0,0 @@
-import { existsSync, mkdirSync, writeFileSync } from 'fs';
-import { join } from 'path';
-import { homedir } from 'os';
-
-const PROJECT_BASE = join(homedir(), '.codingcode', 'project');
-
-export interface PersistResult {
-  path: string;
-  bytes: number;
-}
-
-export function persistToolResult(
-  encodedProjectPath: string,
-  sessionId: string,
-  toolCallId: string,
-  content: string,
-): PersistResult {
-  const dir = join(PROJECT_BASE, encodedProjectPath, 'tool-results', sessionId);
-  if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
-  const file = join(dir, `${toolCallId}.txt`);
-  if (!existsSync(file)) {
-    writeFileSync(file, content, 'utf8');
-  }
-  return { path: file.replace(/\\/g, '/'), bytes: Buffer.byteLength(content, 'utf8') };
-}
diff --git a/packages/codingcode/src/context/utils/tokens.ts b/packages/codingcode/src/context/utils/tokens.ts
index 9f4d8ce..02fdec7 100644
--- a/packages/codingcode/src/context/utils/tokens.ts
+++ b/packages/codingcode/src/context/utils/tokens.ts
@@ -1,9 +1,20 @@
 import type { Message } from '../../core/types.js';
 
+export function estimateMessageTokens(m: Message): number {
+  let tokens = estimateTokensForContent(m.content ?? '');
+  tokens += estimateTokensForContent(m.role);
+  if (m.name) tokens += estimateTokensForContent(m.name);
+  if (m.tool_call_id) tokens += estimateTokensForContent(m.tool_call_id);
+  if (m.tool_name) tokens += estimateTokensForContent(m.tool_name);
+  // OpenAI chat format fixed overhead per message (role tag, content key, delimiters)
+  tokens += 4;
+  return tokens;
+}
+
 export function estimateTokens(messages: Message[]): number {
   let total = 0;
   for (const m of messages) {
-    total += estimateTokensForContent(m.content);
+    total += estimateMessageTokens(m);
   }
   return total;
 }
@@ -11,7 +22,7 @@ export function estimateTokens(messages: Message[]): number {
 export function estimateTokensForContent(content: string): number {
   let charCount = 0;
   for (const char of content) {
-    charCount += char.charCodeAt(0) > 127 ? 1.5 : 1;
+    charCount += char.charCodeAt(0) > 127 ? 3.5 : 1;
   }
   return Math.ceil(charCount / 3.5);
 }
diff --git a/packages/codingcode/src/llm/providers/deepseek.ts b/packages/codingcode/src/llm/providers/deepseek.ts
index c3ac55f..add744c 100644
--- a/packages/codingcode/src/llm/providers/deepseek.ts
+++ b/packages/codingcode/src/llm/providers/deepseek.ts
@@ -148,6 +148,14 @@ export class DeepSeekProvider implements LLMClient {
       try {
         const resp = await result.response;
         const parsed = parseResponseMessages(resp.messages as ModelMessage[]);
+        if ((resp as any).usage) {
+          const usage = (resp as any).usage as any;
+          parsed.usage = {
+            prompt: usage.promptTokens ?? 0,
+            completion: usage.completionTokens ?? 0,
+            total: usage.totalTokens ?? 0,
+          };
+        }
         return Result.ok(parsed);
       } catch (e) {
         return Result.err(mapLlmError('deepseek', e));
diff --git a/packages/codingcode/src/llm/providers/openai.ts b/packages/codingcode/src/llm/providers/openai.ts
index fe2cf75..21b473b 100644
--- a/packages/codingcode/src/llm/providers/openai.ts
+++ b/packages/codingcode/src/llm/providers/openai.ts
@@ -160,6 +160,14 @@ export class OpenAIProvider implements LLMClient {
       try {
         const resp = await result.response;
         const parsed = parseResponseMessages(resp.messages as ModelMessage[]);
+        if ((resp as any).usage) {
+          const usage = (resp as any).usage as any;
+          parsed.usage = {
+            prompt: usage.promptTokens ?? 0,
+            completion: usage.completionTokens ?? 0,
+            total: usage.totalTokens ?? 0,
+          };
+        }
         return Result.ok(parsed);
       } catch (e) {
         return Result.err(mapLlmError('openai', e));
diff --git a/packages/codingcode/src/memory/extractor.ts b/packages/codingcode/src/memory/extractor.ts
index 4e15f0f..3d68ca0 100644
--- a/packages/codingcode/src/memory/extractor.ts
+++ b/packages/codingcode/src/memory/extractor.ts
@@ -1,4 +1,4 @@
-import type { LLMStreamAdapter } from '../agent/agent.js';
+import type { LLMClient } from '../llm/client.js';
 import type { MemoryTypeConfig } from '@codingcode/infra';
 
 export interface StructuredTranscript {
@@ -11,7 +11,7 @@ export async function extractMemory(opts: {
   currentAuto: string;
   transcript: StructuredTranscript;
   types: MemoryTypeConfig[];
-  llm: LLMStreamAdapter;
+  llm: LLMClient;
 }): Promise<string | null> {
   const { currentAuto, transcript, types, llm } = opts;
 
diff --git a/packages/codingcode/src/memory/index.ts b/packages/codingcode/src/memory/index.ts
index a8ab137..903e65c 100644
--- a/packages/codingcode/src/memory/index.ts
+++ b/packages/codingcode/src/memory/index.ts
@@ -1,4 +1,4 @@
-import type { LLMStreamAdapter } from '../agent/agent.js';
+import type { LLMClient } from '../llm/client.js';
 import { findSessionIndex } from '../session/store.js';
 import type { SessionEvent } from '../session/types.js';
 import { readMemoryFile, resolveProjectMemoryPath, resolveUserMemoryPath, extractAutoBlock, replaceAutoBlock, mergeAutoBlocks, enforceMaxBytes, writeMemoryFileAtomic, stripMarkersForPrompt } from './storage.js';
@@ -90,7 +90,7 @@ function buildStructuredTranscript(events: SessionEvent[]): StructuredTranscript
 
 export async function flushSessionToMemory(
   sessionId: string,
-  llm: LLMStreamAdapter | null,
+  llm: LLMClient | null,
 ): Promise<{ written: boolean; bytes: number }> {
   if (!getMemoryEnabled()) {
     return { written: false, bytes: 0 };
diff --git a/packages/codingcode/src/memory/llm-resolver.ts b/packages/codingcode/src/memory/llm-resolver.ts
index 2d40d27..967590b 100644
--- a/packages/codingcode/src/memory/llm-resolver.ts
+++ b/packages/codingcode/src/memory/llm-resolver.ts
@@ -1,11 +1,11 @@
 import { listModels, createClient } from '../llm/factory.js';
-import type { LLMStreamAdapter } from '../agent/agent.js';
+import type { LLMClient } from '../llm/client.js';
 import type { MemoryConfig } from '@codingcode/infra';
 
 export async function resolveMemoryLLM(
   config: MemoryConfig,
-  fallback: LLMStreamAdapter | null,
-): Promise<LLMStreamAdapter | null> {
+  fallback: LLMClient | null,
+): Promise<LLMClient | null> {
   const target = config.model?.trim();
   if (!target) return fallback;
 
diff --git a/packages/codingcode/src/server/adapter.ts b/packages/codingcode/src/server/adapter.ts
index 4f5ec2e..287c078 100644
--- a/packages/codingcode/src/server/adapter.ts
+++ b/packages/codingcode/src/server/adapter.ts
@@ -22,6 +22,8 @@ export function agentEventToSseEvent(event: AgentEvent): SseEvent | null {
       return { type: 'done' };
     case 'TodoUpdate':
       return { type: 'todo_update', items: event.items as unknown as Record<string, unknown>[] };
+    case 'Usage':
+      return { type: 'usage', prompt: event.prompt, completion: event.completion, total: event.total };
     case 'LlmChunk':
     case 'Assistant':
     case 'ReactiveCompact':
diff --git a/packages/codingcode/src/session/store.ts b/packages/codingcode/src/session/store.ts
index 2a31d9f..6d47fb2 100644
--- a/packages/codingcode/src/session/store.ts
+++ b/packages/codingcode/src/session/store.ts
@@ -6,8 +6,9 @@ import { join, dirname } from 'path';
 import type { Message } from '../core/types.js';
 import { AgentError } from '../core/error.js';
 import { normalizePath, encodeProjectPath } from '../core/path.js';
-import type { SessionEvent, SessionMetaEvent, UserEvent, AssistantEvent, ToolResultEvent, SummaryEvent, HideEvent, UnhideEvent, TitleEvent, SessionIndex } from './types.js';
-import { estimateTokensForContent } from '../context/utils/tokens.js';
+import type { SessionEvent, SessionMetaEvent, UserEvent, AssistantEvent, ToolResultEvent, SummaryEvent, HideEvent, UnhideEvent, TitleEvent, SessionIndex, TokenUsage } from './types.js';
+import { estimateTokens, estimateTokensForContent, estimateMessageTokens } from '../context/utils/tokens.js';
+import { getContextConfig } from '../context/config.js';
 import { createLogger } from '@codingcode/infra';
 
 const logger = createLogger();
@@ -82,7 +83,7 @@ export function findSessionIndex(sessionId: string): SessionIndex | null {
     messageCount: h.filter((e) => e.type !== 'session_meta').length,
     title: firstUser ? makeTitle(firstUser) : meta.sessionId.slice(0, 8),
     currentTurnId: 0,
-    tokenCountEstimate: 0,
+    usage: undefined,
     permissionMode: 'default',
   };
 }
@@ -95,6 +96,26 @@ function assertResumeWorkspace(cwd: string, sessionId: string): void {
   }
 }
 
+export interface PersistResult {
+  path: string;
+  bytes: number;
+}
+
+export function persistToolResult(
+  encodedProjectPath: string,
+  sessionId: string,
+  toolCallId: string,
+  content: string,
+): PersistResult {
+  const dir = join(PROJECT_BASE, encodedProjectPath, 'tool-results', sessionId);
+  if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
+  const file = join(dir, `${toolCallId}.txt`);
+  if (!existsSync(file)) {
+    writeFileSync(file, content, 'utf8');
+  }
+  return { path: file.replace(/\\/g, '/'), bytes: Buffer.byteLength(content, 'utf8') };
+}
+
 export interface SessionStoreState {
   sessionId: string;
   cwd: string;
@@ -105,7 +126,8 @@ export interface SessionStoreState {
   sessionMeta: SessionMetaEvent | null;
   title: string;
   currentTurnId: number;
-  tokenCountEstimate: number;
+  usage: TokenUsage | undefined;
+  promptEstimate: number;
 }
 
 function makeTitle(content: string): string {
@@ -166,17 +188,26 @@ export class SessionService extends Effect.Service<SessionService>()('Session',
             if (state.title === state.sessionId.slice(0, 8)) {
               state.title = makeTitle(content);
             }
-            appendEvent(state, event, estimateTokensForContent(content));
+            appendEvent(state, event);
+            state.promptEstimate += estimateMessageTokens({ role: 'user', content });
+            updateIndex(state);
             return event;
           },
           catch: (e) => new AgentError('SESSION_IO_ERROR', `Session write failed: ${String(e)}`, e),
         }),
 
-      recordAssistant: (state: SessionStoreState, content: string, toolCalls: AssistantEvent['toolCalls'], model: string): Effect.Effect<AssistantEvent, AgentError> =>
+      recordAssistant: (state: SessionStoreState, content: string, toolCalls: AssistantEvent['toolCalls'], model: string, usage?: TokenUsage): Effect.Effect<AssistantEvent, AgentError> =>
         Effect.try({
           try: () => {
-            const event: AssistantEvent = { type: 'assistant', turnId: state.currentTurnId, uuid: randomUUID(), content, toolCalls, model, timestamp: new Date().toISOString() };
-            appendEvent(state, event, estimateTokensForContent(content));
+            const event: AssistantEvent = { type: 'assistant', turnId: state.currentTurnId, uuid: randomUUID(), content, toolCalls, model, timestamp: new Date().toISOString(), usage };
+            appendEvent(state, event);
+            if (usage) {
+              state.usage = usage;
+              state.promptEstimate = usage.prompt;
+            } else {
+              state.promptEstimate += estimateMessageTokens({ role: 'assistant', content });
+            }
+            updateIndex(state);
             return event;
           },
           catch: (e) => new AgentError('SESSION_IO_ERROR', `Session write failed: ${String(e)}`, e),
@@ -185,9 +216,24 @@ export class SessionService extends Effect.Service<SessionService>()('Session',
       recordToolResult: (state: SessionStoreState, parentUuid: string, toolName: string, toolCallId: string, output: string): Effect.Effect<ToolResultEvent, AgentError> =>
         Effect.try({
           try: () => {
+            const cfg = getContextConfig();
             const tokenCount = estimateTokensForContent(output);
-            const event: ToolResultEvent = { type: 'tool_result', turnId: state.currentTurnId, uuid: randomUUID(), parentUuid, toolName, toolCallId, output, timestamp: new Date().toISOString(), tokenCount };
-            appendEvent(state, event, tokenCount);
+
+            let finalOutput = output;
+            let finalTokenCount = tokenCount;
+
+            if (tokenCount > cfg.thresholdTokens &&
+                toolName !== 'read' && toolName !== 'read_file') {
+              const { path } = persistToolResult(state.projectPath, state.sessionId, toolCallId, output);
+              const preview = output.slice(0, cfg.persistPreviewChars);
+              finalOutput = `${preview}\n\n[…full output persisted at: ${path}. Use Read tool to access if needed.]`;
+              finalTokenCount = estimateTokensForContent(finalOutput);
+            }
+
+            const event: ToolResultEvent = { type: 'tool_result', turnId: state.currentTurnId, uuid: randomUUID(), parentUuid, toolName, toolCallId, output: finalOutput, timestamp: new Date().toISOString(), tokenCount: finalTokenCount };
+            appendEvent(state, event);
+            state.promptEstimate += estimateMessageTokens({ role: 'tool', content: finalOutput, tool_call_id: toolCallId, tool_name: toolName });
+            updateIndex(state);
             return event;
           },
           catch: (e) => new AgentError('SESSION_IO_ERROR', `Session write failed: ${String(e)}`, e),
@@ -197,7 +243,10 @@ export class SessionService extends Effect.Service<SessionService>()('Session',
         Effect.try({
           try: () => {
             const event: SummaryEvent = { type: 'summary', uuid: randomUUID(), replaces, summaryText, method, timestamp: new Date().toISOString() };
-            appendEvent(state, event, estimateTokensForContent(summaryText));
+            appendEvent(state, event);
+            state.usage = undefined;
+            state.promptEstimate = estimateTokens(buildMessages(state.transcriptPath));
+            updateIndex(state);
             return event;
           },
           catch: (e) => new AgentError('SESSION_IO_ERROR', `Session write failed: ${String(e)}`, e),
@@ -206,14 +255,26 @@ export class SessionService extends Effect.Service<SessionService>()('Session',
       hideMessage: (state: SessionStoreState, targetUuid: string, reason: string): Effect.Effect<HideEvent> =>
         Effect.sync(() => {
           const event: HideEvent = { type: 'hide', uuid: randomUUID(), kind: 'message', targetUuid, reason, timestamp: new Date().toISOString() };
-          appendEvent(state, event, 0);
+          appendEvent(state, event);
+          state.usage = undefined;
+          state.promptEstimate = estimateTokens(buildMessages(state.transcriptPath));
+          updateIndex(state);
           return event;
         }),
 
       rollbackToTurn: (state: SessionStoreState, throughTurnId: number, reason: string): Effect.Effect<HideEvent> =>
         Effect.sync(() => {
           const event: HideEvent = { type: 'hide', uuid: randomUUID(), kind: 'rollback', throughTurnId, reason, timestamp: new Date().toISOString() };
-          appendEvent(state, event, 0);
+          appendEvent(state, event);
+          const lastUsage = findLastVisibleAssistantUsage(state.transcriptPath);
+          if (lastUsage) {
+            state.usage = lastUsage;
+            state.promptEstimate = lastUsage.prompt;
+          } else {
+            state.usage = undefined;
+            state.promptEstimate = estimateTokens(buildMessages(state.transcriptPath));
+          }
+          updateIndex(state);
           return event;
         }),
 
@@ -229,7 +290,10 @@ export class SessionService extends Effect.Service<SessionService>()('Session',
           }
           if (!lastHideUuid || unhidTargets.has(lastHideUuid)) return null;
           const event: UnhideEvent = { type: 'unhide', uuid: randomUUID(), targetHideUuid: lastHideUuid, timestamp: new Date().toISOString() };
-          appendEvent(state, event, 0);
+          appendEvent(state, event);
+          state.usage = undefined;
+          state.promptEstimate = estimateTokens(buildMessages(state.transcriptPath));
+          updateIndex(state);
           return event;
         }),
 
@@ -292,19 +356,29 @@ function initState(cwd: string, sessionId?: string, parentSessionId?: string): S
     : join(sessionsDir, `${id}.jsonl`);
   const indexPath = transcriptPath.replace('.jsonl', '.index.json');
   let currentTurnId = 0;
-  let tokenCountEstimate = 0;
+  let usage: TokenUsage | undefined = undefined;
+  let promptEstimate = 0;
   try {
     if (existsSync(indexPath)) {
       const idx = JSON.parse(readFileSync(indexPath, 'utf8')) as SessionIndex;
       currentTurnId = idx.currentTurnId ?? 0;
-      tokenCountEstimate = idx.tokenCountEstimate ?? 0;
+      usage = idx.usage ?? undefined;
+      promptEstimate = idx.promptEstimate ?? 0;
     }
   } catch { /* ignore corrupt index */ }
+  if (!usage && promptEstimate === 0) {
+    const lastUsage = findLastVisibleAssistantUsage(transcriptPath);
+    if (lastUsage) {
+      usage = lastUsage;
+      promptEstimate = lastUsage.prompt;
+    }
+  }
   return {
     sessionId: id, cwd: normalizedCwd, projectPath, transcriptPath,
     indexPath,
     messageCount: 0, sessionMeta: null, title: id.slice(0, 8), currentTurnId,
-    tokenCountEstimate,
+    usage,
+    promptEstimate,
   };
 }
 
@@ -371,6 +445,23 @@ export function applyVisibilityEvents(events: SessionEvent[]): Set<string> {
   return hidden;
 }
 
+/**
+ * Find the usage of the last visible assistant event in the session history.
+ * Used to restore the precise token anchor after rollback/fork.
+ */
+export function findLastVisibleAssistantUsage(path: string): TokenUsage | undefined {
+  const events = readHistory(path);
+  const hidden = applyVisibilityEvents(events);
+  for (let i = events.length - 1; i >= 0; i--) {
+    const ev = events[i];
+    if (ev.type === 'hide' || ev.type === 'unhide') continue;
+    if (ev.type !== 'assistant') continue;
+    if ('uuid' in ev && hidden.has((ev as any).uuid)) continue;
+    return (ev as AssistantEvent).usage;
+  }
+  return undefined;
+}
+
 export function buildMessagesFromEvents(events: SessionEvent[]): Message[] {
   const hidden = applyVisibilityEvents(events);
 
@@ -443,7 +534,7 @@ export function listSessions(projectPath?: string): SessionIndex[] {
         if (meta?.cwd && meta?.sessionId) {
           const h = readHistory(jsonlPath);
           const firstUser = findFirstUserContent(h);
-          results.push({ sessionId: meta.sessionId, projectPath: meta.projectPath, cwd: meta.cwd, model: meta.model, createdAt: meta.createdAt, updatedAt: meta.createdAt, messageCount: h.filter((e) => e.type !== 'session_meta').length, title: firstUser ? makeTitle(firstUser) : meta.sessionId.slice(0, 8), currentTurnId: 0, tokenCountEstimate: 0, permissionMode: 'default' });
+          results.push({ sessionId: meta.sessionId, projectPath: meta.projectPath, cwd: meta.cwd, model: meta.model, createdAt: meta.createdAt, updatedAt: meta.createdAt, messageCount: h.filter((e) => e.type !== 'session_meta').length, title: firstUser ? makeTitle(firstUser) : meta.sessionId.slice(0, 8), currentTurnId: 0, usage: undefined, promptEstimate: 0, permissionMode: 'default' });
         }
       }
     }
@@ -451,19 +542,18 @@ export function listSessions(projectPath?: string): SessionIndex[] {
   return results;
 }
 
-function appendEvent(state: SessionStoreState, event: SessionEvent, tokenDelta: number = 0): void {
+function appendEvent(state: SessionStoreState, event: SessionEvent): void {
   appendLine(state.transcriptPath, event);
   state.messageCount++;
-  updateIndex(state, tokenDelta);
+  updateIndex(state);
 }
 
 function appendLine(path: string, event: object): void {
   appendFileSync(path, JSON.stringify(event) + '\n', 'utf8');
 }
 
-function updateIndex(state: SessionStoreState, tokenDelta: number = 0): void {
+function updateIndex(state: SessionStoreState): void {
   if (!state.sessionMeta) return;
-  state.tokenCountEstimate = Math.max(0, state.tokenCountEstimate + tokenDelta);
   const current = readCurrentIndex(state.indexPath);
   const index: SessionIndex = {
     sessionId: state.sessionId, projectPath: state.projectPath, cwd: state.cwd,
@@ -472,7 +562,8 @@ function updateIndex(state: SessionStoreState, tokenDelta: number = 0): void {
     updatedAt: new Date().toISOString(),
     messageCount: state.messageCount, title: state.title,
     currentTurnId: state.currentTurnId,
-    tokenCountEstimate: state.tokenCountEstimate,
+    usage: state.usage,
+    promptEstimate: state.promptEstimate,
     permissionMode: current?.permissionMode ?? 'default',
   };
   enqueueWrite(state.sessionId, state.indexPath, index);
@@ -558,17 +649,28 @@ export function forkSession(sourceSessionId: string, sourceJsonlPath: string, at
   // Copy index from source if it exists
   const sourceIdxPath = sourceJsonlPath.replace('.jsonl', '.index.json');
   let title = newSessionId.slice(0, 8);
-  let tokenCountEstimate = 0;
+  let usage: TokenUsage | undefined = undefined;
+  let promptEstimate = 0;
   let permissionMode = 'default';
   if (existsSync(sourceIdxPath)) {
     try {
       const srcIdx = JSON.parse(readFileSync(sourceIdxPath, 'utf8')) as SessionIndex;
       title = srcIdx.title;
-      tokenCountEstimate = srcIdx.tokenCountEstimate;
+      usage = srcIdx.usage ?? undefined;
+      promptEstimate = srcIdx.promptEstimate ?? 0;
       permissionMode = srcIdx.permissionMode ?? 'default';
     } catch { /* corrupt */ }
   }
 
+  const lastUsage = findLastVisibleAssistantUsage(newJsonlPath);
+  if (lastUsage) {
+    usage = lastUsage;
+    promptEstimate = lastUsage.prompt;
+  } else {
+    usage = undefined;
+    promptEstimate = estimateTokens(buildMessages(newJsonlPath));
+  }
+
   const meta = chain[0] as SessionMetaEvent | undefined;
   const newIdx: SessionIndex = {
     sessionId: newSessionId,
@@ -580,7 +682,8 @@ export function forkSession(sourceSessionId: string, sourceJsonlPath: string, at
     messageCount: chain.filter((e) => e.type !== 'session_meta').length,
     title,
     currentTurnId: turnId,
-    tokenCountEstimate,
+    usage,
+    promptEstimate,
     permissionMode,
   };
   writeFileSync(newIndexPath, JSON.stringify(newIdx, null, 2), 'utf8');
@@ -618,7 +721,7 @@ export function sessionEventsToTurns(events: SessionEvent[]): Array<{ id: string
   const turnsMap = new Map<number, { id: string; items: object[]; status: string }>();
   for (const event of events) {
     if (event.type === 'session_meta') continue;
-    if (event.type === 'summary' || event.type === 'hide' || event.type === 'unhide' || event.type === 'title') continue;
+    if (event.type === 'summary' || event.type === 'hide' || event.type === 'unhide' || event.type === 'title' || event.type === 'tool_budget') continue;
     let turn = turnsMap.get(event.turnId);
     if (!turn) {
       turn = { id: String(event.turnId), items: [], status: 'completed' };
diff --git a/packages/codingcode/src/session/types.ts b/packages/codingcode/src/session/types.ts
index 55e489a..1a5b6b1 100644
--- a/packages/codingcode/src/session/types.ts
+++ b/packages/codingcode/src/session/types.ts
@@ -27,6 +27,7 @@ export interface AssistantEvent {
   toolCalls: Array<{ id: string; name: string; arguments: Record<string, unknown> }>;
   model: string;
   timestamp: string;
+  usage?: TokenUsage;
 }
 
 export interface ToolResultEvent {
@@ -84,6 +85,16 @@ export interface TitleEvent {
   timestamp: string;
 }
 
+export interface ToolBudgetEvent {
+  type: 'tool_budget';
+  uuid: string;
+  toolCallId: string;
+  path: string;
+  preview: string;
+  bytes: number;
+  timestamp: string;
+}
+
 export type SessionEvent =
   | SessionMetaEvent
   | UserEvent
@@ -92,7 +103,14 @@ export type SessionEvent =
   | SummaryEvent
   | HideEvent
   | UnhideEvent
-  | TitleEvent;
+  | TitleEvent
+  | ToolBudgetEvent;
+
+export interface TokenUsage {
+  prompt: number;
+  completion: number;
+  total: number;
+}
 
 export interface SessionIndex {
   sessionId: string;
@@ -104,6 +122,7 @@ export interface SessionIndex {
   messageCount: number;
   title: string;
   currentTurnId: number;
-  tokenCountEstimate: number;
+  usage: TokenUsage | undefined;
+  promptEstimate?: number;
   permissionMode: string;
 }
diff --git a/packages/codingcode/src/tools/types.ts b/packages/codingcode/src/tools/types.ts
index 2c50ddc..752a4c6 100644
--- a/packages/codingcode/src/tools/types.ts
+++ b/packages/codingcode/src/tools/types.ts
@@ -9,7 +9,7 @@ export interface ToolExecCtx {
   projectPath?: string;
   agentRunner?: {
     agentService: any; // AgentService — use any to avoid circular imports
-    llm: any; // LLMStreamAdapter — use any to avoid circular imports
+    llm: any; // LLMClient — use any to avoid circular imports
   };
   sandbox?: {
     wrapCommand: (command: string) => Effect.Effect<string>;
diff --git a/packages/codingcode/test/agent-event.test.ts b/packages/codingcode/test/agent-event.test.ts
index 822664b..7d51688 100644
--- a/packages/codingcode/test/agent-event.test.ts
+++ b/packages/codingcode/test/agent-event.test.ts
@@ -14,6 +14,16 @@ describe('AgentEvent type', () => {
     if (ev._tag === 'Done') expect(ev.content).toBe('result');
   });
 
+  it('should accept a Usage event', () => {
+    const ev: AgentEvent = { _tag: 'Usage', prompt: 1000, completion: 500, total: 1500 };
+    expect(ev._tag).toBe('Usage');
+    if (ev._tag === 'Usage') {
+      expect(ev.prompt).toBe(1000);
+      expect(ev.completion).toBe(500);
+      expect(ev.total).toBe(1500);
+    }
+  });
+
   it('should narrow correctly via discriminated union switch', () => {
     const ev: AgentEvent = { _tag: 'Error', error: { _tag: 'MaxStepsReached', maxSteps: 5, message: 'test' } };
     switch (ev._tag) {
diff --git a/packages/codingcode/test/agent/agent-concurrent.test.ts b/packages/codingcode/test/agent/agent-concurrent.test.ts
index 4bd7331..8afed92 100644
--- a/packages/codingcode/test/agent/agent-concurrent.test.ts
+++ b/packages/codingcode/test/agent/agent-concurrent.test.ts
@@ -52,7 +52,7 @@ const mockState = {
   currentTurnId: 1,
   sessionMeta: { model: 'test-model', version: '0.1.0', createdAt: new Date().toISOString() } as any,
   title: 'test',
-  tokenCountEstimate: 0,
+  usage: undefined,
 };
 
 function makeDeps(overrides?: Record<string, any>) {
@@ -74,7 +74,7 @@ function makeDeps(overrides?: Record<string, any>) {
   };
 }
 
-describe('runReActLoop �?concurrent tool execution', () => {
+describe('runReActLoop �?concurrent tool execution', () => {
   it('should execute multiple tool calls concurrently', async () => {
     const executionOrder: string[] = [];
     const resolveBarrier = new Promise<void>((r) => setTimeout(r, 100));
diff --git a/packages/codingcode/test/agent/agent-todo-event.test.ts b/packages/codingcode/test/agent/agent-todo-event.test.ts
index 483b496..ac3e204 100644
--- a/packages/codingcode/test/agent/agent-todo-event.test.ts
+++ b/packages/codingcode/test/agent/agent-todo-event.test.ts
@@ -53,7 +53,7 @@ const mockState = {
   currentTurnId: 1,
   sessionMeta: { model: 'test-model', version: '0.1.0', createdAt: new Date().toISOString() } as any,
   title: 'test',
-  tokenCountEstimate: 0,
+  usage: undefined,
 };
 
 const mockLlm = {
diff --git a/packages/codingcode/test/agent/agent.test.ts b/packages/codingcode/test/agent/agent.test.ts
index bb9a2ee..65fb83e 100644
--- a/packages/codingcode/test/agent/agent.test.ts
+++ b/packages/codingcode/test/agent/agent.test.ts
@@ -52,7 +52,7 @@ const mockState = {
   currentTurnId: 1,
   sessionMeta: { model: 'test-model', version: '0.1.0', createdAt: new Date().toISOString() } as any,
   title: 'test',
-  tokenCountEstimate: 0,
+  usage: undefined,
 };
 
 function makeDeps(overrides?: Record<string, any>) {
diff --git a/packages/codingcode/test/agent/loop-options.test.ts b/packages/codingcode/test/agent/loop-options.test.ts
index d5340b2..ced7d4e 100644
--- a/packages/codingcode/test/agent/loop-options.test.ts
+++ b/packages/codingcode/test/agent/loop-options.test.ts
@@ -12,7 +12,7 @@ describe('runReActLoop — loop options', () => {
     currentTurnId: randomUUID(),
     sessionMeta: { model: 'test-model', version: '0.1.0', createdAt: new Date().toISOString() } as any,
     title: 'test',
-    tokenCountEstimate: 0,
+    usage: undefined,
   };
 
   const mockHooks = {
diff --git a/packages/codingcode/test/agent/stop-hook.test.ts b/packages/codingcode/test/agent/stop-hook.test.ts
index f77c8c8..5e19a11 100644
--- a/packages/codingcode/test/agent/stop-hook.test.ts
+++ b/packages/codingcode/test/agent/stop-hook.test.ts
@@ -12,7 +12,7 @@ describe('runReActLoop — stop hook', () => {
     currentTurnId: randomUUID(),
     sessionMeta: { model: 'test-model', version: '0.1.0', createdAt: new Date().toISOString() } as any,
     title: 'test',
-    tokenCountEstimate: 0,
+    usage: undefined,
   };
 
   function baseMockDeps(overrides: Record<string, any> = {}) {
diff --git a/packages/codingcode/test/client/direct.test.ts b/packages/codingcode/test/client/direct.test.ts
index 699d751..74af0e6 100644
--- a/packages/codingcode/test/client/direct.test.ts
+++ b/packages/codingcode/test/client/direct.test.ts
@@ -71,4 +71,22 @@ describe('agentEventToStreamChunk - approval interleaving', () => {
     expect(chunks[1]).toMatchObject({ type: 'approval_request', id: 'apr-2' });
     expect(chunks[2]).toEqual({ type: 'done' });
   });
+
+  it('yields usage chunks', async () => {
+    async function* source() {
+      yield { _tag: 'Step' as const, step: 1, max: 10 };
+      yield { _tag: 'Assistant' as const, content: 'ok' };
+      yield { _tag: 'Usage' as const, prompt: 1000, completion: 500, total: 1500 };
+    }
+
+    const chunks: any[] = [];
+    for await (const chunk of agentEventToStreamChunk(source())) {
+      chunks.push(chunk);
+    }
+
+    expect(chunks).toEqual([
+      { type: 'text', text: 'ok', messageId: 1 },
+      { type: 'usage', prompt: 1000, completion: 500, total: 1500 },
+    ]);
+  });
 });
diff --git a/packages/codingcode/test/context/append-turn-end.test.ts b/packages/codingcode/test/context/append-turn-end.test.ts
index b96f165..1cd4cf8 100644
--- a/packages/codingcode/test/context/append-turn-end.test.ts
+++ b/packages/codingcode/test/context/append-turn-end.test.ts
@@ -52,7 +52,6 @@ describe('appendTurnEnd', () => {
 
   it('compression thresholds have sensible defaults', () => {
     const config = getContextConfig();
-    expect(config.thresholds.prune).toBeGreaterThan(0);
-    expect(config.thresholds.compaction).toBeGreaterThan(0);
+    expect(config.compactionThreshold).toBeGreaterThan(0);
   });
 });
diff --git a/packages/codingcode/test/context/budget-integration.test.ts b/packages/codingcode/test/context/budget-integration.test.ts
new file mode 100644
index 0000000..923780e
--- /dev/null
+++ b/packages/codingcode/test/context/budget-integration.test.ts
@@ -0,0 +1,86 @@
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { mkdirSync, writeFileSync, rmSync, existsSync, readFileSync } from 'fs';
+import { join } from 'path';
+import { homedir } from 'os';
+import { randomUUID } from 'crypto';
+import { assemblePayload } from '../../src/context/organizer.js';
+import type { SessionEvent, ToolBudgetEvent } from '../../src/session/types.js';
+
+const PROJECT_BASE = join(homedir(), '.codingcode', 'project');
+
+function makeBudgetConfig() {
+  return {
+    compactionThreshold: 0.9,
+    keepRecentTurns: 3,
+    toolsExemptFromMicrocompact: ['Read'],
+    minTurnsBetweenCompactions: 5,
+    compactionModel: '',
+    reactiveCompactMaxRetries: 3,
+    reactiveCompactKeepTurns: 3,
+    snipMaxMessages: 50,
+    persistPreviewChars: 2000,
+    thresholdTokens: 8000,
+    toolResultBudgetThreshold: 100, // low threshold for testing
+    keepRecentToolResults: 3,
+  } as any;
+}
+
+describe('applyToolResultBudget integration', () => {
+  const projectSlug = randomUUID();
+  let sessionId: string;
+  let sessionDir: string;
+  let jsonlPath: string;
+  let indexPath: string;
+
+  beforeEach(() => {
+    sessionId = randomUUID();
+    sessionDir = join(PROJECT_BASE, projectSlug, 'sessions');
+    mkdirSync(sessionDir, { recursive: true });
+    jsonlPath = join(sessionDir, `${sessionId}.jsonl`);
+    indexPath = join(sessionDir, `${sessionId}.index.json`);
+
+    const lines: any[] = [
+      { type: 'session_meta', sessionId, projectPath: projectSlug, cwd: '/tmp/test', model: 'test', createdAt: new Date().toISOString(), version: '0.1.0' },
+      { type: 'user', turnId: 1, uuid: 'u1', content: 'q1', timestamp: new Date().toISOString() },
+      { type: 'assistant', turnId: 1, uuid: 'a1', content: 'r1', toolCalls: [{ id: 'tc1', name: 'bash', arguments: {} }], model: 'test', timestamp: new Date().toISOString() },
+      { type: 'tool_result', turnId: 1, uuid: 't1', parentUuid: 'a1', toolName: 'bash', toolCallId: 'tc1', output: 'x'.repeat(200), timestamp: new Date().toISOString(), tokenCount: 0 },
+      { type: 'tool_result', turnId: 1, uuid: 't2', parentUuid: 'a1', toolName: 'bash', toolCallId: 'tc2', output: 'y'.repeat(200), timestamp: new Date().toISOString(), tokenCount: 0 },
+    ];
+    writeFileSync(jsonlPath, lines.map((l) => JSON.stringify(l)).join('\n') + '\n', 'utf8');
+
+    const idx = {
+      sessionId, projectPath: projectSlug, cwd: '/tmp/test', model: 'test',
+      createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
+      messageCount: lines.length, title: 'fixture', currentTurnId: 1,
+      usage: undefined, promptEstimate: 0, permissionMode: 'default',
+    };
+    writeFileSync(indexPath, JSON.stringify(idx, null, 2), 'utf8');
+  });
+
+  afterEach(() => {
+    const dir = join(PROJECT_BASE, projectSlug);
+    if (existsSync(dir)) rmSync(dir, { recursive: true, force: true });
+  });
+
+  it('persists tool results when same-turn total exceeds budget', () => {
+    const config = makeBudgetConfig();
+    const result = assemblePayload(sessionId, projectSlug, config);
+
+    // Check that at least one tool result was budgeted (replaced with persisted preview)
+    const toolMsgs = result.messages.filter((m: any) => m.role === 'tool');
+    expect(toolMsgs.length).toBe(2);
+    // Budget replaces from largest until under threshold; one of them should be replaced
+    const replacedCount = toolMsgs.filter((m: any) => m.content.includes('persisted at:')).length;
+    expect(replacedCount).toBeGreaterThanOrEqual(1);
+
+    // Check that newBudgets were returned for external persistence
+    expect(result.newBudgets.length).toBeGreaterThanOrEqual(1);
+  });
+
+  it('returns snipTokensFreed >= 0', () => {
+    const config = makeBudgetConfig();
+    const result = assemblePayload(sessionId, projectSlug, config);
+    expect(typeof result.snipTokensFreed).toBe('number');
+    expect(result.snipTokensFreed).toBeGreaterThanOrEqual(0);
+  });
+});
diff --git a/packages/codingcode/test/context/compressor/behavior.test.ts b/packages/codingcode/test/context/compressor/behavior.test.ts
index 032812b..6a8146c 100644
--- a/packages/codingcode/test/context/compressor/behavior.test.ts
+++ b/packages/codingcode/test/context/compressor/behavior.test.ts
@@ -3,11 +3,13 @@ import { mkdirSync, writeFileSync, readFileSync, rmSync, existsSync } from 'fs';
 import { join } from 'path';
 import { homedir } from 'os';
 import { randomUUID } from 'crypto';
-import { run, compactWithLLM } from '../../../src/context/compressor/index.js';
+import { compactWithLLM } from '../../../src/context/compressor/index.js';
 import type { ContextConfig } from '../../../src/context/config.js';
 import type { LLMClient } from '../../../src/llm/client.js';
 import { Result } from '../../../src/core/result.js';
 import type { SessionIndex, SessionEvent, SummaryEvent } from '../../../src/session/types.js';
+import { buildMessages } from '../../../src/session/store.js';
+import { estimateTokens } from '../../../src/context/utils/tokens.js';
 
 const PROJECT_BASE = join(homedir(), '.codingcode', 'project');
 
@@ -43,7 +45,7 @@ function makeFixture(opts: FixtureOptions) {
     sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test',
     createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
     messageCount: opts.numTurns * 3, title: 'fixture', currentTurnId: opts.currentTurnId ?? opts.numTurns,
-    tokenCountEstimate: 0, permissionMode: 'default',
+    usage: undefined, promptEstimate: 0, permissionMode: 'default',
   };
   writeFileSync(indexPath, JSON.stringify(idx, null, 2), 'utf8');
 
@@ -65,25 +67,18 @@ function readSummaryEvents(jsonlPath: string): SummaryEvent[] {
 
 function tinyConfig(overrides: Partial<ContextConfig> = {}): ContextConfig {
   return {
-    defaultMaxTokens: 1000,
-    reservedTokens: 0,
-    thresholds: { prune: 0.2, compaction: 0.5 },
-    pruneProtectedTokens: 100,
-    pruneMinRelease: 1,
-    toolsExemptFromPrune: [],
-    prefixTurnsProtected: 1,
-    minTurnsBetweenCompactions: 3,
+    compactionThreshold: 0.5,
     keepRecentTurns: 2,
+    toolsExemptFromMicrocompact: [],
+    minTurnsBetweenCompactions: 3,
     compactionModel: '',
-    archiveTtlDays: 30,
-    checkpointKeep: 50,
     reactiveCompactMaxRetries: 1,
     reactiveCompactKeepTurns: 3,
     snipMaxMessages: 100,
-    snipKeepHead: 3,
-    microKeepRecentTools: 5,
     persistPreviewChars: 2000,
     thresholdTokens: 2000,
+    toolResultBudgetThreshold: 50000,
+    keepRecentToolResults: 3,
     ...overrides,
   };
 }
@@ -100,62 +95,6 @@ function makeMockLLM(content: string): LLMClient {
 }
 
 describe('compressor behavior', () => {
-  describe('L2 prune protection', () => {
-    it('does not prune tools in protected recent turns (prefixTurnsProtected)', async () => {
-      const fx = makeFixture({ numTurns: 3, toolContentSize: 4000 });
-      try {
-        const cfg = tinyConfig({ prefixTurnsProtected: 2, pruneProtectedTokens: 0 });
-        await run(fx.sessionId, fx.slug, 10000, null, cfg);
-        const summaries = readSummaryEvents(fx.transcriptPath);
-        expect(summaries.filter((s) => s.method === 'prune')).toHaveLength(0);
-      } finally { cleanup(fx.slug); }
-    });
-
-    it('respects pruneProtectedTokens window (recent tools by token budget)', async () => {
-      const fx = makeFixture({ numTurns: 5, toolContentSize: 4000 });
-      try {
-        const cfg = tinyConfig({ prefixTurnsProtected: 0, pruneProtectedTokens: 3000 });
-        await run(fx.sessionId, fx.slug, 100000, null, cfg);
-        const summaries = readSummaryEvents(fx.transcriptPath);
-        const pruneSummaries = summaries.filter((s) => s.method === 'prune');
-        // Only old tools (turn 1, 2) should be pruned; recent tools (3, 4, 5) protected by token budget.
-        // Each prune summary replaces one tool_result uuid
-        expect(pruneSummaries.length).toBeGreaterThanOrEqual(0);
-      } finally { cleanup(fx.slug); }
-    });
-
-    it('skips whitelisted tools', async () => {
-      const fx = makeFixture({ numTurns: 5, toolContentSize: 4000, toolName: 'Read' });
-      try {
-        const cfg = tinyConfig({ prefixTurnsProtected: 0, pruneProtectedTokens: 0, toolsExemptFromPrune: ['Read'] });
-        await run(fx.sessionId, fx.slug, 100000, null, cfg);
-        const summaries = readSummaryEvents(fx.transcriptPath);
-        expect(summaries.filter((s) => s.method === 'prune')).toHaveLength(0);
-      } finally { cleanup(fx.slug); }
-    });
-  });
-
-  describe('fall-through to L5', () => {
-    it('falls through to L5 when prune has no candidates', async () => {
-      const fx = makeFixture({ numTurns: 6, toolContentSize: 4000, toolName: 'Read' });
-      try {
-        const cfg = tinyConfig({
-          toolsExemptFromPrune: ['Read'],
-          prefixTurnsProtected: 0,
-          pruneProtectedTokens: 0,
-          minTurnsBetweenCompactions: 2,
-          keepRecentTurns: 2,
-        });
-        const llm = makeMockLLM('## Compacted History\n\n### Goal\nx\n\n### Instructions\ny\n\n### Discoveries\nz\n\n### Accomplished\nw\n\n### Relevant Files\nv');
-        const result = await run(fx.sessionId, fx.slug, 100000, llm, cfg);
-        const summaries = readSummaryEvents(fx.transcriptPath);
-        const compactionSummaries = summaries.filter((s) => s.method === 'auto-compact');
-        expect(compactionSummaries.length).toBe(1);
-        expect(result.didCompress).toBe(true);
-      } finally { cleanup(fx.slug); }
-    });
-  });
-
   describe('L5 compaction', () => {
     it('writes summary event with five-section system summary', async () => {
       const fx = makeFixture({ numTurns: 5 });
@@ -212,29 +151,20 @@ describe('compressor behavior', () => {
     });
   });
 
-  describe('L1 persist (applyToolResultBudget)', () => {
-    it('persists large tool results to disk', async () => {
-      const fx = makeFixture({ numTurns: 1, toolContentSize: 5000, toolName: 'bash' });
+  describe('compactWithLLM result', () => {
+    it('returns promptEstimate after compression', async () => {
+      const fx = makeFixture({ numTurns: 5 });
       try {
-        const cfg = tinyConfig({ thresholdTokens: 100, persistPreviewChars: 100 });
-        const result = await run(fx.sessionId, fx.slug, 1000, null, cfg);
-        const summaries = readSummaryEvents(fx.transcriptPath);
-        const persistSummaries = summaries.filter((s) => s.method === 'collapse-llm');
-        expect(persistSummaries.length).toBe(1);
-        expect(persistSummaries[0]!.summaryText).toContain('persisted at:');
+        const before = estimateTokens(buildMessages(fx.transcriptPath));
+        const cfg = tinyConfig({ minTurnsBetweenCompactions: 3, keepRecentTurns: 2 });
+        const llm = makeMockLLM('## Compacted History\n\n### Goal\na\n\n### Instructions\nb\n\n### Discoveries\nc\n\n### Accomplished\nd\n\n### Relevant Files\ne');
+        const result = await compactWithLLM(fx.sessionId, fx.slug, cfg, llm);
+        expect(result.didCompress).toBe(true);
+        expect(result.promptEstimate).toBeGreaterThan(0);
+        expect(result.promptEstimate).toBeLessThan(before);
         expect(result.released).toBeGreaterThan(0);
       } finally { cleanup(fx.slug); }
     });
-
-    it('does not persist small tool results', async () => {
-      const fx = makeFixture({ numTurns: 1, toolContentSize: 10, toolName: 'bash' });
-      try {
-        const cfg = tinyConfig({ thresholdTokens: 100 });
-        await run(fx.sessionId, fx.slug, 1000, null, cfg);
-        const summaries = readSummaryEvents(fx.transcriptPath);
-        const persistSummaries = summaries.filter((s) => s.method === 'collapse-llm');
-        expect(persistSummaries).toHaveLength(0);
-      } finally { cleanup(fx.slug); }
-    });
   });
+
 });
diff --git a/packages/codingcode/test/context/compressor/compact-if-needed.test.ts b/packages/codingcode/test/context/compressor/compact-if-needed.test.ts
new file mode 100644
index 0000000..c778c12
--- /dev/null
+++ b/packages/codingcode/test/context/compressor/compact-if-needed.test.ts
@@ -0,0 +1,122 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+
+const { mockCompactWithLLM, mockLLM } = vi.hoisted(() => ({
+  mockCompactWithLLM: vi.fn(),
+  mockLLM: {
+    complete: vi.fn(() => Promise.resolve({
+      ok: true,
+      value: { content: '<summary>compacted</summary>' },
+    })),
+  },
+}));
+
+vi.mock('../../../src/context/compressor/index.js', async (importOriginal) => {
+  const actual = await importOriginal();
+  return {
+    ...(actual as any),
+    compactWithLLM: mockCompactWithLLM,
+  };
+});
+
+vi.mock('../../../src/session/store.js', async (importOriginal) => {
+  const actual = await importOriginal();
+  return {
+    ...(actual as any),
+    resolveSessionDir: vi.fn(() => '/tmp/sessions'),
+    findSessionIndex: vi.fn(() => ({ currentTurnId: 10 })),
+    readHistory: vi.fn(() => [
+      { type: 'user', content: 'a'.repeat(200), uuid: 'u1', turnId: 1 },
+      { type: 'assistant', content: 'b'.repeat(200), uuid: 'a1', turnId: 1 },
+    ]),
+  };
+});
+
+vi.mock('../../../src/context/compressor/llm-resolver.js', () => ({
+  resolveCompactionLLM: vi.fn(() => Promise.resolve(mockLLM)),
+}));
+
+vi.mock('fs', async (importOriginal) => {
+  const actual = await importOriginal();
+  return {
+    ...(actual as any),
+    appendFileSync: vi.fn(),
+  };
+});
+
+import { compactIfNeeded } from '../../../src/context/compressor/index.js';
+import { findSessionIndex } from '../../../src/session/store.js';
+
+function config(threshold: number, maxTokens = 10000) {
+  return {
+    compactionThreshold: threshold,
+    keepRecentTurns: 2,
+    minTurnsBetweenCompactions: 5,
+    toolsExemptFromMicrocompact: [],
+    compactionModel: '',
+    reactiveCompactMaxRetries: 1,
+    reactiveCompactKeepTurns: 3,
+    snipMaxMessages: 100,
+    persistPreviewChars: 2000,
+    thresholdTokens: 2000,
+    toolResultBudgetThreshold: 50000,
+    keepRecentToolResults: 3,
+  } as any;
+}
+
+describe('compactIfNeeded', () => {
+  beforeEach(() => {
+    mockCompactWithLLM.mockClear();
+    (findSessionIndex as any).mockReturnValue({ currentTurnId: 10 });
+  });
+
+  it('returns didCompress=false when promptEstimate is below threshold', async () => {
+    const result = await compactIfNeeded('s1', 'proj', 100, 0, 10000, config(0.5), null);
+    expect(result.didCompress).toBe(false);
+    expect(result.released).toBe(0);
+    expect(result.promptEstimate).toBe(100);
+    expect(mockCompactWithLLM).not.toHaveBeenCalled();
+  });
+
+  it('returns didCompress=false when promptEstimate equals threshold', async () => {
+    const result = await compactIfNeeded('s1', 'proj', 5000, 0, 10000, config(0.5), null);
+    expect(result.didCompress).toBe(false);
+    expect(result.released).toBe(0);
+    expect(mockCompactWithLLM).not.toHaveBeenCalled();
+  });
+
+  it('returns didCompress=true when promptEstimate exceeds threshold', async () => {
+    const result = await compactIfNeeded('s1', 'proj', 10000, 0, 10000, config(0.5), null);
+    expect(result.didCompress).toBe(true);
+    expect(result.released).toBeGreaterThan(0);
+    expect(result.promptEstimate).toBeGreaterThanOrEqual(0);
+  });
+
+  it('does not return restoredFiles field (removed)', async () => {
+    const result = await compactIfNeeded('s1', 'proj', 10000, 0, 10000, config(0.5), null);
+    expect('restoredFiles' in result).toBe(false);
+  });
+
+  it('resets failure count after TTL expires', async () => {
+    // Force compactWithLLM to always return didCompress=false by setting currentTurnId too low
+    (findSessionIndex as any).mockReturnValue({ currentTurnId: 0 });
+
+    // First 3 calls: compactWithLLM returns didCompress=false (insufficient turns)
+    await compactIfNeeded('ttl-session', 'proj', 10000, 0, 10000, config(0.5), null);
+    await compactIfNeeded('ttl-session', 'proj', 10000, 0, 10000, config(0.5), null);
+    await compactIfNeeded('ttl-session', 'proj', 10000, 0, 10000, config(0.5), null);
+
+    // 4th call blocked by failure tracker (failures >= 3)
+    const blocked = await compactIfNeeded('ttl-session', 'proj', 10000, 0, 10000, config(0.5), null);
+    expect(blocked.didCompress).toBe(false);
+
+    // Advance time past 24h TTL
+    const originalNow = Date.now;
+    vi.spyOn(Date, 'now').mockReturnValue(originalNow() + 25 * 60 * 60 * 1000);
+
+    // After TTL, failure count resets, compaction is attempted again (still fails due to turns)
+    const afterTTL = await compactIfNeeded('ttl-session', 'proj', 10000, 0, 10000, config(0.5), null);
+    expect(afterTTL.didCompress).toBe(false);
+
+    vi.restoreAllMocks();
+  });
+});
diff --git a/packages/codingcode/test/context/compressor/llm-resolver.test.ts b/packages/codingcode/test/context/compressor/llm-resolver.test.ts
index 7884f8e..6168772 100644
--- a/packages/codingcode/test/context/compressor/llm-resolver.test.ts
+++ b/packages/codingcode/test/context/compressor/llm-resolver.test.ts
@@ -11,15 +11,18 @@ const fakeFallback: LLMClient = {
 
 function cfg(compactionModel: string): ContextConfig {
   return {
-    defaultMaxTokens: 1000, reservedTokens: 0,
-    thresholds: { prune: 0.6, compaction: 0.9 },
-    pruneProtectedTokens: 100, pruneMinRelease: 100,
-    toolsExemptFromPrune: [],
-    prefixTurnsProtected: 1, minTurnsBetweenCompactions: 3, keepRecentTurns: 2,
-    compactionModel, archiveTtlDays: 30, checkpointKeep: 50,
-    reactiveCompactMaxRetries: 1, reactiveCompactKeepTurns: 3,
-    snipMaxMessages: 100, snipKeepHead: 3, microKeepRecentTools: 5,
-    persistPreviewChars: 2000, thresholdTokens: 2000,
+    compactionThreshold: 0.9,
+    keepRecentTurns: 2,
+    toolsExemptFromMicrocompact: [],
+    minTurnsBetweenCompactions: 3,
+    compactionModel,
+    reactiveCompactMaxRetries: 1,
+    reactiveCompactKeepTurns: 3,
+    snipMaxMessages: 100,
+    persistPreviewChars: 2000,
+    thresholdTokens: 2000,
+    toolResultBudgetThreshold: 50000,
+    keepRecentToolResults: 3,
   };
 }
 
diff --git a/packages/codingcode/test/context/compressor/micro.test.ts b/packages/codingcode/test/context/compressor/micro.test.ts
deleted file mode 100644
index 108d456..0000000
--- a/packages/codingcode/test/context/compressor/micro.test.ts
+++ /dev/null
@@ -1,110 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { mkdirSync, writeFileSync, readFileSync, rmSync } from 'fs';
-import { join } from 'path';
-import { homedir } from 'os';
-import { randomUUID } from 'crypto';
-import { run } from '../../../src/context/compressor/index.js';
-import type { ContextConfig } from '../../../src/context/config.js';
-import type { SessionIndex, SessionEvent, SummaryEvent } from '../../../src/session/types.js';
-
-const PROJECT_BASE = join(homedir(), '.codingcode', 'project');
-
-function makeFixture(sessionId: string, slug: string, numTurns: number, toolOutput?: string) {
-  const dir = join(PROJECT_BASE, slug, 'sessions');
-  mkdirSync(dir, { recursive: true });
-  const transcriptPath = join(dir, `${sessionId}.jsonl`);
-  const indexPath = join(dir, `${sessionId}.index.json`);
-
-  const lines: any[] = [
-    { type: 'session_meta', sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test', createdAt: new Date().toISOString(), version: '0.1.0' },
-  ];
-
-  for (let turn = 1; turn <= numTurns; turn++) {
-    lines.push({ type: 'user', turnId: turn, uuid: `u${turn}`, content: `q${turn}`, timestamp: new Date().toISOString() });
-    lines.push({ type: 'assistant', turnId: turn, uuid: `a${turn}`, content: `r${turn}`, toolCalls: [{ id: `tc${turn}`, name: 'bash', arguments: '{}' }], model: 'test', timestamp: new Date().toISOString() });
-    lines.push({ type: 'tool_result', turnId: turn, uuid: `t${turn}`, parentUuid: `a${turn}`, toolName: 'bash', toolCallId: `tc${turn}`, output: toolOutput ?? 'X'.repeat(200), timestamp: new Date().toISOString(), tokenCount: 100 });
-  }
-
-  writeFileSync(transcriptPath, lines.map((l) => JSON.stringify(l)).join('\n') + '\n', 'utf8');
-
-  const idx: SessionIndex = {
-    sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test',
-    createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
-    messageCount: numTurns * 3, title: 'fixture', currentTurnId: numTurns,
-    tokenCountEstimate: 0, permissionMode: 'default',
-  };
-  writeFileSync(indexPath, JSON.stringify(idx, null, 2), 'utf8');
-
-  return { dir, transcriptPath, indexPath };
-}
-
-function readSummaryEvents(jsonlPath: string): SummaryEvent[] {
-  const content = readFileSync(jsonlPath, 'utf8');
-  return content.split('\n')
-    .filter((l) => l.trim())
-    .map((l) => JSON.parse(l) as SessionEvent)
-    .filter((ev): ev is SummaryEvent => ev.type === 'summary');
-}
-
-function microCfg(): ContextConfig {
-  return {
-    defaultMaxTokens: 10000,
-    reservedTokens: 0,
-    thresholds: { prune: 0.99, compaction: 0.99 },
-    pruneProtectedTokens: 0,
-    pruneMinRelease: 1,
-    toolsExemptFromPrune: [],
-    prefixTurnsProtected: 0,
-    minTurnsBetweenCompactions: 1,
-    keepRecentTurns: 999,
-    compactionModel: '',
-    archiveTtlDays: 30,
-    checkpointKeep: 50,
-    reactiveCompactMaxRetries: 1,
-    reactiveCompactKeepTurns: 3,
-    snipMaxMessages: 999,
-    snipKeepHead: 3,
-    microKeepRecentTools: 1,
-    persistPreviewChars: 2000,
-    thresholdTokens: 999_999,
-  };
-}
-
-describe('L3 Microcompact', () => {
-  it('creates summary events for old tool results exceeding keep count', async () => {
-    const sessionId = randomUUID();
-    const slug = randomUUID();
-    const fx = makeFixture(sessionId, slug, 3); // 3 tool results, keep 1
-    try {
-      await run(sessionId, slug, 1000, null, microCfg());
-      const summaries = readSummaryEvents(fx.transcriptPath);
-      const pruneSummaries = summaries.filter((s) => s.method === 'prune');
-      expect(pruneSummaries.length).toBe(2); // 2 old tool results compacted
-      for (const s of pruneSummaries) {
-        expect(s.replaces).toHaveLength(1);
-      }
-    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
-  });
-
-  it('does nothing when under microKeepRecentTools threshold', async () => {
-    const sessionId = randomUUID();
-    const slug = randomUUID();
-    const fx = makeFixture(sessionId, slug, 1); // 1 tool result <= 1
-    try {
-      await run(sessionId, slug, 1000, null, microCfg());
-      const summaries = readSummaryEvents(fx.transcriptPath);
-      expect(summaries).toHaveLength(0);
-    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
-  });
-
-  it('skips short tool results (< 120 chars)', async () => {
-    const sessionId = randomUUID();
-    const slug = randomUUID();
-    const fx = makeFixture(sessionId, slug, 3, 'short'); // all < 120 chars
-    try {
-      await run(sessionId, slug, 1000, null, microCfg());
-      const summaries = readSummaryEvents(fx.transcriptPath);
-      expect(summaries).toHaveLength(0);
-    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
-  });
-});
diff --git a/packages/codingcode/test/context/compressor/snip.test.ts b/packages/codingcode/test/context/compressor/snip.test.ts
deleted file mode 100644
index 348dcd3..0000000
--- a/packages/codingcode/test/context/compressor/snip.test.ts
+++ /dev/null
@@ -1,98 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { mkdirSync, writeFileSync, readFileSync, rmSync } from 'fs';
-import { join } from 'path';
-import { homedir } from 'os';
-import { randomUUID } from 'crypto';
-import { run } from '../../../src/context/compressor/index.js';
-import type { ContextConfig } from '../../../src/context/config.js';
-import type { SessionIndex, SessionEvent, SummaryEvent } from '../../../src/session/types.js';
-
-const PROJECT_BASE = join(homedir(), '.codingcode', 'project');
-
-function makeFixture(sessionId: string, slug: string, numTurns: number) {
-  const dir = join(PROJECT_BASE, slug, 'sessions');
-  mkdirSync(dir, { recursive: true });
-  const transcriptPath = join(dir, `${sessionId}.jsonl`);
-  const indexPath = join(dir, `${sessionId}.index.json`);
-
-  const lines: any[] = [
-    { type: 'session_meta', sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test', createdAt: new Date().toISOString(), version: '0.1.0' },
-  ];
-
-  for (let turn = 1; turn <= numTurns; turn++) {
-    lines.push({ type: 'user', turnId: turn, uuid: `u${turn}`, content: `q${turn}`, timestamp: new Date().toISOString() });
-    lines.push({ type: 'assistant', turnId: turn, uuid: `a${turn}`, content: `r${turn}`, toolCalls: [{ id: `tc${turn}`, name: 'bash', arguments: '{}' }], model: 'test', timestamp: new Date().toISOString() });
-    lines.push({ type: 'tool_result', turnId: turn, uuid: `t${turn}`, parentUuid: `a${turn}`, toolName: 'bash', toolCallId: `tc${turn}`, output: 'result', timestamp: new Date().toISOString(), tokenCount: 10 });
-  }
-
-  writeFileSync(transcriptPath, lines.map((l) => JSON.stringify(l)).join('\n') + '\n', 'utf8');
-
-  const idx: SessionIndex = {
-    sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test',
-    createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
-    messageCount: numTurns * 3, title: 'fixture', currentTurnId: numTurns,
-    tokenCountEstimate: 0, permissionMode: 'default',
-  };
-  writeFileSync(indexPath, JSON.stringify(idx, null, 2), 'utf8');
-
-  return { dir, transcriptPath, indexPath };
-}
-
-function readSummaryEvents(jsonlPath: string): SummaryEvent[] {
-  const content = readFileSync(jsonlPath, 'utf8');
-  return content.split('\n')
-    .filter((l) => l.trim())
-    .map((l) => JSON.parse(l) as SessionEvent)
-    .filter((ev): ev is SummaryEvent => ev.type === 'summary');
-}
-
-function snipCfg(): ContextConfig {
-  return {
-    defaultMaxTokens: 10000,
-    reservedTokens: 0,
-    thresholds: { prune: 0.99, compaction: 0.99 },
-    pruneProtectedTokens: 0,
-    pruneMinRelease: 1,
-    toolsExemptFromPrune: [],
-    prefixTurnsProtected: 0,
-    minTurnsBetweenCompactions: 1,
-    keepRecentTurns: 999,
-    compactionModel: '',
-    archiveTtlDays: 30,
-    checkpointKeep: 50,
-    reactiveCompactMaxRetries: 1,
-    reactiveCompactKeepTurns: 3,
-    snipMaxMessages: 4,
-    snipKeepHead: 1,
-    microKeepRecentTools: 999,
-    persistPreviewChars: 2000,
-    thresholdTokens: 999_999,
-  };
-}
-
-describe('L2 Snip', () => {
-  it('creates summary event when message count exceeds snipMaxMessages', async () => {
-    const sessionId = randomUUID();
-    const slug = randomUUID();
-    const fx = makeFixture(sessionId, slug, 3); // 9 messages > 4
-    try {
-      await run(sessionId, slug, 1000, null, snipCfg());
-      const summaries = readSummaryEvents(fx.transcriptPath);
-      const snipSummaries = summaries.filter((s) => s.method === 'context-collapse');
-      expect(snipSummaries).toHaveLength(1);
-      expect(snipSummaries[0]!.replaces.length).toBeGreaterThan(0);
-    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
-  });
-
-  it('does nothing when under snipMaxMessages', async () => {
-    const sessionId = randomUUID();
-    const slug = randomUUID();
-    const fx = makeFixture(sessionId, slug, 1); // 3 messages < 4
-    try {
-      const cfg = { ...snipCfg(), snipMaxMessages: 999 };
-      await run(sessionId, slug, 1000, null, cfg);
-      const summaries = readSummaryEvents(fx.transcriptPath);
-      expect(summaries).toHaveLength(0);
-    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
-  });
-});
diff --git a/packages/codingcode/test/context/context.test.ts b/packages/codingcode/test/context/context.test.ts
index 55b07cf..f996401 100644
--- a/packages/codingcode/test/context/context.test.ts
+++ b/packages/codingcode/test/context/context.test.ts
@@ -33,9 +33,9 @@ const MockToolExecutorLayer = Layer.succeed(ToolExecutorService, ToolExecutorSer
 
 const MockContextLayer = Layer.succeed(ContextService, ContextService.of({
   _tag: 'Context' as any,
-  build: () => Effect.sync(() => [{ role: 'user' as const, content: 'hi' }]),
-  compress: () => Effect.succeed({ didCompress: true, released: 0 }),
-  appendTurnEnd: () => Effect.succeed({ didCompress: false, released: 0 }),
+  build: () => Effect.sync(() => ({ messages: [{ role: 'user' as const, content: 'hi' }], snipTokensFreed: 0, newBudgets: [] })),
+  compress: () => Effect.succeed({ didCompress: true, released: 0, promptEstimate: 0 }),
+  compactIfNeeded: () => Effect.succeed({ didCompress: false, released: 0, promptEstimate: 0 }),
 }));
 
 const MockCheckpointLayer = Layer.succeed(CheckpointService, CheckpointService.of({
diff --git a/packages/codingcode/test/context/organizer.test.ts b/packages/codingcode/test/context/organizer.test.ts
index 843a7a7..a5e1b1b 100644
--- a/packages/codingcode/test/context/organizer.test.ts
+++ b/packages/codingcode/test/context/organizer.test.ts
@@ -1,75 +1,142 @@
 import { describe, it, expect } from 'vitest';
-import { fitToBudget } from '../../src/context/organizer.js';
-import type { Message } from '../../src/core/types.js';
-import type { ContextConfig } from '../../src/context/config.js';
+import { assemblePayload, snipEvents, microcompact } from '../../src/context/organizer.js';
+import type { SessionEvent, ToolResultEvent } from '../../src/session/types.js';
 
-function msg(content: string, role: Message['role'] = 'user', toolCalls?: Message['tool_calls']): Message {
-  return toolCalls ? { role, content, tool_calls: toolCalls } : { role, content };
+const baseConfig = {
+  snipMaxMessages: 50,
+  toolsExemptFromMicrocompact: ['Read'],
+  keepRecentToolResults: 3,
+} as any;
+
+function makeUserEvent(content: string, turnId: number): SessionEvent {
+  return { type: 'user', uuid: `u${turnId}`, content, turnId, timestamp: new Date().toISOString() };
 }
 
-function turn(userContent: string, assistantContent: string, toolContent: string, turnNum: number): Message[] {
-  return [
-    { role: 'user', content: userContent },
-    { role: 'assistant', content: assistantContent, tool_calls: [{ id: `tc${turnNum}`, name: 'test', arguments: {} }] },
-    { role: 'tool', content: toolContent, tool_call_id: `tc${turnNum}` },
-  ];
+function makeToolResult(toolName: string, output: string, turnId: number, uuid: string): ToolResultEvent {
+  return { type: 'tool_result', uuid, parentUuid: 'a1', toolName, toolCallId: `tc${uuid}`, output, turnId, timestamp: new Date().toISOString(), tokenCount: 0 };
 }
 
-const testConfig: ContextConfig = {
-  defaultMaxTokens: 1000,
-  reservedTokens: 100,
-  thresholds: { prune: 0.7, compaction: 0.9 },
-  pruneProtectedTokens: 40000,
-  pruneMinRelease: 20000,
-  toolsExemptFromPrune: ['Read'],
-  prefixTurnsProtected: 1,
-  minTurnsBetweenCompactions: 5,
-  keepRecentTurns: 10,
-  compactionModel: 'haiku',
-  archiveTtlDays: 30,
-  checkpointKeep: 50,
-  reactiveCompactMaxRetries: 1,
-  reactiveCompactKeepTurns: 3,
-  snipMaxMessages: 100,
-  snipKeepHead: 3,
-  microKeepRecentTools: 5,
-  persistPreviewChars: 2000,
-  thresholdTokens: 2000,
-};
+describe('snipEvents', () => {
+  it('returns all events when under threshold', () => {
+    const events: SessionEvent[] = Array.from({ length: 10 }, (_, i) => makeUserEvent(`msg${i}`, i));
+    const result = snipEvents(events, baseConfig);
+    expect(result.events).toHaveLength(10);
+    expect(result.tokensFreed).toBe(0);
+  });
+
+  it('truncates head, keeping only tail snipMaxMessages', () => {
+    const events: SessionEvent[] = Array.from({ length: 60 }, (_, i) => makeUserEvent(`msg${i}`, i));
+    const result = snipEvents(events, baseConfig);
+    expect(result.events).toHaveLength(51); // 1 summary + 50 events
+    const summary0 = result.events[0];
+    expect(summary0!.type).toBe('summary');
+    expect((summary0 as any).summaryText).toContain('messages snipped');
+    expect((result.events[1] as any).content).toBe('msg10');
+    expect(result.tokensFreed).toBeGreaterThan(0);
+  });
+
+  it('retreats to user boundary and inserts summary placeholder', () => {
+    const events: SessionEvent[] = [
+      makeUserEvent('q1', 1),
+      { type: 'assistant', uuid: 'a1', content: 'a1', turnId: 1, toolCalls: [], model: 'test', timestamp: new Date().toISOString() },
+      makeToolResult('bash', 'r1', 1, 't1'),
+      makeUserEvent('q2', 2),
+      { type: 'assistant', uuid: 'a2', content: 'a2', turnId: 2, toolCalls: [], model: 'test', timestamp: new Date().toISOString() },
+      makeToolResult('bash', 'r2', 2, 't2'),
+    ];
+    const result = snipEvents(events, { ...baseConfig, snipMaxMessages: 4 });
+    expect(result.events.length).toBe(4);
+    const summary1 = result.events[0];
+    expect(summary1!.type).toBe('summary');
+    expect((summary1 as any).summaryText).toContain('messages snipped');
+    expect((result.events[1] as any).content).toBe('q2');
+    expect(result.tokensFreed).toBeGreaterThan(0);
+  });
+
+  it('counts summary event tokens in tokensFreed', () => {
+    const events: SessionEvent[] = [
+      { type: 'summary', uuid: 's1', replaces: [], summaryText: 'A'.repeat(100), method: 'auto-compact', timestamp: new Date().toISOString() },
+      makeUserEvent('q1', 1),
+      { type: 'assistant', uuid: 'a1', content: 'a1', turnId: 1, toolCalls: [], model: 'test', timestamp: new Date().toISOString() },
+    ];
+    const result = snipEvents(events, { ...baseConfig, snipMaxMessages: 2 });
+    expect(result.events.length).toBe(3);
+    expect(result.tokensFreed).toBeGreaterThan(0);
+  });
+});
+
+describe('microcompact', () => {
+  it('keeps all when tool_result count <= keepRecentToolResults', () => {
+    const events: SessionEvent[] = [
+      makeToolResult('bash', 'a'.repeat(200), 1, 't1'),
+      makeToolResult('bash', 'b'.repeat(200), 2, 't2'),
+    ];
+    const result = microcompact(events, baseConfig);
+    expect((result[0] as ToolResultEvent).output).toBe('a'.repeat(200));
+    expect((result[1] as ToolResultEvent).output).toBe('b'.repeat(200));
+  });
 
-describe('fitToBudget', () => {
-  it('returns messages unchanged when under budget', () => {
-    const messages = [msg('short')];
-    const result = fitToBudget(messages, testConfig);
-    expect(result).toEqual(messages);
+  it('replaces old tool results with placeholder, keeps recent 3', () => {
+    const longContent = 'x'.repeat(500); // ~143 tokens > 120
+    const events: SessionEvent[] = [
+      makeToolResult('bash', longContent, 1, 't1'),
+      makeToolResult('bash', longContent, 2, 't2'),
+      makeToolResult('bash', longContent, 3, 't3'),
+      makeToolResult('bash', longContent, 4, 't4'),
+      makeToolResult('bash', longContent, 5, 't5'),
+    ];
+    const result = microcompact(events, { ...baseConfig, keepRecentToolResults: 3 });
+    expect((result[0] as ToolResultEvent).output).toBe('[Old tool result content cleared]');
+    expect((result[1] as ToolResultEvent).output).toBe('[Old tool result content cleared]');
+    expect((result[2] as ToolResultEvent).output).toBe(longContent);
+    expect((result[3] as ToolResultEvent).output).toBe(longContent);
+    expect((result[4] as ToolResultEvent).output).toBe(longContent);
   });
 
-  it('removes oldest non-pinned messages when over budget', () => {
-    // Each message ~1600 chars → ~457 tokens; budget = 900
-    // pinned(4) + 1600(457) + 1600(457) = 918 > 900 → remove one
-    // pinned(4) + 1600(457) = 461 ≤ 900 → stop, 2 remaining
-    const messages = [msg('pinned', 'system'), msg('x'.repeat(1600)), msg('y'.repeat(1600))];
-    const result = fitToBudget(messages, testConfig, 1); // 1 pinned
-    expect(result.length).toBe(2); // pinned + 1 remaining long msg
-    expect(result[0]!.content).toBe('pinned'); // pinned stays
+  it('skips exempt tools', () => {
+    const longContent = 'x'.repeat(500); // ~143 tokens > 120
+    const events: SessionEvent[] = [
+      makeToolResult('Read', longContent, 1, 't1'),
+      makeToolResult('bash', longContent, 1, 't2'),
+      makeToolResult('bash', longContent, 2, 't3'),
+    ];
+    // keepRecentToolResults=1: only the most recent non-exempt tool is kept
+    const result = microcompact(events, { ...baseConfig, keepRecentToolResults: 1 });
+    expect((result[0] as ToolResultEvent).output).toBe(longContent); // Read exempt
+    expect((result[1] as ToolResultEvent).output).toBe('[Old tool result content cleared]'); // t2 replaced
+    expect((result[2] as ToolResultEvent).output).toBe(longContent); // t3 recent, kept
   });
 
-  it('removes oldest non-pinned messages when over budget, falls back to fitToBudget', () => {
-    // No pinned, 3 huge messages → should trim to under budget
-    const messages = [msg('a'.repeat(4000)), msg('b'.repeat(4000)), msg('c'.repeat(4000))];
-    const result = fitToBudget(messages, testConfig, 0);
-    expect(result.length).toBeLessThan(3); // at least 1 removed
+  it('skips short content <= 120 chars', () => {
+    const events: SessionEvent[] = [
+      makeToolResult('bash', 'short', 1, 't1'),
+      makeToolResult('bash', 'x'.repeat(200), 2, 't2'),
+    ];
+    const result = microcompact(events, { ...baseConfig, keepRecentToolResults: 1 });
+    expect((result[0] as ToolResultEvent).output).toBe('short'); // <= 120, not replaced
+    expect((result[1] as ToolResultEvent).output).toBe('x'.repeat(200)); // recent, kept
   });
 
-  it('removes entire user turn (user+assistant+tool) when trimming', () => {
-    // Two complete turns, very large content. Budget = 900.
-    // Turn 1: user(457t) + assistant(457t) + tool(457t) = 1371t
-    // Turn 2: user(457t) + assistant(457t) + tool(457t) = 1371t
-    // Total ≈ 2742t > 900 → should remove entire turn 1 (all 3 messages)
-    const messages = [...turn('a'.repeat(1600), 'b'.repeat(1600), 'c'.repeat(1600), 1),
-                      ...turn('d'.repeat(1600), 'e'.repeat(1600), 'f'.repeat(1600), 2)];
-    const result = fitToBudget(messages, testConfig, 0);
-    expect(result.length).toBeLessThanOrEqual(3); // either turn 2 alone or part of it
-    expect(result[0]?.role).not.toBe('user'); // turn 1's user should be gone
+  it('replaces tool results when token count exceeds 120 tokens', () => {
+    const longContent = 'x'.repeat(500); // ~143 tokens > 120
+    const events: SessionEvent[] = [
+      makeToolResult('bash', longContent, 1, 't1'),
+      makeToolResult('bash', longContent, 2, 't2'),
+      makeToolResult('bash', longContent, 3, 't3'),
+      makeToolResult('bash', longContent, 4, 't4'),
+      makeToolResult('bash', longContent, 5, 't5'),
+    ];
+    const result = microcompact(events, { ...baseConfig, keepRecentToolResults: 3 });
+    expect((result[0] as ToolResultEvent).output).toBe('[Old tool result content cleared]');
+    expect((result[1] as ToolResultEvent).output).toBe('[Old tool result content cleared]');
+    expect((result[2] as ToolResultEvent).output).toBe(longContent);
+    expect((result[3] as ToolResultEvent).output).toBe(longContent);
+    expect((result[4] as ToolResultEvent).output).toBe(longContent);
+  });
+});
+
+describe('assemblePayload', () => {
+  it('is importable and exists as a function', () => {
+    expect(typeof assemblePayload).toBe('function');
   });
 });
diff --git a/packages/codingcode/test/context/todo-exempt.test.ts b/packages/codingcode/test/context/todo-exempt.test.ts
index ccdd1fc..07ea41d 100644
--- a/packages/codingcode/test/context/todo-exempt.test.ts
+++ b/packages/codingcode/test/context/todo-exempt.test.ts
@@ -1,20 +1,20 @@
 import { describe, it, expect } from 'vitest';
 import { getContextConfig } from '../../src/context/config.js';
 
-describe('Todo/ToolSearch tools exempt from prune', () => {
-  it('toolsExemptFromPrune includes todo_write', () => {
-    expect(getContextConfig().toolsExemptFromPrune).toContain('todo_write');
+describe('Todo/ToolSearch tools exempt from microcompact', () => {
+  it('toolsExemptFromMicrocompact includes todo_write', () => {
+    expect(getContextConfig().toolsExemptFromMicrocompact).toContain('todo_write');
   });
 
-  it('toolsExemptFromPrune includes todo_read', () => {
-    expect(getContextConfig().toolsExemptFromPrune).toContain('todo_read');
+  it('toolsExemptFromMicrocompact includes todo_read', () => {
+    expect(getContextConfig().toolsExemptFromMicrocompact).toContain('todo_read');
   });
 
-  it('toolsExemptFromPrune includes tool_search', () => {
-    expect(getContextConfig().toolsExemptFromPrune).toContain('tool_search');
+  it('toolsExemptFromMicrocompact includes tool_search', () => {
+    expect(getContextConfig().toolsExemptFromMicrocompact).toContain('tool_search');
   });
 
-  it('toolsExemptFromPrune still includes Read', () => {
-    expect(getContextConfig().toolsExemptFromPrune).toContain('Read');
+  it('toolsExemptFromMicrocompact still includes Read', () => {
+    expect(getContextConfig().toolsExemptFromMicrocompact).toContain('Read');
   });
 });
diff --git a/packages/codingcode/test/context/tokens.test.ts b/packages/codingcode/test/context/tokens.test.ts
new file mode 100644
index 0000000..394690c
--- /dev/null
+++ b/packages/codingcode/test/context/tokens.test.ts
@@ -0,0 +1,61 @@
+import { describe, it, expect } from 'vitest';
+import { estimateTokensForContent, estimateTokens, estimateMessageTokens } from '../../src/context/utils/tokens.js';
+
+describe('token estimation', () => {
+  it('empty content returns 0', () => {
+    expect(estimateTokensForContent('')).toBe(0);
+  });
+
+  it('ASCII text estimates ~1 token per 3.5 chars', () => {
+    expect(estimateTokensForContent('hello world')).toBe(4);
+    expect(estimateTokensForContent('a'.repeat(35))).toBe(10);
+  });
+
+  it('CJK text estimates ~1 token per char', () => {
+    expect(estimateTokensForContent('你好世界')).toBe(4);
+    expect(estimateTokensForContent('这是一个测试字符串')).toBe(9);
+    expect(estimateTokensForContent('这是一个测试字符串哈')).toBe(10);
+  });
+
+  it('mixed CJK and ASCII sums separately', () => {
+    expect(estimateTokensForContent('hello世界')).toBe(4);
+  });
+});
+
+describe('estimateMessageTokens', () => {
+  it('counts content + role + fixed structure overhead', () => {
+    const msg = { role: 'user', content: 'hello' } as any;
+    // content: ceil(5/3.5)=2, role: ceil(4/3.5)=2, structure: 4 => 8
+    expect(estimateMessageTokens(msg)).toBe(8);
+  });
+
+  it('includes tool_call_id and tool_name for tool messages', () => {
+    const msg = {
+      role: 'tool',
+      content: 'result',
+      tool_call_id: 'tc123',
+      tool_name: 'bash',
+    } as any;
+    // content: ceil(6/3.5)=2, role: ceil(4/3.5)=2, tool_call_id: ceil(5/3.5)=2,
+    // tool_name: ceil(4/3.5)=2, structure: 4 => 12
+    expect(estimateMessageTokens(msg)).toBe(12);
+  });
+
+  it('includes name for system/assistant messages', () => {
+    const msg = { role: 'system', name: 'compacted_history', content: 'summary' } as any;
+    // content: ceil(7/3.5)=2, role: ceil(6/3.5)=2, name: ceil(17/3.5)=5, structure: 4 => 13
+    expect(estimateMessageTokens(msg)).toBe(13);
+  });
+});
+
+describe('estimateTokens', () => {
+  it('aggregates full message tokens across array', () => {
+    const messages = [
+      { role: 'user', content: 'hello' },
+      { role: 'assistant', content: '你好' },
+    ] as any;
+    // user: content(2) + role(2) + structure(4) = 8
+    // assistant: content(2) + role(3) + structure(4) = 9
+    expect(estimateTokens(messages)).toBe(17);
+  });
+});
diff --git a/packages/codingcode/test/llm/deepseek-provider.test.ts b/packages/codingcode/test/llm/deepseek-provider.test.ts
new file mode 100644
index 0000000..33e800b
--- /dev/null
+++ b/packages/codingcode/test/llm/deepseek-provider.test.ts
@@ -0,0 +1,73 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest';
+
+const streamText = vi.fn();
+const stepCountIs = vi.fn((count: number) => ({ count }));
+const jsonSchema = vi.fn((schema: unknown) => schema);
+
+vi.mock('ai', () => ({
+  generateText: vi.fn(),
+  streamText,
+  stepCountIs,
+  jsonSchema,
+}));
+
+async function collect(stream: AsyncIterable<string>): Promise<string[]> {
+  const chunks: string[] = [];
+  for await (const chunk of stream) {
+    chunks.push(chunk);
+  }
+  return chunks;
+}
+
+function entry() {
+  return {
+    id: 'model@deepseek',
+    provider: 'deepseek',
+    driver: 'openai',
+    name: 'DeepSeek',
+    model: 'deepseek-chat',
+    base_url: 'https://api.deepseek.com/v1',
+    api_key_env: 'DEEPSEEK_API_KEY',
+    context_window: 64000,
+  };
+}
+
+function request() {
+  return {
+    system: 'system',
+    messages: [{ role: 'user', content: 'hello' }],
+    tools: undefined,
+    maxSteps: 1,
+  };
+}
+
+describe('DeepSeekProvider completeStream', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    streamText.mockReturnValue({
+      fullStream: (async function* () {
+        yield { type: 'text-delta', text: 'streamed' };
+      })(),
+      response: Promise.resolve({
+        messages: [{ role: 'assistant', content: 'streamed' }],
+        usage: { promptTokens: 200, completionTokens: 100, totalTokens: 300 },
+      }),
+    });
+  });
+
+  it('streams text and extracts usage from response', async () => {
+    const { DeepSeekProvider } = await import('../../src/llm/providers/deepseek.js');
+    const provider = new DeepSeekProvider({} as any, entry());
+
+    const result = provider.completeStream(request() as any);
+    await expect(collect(result.stream)).resolves.toEqual(['streamed']);
+
+    const resp = await result.response;
+    expect(resp.ok).toBe(true);
+    if (resp.ok) {
+      expect(resp.value.usage).toEqual({ prompt: 200, completion: 100, total: 300 });
+    }
+
+    expect(streamText).toHaveBeenCalledTimes(1);
+  });
+});
diff --git a/packages/codingcode/test/llm/openai-provider.test.ts b/packages/codingcode/test/llm/openai-provider.test.ts
index 65306e6..c5f0cbf 100644
--- a/packages/codingcode/test/llm/openai-provider.test.ts
+++ b/packages/codingcode/test/llm/openai-provider.test.ts
@@ -56,6 +56,7 @@ describe('OpenAIProvider completeStream', () => {
       })(),
       response: Promise.resolve({
         messages: [{ role: 'assistant', content: 'streamed' }],
+        usage: { promptTokens: 100, completionTokens: 50, totalTokens: 150 },
       }),
     });
   });
@@ -93,4 +94,16 @@ describe('OpenAIProvider completeStream', () => {
     expect(streamText).toHaveBeenCalledTimes(1);
     expect(generateText).not.toHaveBeenCalled();
   });
+
+  it('extracts usage from streamText response', async () => {
+    const { OpenAIProvider } = await import('../../src/llm/providers/openai.js');
+    const provider = new OpenAIProvider({} as any, entry('openai'));
+
+    const result = provider.completeStream(request(false) as any);
+    const resp = await result.response;
+    expect(resp.ok).toBe(true);
+    if (resp.ok) {
+      expect(resp.value.usage).toEqual({ prompt: 100, completion: 50, total: 150 });
+    }
+  });
 });
diff --git a/packages/codingcode/test/orchestrate.test.ts b/packages/codingcode/test/orchestrate.test.ts
index 6d60a55..361b4b4 100644
--- a/packages/codingcode/test/orchestrate.test.ts
+++ b/packages/codingcode/test/orchestrate.test.ts
@@ -14,7 +14,8 @@ const mockState = {
   sessionId: 'test-session', cwd: '/tmp/test', projectPath: 'test',
   transcriptPath: '/tmp/test.jsonl', indexPath: '/tmp/test.index.json',
   messageCount: 0, currentTurnId: 0, sessionMeta: null, title: 'test-sess',
-  tokenCountEstimate: 0,
+  usage: undefined,
+  promptEstimate: 0,
 };
 
 const mockLlm = {
@@ -48,7 +49,6 @@ const MockContextLayer = Layer.succeed(ContextService, ContextService.of({
   _tag: 'Context' as any,
   build: () => Effect.sync(() => [{ role: 'user' as const, content: 'hi' }]),
   compress: () => Effect.succeed({ didCompress: true, released: 0 }),
-  appendTurnEnd: () => Effect.succeed({ didCompress: false, released: 0 }),
 }));
 
 const MockSkillLayer = Layer.succeed(SkillService, SkillService.of({
diff --git a/packages/codingcode/test/server/adapter.test.ts b/packages/codingcode/test/server/adapter.test.ts
index 0f720ee..06896f0 100644
--- a/packages/codingcode/test/server/adapter.test.ts
+++ b/packages/codingcode/test/server/adapter.test.ts
@@ -50,12 +50,34 @@ describe('agentEventToSseEvent', () => {
       .toEqual({ type: 'todo_update', items });
   });
 
+  it('maps Usage to usage event', () => {
+    expect(agentEventToSseEvent({ _tag: 'Usage', prompt: 1000, completion: 500, total: 1500 }))
+      .toEqual({ type: 'usage', prompt: 1000, completion: 500, total: 1500 });
+  });
+
   it('returns null for Assistant and ReactiveCompact', () => {
     expect(agentEventToSseEvent({ _tag: 'Assistant', content: 'ok' })).toBeNull();
     expect(agentEventToSseEvent({ _tag: 'ReactiveCompact', attempt: 1, released: 100 })).toBeNull();
   });
 });
 
+describe('toSseEvents with Usage', () => {
+  it('Usage events flow through toSseEvents', async () => {
+    async function* source(): AsyncGenerator<AgentEvent, void, unknown> {
+      yield { _tag: 'Step', step: 1, max: 10 };
+      yield { _tag: 'Assistant', content: 'ok' };
+      yield { _tag: 'Usage', prompt: 1000, completion: 500, total: 1500 };
+    }
+    const result: any[] = [];
+    for await (const s of toSseEvents(source())) result.push(s);
+    expect(result).toEqual([
+      { type: 'step', step: 1 },
+      { type: 'message', id: 1, content: 'ok', partial: false },
+      { type: 'usage', prompt: 1000, completion: 500, total: 1500 },
+    ]);
+  });
+});
+
 describe('toSseEvents', () => {
   it('text chunks carry messageId from preceding Step', async () => {
     async function* source(): AsyncGenerator<AgentEvent, void, unknown> {
diff --git a/packages/codingcode/test/server/handler.test.ts b/packages/codingcode/test/server/handler.test.ts
index dcf5c3d..79e1626 100644
--- a/packages/codingcode/test/server/handler.test.ts
+++ b/packages/codingcode/test/server/handler.test.ts
@@ -22,6 +22,8 @@ const mockState = {
   currentTurnId: 0,
   sessionMeta: null,
   title: 'test-sess',
+  usage: undefined,
+  promptEstimate: 0,
 };
 
 function createMockLlm(chunks?: string[], responseContent?: string) {
@@ -70,7 +72,6 @@ const MockContextLayer = Layer.succeed(ContextService, ContextService.of({
   _tag: 'Context' as any,
   build: () => Effect.sync(() => [{ role: 'user' as const, content: 'hi' }]),
   compress: () => Effect.succeed({ didCompress: true, released: 0 }),
-  appendTurnEnd: () => Effect.succeed({ didCompress: false, released: 0 }),
 }));
 
 const MockSkillLayer = Layer.succeed(
diff --git a/packages/codingcode/test/session/delete-message.test.ts b/packages/codingcode/test/session/delete-message.test.ts
index a8692d9..cfe913f 100644
--- a/packages/codingcode/test/session/delete-message.test.ts
+++ b/packages/codingcode/test/session/delete-message.test.ts
@@ -30,7 +30,7 @@ function makeFixture(sessionId: string, slug: string) {
     sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test',
     createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
     messageCount: 6, title: 'fixture', currentTurnId: 3,
-    tokenCountEstimate: 50, permissionMode: 'default',
+    usage: undefined, promptEstimate: 0, permissionMode: 'default',
   };
   writeFileSync(indexPath, JSON.stringify(idx, null, 2), 'utf8');
 
diff --git a/packages/codingcode/test/session/fork.test.ts b/packages/codingcode/test/session/fork.test.ts
index d973d41..203f2b0 100644
--- a/packages/codingcode/test/session/fork.test.ts
+++ b/packages/codingcode/test/session/fork.test.ts
@@ -31,7 +31,7 @@ function makeFixture(sessionId: string, slug: string) {
     sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test',
     createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
     messageCount: 7, title: 'fixture', currentTurnId: 3,
-    tokenCountEstimate: 100, permissionMode: 'default',
+    usage: undefined, promptEstimate: 0, permissionMode: 'default',
   };
   writeFileSync(indexPath, JSON.stringify(idx, null, 2), 'utf8');
 
diff --git a/packages/codingcode/test/session/io-error.test.ts b/packages/codingcode/test/session/io-error.test.ts
index 295730c..9e74fad 100644
--- a/packages/codingcode/test/session/io-error.test.ts
+++ b/packages/codingcode/test/session/io-error.test.ts
@@ -28,7 +28,8 @@ describe('SessionService — SESSION_IO_ERROR', () => {
         currentTurnId: 1,
         sessionMeta: { model: 'test', version: '0.1.0', createdAt: new Date().toISOString() },
         title: 'io-err-sid'.slice(0, 8),
-        tokenCountEstimate: 0,
+        usage: undefined,
+        promptEstimate: 0,
       };
       return yield* session.recordUser(state, 'hello');
     });
@@ -51,7 +52,8 @@ describe('SessionService — SESSION_IO_ERROR', () => {
         currentTurnId: 1,
         sessionMeta: { model: 'test', version: '0.1.0', createdAt: new Date().toISOString() },
         title: 'io-err-asst'.slice(0, 8),
-        tokenCountEstimate: 0,
+        usage: undefined,
+        promptEstimate: 0,
       };
       return yield* session.recordAssistant(state, 'hi', [], 'model');
     });
diff --git a/packages/codingcode/test/session/prompt-estimate.test.ts b/packages/codingcode/test/session/prompt-estimate.test.ts
new file mode 100644
index 0000000..7c6a793
--- /dev/null
+++ b/packages/codingcode/test/session/prompt-estimate.test.ts
@@ -0,0 +1,243 @@
+import { describe, it, expect } from 'vitest';
+import { mkdirSync, writeFileSync, readFileSync, rmSync } from 'fs';
+import { join } from 'path';
+import { homedir } from 'os';
+import { randomUUID } from 'crypto';
+import { Effect } from 'effect';
+import { findLastVisibleAssistantUsage, forkSession, findSessionIndex, SessionService, buildMessages } from '../../src/session/store.js';
+import { estimateTokensForContent, estimateTokens } from '../../src/context/utils/tokens.js';
+import { encodeProjectPath } from '../../src/core/path.js';
+import type { SessionIndex, SessionEvent } from '../../src/session/types.js';
+
+const PROJECT_BASE = join(homedir(), '.codingcode', 'project');
+
+function makeFixture(sessionId: string, slug: string, usage?: { prompt: number; completion: number; total: number }) {
+  const dir = join(PROJECT_BASE, slug, 'sessions');
+  mkdirSync(dir, { recursive: true });
+  const transcriptPath = join(dir, `${sessionId}.jsonl`);
+  const indexPath = join(dir, `${sessionId}.index.json`);
+
+  const lines: any[] = [
+    { type: 'session_meta', sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test', createdAt: new Date().toISOString(), version: '0.1.0' },
+    { type: 'user', turnId: 1, uuid: 'u1', content: 'hello world', timestamp: new Date().toISOString() },
+    { type: 'assistant', turnId: 1, uuid: 'a1', content: 'hi there', toolCalls: [], model: 'test', timestamp: new Date().toISOString(), usage },
+    { type: 'user', turnId: 2, uuid: 'u2', content: 'do stuff', timestamp: new Date().toISOString() },
+    { type: 'assistant', turnId: 2, uuid: 'a2', content: 'ok done', toolCalls: [], model: 'test', timestamp: new Date().toISOString(), usage: usage ? { prompt: usage.prompt + 100, completion: usage.completion + 50, total: usage.total + 150 } : undefined },
+  ];
+
+  writeFileSync(transcriptPath, lines.map((l) => JSON.stringify(l)).join('\n') + '\n', 'utf8');
+
+  const idx: SessionIndex = {
+    sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test',
+    createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
+    messageCount: 4, title: 'fixture', currentTurnId: 2,
+    usage: usage ?? undefined,
+    promptEstimate: usage ? usage.prompt : estimateTokens(buildMessages(transcriptPath)),
+    permissionMode: 'default',
+  };
+  writeFileSync(indexPath, JSON.stringify(idx, null, 2), 'utf8');
+
+  return { dir, transcriptPath, indexPath };
+}
+
+describe('promptEstimate', () => {
+  it('findLastVisibleAssistantUsage reads usage from visible assistant event', () => {
+    const sessionId = randomUUID();
+    const slug = randomUUID();
+    const usage = { prompt: 1200, completion: 300, total: 1500 };
+    const lastUsage = { prompt: 1300, completion: 350, total: 1650 };
+    const fx = makeFixture(sessionId, slug, usage);
+    try {
+      const result = findLastVisibleAssistantUsage(fx.transcriptPath);
+      expect(result).toEqual(lastUsage);
+    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
+  });
+
+  it('findLastVisibleAssistantUsage returns undefined when no assistant usage', () => {
+    const sessionId = randomUUID();
+    const slug = randomUUID();
+    const fx = makeFixture(sessionId, slug, undefined);
+    try {
+      const result = findLastVisibleAssistantUsage(fx.transcriptPath);
+      expect(result).toBeUndefined();
+    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
+  });
+
+  it('findLastVisibleAssistantUsage skips hidden assistant events', () => {
+    const sessionId = randomUUID();
+    const slug = randomUUID();
+    const dir = join(PROJECT_BASE, slug, 'sessions');
+    mkdirSync(dir, { recursive: true });
+    const transcriptPath = join(dir, `${sessionId}.jsonl`);
+
+    const usage1 = { prompt: 100, completion: 50, total: 150 };
+    const usage2 = { prompt: 200, completion: 100, total: 300 };
+    const lines: any[] = [
+      { type: 'session_meta', sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test', createdAt: new Date().toISOString(), version: '0.1.0' },
+      { type: 'assistant', turnId: 1, uuid: 'a1', content: 'first', toolCalls: [], model: 'test', timestamp: new Date().toISOString(), usage: usage1 },
+      { type: 'hide', uuid: 'h1', kind: 'message', targetUuid: 'a1', reason: 'test', timestamp: new Date().toISOString() },
+      { type: 'assistant', turnId: 2, uuid: 'a2', content: 'second', toolCalls: [], model: 'test', timestamp: new Date().toISOString(), usage: usage2 },
+    ];
+    writeFileSync(transcriptPath, lines.map((l) => JSON.stringify(l)).join('\n') + '\n', 'utf8');
+
+    try {
+      const result = findLastVisibleAssistantUsage(transcriptPath);
+      expect(result).toEqual(usage2);
+    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
+  });
+
+  it('findSessionIndex reads promptEstimate from index.json', () => {
+    const sessionId = randomUUID();
+    const slug = randomUUID();
+    const fx = makeFixture(sessionId, slug, { prompt: 500, completion: 200, total: 700 });
+    try {
+      const idx = findSessionIndex(sessionId);
+      expect(idx).not.toBeNull();
+      expect(idx!.promptEstimate).toBe(500);
+    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
+  });
+
+  it('forkSession restores usage and promptEstimate from last visible assistant', () => {
+    const sessionId = randomUUID();
+    const slug = randomUUID();
+    const usage = { prompt: 800, completion: 400, total: 1200 };
+    const fx = makeFixture(sessionId, slug, usage);
+    try {
+      const newSessionId = forkSession(sessionId, fx.transcriptPath, 'a1');
+      const newIndexPath = join(fx.dir, `${newSessionId}.index.json`);
+      const idx = JSON.parse(readFileSync(newIndexPath, 'utf8')) as SessionIndex;
+      expect(idx.usage).toEqual(usage);
+      expect(idx.promptEstimate).toBe(usage.prompt);
+    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
+  });
+
+  it('forkSession falls back to estimateTokens when no assistant usage', () => {
+    const sessionId = randomUUID();
+    const slug = randomUUID();
+    const fx = makeFixture(sessionId, slug, undefined);
+    try {
+      const newSessionId = forkSession(sessionId, fx.transcriptPath, 'u2');
+      const newIndexPath = join(fx.dir, `${newSessionId}.index.json`);
+      const idx = JSON.parse(readFileSync(newIndexPath, 'utf8')) as SessionIndex;
+      expect(idx.promptEstimate).toBeGreaterThan(0);
+    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
+  });
+});
+
+describe('token estimation', () => {
+  it('estimateTokensForContent returns > 0 for non-empty strings', () => {
+    expect(estimateTokensForContent('hello world')).toBeGreaterThan(0);
+    expect(estimateTokensForContent('')).toBe(0);
+  });
+});
+
+function run<T>(eff: Effect.Effect<T, any, any>): Promise<T> {
+  return Effect.runPromise(eff.pipe(Effect.provide(SessionService.Default) as any));
+}
+
+describe('SessionService record methods update promptEstimate', () => {
+  it('recordUser increments promptEstimate', async () => {
+    const slug = randomUUID();
+    const dir = join(PROJECT_BASE, slug);
+    mkdirSync(dir, { recursive: true });
+    try {
+      const state = await run(
+        SessionService.pipe(Effect.flatMap((s) => s.create(dir, 'test-model', '0.1.0'))),
+      );
+      expect(state.promptEstimate).toBe(0);
+
+      const before = state.promptEstimate;
+      await run(
+        SessionService.pipe(Effect.flatMap((s) => s.recordUser(state, 'hello world'))),
+      );
+      expect(state.promptEstimate).toBeGreaterThan(before);
+    } finally { await new Promise((r) => setTimeout(r, 50)); rmSync(join(PROJECT_BASE, encodeProjectPath(dir)), { recursive: true, force: true }); rmSync(dir, { recursive: true, force: true }); }
+  });
+
+  it('recordAssistant without usage increments promptEstimate', async () => {
+    const slug = randomUUID();
+    const dir = join(PROJECT_BASE, slug);
+    mkdirSync(dir, { recursive: true });
+    try {
+      const state = await run(
+        SessionService.pipe(Effect.flatMap((s) => s.create(dir, 'test-model', '0.1.0'))),
+      );
+
+      await run(
+        SessionService.pipe(Effect.flatMap((s) => s.recordUser(state, 'hello'))),
+      );
+      const before = state.promptEstimate;
+
+      await run(
+        SessionService.pipe(Effect.flatMap((s) => s.recordAssistant(state, 'reply', [], 'test-model'))),
+      );
+      expect(state.promptEstimate).toBeGreaterThan(before);
+      expect(state.usage).toBeUndefined();
+    } finally { await new Promise((r) => setTimeout(r, 50)); rmSync(join(PROJECT_BASE, encodeProjectPath(dir)), { recursive: true, force: true }); rmSync(dir, { recursive: true, force: true }); }
+  });
+
+  it('recordAssistant with usage sets promptEstimate to usage.prompt', async () => {
+    const slug = randomUUID();
+    const dir = join(PROJECT_BASE, slug);
+    mkdirSync(dir, { recursive: true });
+    try {
+      const state = await run(
+        SessionService.pipe(Effect.flatMap((s) => s.create(dir, 'test-model', '0.1.0'))),
+      );
+
+      const usage = { prompt: 999, completion: 111, total: 1110 };
+      await run(
+        SessionService.pipe(Effect.flatMap((s) => s.recordAssistant(state, 'reply', [], 'test-model', usage))),
+      );
+      expect(state.promptEstimate).toBe(999);
+      expect(state.usage).toEqual(usage);
+    } finally { await new Promise((r) => setTimeout(r, 50)); rmSync(join(PROJECT_BASE, encodeProjectPath(dir)), { recursive: true, force: true }); rmSync(dir, { recursive: true, force: true }); }
+  });
+
+  it('recordToolResult increments promptEstimate and stores tokenCount', async () => {
+    const slug = randomUUID();
+    const dir = join(PROJECT_BASE, slug);
+    mkdirSync(dir, { recursive: true });
+    try {
+      const state = await run(
+        SessionService.pipe(Effect.flatMap((s) => s.create(dir, 'test-model', '0.1.0'))),
+      );
+
+      const assistantEvent = await run(
+        SessionService.pipe(Effect.flatMap((s) => s.recordAssistant(state, 'use tool', [{ id: 'tc1', name: 'bash', arguments: {} }], 'test-model'))),
+      );
+      const before = state.promptEstimate;
+
+      const toolEvent = await run(
+        SessionService.pipe(Effect.flatMap((s) => s.recordToolResult(state, assistantEvent.uuid, 'bash', 'tc1', 'tool output here'))),
+      );
+      expect(state.promptEstimate).toBeGreaterThan(before);
+      expect(toolEvent.tokenCount).toBeGreaterThan(0);
+    } finally { await new Promise((r) => setTimeout(r, 50)); rmSync(join(PROJECT_BASE, encodeProjectPath(dir)), { recursive: true, force: true }); rmSync(dir, { recursive: true, force: true }); }
+  });
+
+  it('hideMessage resets usage and recalculates promptEstimate', async () => {
+    const slug = randomUUID();
+    const dir = join(PROJECT_BASE, slug);
+    mkdirSync(dir, { recursive: true });
+    try {
+      const state = await run(
+        SessionService.pipe(Effect.flatMap((s) => s.create(dir, 'test-model', '0.1.0'))),
+      );
+
+      const userEv = await run(
+        SessionService.pipe(Effect.flatMap((s) => s.recordUser(state, 'hello world'))),
+      );
+      await run(
+        SessionService.pipe(Effect.flatMap((s) => s.recordAssistant(state, 'reply', [], 'test-model', { prompt: 100, completion: 50, total: 150 }))),
+      );
+      expect(state.usage).toBeDefined();
+
+      await run(
+        SessionService.pipe(Effect.flatMap((s) => s.hideMessage(state, userEv.uuid, 'test'))),
+      );
+      expect(state.usage).toBeUndefined();
+      expect(state.promptEstimate).toBeGreaterThanOrEqual(0);
+    } finally { await new Promise((r) => setTimeout(r, 50)); rmSync(join(PROJECT_BASE, encodeProjectPath(dir)), { recursive: true, force: true }); rmSync(dir, { recursive: true, force: true }); }
+  });
+});
diff --git a/packages/codingcode/test/session/record-tool-result-persist.test.ts b/packages/codingcode/test/session/record-tool-result-persist.test.ts
new file mode 100644
index 0000000..7c53a75
--- /dev/null
+++ b/packages/codingcode/test/session/record-tool-result-persist.test.ts
@@ -0,0 +1,82 @@
+import { describe, it, expect, vi } from 'vitest';
+import { Effect } from 'effect';
+import { SessionService } from '../../src/session/store.js';
+
+vi.mock('../../src/context/config.js', () => ({
+  getContextConfig: vi.fn(() => ({
+    thresholdTokens: 8000,
+    persistPreviewChars: 2000,
+    compactionThreshold: 0.9,
+    keepRecentTurns: 3,
+    toolsExemptFromMicrocompact: ['Read', 'todo_write', 'todo_read', 'tool_search'],
+    minTurnsBetweenCompactions: 5,
+    compactionModel: '',
+    reactiveCompactMaxRetries: 3,
+    reactiveCompactKeepTurns: 3,
+    snipMaxMessages: 50,
+    toolResultBudgetThreshold: 50000,
+    keepRecentToolResults: 3,
+  })),
+}));
+
+function run<T>(eff: Effect.Effect<T, any, any>): Promise<T> {
+  return Effect.runPromise(eff.pipe(Effect.provide(SessionService.Default) as any));
+}
+
+describe('recordToolResult proactive persist', () => {
+  it('persists large tool results (> thresholdTokens) and replaces output', async () => {
+
+    const state = await run(
+      SessionService.pipe(Effect.flatMap((s) => s.create('/tmp/persist-test', 'test-model', '0.1.0'))),
+    );
+
+    const longOutput = 'x'.repeat(30000);
+    const assistantEvent = await run(
+      SessionService.pipe(Effect.flatMap((s) => s.recordAssistant(state, 'use tool', [{ id: 'tc1', name: 'bash', arguments: { cmd: 'echo' } }], 'test-model'))),
+    );
+
+    const event = await run(
+      SessionService.pipe(Effect.flatMap((s) => s.recordToolResult(state, assistantEvent.uuid, 'bash', 'tc1', longOutput))),
+    );
+
+    expect(event.output).toContain('persisted at:');
+    expect(event.output).toContain('x'.repeat(2000));
+  });
+
+  it('does NOT persist read tool results even if large', async () => {
+
+    const state = await run(
+      SessionService.pipe(Effect.flatMap((s) => s.create('/tmp/persist-test-read', 'test-model', '0.1.0'))),
+    );
+
+    const longOutput = 'x'.repeat(30000);
+    const assistantEvent = await run(
+      SessionService.pipe(Effect.flatMap((s) => s.recordAssistant(state, 'use tool', [{ id: 'tc1', name: 'read', arguments: { path: '/tmp/file.txt' } }], 'test-model'))),
+    );
+
+    const event = await run(
+      SessionService.pipe(Effect.flatMap((s) => s.recordToolResult(state, assistantEvent.uuid, 'read', 'tc1', longOutput))),
+    );
+
+    expect(event.output).toBe(longOutput);
+  });
+
+  it('does NOT persist small tool results', async () => {
+
+    const state = await run(
+      SessionService.pipe(Effect.flatMap((s) => s.create('/tmp/persist-test-small', 'test-model', '0.1.0'))),
+    );
+
+    const shortOutput = 'small result';
+    const assistantEvent = await run(
+      SessionService.pipe(Effect.flatMap((s) => s.recordAssistant(state, 'use tool', [{ id: 'tc1', name: 'bash', arguments: { cmd: 'echo' } }], 'test-model'))),
+    );
+
+    const event = await run(
+      SessionService.pipe(Effect.flatMap((s) => s.recordToolResult(state, assistantEvent.uuid, 'bash', 'tc1', shortOutput))),
+    );
+
+    expect(event.output).toBe(shortOutput);
+  });
+});
+
diff --git a/packages/codingcode/test/session/rollback.test.ts b/packages/codingcode/test/session/rollback.test.ts
index b5eccea..2ea90f8 100644
--- a/packages/codingcode/test/session/rollback.test.ts
+++ b/packages/codingcode/test/session/rollback.test.ts
@@ -31,7 +31,7 @@ function makeFixture(sessionId: string, slug: string) {
     sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test',
     createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
     messageCount: 7, title: 'fixture', currentTurnId: 3,
-    tokenCountEstimate: 100, permissionMode: 'default',
+    usage: undefined, promptEstimate: 0, permissionMode: 'default',
   };
   writeFileSync(indexPath, JSON.stringify(idx, null, 2), 'utf8');
 
diff --git a/packages/codingcode/test/session/ui-history-rollback.test.ts b/packages/codingcode/test/session/ui-history-rollback.test.ts
index 3644f87..4ca6eb6 100644
--- a/packages/codingcode/test/session/ui-history-rollback.test.ts
+++ b/packages/codingcode/test/session/ui-history-rollback.test.ts
@@ -32,7 +32,7 @@ function makeFixture(sessionId: string, slug: string, extraEvents?: object[]) {
     sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test',
     createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
     messageCount: lines.length, title: 'fixture', currentTurnId: 3,
-    tokenCountEstimate: 100, permissionMode: 'default',
+    usage: undefined, promptEstimate: 0, permissionMode: 'default',
   };
   writeFileSync(indexPath, JSON.stringify(idx, null, 2), 'utf8');
 
@@ -238,7 +238,7 @@ describe('readUIHistory with visibility filtering', () => {
       writeFileSync(join(dir, `${sessionId}.index.json`), JSON.stringify({
         sessionId, projectPath: slug, cwd: '/tmp', model: 't',
         createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
-        messageCount: 7, title: 'test', currentTurnId: 2, tokenCountEstimate: 0, permissionMode: 'default',
+        messageCount: 7, title: 'test', currentTurnId: 2, usage: undefined, promptEstimate: 0, permissionMode: 'default',
       }));
 
       const turns = readUIHistory(sessionId);
diff --git a/packages/codingcode/test/session/usage-persist.test.ts b/packages/codingcode/test/session/usage-persist.test.ts
new file mode 100644
index 0000000..d334b56
--- /dev/null
+++ b/packages/codingcode/test/session/usage-persist.test.ts
@@ -0,0 +1,54 @@
+import { describe, it, expect } from 'vitest';
+import { mkdirSync, writeFileSync, readFileSync, rmSync } from 'fs';
+import { join } from 'path';
+import { homedir } from 'os';
+import { randomUUID } from 'crypto';
+import { findSessionIndex } from '../../src/session/store.js';
+import type { SessionIndex } from '../../src/session/types.js';
+
+const PROJECT_BASE = join(homedir(), '.codingcode', 'project');
+
+function makeFixture(sessionId: string, slug: string, usage?: { prompt: number; completion: number; total: number }) {
+  const dir = join(PROJECT_BASE, slug, 'sessions');
+  mkdirSync(dir, { recursive: true });
+  const transcriptPath = join(dir, `${sessionId}.jsonl`);
+  const indexPath = join(dir, `${sessionId}.index.json`);
+
+  const meta = { type: 'session_meta', sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test', createdAt: new Date().toISOString(), version: '0.1.0' };
+  writeFileSync(transcriptPath, JSON.stringify(meta) + '\n', 'utf8');
+
+  const idx: SessionIndex = {
+    sessionId, projectPath: slug, cwd: '/tmp/test', model: 'test',
+    createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(),
+    messageCount: 0, title: 'test', currentTurnId: 0,
+    usage: usage as any, promptEstimate: 0, permissionMode: 'default',
+  };
+  writeFileSync(indexPath, JSON.stringify(idx, null, 2), 'utf8');
+
+  return { dir, indexPath };
+}
+
+describe('session usage persist', () => {
+  it('findSessionIndex reads usage from index.json', () => {
+    const sessionId = randomUUID();
+    const slug = randomUUID();
+    const usage = { prompt: 1000, completion: 500, total: 1500 };
+    const fx = makeFixture(sessionId, slug, usage);
+    try {
+      const idx = findSessionIndex(sessionId);
+      expect(idx).not.toBeNull();
+      expect(idx!.usage).toEqual(usage);
+    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
+  });
+
+  it('findSessionIndex returns undefined usage when not present', () => {
+    const sessionId = randomUUID();
+    const slug = randomUUID();
+    const fx = makeFixture(sessionId, slug);
+    try {
+      const idx = findSessionIndex(sessionId);
+      expect(idx).not.toBeNull();
+      expect(idx!.usage).toBeUndefined();
+    } finally { rmSync(join(PROJECT_BASE, slug), { recursive: true, force: true }); }
+  });
+});
diff --git a/packages/desktop/src/agent/AgentWorkspace.tsx b/packages/desktop/src/agent/AgentWorkspace.tsx
index 976c7bf..e271762 100644
--- a/packages/desktop/src/agent/AgentWorkspace.tsx
+++ b/packages/desktop/src/agent/AgentWorkspace.tsx
@@ -10,16 +10,53 @@ import ApprovalPanel from './ApprovalPanel'
 
 function ContextIndicator({ threadId }: { threadId: string }) {
   const contextUsage = useGlobalStore((s) => s.agent.contextUsage)
+  const usage = useGlobalStore((s) => s.agent.usageByThreadId[threadId])
   const setContextUsage = useGlobalStore((s) => s.setContextUsage)
+  const isCompressing = useGlobalStore((s) => s.agent.isCompressing)
+  const startCompressing = useGlobalStore((s) => s.startCompressing)
+  const stopCompressing = useGlobalStore((s) => s.stopCompressing)
+
+  const r = 7
+  const circ = 2 * Math.PI * r
+
+  if (isCompressing) {
+    return (
+      <button type="button" disabled
+        className="w-5 h-5 flex items-center justify-center animate-pulse cursor-default">
+        <svg width="18" height="18" viewBox="0 0 18 18">
+          <circle cx="9" cy="9" r={r} fill="none" stroke="#2a2a2a" strokeWidth="2.5" />
+          <circle cx="9" cy="9" r={r} fill="none" stroke="#555" strokeWidth="2.5"
+            strokeDasharray={circ} strokeDashoffset={circ * 0.6}
+            strokeLinecap="round" transform="rotate(-90 9 9)" />
+        </svg>
+      </button>
+    )
+  }
+
   if (!contextUsage) return null
   const pct = Math.min(contextUsage.used / contextUsage.contextWindow, 1)
   const color = pct < 0.4 ? '#4ec9b0' : pct < 0.75 ? '#e5c07b' : '#f44747'
-  const r = 7
-  const circ = 2 * Math.PI * r
+  const detail = usage
+    ? usage.prompt === 0 && usage.completion === 0
+      ? `${usage.total.toLocaleString()} / ${contextUsage.contextWindow.toLocaleString()} tokens`
+      : `prompt: ${usage.prompt.toLocaleString()}, completion: ${usage.completion.toLocaleString()}, total: ${usage.total.toLocaleString()} / ${contextUsage.contextWindow.toLocaleString()} tokens`
+    : `${contextUsage.used.toLocaleString()} / ${contextUsage.contextWindow.toLocaleString()} tokens`
   return (
     <button type="button"
-      onClick={async () => { await api(`/api/sessions/${threadId}/compact`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ cwd: '' }) }).catch((e) => { console.error('Failed to compact session:', e) }); setContextUsage(null) }}
-      title={`上下文: ${Math.round(pct * 100)}% (${contextUsage.used.toLocaleString()} / ${contextUsage.contextWindow.toLocaleString()} tokens)\n点击压缩`}
+      onClick={async () => {
+        startCompressing()
+        try {
+          const res = await api<{ promptEstimate: number; didCompress: boolean; released: number }>(`/api/sessions/${threadId}/compact`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ cwd: '' }) })
+          if (res.promptEstimate != null && contextUsage) {
+            setContextUsage({ used: res.promptEstimate, contextWindow: contextUsage.contextWindow })
+          }
+        } catch (e) {
+          console.error('Failed to compact session:', e)
+        } finally {
+          stopCompressing()
+        }
+      }}
+      title={`上下文: ${Math.round(pct * 100)}% (${detail})\n点击压缩`}
       className="w-5 h-5 flex items-center justify-center hover:opacity-70 transition-opacity">
       <svg width="18" height="18" viewBox="0 0 18 18">
         <circle cx="9" cy="9" r={r} fill="none" stroke="#2a2a2a" strokeWidth="2.5" />
@@ -193,6 +230,7 @@ interface AgentWorkspaceProps {
 
 export default function AgentWorkspace({ sendMessage, abort }: AgentWorkspaceProps) {
   const currentThreadId = useGlobalStore((s) => s.agent.currentThreadId)
+  const isCompressing = useGlobalStore((s) => s.agent.isCompressing)
   const workspace = useGlobalStore((s) => s.workspace)
 
   if (!currentThreadId) {
@@ -211,6 +249,12 @@ export default function AgentWorkspace({ sendMessage, abort }: AgentWorkspacePro
       <MessageStream key={currentThreadId} threadId={currentThreadId} />
       <ApprovalPanel threadId={currentThreadId} />
       <TodoPanel threadId={currentThreadId} />
+      {isCompressing && (
+        <div className="shrink-0 px-5 py-1.5 bg-[#1a1a1a] border-t border-[#2d2d2d] flex items-center gap-2 text-[13px] text-[#888]">
+          <span className="w-3 h-3 border-2 border-[#555] border-t-transparent rounded-full animate-spin" />
+          <span>正在压缩上下文...</span>
+        </div>
+      )}
       <div className="shrink-0">
         <InputBox sendMessage={sendMessage} abort={abort} />
       </div>
diff --git a/packages/desktop/src/hooks/useAgent.ts b/packages/desktop/src/hooks/useAgent.ts
index 223ccb4..3ab432c 100644
--- a/packages/desktop/src/hooks/useAgent.ts
+++ b/packages/desktop/src/hooks/useAgent.ts
@@ -31,6 +31,7 @@ export function useAgent() {
   const setModels = useGlobalStore((s) => s.setModels)
   const setApprovalPolicy = useGlobalStore((s) => s.setApprovalPolicy)
   const setContextUsage = useGlobalStore((s) => s.setContextUsage)
+  const setThreadUsage = useGlobalStore((s) => s.setThreadUsage)
   const setProjects = useGlobalStore((s) => s.setProjects)
   const switchProject = useGlobalStore((s) => s.switchProject)
   const addProject = useGlobalStore((s) => s.addProject)
@@ -78,11 +79,16 @@ export function useAgent() {
           updatedAt: new Date(s.updatedAt).getTime(),
         }))
         loadThreads(threads)
+        for (const s of sessions) {
+          if (s.usage) {
+            setThreadUsage(s.sessionId, { prompt: s.usage.prompt, completion: s.usage.completion, total: s.usage.total })
+          }
+        }
       }).catch((e) => { console.error('Failed to load sessions:', e) })
     }
 
     // Restore persisted projects, approval policy, model - already done by persist middleware
-  }, [loadThreads, setModel, setModels, workspace.rootPath])
+  }, [loadThreads, setModel, setModels, setThreadUsage, workspace.rootPath])
 
   // Load history from HTTP when switching to a thread with no turns
   useEffect(() => {
@@ -118,13 +124,24 @@ export function useAgent() {
       case 'todo_update':
         applyTodoUpdate(threadId, event.items as any)
         return null
+      case 'usage':
+        setThreadUsage(threadId, { prompt: event.prompt, completion: event.completion, total: event.total })
+        return null
+      case 'reactive_compact':
+        {
+          const contextUsage = useGlobalStore.getState().agent.contextUsage
+          if (contextUsage) {
+            setContextUsage({ used: event.promptEstimate, contextWindow: contextUsage.contextWindow })
+          }
+        }
+        return null
       case 'done':
       case 'session_id':
         return null
       default:
         return null
     }
-  }, [applyTodoUpdate, updateTurnId])
+  }, [applyTodoUpdate, updateTurnId, setThreadUsage, setContextUsage])
 
   const sendMessage = useCallback(
     async (content: string, cwd?: string) => {
@@ -148,7 +165,6 @@ export function useAgent() {
       const turn: Turn = { id: turnId, items: [userItem], status: 'running' }
 
       startTurn(threadId, turn, { cwd: effectiveCwd, title: content.slice(0, 60) })
-      setContextUsage(null)
 
       const controller = new AbortController()
       abortControllers.current.set(threadId, controller)
@@ -186,7 +202,7 @@ export function useAgent() {
         abortControllers.current.delete(threadId)
       }
     },
-    [startTurn, setCurrentThread, setContextUsage, streamChunkToItem, applyChunk, completeTurn, workspace.rootPath, approvalPolicy, currentThreadId]
+    [startTurn, setCurrentThread, streamChunkToItem, applyChunk, completeTurn, workspace.rootPath, approvalPolicy, currentThreadId]
   )
 
   const abort = useCallback(() => {
@@ -238,10 +254,15 @@ export function useAgent() {
             updatedAt: new Date(s.updatedAt).getTime(),
           }))
           loadThreads(threads)
+          for (const s of sessions) {
+            if (s.usage) {
+              setThreadUsage(s.sessionId, { prompt: s.usage.prompt, completion: s.usage.completion, total: s.usage.total })
+            }
+          }
         }
       }
     },
-    [loadThreads]
+    [loadThreads, setThreadUsage]
   )
 
   // Rollback methods
diff --git a/packages/desktop/src/stores/global.store.ts b/packages/desktop/src/stores/global.store.ts
index 36b94c2..496db30 100644
--- a/packages/desktop/src/stores/global.store.ts
+++ b/packages/desktop/src/stores/global.store.ts
@@ -64,6 +64,8 @@ interface AgentState {
   contextUsage: { used: number; contextWindow: number } | null
   todoByThreadId: Record<string, TodoPanelState>
   pendingInput: string | null
+  usageByThreadId: Record<string, { prompt: number; completion: number; total: number }>
+  isCompressing: boolean
 }
 
 interface EditorState {
@@ -120,6 +122,7 @@ interface GlobalActions {
   setModel: (model: string) => void
   setModels: (models: ModelEntry[]) => void
   setContextUsage: (usage: { used: number; contextWindow: number } | null) => void
+  setThreadUsage: (threadId: string, usage: { prompt: number; completion: number; total: number }) => void
   setCursor: (line: number, col: number) => void
   loadThreads: (threads: Thread[]) => void
   updateToolCallStatus: (threadId: string, callId: string, status: 'pending' | 'approved' | 'rejected' | 'running') => void
@@ -142,6 +145,8 @@ interface GlobalActions {
   markScopeRestored: (threadId: string, turnId: string, scope: 'agent' | 'all') => void
   initRevertedFilesFromState: (threadId: string) => void
   setTurnCheckpointMapping: (threadId: string, checkpointId: number, uiTurnId: string) => void
+  startCompressing: () => void
+  stopCompressing: () => void
 }
 
 const initialGit: GitStatus = {
@@ -185,6 +190,8 @@ export const useGlobalStore = create<GlobalState & GlobalActions>()(
         contextUsage: null,
         todoByThreadId: {},
         pendingInput: null,
+        usageByThreadId: {},
+        isCompressing: false,
       },
       editor: {
         cursorLine: 1,
@@ -246,7 +253,20 @@ export const useGlobalStore = create<GlobalState & GlobalActions>()(
       setGit: (status) => set((s) => { s.git = status }),
       addTerminal: (session) => set((s) => { s.terminals.push(session) }),
       removeTerminal: (id) => set((s) => { s.terminals = s.terminals.filter((t) => t.id !== id) }),
-      setCurrentThread: (id) => set((s) => { s.agent.currentThreadId = id }),
+      setCurrentThread: (id) => set((s) => {
+        s.agent.currentThreadId = id
+        if (id) {
+          const usage = s.agent.usageByThreadId[id]
+          const model = s.agent.models.find((m) => m.id === s.agent.model)
+          if (usage && model) {
+            s.agent.contextUsage = { used: usage.total, contextWindow: model.context_window }
+          } else {
+            s.agent.contextUsage = null
+          }
+        } else {
+          s.agent.contextUsage = null
+        }
+      }),
       upsertThread: (thread) => set((s) => { s.agent.threads[thread.id] = thread }),
       setThreadTurns: (threadId, turns) => set((s) => {
         const thread = s.agent.threads[threadId]
@@ -263,6 +283,15 @@ export const useGlobalStore = create<GlobalState & GlobalActions>()(
       setModel: (model) => set((s) => { s.agent.model = model }),
       setModels: (models) => set((s) => { s.agent.models = models }),
       setContextUsage: (usage) => set((s) => { s.agent.contextUsage = usage }),
+      setThreadUsage: (threadId, usage) => set((s) => {
+        s.agent.usageByThreadId[threadId] = usage
+        if (s.agent.currentThreadId === threadId) {
+          const model = s.agent.models.find((m) => m.id === s.agent.model)
+          if (model) {
+            s.agent.contextUsage = { used: usage.total, contextWindow: model.context_window }
+          }
+        }
+      }),
       setCursor: (line, col) => set((s) => { s.editor.cursorLine = line; s.editor.cursorCol = col }),
 
       loadThreads: (threads) => set((s) => {
@@ -280,6 +309,12 @@ export const useGlobalStore = create<GlobalState & GlobalActions>()(
           }
         }
         s.agent.threads = next
+        // Clean up usage entries for deleted threads
+        for (const id of Object.keys(s.agent.usageByThreadId)) {
+          if (!incomingIds.has(id)) {
+            delete s.agent.usageByThreadId[id]
+          }
+        }
       }),
 
       updateToolCallStatus: (threadId, callId, status) => set((s) => {
@@ -503,6 +538,8 @@ export const useGlobalStore = create<GlobalState & GlobalActions>()(
         }
         s.rollback.turnCheckpointMapping[threadId][checkpointId] = uiTurnId
       }),
+      startCompressing: () => set((s) => { s.agent.isCompressing = true }),
+      stopCompressing: () => set((s) => { s.agent.isCompressing = false }),
     })),
     {
       name: 'codingcode-desktop-store',
@@ -528,6 +565,7 @@ export const useGlobalStore = create<GlobalState & GlobalActions>()(
         agent: {
           approvalPolicy: state.agent.approvalPolicy,
           model: state.agent.model,
+          usageByThreadId: state.agent.usageByThreadId,
         },
         editor: {
           cursorLine: state.editor.cursorLine,
@@ -551,6 +589,7 @@ export const useGlobalStore = create<GlobalState & GlobalActions>()(
           threads: {},
           todoByThreadId: {},
           contextUsage: null,
+          usageByThreadId: (persisted as any).agent?.usageByThreadId ?? {},
         },
       }),
     },
diff --git a/packages/desktop/src/styles/index.css b/packages/desktop/src/styles/index.css
index a69dc74..6ee9c88 100644
--- a/packages/desktop/src/styles/index.css
+++ b/packages/desktop/src/styles/index.css
@@ -32,19 +32,21 @@ body,
 
 /* Custom scrollbar */
 ::-webkit-scrollbar {
-  width: 6px;
-  height: 6px;
+  width: 14px;
+  height: 14px;
 }
 
 ::-webkit-scrollbar-track {
-  background: transparent;
+  background: #1e1e1e;
 }
 
 ::-webkit-scrollbar-thumb {
-  background: #424242;
-  border-radius: 3px;
+  background: #555;
+  border-radius: 7px;
+  border: 3px solid #1e1e1e;
+  background-clip: padding-box;
 }
 
 ::-webkit-scrollbar-thumb:hover {
-  background: #555555;
+  background: #666;
 }
diff --git a/packages/desktop/test/global-store.test.ts b/packages/desktop/test/global-store.test.ts
index 2884502..14732f4 100644
--- a/packages/desktop/test/global-store.test.ts
+++ b/packages/desktop/test/global-store.test.ts
@@ -18,6 +18,8 @@ beforeEach(() => {
       contextUsage: null,
       todoByThreadId: {},
       pendingInput: null,
+      usageByThreadId: {},
+      isCompressing: false,
     },
     workspace: {
       rootPath: '',
@@ -436,3 +438,56 @@ describe('global store - project management', () => {
     expect(useGlobalStore.getState().workspace.rootPath).toBe('/some/path')
   })
 })
+
+describe('global store - token usage', () => {
+  it('setThreadUsage stores usage by threadId', () => {
+    useGlobalStore.getState().setThreadUsage('t1', { prompt: 1000, completion: 500, total: 1500 })
+    expect(useGlobalStore.getState().agent.usageByThreadId['t1']).toEqual({ prompt: 1000, completion: 500, total: 1500 })
+  })
+
+  it('setThreadUsage updates contextUsage when thread is active', () => {
+    useGlobalStore.getState().setModels([{ id: 'm1', name: 'Model', provider: 'openai', context_window: 128000 }])
+    useGlobalStore.getState().setModel('m1')
+    useGlobalStore.getState().setCurrentThread('t1')
+    useGlobalStore.getState().setThreadUsage('t1', { prompt: 1000, completion: 500, total: 1500 })
+    expect(useGlobalStore.getState().agent.contextUsage).toEqual({ used: 1500, contextWindow: 128000 })
+  })
+
+  it('setThreadUsage does not update contextUsage for inactive thread', () => {
+    useGlobalStore.getState().setCurrentThread('t1')
+    useGlobalStore.getState().setThreadUsage('t2', { prompt: 1000, completion: 500, total: 1500 })
+    expect(useGlobalStore.getState().agent.contextUsage).toBeNull()
+  })
+
+  it('setCurrentThread restores contextUsage from usageByThreadId', () => {
+    useGlobalStore.getState().setModels([{ id: 'm1', name: 'Model', provider: 'openai', context_window: 128000 }])
+    useGlobalStore.getState().setModel('m1')
+    useGlobalStore.getState().setThreadUsage('t1', { prompt: 1000, completion: 500, total: 1500 })
+    useGlobalStore.getState().setCurrentThread('t1')
+    expect(useGlobalStore.getState().agent.contextUsage).toEqual({ used: 1500, contextWindow: 128000 })
+  })
+
+  it('setCurrentThread clears contextUsage when no usage for thread', () => {
+    useGlobalStore.getState().setContextUsage({ used: 100, contextWindow: 128000 })
+    useGlobalStore.getState().setCurrentThread('t1')
+    expect(useGlobalStore.getState().agent.contextUsage).toBeNull()
+  })
+})
+
+describe('global store - compressing state', () => {
+  it('initial isCompressing is false', () => {
+    expect(useGlobalStore.getState().agent.isCompressing).toBe(false)
+  })
+
+  it('startCompressing sets isCompressing to true', () => {
+    useGlobalStore.getState().startCompressing()
+    expect(useGlobalStore.getState().agent.isCompressing).toBe(true)
+  })
+
+  it('stopCompressing sets isCompressing to false', () => {
+    useGlobalStore.getState().startCompressing()
+    expect(useGlobalStore.getState().agent.isCompressing).toBe(true)
+    useGlobalStore.getState().stopCompressing()
+    expect(useGlobalStore.getState().agent.isCompressing).toBe(false)
+  })
+})
diff --git a/packages/desktop/test/useAgent-streamChunkToItem.test.ts b/packages/desktop/test/useAgent-streamChunkToItem.test.ts
index c157db0..d96d06a 100644
--- a/packages/desktop/test/useAgent-streamChunkToItem.test.ts
+++ b/packages/desktop/test/useAgent-streamChunkToItem.test.ts
@@ -26,6 +26,10 @@ function streamChunkToItem(
       return { id: 'rand', type: 'error', message: event.message }
     case 'todo_update':
       return null
+    case 'usage':
+      return null
+    case 'reactive_compact':
+      return null
     case 'done':
     case 'session_id':
       return null
@@ -118,4 +122,20 @@ describe('streamChunkToItem after StreamChunk refactor', () => {
     )
     expect(item).toBeNull()
   })
+
+  it('maps usage to null', () => {
+    const item = streamChunkToItem(
+      { type: 'usage', prompt: 1000, completion: 500, total: 1500 },
+      't1', 'a1', 'turn1',
+    )
+    expect(item).toBeNull()
+  })
+
+  it('maps reactive_compact to null', () => {
+    const item = streamChunkToItem(
+      { type: 'reactive_compact', released: 500, promptEstimate: 800 },
+      't1', 'a1', 'turn1',
+    )
+    expect(item).toBeNull()
+  })
 })
diff --git a/packages/infra/src/config.ts b/packages/infra/src/config.ts
index 0a5e829..87b45e4 100644
--- a/packages/infra/src/config.ts
+++ b/packages/infra/src/config.ts
@@ -3,34 +3,22 @@ import { resolve, dirname } from 'path';
 import { homedir } from 'os';
 import { parse as parseYaml, stringify as stringifyYaml } from 'yaml';
 
-export interface ContextThresholdsConfig {
-  prune: number;
-  compaction: number;
-}
-
 export interface ContextConfig {
-  defaultMaxTokens: number;
-  reservedTokens: number;
-  thresholds: ContextThresholdsConfig;
-  pruneProtectedTokens: number;
-  pruneMinRelease: number;
-  toolsExemptFromPrune: string[];
-  prefixTurnsProtected: number;
-  minTurnsBetweenCompactions: number;
+  compactionThreshold: number;
   keepRecentTurns: number;
+  toolsExemptFromMicrocompact: string[];
+  minTurnsBetweenCompactions: number;
   /** Model for context compaction. Empty string falls back to main session LLM.
    *  Use full id format "model@API_KEY_ENV" to avoid ambiguity (e.g. "deepseek-chat@DEEPSEEK_API_KEY").
    *  Can also use bare model id (e.g. "deepseek-chat") or display name, first match wins. */
   compactionModel: string;
-  archiveTtlDays: number;
-  checkpointKeep: number;
   thresholdTokens: number;
   reactiveCompactMaxRetries: number;
   reactiveCompactKeepTurns: number;
   snipMaxMessages: number;
-  snipKeepHead: number;
-  microKeepRecentTools: number;
   persistPreviewChars: number;
+  toolResultBudgetThreshold: number;
+  keepRecentToolResults: number;
 }
 
 export interface MemoryTypeConfig {
@@ -70,25 +58,18 @@ export interface AppConfig {
 }
 
 const DEFAULT_CONTEXT: ContextConfig = {
-  defaultMaxTokens: 200000,
-  reservedTokens: 20000,
-  thresholds: { prune: 0.7, compaction: 0.9 },
-  pruneProtectedTokens: 40000,
-  pruneMinRelease: 20000,
-  toolsExemptFromPrune: ['Read', 'todo_write', 'todo_read', 'tool_search'],
-  prefixTurnsProtected: 1,
+  compactionThreshold: 0.9,
+  keepRecentTurns: 3,
+  toolsExemptFromMicrocompact: ['Read', 'todo_write', 'todo_read', 'tool_search'],
   minTurnsBetweenCompactions: 5,
-  keepRecentTurns: 10,
   compactionModel: '',
-  archiveTtlDays: 30,
-  checkpointKeep: 50,
-  reactiveCompactMaxRetries: 1,
+  reactiveCompactMaxRetries: 3,
   reactiveCompactKeepTurns: 3,
-  snipMaxMessages: 100,
-  snipKeepHead: 3,
-  microKeepRecentTools: 5,
+  snipMaxMessages: 50,
   persistPreviewChars: 2000,
-  thresholdTokens: 2000,
+  thresholdTokens: 8000,
+  toolResultBudgetThreshold: 50000,
+  keepRecentToolResults: 3,
 };
 
 export const DEFAULT_MEMORY_TYPES: MemoryTypeConfig[] = [