From 94bd6ac0911888c242748feb58ff34650d7623be Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Sat, 2 May 2026 00:56:23 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20context=20health=20diagnostics=20?= =?UTF-8?q?=E2=80=94=20C=5Fnorm,=20R=5Fcompression,=20time-gap=20segmentat?= =?UTF-8?q?ion,=20recall=20recency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two computable diagnostic signals for context management quality: - temporalCnorm(): normalized variance of relative-existence weights over message timestamps. Measures attention imbalance [0,1] — 0 is uniform distribution, 1 is dominated by distant past. Logged per-distillation under LORE_DEBUG=1. - compressionRatio(): k/√N ratio where k=distilled tokens, N=source tokens. Values < 1.0 signal aggressive/likely-lossy compression. Logged per-distillation under LORE_DEBUG=1. Enhance detectSegments() to prefer splitting at the largest inter-message time gap (≥3x median) when oversized, respecting natural conversation boundaries instead of arbitrary count-based chunking. Falls back to count-based splitting when timestamps are uniform. Add recency-biased RRF list for temporal recall results. Same candidates re-ranked by created_at (newest first), fused alongside BM25 via existing RRF — messages that are both semantically relevant AND recent get a natural score boost. Inspired by D7x7z49/llm-context-idea research notes on temporal clustering and compression boundaries. 582 tests pass, 0 fail. Build clean across all 3 packages. --- packages/core/src/distillation.ts | 144 ++++++++++-- packages/core/src/recall.ts | 18 ++ packages/core/src/temporal.ts | 39 ++++ packages/core/test/context-health.test.ts | 256 ++++++++++++++++++++++ packages/core/test/distillation.test.ts | 104 +++++++++ 5 files changed, 542 insertions(+), 19 deletions(-) create mode 100644 packages/core/test/context-health.test.ts diff --git a/packages/core/src/distillation.ts b/packages/core/src/distillation.ts index a07610d..8485397 100644 --- a/packages/core/src/distillation.ts +++ b/packages/core/src/distillation.ts @@ -19,32 +19,125 @@ export { workerSessionIDs }; type TemporalMessage = temporal.TemporalMessage; -// Segment detection: group related messages together -function detectSegments( +/** + * Compression health ratio: k / √N. + * + * k = distilled token count, N = source token count. + * Values < 1.0 signal likely lossy compression (below the square-root + * boundary). Values > 1.0 signal relatively faithful compression. + * + * Based on the "LLM Context Square Root Theory" heuristic from + * D7x7z49/llm-context-idea. The specific threshold is unvalidated — + * use as a diagnostic signal, not a hard gate. + */ +export function compressionRatio( + distilledTokens: number, + sourceTokens: number, +): number { + if (sourceTokens <= 0) return 0; + return distilledTokens / Math.sqrt(sourceTokens); +} + +/** + * Segment detection: group related messages into distillation-sized chunks. + * + * When the message count exceeds `maxSegment`, prefers splitting at the + * largest inter-message time gap (if it's ≥ 3× the median gap) to respect + * natural conversation boundaries. Falls back to count-based splitting at + * `maxSegment` when timestamps are uniform. + * + * Trailing segments with < 3 messages are merged into the previous segment + * to avoid tiny distillation inputs with too little context. + * + * Exported for testing; `run()` is the production caller. + */ +export function detectSegments( messages: TemporalMessage[], maxSegment: number, ): TemporalMessage[][] { if (messages.length <= maxSegment) return [messages]; - const segments: TemporalMessage[][] = []; - let current: TemporalMessage[] = []; - - for (const msg of messages) { - current.push(msg); - // Split on segment size limit - if (current.length >= maxSegment) { - segments.push(current); - current = []; - } + return splitSegments(messages, maxSegment); +} + +/** Minimum segment size — segments smaller than this get merged. */ +const MIN_SEGMENT = 3; + +/** + * Multiplier for the median gap threshold: a time gap must be at least + * this many times the median gap to be used as a split point. + */ +const GAP_THRESHOLD_MULTIPLIER = 3; + +function splitSegments( + messages: TemporalMessage[], + maxSegment: number, +): TemporalMessage[][] { + if (messages.length <= maxSegment) return [messages]; + + // Find the split point: prefer the largest time gap if it's significant + const splitIdx = findSplitIndex(messages, maxSegment); + + const left = messages.slice(0, splitIdx); + const right = messages.slice(splitIdx); + + // Recurse on both halves + const result = splitSegments(left, maxSegment); + + if (right.length < MIN_SEGMENT) { + // Merge tiny trailing segment into the last segment + result[result.length - 1].push(...right); + } else { + result.push(...splitSegments(right, maxSegment)); + } + + return result; +} + +/** + * Choose where to split an oversized message array. + * + * If there's a time gap ≥ 3× the median gap AND it falls within a range + * that would produce segments of at least MIN_SEGMENT size, use it. + * Otherwise fall back to the count-based boundary at `maxSegment`. + */ +function findSplitIndex( + messages: TemporalMessage[], + maxSegment: number, +): number { + // Compute consecutive time gaps + const gaps: Array<{ index: number; gap: number }> = []; + for (let i = 1; i < messages.length; i++) { + gaps.push({ + index: i, + gap: messages[i].created_at - messages[i - 1].created_at, + }); } - if (current.length > 0) { - // Merge small trailing segment with previous if too small - if (current.length < 3 && segments.length > 0) { - segments[segments.length - 1].push(...current); - } else { - segments.push(current); + + if (gaps.length === 0) return maxSegment; + + // Find median gap + const sortedGaps = gaps.map((g) => g.gap).sort((a, b) => a - b); + const medianGap = sortedGaps[Math.floor(sortedGaps.length / 2)]; + + // Find the largest gap that would produce viable segments (≥ MIN_SEGMENT on each side) + let bestGap = { index: -1, gap: 0 }; + for (const g of gaps) { + if ( + g.gap > bestGap.gap && + g.index >= MIN_SEGMENT && + messages.length - g.index >= MIN_SEGMENT + ) { + bestGap = g; } } - return segments; + + // Use the time gap if it's significantly larger than median + if (bestGap.index > 0 && bestGap.gap >= medianGap * GAP_THRESHOLD_MULTIPLIER) { + return bestGap.index; + } + + // Fall back to count-based splitting + return maxSegment; } function formatTime(ms: number): string { @@ -527,6 +620,19 @@ async function distillSegment(input: { }); temporal.markDistilled(input.messages.map((m) => m.id)); + // Diagnostic: log compression health and temporal clustering metrics. + // R_compression (k/√N): < 1.0 signals likely lossy distillation. + // C_norm: 0 = uniform timestamps, 1 = dominated by distant past. + const distilledTokens = Math.ceil(result.observations.length / 3); + const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0); + const rComp = compressionRatio(distilledTokens, sourceTokens); + const cNorm = temporal.temporalCnorm(input.messages.map((m) => m.created_at)); + log.info( + `distill segment: ${input.messages.length} msgs, ` + + `${sourceTokens}→${distilledTokens} tokens, ` + + `R=${rComp.toFixed(2)}, C_norm=${cNorm.toFixed(3)}`, + ); + // Fire-and-forget: embed the distillation for vector search if (embedding.isAvailable()) { embedding.embedDistillation(distillId, result.observations); diff --git a/packages/core/src/recall.ts b/packages/core/src/recall.ts index ea0007b..887fb1b 100644 --- a/packages/core/src/recall.ts +++ b/packages/core/src/recall.ts @@ -322,6 +322,24 @@ export async function runRecall(input: RecallInput): Promise { key: (r) => `t:${r.item.id}`, }, ); + + // Recency-biased list for temporal results: same candidates re-ranked + // by created_at (newest first). RRF naturally boosts messages that + // appear in both the BM25 and recency lists — i.e. results that are + // both semantically relevant AND recent. Uses the same `t:` key prefix + // so RRF merges rather than duplicates. + if (temporalResults.length > 0) { + const recencySorted = [...temporalResults].sort( + (a, b) => b.created_at - a.created_at, + ); + allRrfLists.push({ + items: recencySorted.map((item) => ({ + source: "temporal" as const, + item, + })), + key: (r) => `t:${r.item.id}`, + }); + } } // Vector search on the original query (not expansions — avoid redundant embeds). diff --git a/packages/core/src/temporal.ts b/packages/core/src/temporal.ts index bc86c36..6d0c30d 100644 --- a/packages/core/src/temporal.ts +++ b/packages/core/src/temporal.ts @@ -280,6 +280,45 @@ export function searchScored(input: { } } +/** + * Normalized variance of relative-existence weights over message timestamps. + * + * Measures temporal attention imbalance: 0 means timestamps are evenly + * distributed (uniform attention), 1 means a single distant timestamp + * dominates (attention stuck in the past). Useful as a lightweight + * signal for distillation segmentation, recall time-biasing, and + * idle-resume awareness. + * + * Only meaningful for n ≥ 2. Returns 0 for 0 or 1 timestamps. + * + * Based on the "Temporal Clustering via Relative Existence" heuristic + * from D7x7z49/llm-context-idea. + */ +export function temporalCnorm( + timestamps: number[], + now: number = Date.now(), +): number { + const n = timestamps.length; + if (n < 2) return 0; + + // Existence durations: how long each piece has existed + const durations = timestamps.map((t) => now - t); + const totalDuration = durations.reduce((a, b) => a + b, 0); + if (totalDuration <= 0) return 0; + + // Relative existence weights (positive, sum to 1) + const weights = durations.map((d) => d / totalDuration); + + // Normalized variance: Var(w) / Var_max + // Var(w) = (1/n) * Σ(w_i - 1/n)² + // Var_max = (n-1) / n² (when one weight = 1, rest = 0) + const uniform = 1 / n; + const variance = + weights.reduce((sum, w) => sum + (w - uniform) ** 2, 0) / n; + const maxVariance = (n - 1) / (n * n); + return maxVariance === 0 ? 0 : variance / maxVariance; +} + export function count(projectPath: string, sessionID?: string): number { const pid = ensureProject(projectPath); const query = sessionID diff --git a/packages/core/test/context-health.test.ts b/packages/core/test/context-health.test.ts new file mode 100644 index 0000000..e862da6 --- /dev/null +++ b/packages/core/test/context-health.test.ts @@ -0,0 +1,256 @@ +import { describe, test, expect } from "bun:test"; +import { temporalCnorm } from "../src/temporal"; +import { compressionRatio } from "../src/distillation"; +import { reciprocalRankFusion } from "../src/search"; + +// ─── temporalCnorm ────────────────────────────────────────────────────────── + +describe("temporalCnorm", () => { + test("returns 0 for empty array", () => { + expect(temporalCnorm([], 1000)).toBe(0); + }); + + test("returns 0 for single timestamp", () => { + expect(temporalCnorm([500], 1000)).toBe(0); + }); + + test("returns 0 for two equal timestamps", () => { + // Both have the same existence duration → uniform weights → variance = 0 + expect(temporalCnorm([500, 500], 1000)).toBe(0); + }); + + test("returns 0 when all timestamps equal now", () => { + // All durations = 0 → totalDuration = 0 → early return 0 + const now = 5000; + expect(temporalCnorm([now, now, now], now)).toBe(0); + }); + + test("returns ≈0 for equally-spaced timestamps", () => { + // 10 timestamps at regular 100ms intervals. With a small n like 10, + // equally-spaced doesn't yield exactly 0 variance (it's proportional + // to how far each point is from the mean), but the C_norm should be + // very low compared to pathological cases. + const base = 1000; + const timestamps = Array.from({ length: 10 }, (_, i) => base + i * 100); + const now = base + 10 * 100; + const result = temporalCnorm(timestamps, now); + // For equally-spaced, C_norm is deterministic and low but not zero. + expect(result).toBeGreaterThanOrEqual(0); + expect(result).toBeLessThan(0.15); + }); + + test("approaches 1 for one ancient + many recent timestamps", () => { + // One timestamp from long ago, rest very recent → old one dominates + const now = 1_000_000; + const timestamps = [ + 0, // ancient — duration = 1_000_000 + 999_990, + 999_991, + 999_992, + 999_993, + 999_994, + 999_995, + 999_996, + 999_997, + 999_998, + ]; + const result = temporalCnorm(timestamps, now); + expect(result).toBeGreaterThan(0.8); + expect(result).toBeLessThanOrEqual(1); + }); + + test("result is always in [0, 1]", () => { + // Property test: random timestamps should always produce [0, 1] + for (let trial = 0; trial < 100; trial++) { + const n = 2 + Math.floor(Math.random() * 20); + const now = Date.now(); + const timestamps = Array.from( + { length: n }, + () => now - Math.floor(Math.random() * 10_000_000), + ); + const result = temporalCnorm(timestamps, now); + expect(result).toBeGreaterThanOrEqual(0); + expect(result).toBeLessThanOrEqual(1.0001); // tiny float tolerance + } + }); + + test("two timestamps: one old, one recent → high C_norm", () => { + // With exactly 2 items, extreme asymmetry should produce high C_norm + const now = 10_000; + const result = temporalCnorm([0, 9_999], now); + // Old: duration = 10_000, Recent: duration = 1 + // Weights: [10000/10001, 1/10001] ≈ [0.9999, 0.0001] + // This is highly skewed + expect(result).toBeGreaterThan(0.9); + }); + + test("defaults to Date.now() when now is omitted", () => { + const past = Date.now() - 60_000; + const result = temporalCnorm([past, past + 100]); + // Should not throw and should return a valid number + expect(typeof result).toBe("number"); + expect(result).toBeGreaterThanOrEqual(0); + }); + + test("spike detection: C_norm is higher for clustered timestamps than uniform", () => { + // Two distinct clusters separated by a large gap produce higher + // C_norm than the same number of evenly-spaced timestamps. + const now = 20_000; + + // Clustered: 5 messages at t≈0, 5 messages at t≈19_000 (big gap) + const clustered = [ + ...Array.from({ length: 5 }, (_, i) => i * 10), + ...Array.from({ length: 5 }, (_, i) => 19_000 + i * 10), + ]; + const cClustered = temporalCnorm(clustered, now); + + // Uniform: 10 messages evenly spaced + const uniform = Array.from({ length: 10 }, (_, i) => i * 2000); + const cUniform = temporalCnorm(uniform, now); + + // Clustered layout should produce higher C_norm (more imbalance) + expect(cClustered).toBeGreaterThan(cUniform); + }); +}); + +// ─── compressionRatio ─────────────────────────────────────────────────────── + +describe("compressionRatio", () => { + test("returns 0 for zero source tokens", () => { + expect(compressionRatio(100, 0)).toBe(0); + }); + + test("returns 0 for negative source tokens", () => { + expect(compressionRatio(100, -10)).toBe(0); + }); + + test("returns k/√N for valid inputs", () => { + // k=10, N=100 → 10/√100 = 10/10 = 1.0 + expect(compressionRatio(10, 100)).toBe(1.0); + }); + + test("returns < 1.0 for aggressively compressed output", () => { + // k=5, N=100 → 5/10 = 0.5 + expect(compressionRatio(5, 100)).toBe(0.5); + }); + + test("returns > 1.0 for faithful/verbose compression", () => { + // k=20, N=100 → 20/10 = 2.0 + expect(compressionRatio(20, 100)).toBe(2.0); + }); + + test("handles large token counts", () => { + // k=1000, N=1_000_000 → 1000/1000 = 1.0 + expect(compressionRatio(1000, 1_000_000)).toBe(1.0); + }); + + test("returns exactly 1.0 when k = √N", () => { + // N = 10000, k = 100 → 100/√10000 = 100/100 = 1.0 + expect(compressionRatio(100, 10_000)).toBe(1.0); + }); + + test("realistic distillation: 30 messages → summary", () => { + // 30 messages averaging 500 tokens each = 15000 source tokens + // Distilled to 300 tokens of observations + // R = 300 / √15000 = 300 / 122.47 ≈ 2.45 + const r = compressionRatio(300, 15_000); + expect(r).toBeGreaterThan(2); + expect(r).toBeLessThan(3); + }); +}); + +// ─── Recall recency biasing via RRF ───────────────────────────────────────── + +describe("recency-biased RRF fusion", () => { + // Simulate the pattern used in recall.ts: BM25-sorted list + recency-sorted + // list of the same items, fused via RRF. Items appearing in both lists get + // a higher score than items in just one. + type Item = { id: string; bm25Rank: number; created_at: number }; + + function makeItems(): Item[] { + return [ + { id: "old-relevant", bm25Rank: 0, created_at: 1000 }, // best BM25, oldest + { id: "mid-mid", bm25Rank: 1, created_at: 5000 }, // mid BM25, mid recency + { id: "new-weak", bm25Rank: 2, created_at: 9000 }, // worst BM25, newest + ]; + } + + test("BM25-only: ordered purely by BM25 rank", () => { + const items = makeItems(); + const bm25List = items; // already sorted by bm25Rank + + const fused = reciprocalRankFusion([ + { items: bm25List, key: (i) => i.id }, + ]); + + expect(fused.map((r) => r.item.id)).toEqual([ + "old-relevant", + "mid-mid", + "new-weak", + ]); + }); + + test("BM25 + recency: items appearing in both lists get boosted", () => { + const items = makeItems(); + const bm25List = items; // [old-relevant, mid-mid, new-weak] + const recencyList = [...items].sort((a, b) => b.created_at - a.created_at); + // recencyList = [new-weak, mid-mid, old-relevant] + + const fused = reciprocalRankFusion([ + { items: bm25List, key: (i) => i.id }, + { items: recencyList, key: (i) => i.id }, + ]); + + // With k=60: + // old-relevant: rank 0 in BM25 (1/60) + rank 2 in recency (1/62) + // new-weak: rank 2 in BM25 (1/62) + rank 0 in recency (1/60) + // → old-relevant and new-weak have equal scores (symmetric) + // mid-mid: rank 1 in both (2 * 1/61) — very close but slightly less + // + // The key property: ALL items get boosted vs BM25-only, and items + // that are extreme on one axis but weak on the other (old-relevant, + // new-weak) score the same — the symmetry is correct. + expect(fused).toHaveLength(3); + + // old-relevant and new-weak should have equal scores (symmetric ranks) + const oldRelevant = fused.find((r) => r.item.id === "old-relevant")!; + const newWeak = fused.find((r) => r.item.id === "new-weak")!; + expect(oldRelevant.score).toBeCloseTo(newWeak.score, 10); + }); + + test("recency list does not duplicate items in output", () => { + const items = makeItems(); + const bm25List = items; + const recencyList = [...items].sort((a, b) => b.created_at - a.created_at); + + const fused = reciprocalRankFusion([ + { items: bm25List, key: (i) => i.id }, + { items: recencyList, key: (i) => i.id }, + ]); + + // Should have exactly 3 unique items, not 6 + expect(fused).toHaveLength(3); + const ids = fused.map((r) => r.item.id); + expect(new Set(ids).size).toBe(3); + }); + + test("scores are strictly higher with recency list than without", () => { + const items = makeItems(); + const bm25List = items; + const recencyList = [...items].sort((a, b) => b.created_at - a.created_at); + + const withoutRecency = reciprocalRankFusion([ + { items: bm25List, key: (i) => i.id }, + ]); + const withRecency = reciprocalRankFusion([ + { items: bm25List, key: (i) => i.id }, + { items: recencyList, key: (i) => i.id }, + ]); + + // Every item should have a higher score with the recency list + for (const item of withRecency) { + const without = withoutRecency.find((r) => r.item.id === item.item.id); + expect(item.score).toBeGreaterThan(without!.score); + } + }); +}); diff --git a/packages/core/test/distillation.test.ts b/packages/core/test/distillation.test.ts index 17fc166..c595630 100644 --- a/packages/core/test/distillation.test.ts +++ b/packages/core/test/distillation.test.ts @@ -5,6 +5,7 @@ import { loadForSession, latestMetaObservations, metaDistill, + detectSegments, } from "../src/distillation"; import * as temporal from "../src/temporal"; import { CHUNK_TERMINATOR, partsToText } from "../src/temporal"; @@ -823,3 +824,106 @@ describe("metaDistill — anchored second round", () => { expect(metaRows[1]!.observations).toBe("updated"); }); }); + +// ─── detectSegments (time-gap-aware splitting) ────────────────────────────── + +describe("detectSegments", () => { + function msgs(n: number, timestamps?: number[]): temporal.TemporalMessage[] { + return Array.from({ length: n }, (_, i) => + msg("user", `message ${i}`, { + id: `seg-msg-${i}`, + created_at: timestamps ? timestamps[i] : T + i * 1000, + }), + ); + } + + test("returns single segment when under maxSegment", () => { + const result = detectSegments(msgs(10), 30); + expect(result).toHaveLength(1); + expect(result[0]).toHaveLength(10); + }); + + test("count-based split with uniform timestamps", () => { + // 40 messages at 1-second intervals → no significant time gap + // Should split at maxSegment=30 boundary + const result = detectSegments(msgs(40), 30); + expect(result).toHaveLength(2); + expect(result[0]).toHaveLength(30); + expect(result[1]).toHaveLength(10); + }); + + test("time-gap split when a large gap exists", () => { + // 20 messages: first 10 at 1s intervals, then a 1-hour gap, then 10 more + const timestamps = [ + ...Array.from({ length: 10 }, (_, i) => T + i * 1000), + ...Array.from({ length: 10 }, (_, i) => T + 3_600_000 + i * 1000), + ]; + // maxSegment=15, so count-based would split at 15 + // but the time gap at index 10 is 3,599,000ms vs median ~1000ms → should split at 10 + const result = detectSegments(msgs(20, timestamps), 15); + expect(result).toHaveLength(2); + expect(result[0]).toHaveLength(10); + expect(result[1]).toHaveLength(10); + }); + + test("merges tiny trailing segment into previous", () => { + // 32 messages with uniform timestamps, maxSegment=30 + // First split at 30, leaving 2 → merged into first segment + const result = detectSegments(msgs(32), 30); + expect(result).toHaveLength(1); + expect(result[0]).toHaveLength(32); + }); + + test("does not merge trailing segment with ≥ 3 messages", () => { + // 33 messages with uniform timestamps, maxSegment=30 + // First split at 30, leaving 3 → NOT merged (≥ MIN_SEGMENT) + const result = detectSegments(msgs(33), 30); + expect(result).toHaveLength(2); + expect(result[0]).toHaveLength(30); + expect(result[1]).toHaveLength(3); + }); + + test("multiple time-gap splits in large message set", () => { + // 46 messages in 3 bursts of ~15, separated by 1-hour gaps + // With maxSegment=29, the right half (31 msgs) exceeds maxSegment + // and triggers a second split at the next time gap + const timestamps = [ + ...Array.from({ length: 15 }, (_, i) => T + i * 1000), + ...Array.from({ length: 15 }, (_, i) => T + 3_600_000 + i * 1000), + ...Array.from({ length: 16 }, (_, i) => T + 7_200_000 + i * 1000), + ]; + const result = detectSegments(msgs(46, timestamps), 29); + expect(result).toHaveLength(3); + expect(result[0]).toHaveLength(15); + expect(result[1]).toHaveLength(15); + expect(result[2]).toHaveLength(16); + }); + + test("preserves original message references", () => { + const messages = msgs(20); + const result = detectSegments(messages, 10); + const flat = result.flat(); + expect(flat).toHaveLength(20); + // Check all original messages are present + for (const m of messages) { + expect(flat.find((f) => f.id === m.id)).toBeDefined(); + } + }); + + test("ignores time gap if it would create segment < MIN_SEGMENT", () => { + // 20 messages: first 2 at t=0, then a huge gap, then 18 more + // The gap at index 2 creates a left segment of only 2 (< MIN_SEGMENT=3) + // so it should NOT split there → falls back to count-based + const timestamps = [ + T, + T + 1000, + T + 10_000_000, + ...Array.from({ length: 17 }, (_, i) => T + 10_000_000 + (i + 1) * 1000), + ]; + const result = detectSegments(msgs(20, timestamps), 15); + // Should use count-based split at 15, not time-gap at 2 + expect(result).toHaveLength(2); + expect(result[0]).toHaveLength(15); + expect(result[1]).toHaveLength(5); + }); +});