From 30e6f9eae3eea3d1762975a03b63c80b4fcfc526 Mon Sep 17 00:00:00 2001 From: Kix Panganiban Date: Fri, 8 May 2026 11:41:35 +0000 Subject: [PATCH] Index Cursor transcripts from agent-transcripts/, walk up for worker.log --- .../cursor-session-synchronizer.provider.ts | 71 ++++++++--- .../tests/cursor-session-synchronizer.test.ts | 120 ++++++++++++++++++ 2 files changed, 173 insertions(+), 18 deletions(-) create mode 100644 server/modules/providers/tests/cursor-session-synchronizer.test.ts diff --git a/server/modules/providers/list/cursor/cursor-session-synchronizer.provider.ts b/server/modules/providers/list/cursor/cursor-session-synchronizer.provider.ts index 4be02dee2e..2f960eae13 100644 --- a/server/modules/providers/list/cursor/cursor-session-synchronizer.provider.ts +++ b/server/modules/providers/list/cursor/cursor-session-synchronizer.provider.ts @@ -1,4 +1,3 @@ -import crypto from 'node:crypto'; import fs from 'node:fs'; import fsp from 'node:fs/promises'; import os from 'node:os'; @@ -35,13 +34,20 @@ async function listDirectoryEntriesSafe( /** * Session indexer for Cursor transcript artifacts. + * + * Recent cursor-agent versions write JSONL transcripts under + * ~/.cursor/projects//agent-transcripts//.jsonl + * (sometimes nested one level deeper). The legacy + * ~/.cursor/chats// + * directory still exists but now holds SQLite `store.db` files used by the + * loader (cursor-sessions.provider.ts), not JSONL the indexer can parse. */ export class CursorSessionSynchronizer implements IProviderSessionSynchronizer { private readonly provider = 'cursor' as const; private readonly cursorHome = path.join(os.homedir(), '.cursor'); /** - * Scans Cursor chats and upserts discovered sessions into DB. + * Scans Cursor transcripts and upserts discovered sessions into DB. */ async synchronize(since?: Date): Promise { const projectsDir = path.join(this.cursorHome, 'projects'); @@ -54,19 +60,19 @@ export class CursorSessionSynchronizer implements IProviderSessionSynchronizer { continue; } - const workerLogPath = path.join(projectsDir, entry.name, 'worker.log'); + const projectDir = path.join(projectsDir, entry.name); + const workerLogPath = path.join(projectDir, 'worker.log'); const projectPath = await this.extractProjectPathFromWorkerLog(workerLogPath); if (!projectPath || seenProjectPaths.has(projectPath)) { continue; } - seenProjectPaths.add(projectPath); - const projectHash = this.md5(projectPath); - const chatsDir = path.join(this.cursorHome, 'chats', projectHash); - const files = await findFilesRecursivelyCreatedAfter(chatsDir, '.jsonl', since ?? null); + + const transcriptsDir = path.join(projectDir, 'agent-transcripts'); + const files = await findFilesRecursivelyCreatedAfter(transcriptsDir, '.jsonl', since ?? null); for (const filePath of files) { - const parsed = await this.processSessionFile(filePath); + const parsed = await this.processSessionFile(filePath, projectPath); if (!parsed) { continue; } @@ -89,7 +95,7 @@ export class CursorSessionSynchronizer implements IProviderSessionSynchronizer { } /** - * Parses and upserts one Cursor session JSONL file. + * Parses and upserts one Cursor session JSONL file (called by the file watcher). */ async synchronizeFile(filePath: string): Promise { if (!filePath.endsWith('.jsonl')) { @@ -114,10 +120,30 @@ export class CursorSessionSynchronizer implements IProviderSessionSynchronizer { } /** - * Produces the same project hash Cursor uses in chat directory names. + * Walks up from a transcript file looking for the project's worker.log. + * + * Cursor has nested transcripts at varying depths over time + * (`agent-transcripts//.jsonl` and + * `agent-transcripts///.jsonl` both occur in the wild), + * so a fixed `dirname()` count silently skipped the deeper variant. */ - private md5(input: string): string { - return crypto.createHash('md5').update(input).digest('hex'); + private async findProjectDirForTranscript(filePath: string): Promise { + const projectsRoot = path.join(this.cursorHome, 'projects'); + let current = path.dirname(filePath); + while (current.startsWith(projectsRoot + path.sep) && current !== projectsRoot) { + try { + await fsp.access(path.join(current, 'worker.log')); + return current; + } catch { + // keep walking up + } + const parent = path.dirname(current); + if (parent === current) { + break; + } + current = parent; + } + return null; } /** @@ -147,16 +173,25 @@ export class CursorSessionSynchronizer implements IProviderSessionSynchronizer { /** * Extracts session metadata from one Cursor JSONL session file. */ - private async processSessionFile(filePath: string): Promise { + private async processSessionFile( + filePath: string, + projectPathHint?: string + ): Promise { const sessionId = path.basename(filePath, '.jsonl'); - const grandparentDir = path.dirname(path.dirname(filePath)); - const workerLogPath = path.join(grandparentDir, 'worker.log'); - const projectPath = await this.extractProjectPathFromWorkerLog(workerLogPath); + let projectPath = projectPathHint ?? null; if (!projectPath) { - return null; + const projectDir = await this.findProjectDirForTranscript(filePath); + if (!projectDir) { + return null; + } + projectPath = await this.extractProjectPathFromWorkerLog(path.join(projectDir, 'worker.log')); + if (!projectPath) { + return null; + } } + const resolvedProjectPath = projectPath; return extractFirstValidJsonlData(filePath, (rawData) => { const data = rawData as Record; if (data.role !== 'user') { @@ -168,7 +203,7 @@ export class CursorSessionSynchronizer implements IProviderSessionSynchronizer { return { sessionId, - projectPath, + projectPath: resolvedProjectPath, sessionName: normalizeSessionName(firstLine, 'Untitled Cursor Session'), }; }); diff --git a/server/modules/providers/tests/cursor-session-synchronizer.test.ts b/server/modules/providers/tests/cursor-session-synchronizer.test.ts new file mode 100644 index 0000000000..4f70b6b0e4 --- /dev/null +++ b/server/modules/providers/tests/cursor-session-synchronizer.test.ts @@ -0,0 +1,120 @@ +import assert from 'node:assert/strict'; +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import test from 'node:test'; + +const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'cursor-sync-')); +process.env.DATABASE_PATH = path.join(tempRoot, 'auth.db'); + +const { initializeDatabase, sessionsDb, scanStateDb } = await import( + '@/modules/database/index.js' +); +const { CursorSessionSynchronizer } = await import( + '@/modules/providers/list/cursor/cursor-session-synchronizer.provider.js' +); +const { closeConnection } = await import('@/modules/database/connection.js'); + +const patchHomeDir = (nextHomeDir: string) => { + const original = os.homedir; + (os as any).homedir = () => nextHomeDir; + return () => { + (os as any).homedir = original; + }; +}; + +const writeJsonl = async (filePath: string, rows: unknown[]) => { + await fs.mkdir(path.dirname(filePath), { recursive: true }); + await fs.writeFile(filePath, rows.map((r) => JSON.stringify(r)).join('\n') + '\n'); +}; + +const userQueryRow = (text: string) => ({ + role: 'user', + message: { content: [{ type: 'text', text: `\n${text}\n` }] }, +}); + +/** + * Cursor's transcript layout has shifted over time: + * - jsonl at agent-transcripts//.jsonl (current) + * - jsonl at agent-transcripts///.jsonl (older) + * Both must be picked up. The legacy ~/.cursor/chats// + * directory now holds only SQLite store.db files used by the loader. + */ +test('CursorSessionSynchronizer indexes transcripts at both nested depths', { concurrency: false }, async () => { + const restoreHomeDir = patchHomeDir(tempRoot); + try { + await initializeDatabase(); + + const cursorHome = path.join(tempRoot, '.cursor'); + const projectsDir = path.join(cursorHome, 'projects'); + const projectDir = path.join(projectsDir, 'home-coder-cc-backend'); + const transcriptsDir = path.join(projectDir, 'agent-transcripts'); + + await fs.mkdir(projectDir, { recursive: true }); + await fs.writeFile( + path.join(projectDir, 'worker.log'), + [ + '[info] starting worker', + '[info] Getting tree structure for workspacePath=/home/coder/cc-backend', + ].join('\n') + ); + + const shallowChatId = '11111111-2222-3333-4444-555555555555'; + const shallowJsonl = path.join(transcriptsDir, shallowChatId, `${shallowChatId}.jsonl`); + await writeJsonl(shallowJsonl, [userQueryRow('refactor the watchtower analytics route')]); + + const deepChatId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'; + const deepJsonl = path.join(transcriptsDir, deepChatId, 'turn-001', `${deepChatId}.jsonl`); + await writeJsonl(deepJsonl, [userQueryRow('add a parity test for the bun build')]); + + // Project that lacks worker.log — must be ignored, not crash the scan. + await fs.mkdir(path.join(projectsDir, 'tmp-orphan'), { recursive: true }); + await writeJsonl( + path.join(projectsDir, 'tmp-orphan', 'agent-transcripts', 'orphan', 'orphan.jsonl'), + [userQueryRow('orphan')] + ); + + // Legacy ~/.cursor/chats SQLite presence must NOT cause indexer to claim sessions. + await fs.mkdir(path.join(cursorHome, 'chats', 'deadbeef'), { recursive: true }); + await fs.writeFile(path.join(cursorHome, 'chats', 'deadbeef', 'store.db'), ''); + + const sync = new CursorSessionSynchronizer(); + const processed = await sync.synchronize(); + assert.equal(processed, 2, 'should index both shallow and deep transcripts'); + + const shallow = sessionsDb.getSessionById(shallowChatId); + assert.ok(shallow, 'shallow session indexed'); + assert.equal(shallow!.provider, 'cursor'); + assert.equal(shallow!.project_path, '/home/coder/cc-backend'); + assert.match(shallow!.custom_name ?? '', /watchtower analytics/); + + const deep = sessionsDb.getSessionById(deepChatId); + assert.ok(deep, 'deep session indexed'); + assert.equal(deep!.project_path, '/home/coder/cc-backend'); + assert.match(deep!.custom_name ?? '', /parity test/); + + // Per-file path used by the watcher must also resolve project_path + // for transcripts at both depths without a hint. + const ad = path.join(transcriptsDir, 'cccccccc-cccc-cccc-cccc-cccccccccccc'); + const adHocJsonl = path.join(ad, 'cccccccc-cccc-cccc-cccc-cccccccccccc.jsonl'); + await writeJsonl(adHocJsonl, [userQueryRow('hot-added by watcher')]); + const indexedId = await sync.synchronizeFile(adHocJsonl); + assert.equal(indexedId, 'cccccccc-cccc-cccc-cccc-cccccccccccc'); + const adHoc = sessionsDb.getSessionById('cccccccc-cccc-cccc-cccc-cccccccccccc'); + assert.equal(adHoc!.project_path, '/home/coder/cc-backend'); + + // Transcripts outside ~/.cursor/projects must be rejected. + const outsideJsonl = path.join(tempRoot, 'random', 'dddddddd.jsonl'); + await writeJsonl(outsideJsonl, [userQueryRow('outside cursor home')]); + assert.equal(await sync.synchronizeFile(outsideJsonl), null); + + // Incremental rescan with `since` set to now finds nothing new. + scanStateDb.updateLastScannedAt(new Date(Date.now() + 60_000)); + const reprocessed = await sync.synchronize(scanStateDb.getLastScannedAt() ?? undefined); + assert.equal(reprocessed, 0, 'incremental scan should skip files older than `since`'); + } finally { + closeConnection(); + restoreHomeDir(); + await fs.rm(tempRoot, { recursive: true, force: true }); + } +});