From 9d1b504e58813c95fc7d5fd4ff18b630f376eaf6 Mon Sep 17 00:00:00 2001 From: stainlu Date: Wed, 6 May 2026 12:19:01 +0800 Subject: [PATCH] perf: bound repair plan pr hydration --- CHANGELOG.md | 2 + src/repair/github-cli.ts | 56 +++++++++++++++++--- src/repair/lib.ts | 5 ++ src/repair/plan-cluster.ts | 33 ++++++++++-- test/repair/github-cli.test.ts | 21 +++++++- test/repair/plan-cluster.test.ts | 90 +++++++++++++++++++++++++++++++- 6 files changed, 194 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c4e66549d..d77d2c1d77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -105,6 +105,8 @@ checkpoint, and status-only commits are intentionally omitted. tokens exist, keeping untargeted fix prompts cheaper to build. - Requested 100-item REST pages for paginated GitHub list calls, reducing review and repair API page fan-out on large issues and pull requests. +- Bounded repair cluster PR file and commit hydration to the context carried + into generated plans, avoiding full pagination for very large pull requests. - Compacted review prompt context lazily so large comment, timeline, file, and commit lists no longer process entries that are omitted from Codex input. - Scoped every sweep workflow status write to the active target repository so diff --git a/src/repair/github-cli.ts b/src/repair/github-cli.ts index 3fa6370ea5..c7602ccf2b 100644 --- a/src/repair/github-cli.ts +++ b/src/repair/github-cli.ts @@ -48,12 +48,19 @@ export function ghJsonBestEffort( } export function githubPaginatedPath(apiPath: string): string { - const [basePart, query = ""] = apiPath.split("?", 2); - const base = basePart ?? apiPath; - const params = new URLSearchParams(query); - if (!params.has("per_page")) params.set("per_page", "100"); - const serialized = params.toString(); - return serialized ? `${base}?${serialized}` : base; + return githubPathWithQueryDefaults(apiPath, { per_page: "100" }); +} + +export function githubLimitedPagePath(apiPath: string, limit: number, page = 1): string { + const normalizedLimit = Number.isFinite(limit) ? Math.floor(limit) : 1; + const normalizedPage = Number.isFinite(page) ? Math.floor(page) : 1; + const pageSize = Math.max(1, Math.min(100, normalizedLimit)); + const pageNumber = Math.max(1, normalizedPage); + return githubPathWithQueryDefaults( + apiPath, + { per_page: String(pageSize), page: String(pageNumber) }, + { override: true }, + ); } export function ghPaged(apiPath: string, options: GhRunOptions = {}): T[] { @@ -89,6 +96,28 @@ export async function ghPagedWithRetryAsync( return pages.flatMap((page: JsonValue) => (Array.isArray(page) ? (page as T[]) : [])); } +export function ghPagedLimit( + apiPath: string, + limit: number, + options: GhRunOptions = {}, +): T[] { + const max = Number.isFinite(limit) ? Math.max(0, Math.floor(limit)) : 0; + if (max <= 0) return []; + + const perPage = Math.min(100, max); + const out: T[] = []; + for (let page = 1; out.length < max; page += 1) { + const entries = ghJson( + ["api", githubLimitedPagePath(apiPath, perPage, page)], + options, + ); + if (!Array.isArray(entries) || entries.length === 0) break; + out.push(...(entries as T[])); + if (entries.length < perPage) break; + } + return out.slice(0, max); +} + export function ghText(ghArgs: string[], options: GhRunOptions = {}): string { const text = execFileSync("gh", ghArgs, { cwd: options.cwd ?? repoRoot(), @@ -234,6 +263,21 @@ function resolveRetryOptions(options: GhRetryOptions | number): GhRetryOptions { return options; } +function githubPathWithQueryDefaults( + apiPath: string, + defaults: Record, + { override = false }: { override?: boolean } = {}, +): string { + const [basePart, query = ""] = apiPath.split("?", 2); + const base = basePart ?? apiPath; + const params = new URLSearchParams(query); + for (const [key, value] of Object.entries(defaults)) { + if (override || !params.has(key)) params.set(key, value); + } + const serialized = params.toString(); + return serialized ? `${base}?${serialized}` : base; +} + function bufferLikeToString(value: unknown): string { if (Buffer.isBuffer(value)) return value.toString("utf8"); return String(value ?? ""); diff --git a/src/repair/lib.ts b/src/repair/lib.ts index 4157d2ead4..06826c7423 100755 --- a/src/repair/lib.ts +++ b/src/repair/lib.ts @@ -377,9 +377,14 @@ function compactPlanItem(item: LooseRecord) { branch_writable: pull.branch_writable, branch_write_reason: pull.branch_write_reason, changed_files: pull.changed_files, + files_hydrated: pull.files_hydrated, + files_truncated: pull.files_truncated, additions: pull.additions, deletions: pull.deletions, files: (pull.files ?? []).slice(0, 40), + commits_count: pull.commits_count, + commits_hydrated: pull.commits_hydrated, + commits_truncated: pull.commits_truncated, commits: (pull.commits ?? []).slice(0, 10), reviews: (pull.reviews ?? []).slice(0, 12), review_comments_count: pull.review_comments_count, diff --git a/src/repair/plan-cluster.ts b/src/repair/plan-cluster.ts index bd770c1f30..ac832afa91 100644 --- a/src/repair/plan-cluster.ts +++ b/src/repair/plan-cluster.ts @@ -11,13 +11,15 @@ import { repoRoot, validateJob, } from "./lib.js"; -import { ghJson, ghPaged, ghText } from "./github-cli.js"; +import { ghJson, ghPaged, ghPagedLimit, ghText } from "./github-cli.js"; import { hasSecurityRepairOptInLabel } from "./security-boundary.js"; const MAX_LINKED_REFS = Number(process.env.CLAWSWEEPER_MAX_LINKED_REFS ?? 0); const HYDRATE_COMMENTS = process.env.CLAWSWEEPER_HYDRATE_COMMENTS === "1"; const MAX_COMMENTS_PER_ITEM = Number(process.env.CLAWSWEEPER_MAX_COMMENTS_PER_ITEM ?? 30); const MAX_REVIEW_COMMENTS_PER_PR = Number(process.env.CLAWSWEEPER_MAX_REVIEW_COMMENTS_PER_PR ?? 50); +const MAX_FILES_PER_PR = Number(process.env.CLAWSWEEPER_MAX_FILES_PER_PR ?? 80); +const MAX_COMMITS_PER_PR = Number(process.env.CLAWSWEEPER_MAX_COMMITS_PER_PR ?? 80); const MAINTAINER_AUTHOR_ASSOCIATIONS = new Set(["OWNER", "MEMBER", "COLLABORATOR"]); const REVIEW_BOT_PATTERN = /\b(greptile|codex|asile|coderabbit|code rabbit|copilot|reviewdog|sonar|deepsource|codecov|github-actions)\b/i; @@ -141,6 +143,8 @@ const plan = { hydrate_comments: HYDRATE_COMMENTS, max_comments_per_item: MAX_COMMENTS_PER_ITEM, max_review_comments_per_pr: MAX_REVIEW_COMMENTS_PER_PR, + max_files_per_pr: MAX_FILES_PER_PR, + max_commits_per_pr: MAX_COMMITS_PER_PR, }, items: itemList.map((item: JsonValue) => summarizeItem(item, job)), canonical_candidates: canonicalCandidates(itemList, job), @@ -184,10 +188,16 @@ function hydrateItem(repo: string, number: JsonValue) { } const comments = HYDRATE_COMMENTS ? ghPaged(`repos/${repo}/issues/${number}/comments`) : []; const pullRequest = issue.pull_request ? ghJson(["api", `repos/${repo}/pulls/${number}`]) : null; - const files = pullRequest ? ghPaged(`repos/${repo}/pulls/${number}/files`) : []; - const commits = pullRequest ? ghPaged(`repos/${repo}/pulls/${number}/commits`) : []; + const files = pullRequest + ? ghPagedLimit(`repos/${repo}/pulls/${number}/files`, MAX_FILES_PER_PR) + : []; + const commits = pullRequest + ? ghPagedLimit(`repos/${repo}/pulls/${number}/commits`, MAX_COMMITS_PER_PR) + : []; const reviews = pullRequest ? ghPaged(`repos/${repo}/pulls/${number}/reviews`) : []; const reviewComments = pullRequest ? ghPaged(`repos/${repo}/pulls/${number}/comments`) : []; + const changedFilesCount = countValue(pullRequest?.changed_files, files.length); + const commitsCount = countValue(pullRequest?.commits, commits.length); const checks = pullRequest ? ghPrChecks(repo, number) : []; return { @@ -240,13 +250,18 @@ function hydrateItem(repo: string, number: JsonValue) { .filter(Boolean), additions: pullRequest.additions, deletions: pullRequest.deletions, - changed_files: pullRequest.changed_files, + changed_files: changedFilesCount, + files_hydrated: files.length, + files_truncated: Math.max(0, changedFilesCount - files.length), files: files.map((file: JsonValue) => ({ filename: file.filename, status: file.status, additions: file.additions, deletions: file.deletions, })), + commits_count: commitsCount, + commits_hydrated: commits.length, + commits_truncated: Math.max(0, commitsCount - commits.length), commits: commits.map((commit: JsonValue) => ({ sha: commit.sha, message: firstLine(commit.commit?.message), @@ -302,6 +317,11 @@ function unavailableItem(repo: string, number: JsonValue, error: JsonValue) { }; } +function countValue(value: JsonValue, fallback = 0) { + const number = Number(value); + return Number.isFinite(number) && number >= 0 ? Math.floor(number) : fallback; +} + function summarizeItem(item: LooseRecord, job: LooseRecord) { return { repo: item.repo, @@ -356,9 +376,14 @@ function summarizeItem(item: LooseRecord, job: LooseRecord) { requested_reviewers: item.pull_request.requested_reviewers, requested_teams: item.pull_request.requested_teams, changed_files: item.pull_request.changed_files, + files_hydrated: item.pull_request.files_hydrated, + files_truncated: item.pull_request.files_truncated, additions: item.pull_request.additions, deletions: item.pull_request.deletions, files: item.pull_request.files, + commits_count: item.pull_request.commits_count, + commits_hydrated: item.pull_request.commits_hydrated, + commits_truncated: item.pull_request.commits_truncated, commits: item.pull_request.commits, reviews: item.pull_request.reviews, review_comments_count: item.pull_request.review_comments.length, diff --git a/test/repair/github-cli.test.ts b/test/repair/github-cli.test.ts index 7ec49469b9..e1f4c90b4e 100644 --- a/test/repair/github-cli.test.ts +++ b/test/repair/github-cli.test.ts @@ -1,7 +1,7 @@ import assert from "node:assert/strict"; import test from "node:test"; -import { githubPaginatedPath } from "../../dist/repair/github-cli.js"; +import { githubLimitedPagePath, githubPaginatedPath } from "../../dist/repair/github-cli.js"; test("githubPaginatedPath requests maximum REST page size by default", () => { assert.equal( @@ -17,3 +17,22 @@ test("githubPaginatedPath requests maximum REST page size by default", () => { "repos/openclaw/openclaw/issues?per_page=50&state=open", ); }); + +test("githubLimitedPagePath caps one REST page and preserves existing filters", () => { + assert.equal( + githubLimitedPagePath("repos/openclaw/openclaw/pulls/123/files", 80), + "repos/openclaw/openclaw/pulls/123/files?per_page=80&page=1", + ); + assert.equal( + githubLimitedPagePath( + "repos/openclaw/openclaw/pulls/123/files?state=open&per_page=100", + 250, + 3, + ), + "repos/openclaw/openclaw/pulls/123/files?state=open&per_page=100&page=3", + ); + assert.equal( + githubLimitedPagePath("repos/openclaw/openclaw/pulls/123/files", 0, 0), + "repos/openclaw/openclaw/pulls/123/files?per_page=1&page=1", + ); +}); diff --git a/test/repair/plan-cluster.test.ts b/test/repair/plan-cluster.test.ts index 06dc5748f2..feb9039e28 100644 --- a/test/repair/plan-cluster.test.ts +++ b/test/repair/plan-cluster.test.ts @@ -222,9 +222,71 @@ test("plan-cluster treats same-repo PR branches as writable despite raw maintain assert.match(pull.branch_write_reason, /same-repo head branch/); }); +test("plan-cluster bounds PR file and commit hydration", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "clawsweeper-plan-bounded-pr-")); + const binDir = path.join(tmp, "bin"); + const jobPath = path.join(tmp, "job.md"); + const runDir = path.join(tmp, "run"); + fs.mkdirSync(binDir); + fs.writeFileSync(path.join(binDir, "gh"), fakeGhScript(), { mode: 0o755 }); + + fs.writeFileSync( + jobPath, + [ + "---", + "repo: openclaw/openclaw", + "cluster_id: automerge-openclaw-openclaw-74134", + "mode: autonomous", + "allowed_actions:", + " - comment", + " - fix", + " - raise_pr", + "blocked_actions:", + " - close", + " - merge", + "source: pr_automerge", + "canonical:", + " - #74134", + "candidates:", + " - #74134", + "allow_fix_pr: true", + "allow_merge: false", + "security_policy: central_security_only", + "security_sensitive: false", + "---", + "Maintainer opted #74134 into ClawSweeper automerge.", + "", + ].join("\n"), + ); + + execFileSync(process.execPath, ["dist/repair/plan-cluster.js", jobPath, "--run-dir", runDir], { + cwd: process.cwd(), + env: { + ...process.env, + PATH: `${binDir}${path.delimiter}${process.env.PATH}`, + FAKE_GH_LARGE_PR: "1", + }, + stdio: "pipe", + }); + + const clusterPlan = JSON.parse(fs.readFileSync(path.join(runDir, "cluster-plan.json"), "utf8")); + const pull = clusterPlan.items[0].pull_request; + + assert.equal(pull.changed_files, 120); + assert.equal(pull.files_hydrated, 80); + assert.equal(pull.files_truncated, 40); + assert.equal(pull.files.length, 80); + assert.equal(pull.commits_count, 120); + assert.equal(pull.commits_hydrated, 80); + assert.equal(pull.commits_truncated, 40); + assert.equal(pull.commits.length, 80); +}); + function fakeGhScript() { return `#!/usr/bin/env node +const fs = require("node:fs"); const args = process.argv.slice(2); +if (process.env.FAKE_GH_LOG) fs.appendFileSync(process.env.FAKE_GH_LOG, args.join(" ") + "\\n"); function write(value) { process.stdout.write(JSON.stringify(value)); } @@ -248,6 +310,10 @@ if (isPaged()) { write([pagedResponse(endpoint)]); process.exit(0); } +if (/\\?(?:.*&)?per_page=/.test(endpoint)) { + write(pagedResponse(endpoint)); + process.exit(0); +} if (endpoint === "repos/openclaw/openclaw/issues/74134") { write(issue(74134, [], "Replacement PR: https://github.com/openclaw/openclaw/pull/74742")); process.exit(0); @@ -282,6 +348,7 @@ function issue(number, labels, body) { }; } function pull(number, sha) { + const large = process.env.FAKE_GH_LARGE_PR === "1"; return { draft: false, merged: false, @@ -300,11 +367,30 @@ function pull(number, sha) { requested_teams: [], additions: 1, deletions: 0, - changed_files: 1, + changed_files: large ? 120 : 1, + commits: large ? 120 : 1, + review_comments: 0, }; } function pagedResponse(endpoint) { - if (endpoint.endsWith("/commits")) return [{ sha: "commit-sha", commit: { message: "test" }, author: { login: "contributor" } }]; + const [endpointPath, query = ""] = endpoint.split("?"); + const params = new URLSearchParams(query); + const limit = Math.max(1, Number(params.get("per_page") || 1)); + if (endpointPath.endsWith("/files")) { + return Array.from({ length: limit }, (_, index) => ({ + filename: "src/file-" + index + ".ts", + status: "modified", + additions: 1, + deletions: 0, + })); + } + if (endpointPath.endsWith("/commits")) { + return Array.from({ length: limit }, (_, index) => ({ + sha: "commit-sha-" + index, + commit: { message: "test " + index }, + author: { login: "contributor" }, + })); + } return []; } `;