From 4161851e94f44a5d4c96a93bbb4cdd240d4356d1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 21:12:03 +0000 Subject: [PATCH 1/5] Initial plan From c24442e4b7ec49b6f9fbd411c107507095db3d07 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 21:14:59 +0000 Subject: [PATCH 2/5] Reduce verbosity of benchmark PR comment: wrap details in
element, show regression summary Agent-Logs-Url: https://github.com/Azure/typespec-azure/sessions/f466ca11-42d8-472b-9318-2756aad71ca6 Co-authored-by: timotheeguerin <1031227+timotheeguerin@users.noreply.github.com> --- packages/benchmark/src/cli.ts | 3 -- packages/benchmark/src/format-comment.ts | 40 ++++++++++++++---------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/packages/benchmark/src/cli.ts b/packages/benchmark/src/cli.ts index 48ba67537e..d26384eadd 100644 --- a/packages/benchmark/src/cli.ts +++ b/packages/benchmark/src/cli.ts @@ -43,7 +43,6 @@ Compare options: --output Output file (default: stdout) --format Output format: "console" or "markdown" (default: console) --detailed Show per-rule/per-emitter-step breakdown - --changes-only Only show metrics with notable changes Generate-history options: --dir Read results from a directory instead of the benchmark-data git branch @@ -122,7 +121,6 @@ async function compareCommand(args: Record): Promise { const threshold = args["threshold"] ? parseFloat(args["threshold"]) : undefined; const format = args["format"] ?? "console"; - const changesOnly = args["changes-only"] === "true"; const outputFile = args["output"]; const baseline = await loadJson(baselineFile); @@ -133,7 +131,6 @@ async function compareCommand(args: Record): Promise { if (format === "markdown") { output = formatPrComment(comparisons, baseline.commit, current.commit, { threshold, - changesOnly, }); } else { output = formatConsoleSummary(comparisons, threshold); diff --git a/packages/benchmark/src/format-comment.ts b/packages/benchmark/src/format-comment.ts index aa620aaecf..cff5d784ab 100644 --- a/packages/benchmark/src/format-comment.ts +++ b/packages/benchmark/src/format-comment.ts @@ -145,8 +145,6 @@ const LEGEND = export interface FormatOptions { /** Change threshold for highlighting (default: 5%). */ threshold?: number; - /** Only show metrics with notable changes. */ - changesOnly?: boolean; } /** Format comparison results as a GitHub PR comment markdown. */ @@ -157,30 +155,23 @@ export function formatPrComment( options: FormatOptions = {}, ): string { const threshold = options.threshold ?? DEFAULT_THRESHOLD; - const changesOnly = options.changesOnly ?? false; const lines: string[] = []; lines.push("## ⚡ Benchmark Results\n"); - lines.push( - `Comparing [\`${currentCommit.slice(0, 7)}\`] against baseline [\`${baselineCommit.slice(0, 7)}\`]\n`, - ); // Average metrics across all specs const averaged = averageComparisonMetrics(comparisons); + const regressions = averaged.filter((m) => m.percentChange >= threshold); - let metrics = averaged; - if (changesOnly) { - metrics = metrics.filter((m) => Math.abs(m.percentChange) >= threshold); - } - - if (metrics.length === 0) { - lines.push("_No notable changes._\n"); + // Top-level summary: show regressions prominently, otherwise a simple ok message + if (regressions.length === 0) { + lines.push("✅ No performance regressions detected.\n"); } else { + lines.push(`⚠️ **${regressions.length} metric(s) regressed** above the ±${threshold}% threshold:\n`); lines.push("| Metric | Baseline | Current | Change |"); lines.push("|--------|----------|---------|--------|"); - for (const m of metrics) { - const indicator = changeIndicator(m.percentChange, threshold); - const changeStr = `${formatPercent(m.percentChange)} ${indicator}`.trim(); + for (const m of regressions) { + const changeStr = `${formatPercent(m.percentChange)} ${changeIndicator(m.percentChange, threshold)}`.trim(); const th = thresholdsFor(m.label); lines.push( `| ${displayLabel(m.label)} | ${formatMsColored(m.baseline, th)} | ${formatMsColored(m.current, th)} | ${changeStr} |`, @@ -189,9 +180,26 @@ export function formatPrComment( lines.push(""); } + // Full details collapsed const specNames = comparisons.map((c) => c.specName).join(", "); + lines.push("
"); + lines.push( + `Full details — comparing ${currentCommit.slice(0, 7)} vs baseline ${baselineCommit.slice(0, 7)}\n`, + ); + lines.push("| Metric | Baseline | Current | Change |"); + lines.push("|--------|----------|---------|--------|"); + for (const m of averaged) { + const changeStr = `${formatPercent(m.percentChange)} ${changeIndicator(m.percentChange, threshold)}`.trim(); + const th = thresholdsFor(m.label); + lines.push( + `| ${displayLabel(m.label)} | ${formatMsColored(m.baseline, th)} | ${formatMsColored(m.current, th)} | ${changeStr} |`, + ); + } + lines.push(""); lines.push(`> Averaged across ${comparisons.length} specs (${specNames}).`); lines.push(`> Threshold: changes > ±${threshold}% are highlighted.`); + lines.push(LEGEND); + lines.push("
"); return lines.join("\n"); } From 5bca0f243866f628f04c2b910b7309646f59e90e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 4 May 2026 21:15:37 +0000 Subject: [PATCH 3/5] Fix threshold message and em dash in benchmark PR comment Agent-Logs-Url: https://github.com/Azure/typespec-azure/sessions/f466ca11-42d8-472b-9318-2756aad71ca6 Co-authored-by: timotheeguerin <1031227+timotheeguerin@users.noreply.github.com> --- packages/benchmark/src/format-comment.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/benchmark/src/format-comment.ts b/packages/benchmark/src/format-comment.ts index cff5d784ab..8a27689f11 100644 --- a/packages/benchmark/src/format-comment.ts +++ b/packages/benchmark/src/format-comment.ts @@ -167,7 +167,7 @@ export function formatPrComment( if (regressions.length === 0) { lines.push("✅ No performance regressions detected.\n"); } else { - lines.push(`⚠️ **${regressions.length} metric(s) regressed** above the ±${threshold}% threshold:\n`); + lines.push(`⚠️ **${regressions.length} metric(s) regressed** above the +${threshold}% threshold:\n`); lines.push("| Metric | Baseline | Current | Change |"); lines.push("|--------|----------|---------|--------|"); for (const m of regressions) { @@ -184,7 +184,7 @@ export function formatPrComment( const specNames = comparisons.map((c) => c.specName).join(", "); lines.push("
"); lines.push( - `Full details — comparing ${currentCommit.slice(0, 7)} vs baseline ${baselineCommit.slice(0, 7)}\n`, + `Full details – comparing ${currentCommit.slice(0, 7)} vs baseline ${baselineCommit.slice(0, 7)}\n`, ); lines.push("| Metric | Baseline | Current | Change |"); lines.push("|--------|----------|---------|--------|"); From a99f13d752c251ea2141158f7cb677fc4fc79d56 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 5 May 2026 10:22:57 +0000 Subject: [PATCH 4/5] Run formatter on benchmark source files Agent-Logs-Url: https://github.com/Azure/typespec-azure/sessions/a233d106-06db-4121-be7c-b04165bf5bbd Co-authored-by: timotheeguerin <1031227+timotheeguerin@users.noreply.github.com> --- packages/benchmark/src/cli.ts | 19 +++++++--- packages/benchmark/src/format-comment.ts | 46 ++++++++++++++++++------ 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/packages/benchmark/src/cli.ts b/packages/benchmark/src/cli.ts index d26384eadd..03c376c390 100644 --- a/packages/benchmark/src/cli.ts +++ b/packages/benchmark/src/cli.ts @@ -93,7 +93,9 @@ function parseArgs(args: string[]): Record { async function runCommand(args: Record): Promise { const specsDir = args["specs-dir"] ?? defaultSpecsDir; - const iterations = args["iterations"] ? parseInt(args["iterations"], 10) : undefined; + const iterations = args["iterations"] + ? parseInt(args["iterations"], 10) + : undefined; const warmup = args["warmup"] ? parseInt(args["warmup"], 10) : undefined; const specs = args["specs"]?.split(","); const commit = args["commit"]; @@ -115,11 +117,15 @@ async function compareCommand(args: Record): Promise { const baselineFile = args["baseline"]; const currentFile = args["current"]; if (!baselineFile || !currentFile) { - console.error("Error: --baseline and --current are required for compare command"); + console.error( + "Error: --baseline and --current are required for compare command", + ); process.exit(1); } - const threshold = args["threshold"] ? parseFloat(args["threshold"]) : undefined; + const threshold = args["threshold"] + ? parseFloat(args["threshold"]) + : undefined; const format = args["format"] ?? "console"; const outputFile = args["output"]; @@ -142,7 +148,12 @@ async function compareCommand(args: Record): Promise { await outputResult(output, outputFile); await writeGitHubSummary( - formatComparisonSummary(comparisons, baseline.commit, current.commit, threshold), + formatComparisonSummary( + comparisons, + baseline.commit, + current.commit, + threshold, + ), ); } diff --git a/packages/benchmark/src/format-comment.ts b/packages/benchmark/src/format-comment.ts index 8a27689f11..cafbb2df5a 100644 --- a/packages/benchmark/src/format-comment.ts +++ b/packages/benchmark/src/format-comment.ts @@ -1,4 +1,9 @@ -import type { BenchmarkResult, ComparisonResult, MetricComparison, RuntimeStats } from "./types.js"; +import type { + BenchmarkResult, + ComparisonResult, + MetricComparison, + RuntimeStats, +} from "./types.js"; const DEFAULT_THRESHOLD = 5; @@ -8,7 +13,10 @@ function formatMs(ms: number): string { } /** Color-code a time value based on thresholds: 🔴 slow, 🟡 moderate, 🟢 fast. */ -function timeIndicator(ms: number, thresholds: readonly [number, number]): string { +function timeIndicator( + ms: number, + thresholds: readonly [number, number], +): string { if (ms > thresholds[1]) return "🔴"; if (ms > thresholds[0]) return "🟡"; return "🟢"; @@ -28,7 +36,10 @@ function thresholdsFor(label: string): readonly [number, number] { return Thresholds.stage; } -function formatMsColored(ms: number, thresholds: readonly [number, number]): string { +function formatMsColored( + ms: number, + thresholds: readonly [number, number], +): string { return `${timeIndicator(ms, thresholds)} ${formatMs(ms)}`; } @@ -60,12 +71,16 @@ function flattenRuntime(rt: RuntimeStats): FlatMetric[] { metrics.push({ label: "checker", value: rt.checker }); metrics.push({ label: "validation", value: rt.validation.total }); - for (const [v, t] of Object.entries(rt.validation.validators).sort(([, a], [, b]) => b - a)) { + for (const [v, t] of Object.entries(rt.validation.validators).sort( + ([, a], [, b]) => b - a, + )) { metrics.push({ label: `validation/${v}`, value: t }); } metrics.push({ label: "linter", value: rt.linter.total }); - for (const [r, t] of Object.entries(rt.linter.rules).sort(([, a], [, b]) => b - a)) { + for (const [r, t] of Object.entries(rt.linter.rules).sort( + ([, a], [, b]) => b - a, + )) { metrics.push({ label: `linter/${r}`, value: t }); } @@ -167,11 +182,14 @@ export function formatPrComment( if (regressions.length === 0) { lines.push("✅ No performance regressions detected.\n"); } else { - lines.push(`⚠️ **${regressions.length} metric(s) regressed** above the +${threshold}% threshold:\n`); + lines.push( + `⚠️ **${regressions.length} metric(s) regressed** above the +${threshold}% threshold:\n`, + ); lines.push("| Metric | Baseline | Current | Change |"); lines.push("|--------|----------|---------|--------|"); for (const m of regressions) { - const changeStr = `${formatPercent(m.percentChange)} ${changeIndicator(m.percentChange, threshold)}`.trim(); + const changeStr = + `${formatPercent(m.percentChange)} ${changeIndicator(m.percentChange, threshold)}`.trim(); const th = thresholdsFor(m.label); lines.push( `| ${displayLabel(m.label)} | ${formatMsColored(m.baseline, th)} | ${formatMsColored(m.current, th)} | ${changeStr} |`, @@ -189,7 +207,8 @@ export function formatPrComment( lines.push("| Metric | Baseline | Current | Change |"); lines.push("|--------|----------|---------|--------|"); for (const m of averaged) { - const changeStr = `${formatPercent(m.percentChange)} ${changeIndicator(m.percentChange, threshold)}`.trim(); + const changeStr = + `${formatPercent(m.percentChange)} ${changeIndicator(m.percentChange, threshold)}`.trim(); const th = thresholdsFor(m.label); lines.push( `| ${displayLabel(m.label)} | ${formatMsColored(m.baseline, th)} | ${formatMsColored(m.current, th)} | ${changeStr} |`, @@ -205,7 +224,9 @@ export function formatPrComment( } /** Average MetricComparisons across all ComparisonResults by label. */ -function averageComparisonMetrics(comparisons: ComparisonResult[]): MetricComparison[] { +function averageComparisonMetrics( + comparisons: ComparisonResult[], +): MetricComparison[] { const sums = new Map< string, { baseline: number; current: number; change: number; count: number } @@ -236,7 +257,8 @@ function averageComparisonMetrics(comparisons: ComparisonResult[]): MetricCompar const baseline = e.baseline / e.count; const current = e.current / e.count; const change = e.change / e.count; - const percentChange = baseline === 0 ? (current === 0 ? 0 : 100) : (change / baseline) * 100; + const percentChange = + baseline === 0 ? (current === 0 ? 0 : 100) : (change / baseline) * 100; return { label, baseline, current, change, percentChange }; }); } @@ -291,7 +313,9 @@ export function formatRunSummary(result: BenchmarkResult): string { } lines.push(""); - lines.push(`> Averaged across ${specs.length} specs (${specNames.join(", ")}).`); + lines.push( + `> Averaged across ${specs.length} specs (${specNames.join(", ")}).`, + ); lines.push(LEGEND); return lines.join("\n"); From b5a84fd6bb4cbee5885a1e2dd7f6efe55543d7f7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 7 May 2026 14:22:50 +0000 Subject: [PATCH 5/5] Re-run formatter with correct printWidth=100 config Agent-Logs-Url: https://github.com/Azure/typespec-azure/sessions/ed48d0df-a0d3-45e1-a365-3dfa4056c372 Co-authored-by: timotheeguerin <1031227+timotheeguerin@users.noreply.github.com> --- packages/benchmark/src/cli.ts | 19 +++---------- packages/benchmark/src/format-comment.ts | 36 ++++++------------------ 2 files changed, 12 insertions(+), 43 deletions(-) diff --git a/packages/benchmark/src/cli.ts b/packages/benchmark/src/cli.ts index 03c376c390..d26384eadd 100644 --- a/packages/benchmark/src/cli.ts +++ b/packages/benchmark/src/cli.ts @@ -93,9 +93,7 @@ function parseArgs(args: string[]): Record { async function runCommand(args: Record): Promise { const specsDir = args["specs-dir"] ?? defaultSpecsDir; - const iterations = args["iterations"] - ? parseInt(args["iterations"], 10) - : undefined; + const iterations = args["iterations"] ? parseInt(args["iterations"], 10) : undefined; const warmup = args["warmup"] ? parseInt(args["warmup"], 10) : undefined; const specs = args["specs"]?.split(","); const commit = args["commit"]; @@ -117,15 +115,11 @@ async function compareCommand(args: Record): Promise { const baselineFile = args["baseline"]; const currentFile = args["current"]; if (!baselineFile || !currentFile) { - console.error( - "Error: --baseline and --current are required for compare command", - ); + console.error("Error: --baseline and --current are required for compare command"); process.exit(1); } - const threshold = args["threshold"] - ? parseFloat(args["threshold"]) - : undefined; + const threshold = args["threshold"] ? parseFloat(args["threshold"]) : undefined; const format = args["format"] ?? "console"; const outputFile = args["output"]; @@ -148,12 +142,7 @@ async function compareCommand(args: Record): Promise { await outputResult(output, outputFile); await writeGitHubSummary( - formatComparisonSummary( - comparisons, - baseline.commit, - current.commit, - threshold, - ), + formatComparisonSummary(comparisons, baseline.commit, current.commit, threshold), ); } diff --git a/packages/benchmark/src/format-comment.ts b/packages/benchmark/src/format-comment.ts index cafbb2df5a..a774c70e70 100644 --- a/packages/benchmark/src/format-comment.ts +++ b/packages/benchmark/src/format-comment.ts @@ -1,9 +1,4 @@ -import type { - BenchmarkResult, - ComparisonResult, - MetricComparison, - RuntimeStats, -} from "./types.js"; +import type { BenchmarkResult, ComparisonResult, MetricComparison, RuntimeStats } from "./types.js"; const DEFAULT_THRESHOLD = 5; @@ -13,10 +8,7 @@ function formatMs(ms: number): string { } /** Color-code a time value based on thresholds: 🔴 slow, 🟡 moderate, 🟢 fast. */ -function timeIndicator( - ms: number, - thresholds: readonly [number, number], -): string { +function timeIndicator(ms: number, thresholds: readonly [number, number]): string { if (ms > thresholds[1]) return "🔴"; if (ms > thresholds[0]) return "🟡"; return "🟢"; @@ -36,10 +28,7 @@ function thresholdsFor(label: string): readonly [number, number] { return Thresholds.stage; } -function formatMsColored( - ms: number, - thresholds: readonly [number, number], -): string { +function formatMsColored(ms: number, thresholds: readonly [number, number]): string { return `${timeIndicator(ms, thresholds)} ${formatMs(ms)}`; } @@ -71,16 +60,12 @@ function flattenRuntime(rt: RuntimeStats): FlatMetric[] { metrics.push({ label: "checker", value: rt.checker }); metrics.push({ label: "validation", value: rt.validation.total }); - for (const [v, t] of Object.entries(rt.validation.validators).sort( - ([, a], [, b]) => b - a, - )) { + for (const [v, t] of Object.entries(rt.validation.validators).sort(([, a], [, b]) => b - a)) { metrics.push({ label: `validation/${v}`, value: t }); } metrics.push({ label: "linter", value: rt.linter.total }); - for (const [r, t] of Object.entries(rt.linter.rules).sort( - ([, a], [, b]) => b - a, - )) { + for (const [r, t] of Object.entries(rt.linter.rules).sort(([, a], [, b]) => b - a)) { metrics.push({ label: `linter/${r}`, value: t }); } @@ -224,9 +209,7 @@ export function formatPrComment( } /** Average MetricComparisons across all ComparisonResults by label. */ -function averageComparisonMetrics( - comparisons: ComparisonResult[], -): MetricComparison[] { +function averageComparisonMetrics(comparisons: ComparisonResult[]): MetricComparison[] { const sums = new Map< string, { baseline: number; current: number; change: number; count: number } @@ -257,8 +240,7 @@ function averageComparisonMetrics( const baseline = e.baseline / e.count; const current = e.current / e.count; const change = e.change / e.count; - const percentChange = - baseline === 0 ? (current === 0 ? 0 : 100) : (change / baseline) * 100; + const percentChange = baseline === 0 ? (current === 0 ? 0 : 100) : (change / baseline) * 100; return { label, baseline, current, change, percentChange }; }); } @@ -313,9 +295,7 @@ export function formatRunSummary(result: BenchmarkResult): string { } lines.push(""); - lines.push( - `> Averaged across ${specs.length} specs (${specNames.join(", ")}).`, - ); + lines.push(`> Averaged across ${specs.length} specs (${specNames.join(", ")}).`); lines.push(LEGEND); return lines.join("\n");