From 4161851e94f44a5d4c96a93bbb4cdd240d4356d1 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 4 May 2026 21:12:03 +0000
Subject: [PATCH 1/5] Initial plan


From c24442e4b7ec49b6f9fbd411c107507095db3d07 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 4 May 2026 21:14:59 +0000
Subject: [PATCH 2/5] Reduce verbosity of benchmark PR comment: wrap details in
 <details> element, show regression summary

Agent-Logs-Url: https://github.com/Azure/typespec-azure/sessions/f466ca11-42d8-472b-9318-2756aad71ca6

Co-authored-by: timotheeguerin <1031227+timotheeguerin@users.noreply.github.com>
---
 packages/benchmark/src/cli.ts            |  3 --
 packages/benchmark/src/format-comment.ts | 40 ++++++++++++++----------
 2 files changed, 24 insertions(+), 19 deletions(-)
diff --git a/packages/benchmark/src/cli.ts b/packages/benchmark/src/cli.ts
index 48ba67537e..d26384eadd 100644
--- a/packages/benchmark/src/cli.ts
+++ b/packages/benchmark/src/cli.ts
@@ -43,7 +43,6 @@ Compare options:
   --output <file>       Output file (default: stdout)
   --format <type>       Output format: "console" or "markdown" (default: console)
   --detailed            Show per-rule/per-emitter-step breakdown
-  --changes-only        Only show metrics with notable changes
 
 Generate-history options:
   --dir <dir>           Read results from a directory instead of the benchmark-data git branch
@@ -122,7 +121,6 @@ async function compareCommand(args: Record<string, string>): Promise<void> {
 
   const threshold = args["threshold"] ? parseFloat(args["threshold"]) : undefined;
   const format = args["format"] ?? "console";
-  const changesOnly = args["changes-only"] === "true";
   const outputFile = args["output"];
 
   const baseline = await loadJson<BenchmarkResult>(baselineFile);
@@ -133,7 +131,6 @@ async function compareCommand(args: Record<string, string>): Promise<void> {
   if (format === "markdown") {
     output = formatPrComment(comparisons, baseline.commit, current.commit, {
       threshold,
-      changesOnly,
     });
   } else {
     output = formatConsoleSummary(comparisons, threshold);
diff --git a/packages/benchmark/src/format-comment.ts b/packages/benchmark/src/format-comment.ts
index aa620aaecf..cff5d784ab 100644
--- a/packages/benchmark/src/format-comment.ts
+++ b/packages/benchmark/src/format-comment.ts
@@ -145,8 +145,6 @@ const LEGEND =
 export interface FormatOptions {
   /** Change threshold for highlighting (default: 5%). */
   threshold?: number;
-  /** Only show metrics with notable changes. */
-  changesOnly?: boolean;
 }
 
 /** Format comparison results as a GitHub PR comment markdown. */
@@ -157,30 +155,23 @@ export function formatPrComment(
   options: FormatOptions = {},
 ): string {
   const threshold = options.threshold ?? DEFAULT_THRESHOLD;
-  const changesOnly = options.changesOnly ?? false;
 
   const lines: string[] = [];
   lines.push("## ⚡ Benchmark Results\n");
-  lines.push(
-    `Comparing [\`${currentCommit.slice(0, 7)}\`] against baseline [\`${baselineCommit.slice(0, 7)}\`]\n`,
-  );
 
   // Average metrics across all specs
   const averaged = averageComparisonMetrics(comparisons);
+  const regressions = averaged.filter((m) => m.percentChange >= threshold);
 
-  let metrics = averaged;
-  if (changesOnly) {
-    metrics = metrics.filter((m) => Math.abs(m.percentChange) >= threshold);
-  }
-
-  if (metrics.length === 0) {
-    lines.push("_No notable changes._\n");
+  // Top-level summary: show regressions prominently, otherwise a simple ok message
+  if (regressions.length === 0) {
+    lines.push("✅ No performance regressions detected.\n");
   } else {
+    lines.push(`⚠️ **${regressions.length} metric(s) regressed** above the ±${threshold}% threshold:\n`);
     lines.push("| Metric | Baseline | Current | Change |");
     lines.push("|--------|----------|---------|--------|");
-    for (const m of metrics) {
-      const indicator = changeIndicator(m.percentChange, threshold);
-      const changeStr = `${formatPercent(m.percentChange)} ${indicator}`.trim();
+    for (const m of regressions) {
+      const changeStr = `${formatPercent(m.percentChange)} ${changeIndicator(m.percentChange, threshold)}`.trim();
       const th = thresholdsFor(m.label);
       lines.push(
         `| ${displayLabel(m.label)} | ${formatMsColored(m.baseline, th)} | ${formatMsColored(m.current, th)} | ${changeStr} |`,
@@ -189,9 +180,26 @@ export function formatPrComment(
     lines.push("");
   }
 
+  // Full details collapsed
   const specNames = comparisons.map((c) => c.specName).join(", ");
+  lines.push("<details>");
+  lines.push(
+    `<summary>Full details — comparing <code>${currentCommit.slice(0, 7)}</code> vs baseline <code>${baselineCommit.slice(0, 7)}</code></summary>\n`,
+  );
+  lines.push("| Metric | Baseline | Current | Change |");
+  lines.push("|--------|----------|---------|--------|");
+  for (const m of averaged) {
+    const changeStr = `${formatPercent(m.percentChange)} ${changeIndicator(m.percentChange, threshold)}`.trim();
+    const th = thresholdsFor(m.label);
+    lines.push(
+      `| ${displayLabel(m.label)} | ${formatMsColored(m.baseline, th)} | ${formatMsColored(m.current, th)} | ${changeStr} |`,
+    );
+  }
+  lines.push("");
   lines.push(`> Averaged across ${comparisons.length} specs (${specNames}).`);
   lines.push(`> Threshold: changes > ±${threshold}% are highlighted.`);
+  lines.push(LEGEND);
+  lines.push("</details>");
 
   return lines.join("\n");
 }

From 5bca0f243866f628f04c2b910b7309646f59e90e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 4 May 2026 21:15:37 +0000
Subject: [PATCH 3/5] Fix threshold message and em dash in benchmark PR comment

Agent-Logs-Url: https://github.com/Azure/typespec-azure/sessions/f466ca11-42d8-472b-9318-2756aad71ca6

Co-authored-by: timotheeguerin <1031227+timotheeguerin@users.noreply.github.com>
---
 packages/benchmark/src/format-comment.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/benchmark/src/format-comment.ts b/packages/benchmark/src/format-comment.ts
index cff5d784ab..8a27689f11 100644
--- a/packages/benchmark/src/format-comment.ts
+++ b/packages/benchmark/src/format-comment.ts
@@ -167,7 +167,7 @@ export function formatPrComment(
   if (regressions.length === 0) {
     lines.push("✅ No performance regressions detected.\n");
   } else {
-    lines.push(`⚠️ **${regressions.length} metric(s) regressed** above the ±${threshold}% threshold:\n`);
+    lines.push(`⚠️ **${regressions.length} metric(s) regressed** above the +${threshold}% threshold:\n`);
     lines.push("| Metric | Baseline | Current | Change |");
     lines.push("|--------|----------|---------|--------|");
     for (const m of regressions) {
@@ -184,7 +184,7 @@ export function formatPrComment(
   const specNames = comparisons.map((c) => c.specName).join(", ");
   lines.push("<details>");
   lines.push(
-    `<summary>Full details — comparing <code>${currentCommit.slice(0, 7)}</code> vs baseline <code>${baselineCommit.slice(0, 7)}</code></summary>\n`,
+    `<summary>Full details – comparing <code>${currentCommit.slice(0, 7)}</code> vs baseline <code>${baselineCommit.slice(0, 7)}</code></summary>\n`,
   );
   lines.push("| Metric | Baseline | Current | Change |");
   lines.push("|--------|----------|---------|--------|");

From a99f13d752c251ea2141158f7cb677fc4fc79d56 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 5 May 2026 10:22:57 +0000
Subject: [PATCH 4/5] Run formatter on benchmark source files

Agent-Logs-Url: https://github.com/Azure/typespec-azure/sessions/a233d106-06db-4121-be7c-b04165bf5bbd

Co-authored-by: timotheeguerin <1031227+timotheeguerin@users.noreply.github.com>
---
 packages/benchmark/src/cli.ts            | 19 +++++++---
 packages/benchmark/src/format-comment.ts | 46 ++++++++++++++++++------
 2 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/packages/benchmark/src/cli.ts b/packages/benchmark/src/cli.ts
index d26384eadd..03c376c390 100644
--- a/packages/benchmark/src/cli.ts
+++ b/packages/benchmark/src/cli.ts
@@ -93,7 +93,9 @@ function parseArgs(args: string[]): Record<string, string> {
 
 async function runCommand(args: Record<string, string>): Promise<void> {
   const specsDir = args["specs-dir"] ?? defaultSpecsDir;
-  const iterations = args["iterations"] ? parseInt(args["iterations"], 10) : undefined;
+  const iterations = args["iterations"]
+    ? parseInt(args["iterations"], 10)
+    : undefined;
   const warmup = args["warmup"] ? parseInt(args["warmup"], 10) : undefined;
   const specs = args["specs"]?.split(",");
   const commit = args["commit"];
@@ -115,11 +117,15 @@ async function compareCommand(args: Record<string, string>): Promise<void> {
   const baselineFile = args["baseline"];
   const currentFile = args["current"];
   if (!baselineFile || !currentFile) {
-    console.error("Error: --baseline and --current are required for compare command");
+    console.error(
+      "Error: --baseline and --current are required for compare command",
+    );
     process.exit(1);
   }
 
-  const threshold = args["threshold"] ? parseFloat(args["threshold"]) : undefined;
+  const threshold = args["threshold"]
+    ? parseFloat(args["threshold"])
+    : undefined;
   const format = args["format"] ?? "console";
   const outputFile = args["output"];
 
@@ -142,7 +148,12 @@ async function compareCommand(args: Record<string, string>): Promise<void> {
 
   await outputResult(output, outputFile);
   await writeGitHubSummary(
-    formatComparisonSummary(comparisons, baseline.commit, current.commit, threshold),
+    formatComparisonSummary(
+      comparisons,
+      baseline.commit,
+      current.commit,
+      threshold,
+    ),
   );
 }
 
diff --git a/packages/benchmark/src/format-comment.ts b/packages/benchmark/src/format-comment.ts
index 8a27689f11..cafbb2df5a 100644
--- a/packages/benchmark/src/format-comment.ts
+++ b/packages/benchmark/src/format-comment.ts
@@ -1,4 +1,9 @@
-import type { BenchmarkResult, ComparisonResult, MetricComparison, RuntimeStats } from "./types.js";
+import type {
+  BenchmarkResult,
+  ComparisonResult,
+  MetricComparison,
+  RuntimeStats,
+} from "./types.js";
 
 const DEFAULT_THRESHOLD = 5;
 
@@ -8,7 +13,10 @@ function formatMs(ms: number): string {
 }
 
 /** Color-code a time value based on thresholds: 🔴 slow, 🟡 moderate, 🟢 fast. */
-function timeIndicator(ms: number, thresholds: readonly [number, number]): string {
+function timeIndicator(
+  ms: number,
+  thresholds: readonly [number, number],
+): string {
   if (ms > thresholds[1]) return "🔴";
   if (ms > thresholds[0]) return "🟡";
   return "🟢";
@@ -28,7 +36,10 @@ function thresholdsFor(label: string): readonly [number, number] {
   return Thresholds.stage;
 }
 
-function formatMsColored(ms: number, thresholds: readonly [number, number]): string {
+function formatMsColored(
+  ms: number,
+  thresholds: readonly [number, number],
+): string {
   return `${timeIndicator(ms, thresholds)} ${formatMs(ms)}`;
 }
 
@@ -60,12 +71,16 @@ function flattenRuntime(rt: RuntimeStats): FlatMetric[] {
   metrics.push({ label: "checker", value: rt.checker });
 
   metrics.push({ label: "validation", value: rt.validation.total });
-  for (const [v, t] of Object.entries(rt.validation.validators).sort(([, a], [, b]) => b - a)) {
+  for (const [v, t] of Object.entries(rt.validation.validators).sort(
+    ([, a], [, b]) => b - a,
+  )) {
     metrics.push({ label: `validation/${v}`, value: t });
   }
 
   metrics.push({ label: "linter", value: rt.linter.total });
-  for (const [r, t] of Object.entries(rt.linter.rules).sort(([, a], [, b]) => b - a)) {
+  for (const [r, t] of Object.entries(rt.linter.rules).sort(
+    ([, a], [, b]) => b - a,
+  )) {
     metrics.push({ label: `linter/${r}`, value: t });
   }
 
@@ -167,11 +182,14 @@ export function formatPrComment(
   if (regressions.length === 0) {
     lines.push("✅ No performance regressions detected.\n");
   } else {
-    lines.push(`⚠️ **${regressions.length} metric(s) regressed** above the +${threshold}% threshold:\n`);
+    lines.push(
+      `⚠️ **${regressions.length} metric(s) regressed** above the +${threshold}% threshold:\n`,
+    );
     lines.push("| Metric | Baseline | Current | Change |");
     lines.push("|--------|----------|---------|--------|");
     for (const m of regressions) {
-      const changeStr = `${formatPercent(m.percentChange)} ${changeIndicator(m.percentChange, threshold)}`.trim();
+      const changeStr =
+        `${formatPercent(m.percentChange)} ${changeIndicator(m.percentChange, threshold)}`.trim();
       const th = thresholdsFor(m.label);
       lines.push(
         `| ${displayLabel(m.label)} | ${formatMsColored(m.baseline, th)} | ${formatMsColored(m.current, th)} | ${changeStr} |`,
@@ -189,7 +207,8 @@ export function formatPrComment(
   lines.push("| Metric | Baseline | Current | Change |");
   lines.push("|--------|----------|---------|--------|");
   for (const m of averaged) {
-    const changeStr = `${formatPercent(m.percentChange)} ${changeIndicator(m.percentChange, threshold)}`.trim();
+    const changeStr =
+      `${formatPercent(m.percentChange)} ${changeIndicator(m.percentChange, threshold)}`.trim();
     const th = thresholdsFor(m.label);
     lines.push(
       `| ${displayLabel(m.label)} | ${formatMsColored(m.baseline, th)} | ${formatMsColored(m.current, th)} | ${changeStr} |`,
@@ -205,7 +224,9 @@ export function formatPrComment(
 }
 
 /** Average MetricComparisons across all ComparisonResults by label. */
-function averageComparisonMetrics(comparisons: ComparisonResult[]): MetricComparison[] {
+function averageComparisonMetrics(
+  comparisons: ComparisonResult[],
+): MetricComparison[] {
   const sums = new Map<
     string,
     { baseline: number; current: number; change: number; count: number }
@@ -236,7 +257,8 @@ function averageComparisonMetrics(comparisons: ComparisonResult[]): MetricCompar
       const baseline = e.baseline / e.count;
       const current = e.current / e.count;
       const change = e.change / e.count;
-      const percentChange = baseline === 0 ? (current === 0 ? 0 : 100) : (change / baseline) * 100;
+      const percentChange =
+        baseline === 0 ? (current === 0 ? 0 : 100) : (change / baseline) * 100;
       return { label, baseline, current, change, percentChange };
     });
 }
@@ -291,7 +313,9 @@ export function formatRunSummary(result: BenchmarkResult): string {
   }
 
   lines.push("");
-  lines.push(`> Averaged across ${specs.length} specs (${specNames.join(", ")}).`);
+  lines.push(
+    `> Averaged across ${specs.length} specs (${specNames.join(", ")}).`,
+  );
   lines.push(LEGEND);
 
   return lines.join("\n");

From b5a84fd6bb4cbee5885a1e2dd7f6efe55543d7f7 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 7 May 2026 14:22:50 +0000
Subject: [PATCH 5/5] Re-run formatter with correct printWidth=100 config

Agent-Logs-Url: https://github.com/Azure/typespec-azure/sessions/ed48d0df-a0d3-45e1-a365-3dfa4056c372

Co-authored-by: timotheeguerin <1031227+timotheeguerin@users.noreply.github.com>
---
 packages/benchmark/src/cli.ts            | 19 +++----------
 packages/benchmark/src/format-comment.ts | 36 ++++++------------------
 2 files changed, 12 insertions(+), 43 deletions(-)

diff --git a/packages/benchmark/src/cli.ts b/packages/benchmark/src/cli.ts
index 03c376c390..d26384eadd 100644
--- a/packages/benchmark/src/cli.ts
+++ b/packages/benchmark/src/cli.ts
@@ -93,9 +93,7 @@ function parseArgs(args: string[]): Record<string, string> {
 
 async function runCommand(args: Record<string, string>): Promise<void> {
   const specsDir = args["specs-dir"] ?? defaultSpecsDir;
-  const iterations = args["iterations"]
-    ? parseInt(args["iterations"], 10)
-    : undefined;
+  const iterations = args["iterations"] ? parseInt(args["iterations"], 10) : undefined;
   const warmup = args["warmup"] ? parseInt(args["warmup"], 10) : undefined;
   const specs = args["specs"]?.split(",");
   const commit = args["commit"];
@@ -117,15 +115,11 @@ async function compareCommand(args: Record<string, string>): Promise<void> {
   const baselineFile = args["baseline"];
   const currentFile = args["current"];
   if (!baselineFile || !currentFile) {
-    console.error(
-      "Error: --baseline and --current are required for compare command",
-    );
+    console.error("Error: --baseline and --current are required for compare command");
     process.exit(1);
   }
 
-  const threshold = args["threshold"]
-    ? parseFloat(args["threshold"])
-    : undefined;
+  const threshold = args["threshold"] ? parseFloat(args["threshold"]) : undefined;
   const format = args["format"] ?? "console";
   const outputFile = args["output"];
 
@@ -148,12 +142,7 @@ async function compareCommand(args: Record<string, string>): Promise<void> {
 
   await outputResult(output, outputFile);
   await writeGitHubSummary(
-    formatComparisonSummary(
-      comparisons,
-      baseline.commit,
-      current.commit,
-      threshold,
-    ),
+    formatComparisonSummary(comparisons, baseline.commit, current.commit, threshold),
   );
 }
 
diff --git a/packages/benchmark/src/format-comment.ts b/packages/benchmark/src/format-comment.ts
index cafbb2df5a..a774c70e70 100644
--- a/packages/benchmark/src/format-comment.ts
+++ b/packages/benchmark/src/format-comment.ts
@@ -1,9 +1,4 @@
-import type {
-  BenchmarkResult,
-  ComparisonResult,
-  MetricComparison,
-  RuntimeStats,
-} from "./types.js";
+import type { BenchmarkResult, ComparisonResult, MetricComparison, RuntimeStats } from "./types.js";
 
 const DEFAULT_THRESHOLD = 5;
 
@@ -13,10 +8,7 @@ function formatMs(ms: number): string {
 }
 
 /** Color-code a time value based on thresholds: 🔴 slow, 🟡 moderate, 🟢 fast. */
-function timeIndicator(
-  ms: number,
-  thresholds: readonly [number, number],
-): string {
+function timeIndicator(ms: number, thresholds: readonly [number, number]): string {
   if (ms > thresholds[1]) return "🔴";
   if (ms > thresholds[0]) return "🟡";
   return "🟢";
@@ -36,10 +28,7 @@ function thresholdsFor(label: string): readonly [number, number] {
   return Thresholds.stage;
 }
 
-function formatMsColored(
-  ms: number,
-  thresholds: readonly [number, number],
-): string {
+function formatMsColored(ms: number, thresholds: readonly [number, number]): string {
   return `${timeIndicator(ms, thresholds)} ${formatMs(ms)}`;
 }
 
@@ -71,16 +60,12 @@ function flattenRuntime(rt: RuntimeStats): FlatMetric[] {
   metrics.push({ label: "checker", value: rt.checker });
 
   metrics.push({ label: "validation", value: rt.validation.total });
-  for (const [v, t] of Object.entries(rt.validation.validators).sort(
-    ([, a], [, b]) => b - a,
-  )) {
+  for (const [v, t] of Object.entries(rt.validation.validators).sort(([, a], [, b]) => b - a)) {
     metrics.push({ label: `validation/${v}`, value: t });
   }
 
   metrics.push({ label: "linter", value: rt.linter.total });
-  for (const [r, t] of Object.entries(rt.linter.rules).sort(
-    ([, a], [, b]) => b - a,
-  )) {
+  for (const [r, t] of Object.entries(rt.linter.rules).sort(([, a], [, b]) => b - a)) {
     metrics.push({ label: `linter/${r}`, value: t });
   }
 
@@ -224,9 +209,7 @@ export function formatPrComment(
 }
 
 /** Average MetricComparisons across all ComparisonResults by label. */
-function averageComparisonMetrics(
-  comparisons: ComparisonResult[],
-): MetricComparison[] {
+function averageComparisonMetrics(comparisons: ComparisonResult[]): MetricComparison[] {
   const sums = new Map<
     string,
     { baseline: number; current: number; change: number; count: number }
@@ -257,8 +240,7 @@ function averageComparisonMetrics(
       const baseline = e.baseline / e.count;
       const current = e.current / e.count;
       const change = e.change / e.count;
-      const percentChange =
-        baseline === 0 ? (current === 0 ? 0 : 100) : (change / baseline) * 100;
+      const percentChange = baseline === 0 ? (current === 0 ? 0 : 100) : (change / baseline) * 100;
       return { label, baseline, current, change, percentChange };
     });
 }
@@ -313,9 +295,7 @@ export function formatRunSummary(result: BenchmarkResult): string {
   }
 
   lines.push("");
-  lines.push(
-    `> Averaged across ${specs.length} specs (${specNames.join(", ")}).`,
-  );
+  lines.push(`> Averaged across ${specs.length} specs (${specNames.join(", ")}).`);
   lines.push(LEGEND);
 
   return lines.join("\n");