diff --git a/.github/aw/compat.json b/.github/aw/compat.json index ebe455b8..c17e3e34 100644 --- a/.github/aw/compat.json +++ b/.github/aw/compat.json @@ -7,7 +7,7 @@ "min-gh-aw": "0.72.0", "max-gh-aw": "*", "min-agent": "1.0.21", - "max-agent": "1.0.48", + "max-agent": "1.0.51", "open": true }, { diff --git a/setup/js/awf_reflect.cjs b/setup/js/awf_reflect.cjs index c76e456c..ed0491dc 100644 --- a/setup/js/awf_reflect.cjs +++ b/setup/js/awf_reflect.cjs @@ -15,8 +15,11 @@ "use strict"; +require("./shim.cjs"); + const fs = require("fs"); const path = require("path"); +const { withRetry } = require("./error_recovery.cjs"); // AWF API proxy management endpoint for discovering configured LLM providers and available models. // The api-proxy sidecar exposes /reflect on its management port (port 10000) inside the AWF @@ -29,6 +32,12 @@ const AWF_REFLECT_OUTPUT_PATH = "/tmp/gh-aw/sandbox/firewall/awf-reflect.json"; const AWF_REFLECT_TIMEOUT_MS = 60000; // Milliseconds to wait for each models_url fallback fetch (shorter than the main reflect timeout). const AWF_MODELS_URL_TIMEOUT_MS = 3000; +// Maximum attempts for models_url fallback fetches when the proxy is not yet ready. +const AWF_MODELS_URL_MAX_ATTEMPTS = 5; +// Base delay between models_url fallback retries. Uses exponential backoff. +const AWF_MODELS_URL_RETRY_BASE_MS = 250; +// Cap for exponential backoff delay between retries. +const AWF_MODELS_URL_RETRY_MAX_MS = 2000; // Gemini model name prefix stripped from model IDs in the Gemini models API response. // Example: { name: "models/gemini-1.5-pro" } → "gemini-1.5-pro" const GEMINI_MODEL_NAME_PREFIX = "models/"; @@ -84,32 +93,79 @@ function extractModelIds(json) { * @returns {Promise} */ async function fetchModelsFromUrl(modelsUrl, timeoutMs, logger) { - const ac = new AbortController(); - const timer = setTimeout(() => { - logger(`awf-reflect: models fetch timed out for ${modelsUrl}`); - ac.abort(); - }, timeoutMs); + let attemptCounter = 0; + const retryConfig = { + maxRetries: AWF_MODELS_URL_MAX_ATTEMPTS - 1, + // withRetry multiplies delay before the next attempt, so divide by 2 here + // to preserve the intended first backoff of AWF_MODELS_URL_RETRY_BASE_MS. + initialDelayMs: Math.ceil(AWF_MODELS_URL_RETRY_BASE_MS / 2), + maxDelayMs: AWF_MODELS_URL_RETRY_MAX_MS, + backoffMultiplier: 2, + jitterMs: 0, + shouldRetry: error => { + const original = error?.originalError || error; + const status = original?.status ?? original?.response?.status ?? null; + const shouldRetry = status === 503; + if (shouldRetry && attemptCounter < AWF_MODELS_URL_MAX_ATTEMPTS) { + logger(`awf-reflect: models fetch returned 503 for ${modelsUrl}; retrying (attempt ${attemptCounter + 1}/${AWF_MODELS_URL_MAX_ATTEMPTS})`); + } + return shouldRetry; + }, + }; + try { - const res = await fetch(modelsUrl, { signal: ac.signal }); - if (!res.ok) { - logger(`awf-reflect: models fetch returned ${res.status} for ${modelsUrl}`); - return null; - } - const json = await res.json(); - const models = extractModelIds(json); - if (models) { - logger(`awf-reflect: fetched ${models.length} model(s) from ${modelsUrl}`); - } - return models; + return await withRetry( + async () => { + attemptCounter += 1; + const ac = new AbortController(); + const timer = setTimeout(() => { + logger(`awf-reflect: models fetch timed out for ${modelsUrl}`); + ac.abort(); + }, timeoutMs); + try { + const res = await fetch(modelsUrl, { signal: ac.signal }); + if (!res.ok) { + if (res.status === 503) { + const err = Object.assign(new Error(`models fetch returned 503 for ${modelsUrl}`), { status: 503 }); + throw err; + } + logger(`awf-reflect: models fetch returned ${res.status} for ${modelsUrl}`); + return null; + } + const json = await res.json(); + const models = extractModelIds(json); + if (models) { + logger(`awf-reflect: fetched ${models.length} model(s) from ${modelsUrl}`); + } + return models; + } catch (err) { + const e = /** @type {Error} */ err; + if (e.name === "AbortError") { + return null; // already logged above + } + const status = e?.status ?? e?.response?.status ?? null; + if (status === 503) { + throw e; + } + logger(`awf-reflect: models fetch error for ${modelsUrl}: ${e.message}`); + return null; + } finally { + clearTimeout(timer); + } + }, + retryConfig, + `awf-reflect models fetch for ${modelsUrl}` + ); } catch (err) { const e = /** @type {Error} */ err; - if (e.name === "AbortError") { - return null; // already logged above + const original = e?.originalError || e; + const status = original?.status ?? original?.response?.status ?? null; + if (status === 503) { + logger(`awf-reflect: models fetch returned 503 for ${modelsUrl}`); + return null; } logger(`awf-reflect: models fetch error for ${modelsUrl}: ${e.message}`); return null; - } finally { - clearTimeout(timer); } } @@ -250,6 +306,9 @@ if (typeof module !== "undefined" && module.exports) { AWF_REFLECT_OUTPUT_PATH, AWF_REFLECT_TIMEOUT_MS, AWF_MODELS_URL_TIMEOUT_MS, + AWF_MODELS_URL_MAX_ATTEMPTS, + AWF_MODELS_URL_RETRY_BASE_MS, + AWF_MODELS_URL_RETRY_MAX_MS, GEMINI_MODEL_NAME_PREFIX, enrichReflectModels, extractModelIds, diff --git a/setup/js/codex_harness.cjs b/setup/js/codex_harness.cjs index 9757f64f..71e7c086 100644 --- a/setup/js/codex_harness.cjs +++ b/setup/js/codex_harness.cjs @@ -60,6 +60,11 @@ const MAX_DELAY_MS = 60000; const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError/i; const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[^)]+\))?/i; +// Pattern to detect a missing API key at startup — Codex emits this before making any API +// calls when neither CODEX_API_KEY nor OPENAI_API_KEY is available in the environment. +// Example: "ERROR: Missing environment variable: `OPENAI_API_KEY`" +const MISSING_API_KEY_PATTERN = /Missing environment variable:\s*`?(?:CODEX_API_KEY|OPENAI_API_KEY)\b`?/i; + // Pattern to detect OpenAI server-side errors (HTTP 500, 503). // These are transient infrastructure failures that may resolve on retry. const SERVER_ERROR_PATTERN = /InternalServerError|ServiceUnavailableError|500 Internal Server Error|503 Service Unavailable/i; @@ -95,6 +100,16 @@ function isAuthenticationFailedError(output) { return AUTHENTICATION_FAILED_PATTERN.test(output); } +/** + * Determines if the collected output indicates a missing API key at startup. + * Codex exits before producing any agent output in this case, so retrying is futile. + * @param {string} output - Collected stdout+stderr from the process + * @returns {boolean} + */ +function isMissingApiKeyError(output) { + return MISSING_API_KEY_PATTERN.test(output); +} + /** * Determines if the collected output contains an OpenAI server error. * @param {string} output - Collected stdout+stderr from the process @@ -245,6 +260,22 @@ function resolveCodexPromptFileArgs(args) { return filteredArgs; } +/** + * Inject `--json` after `exec` in the args list so that Codex streams structured + * JSON Lines (JSONL) to stdout. This enables machine-readable output for CI + * pipelines without changing how stderr progress output works. + * + * No-op when the subcommand is not `exec` or when `--json` is already present. + * + * @param {string[]} args + * @returns {string[]} + */ +function injectJsonFlag(args) { + if (args.length === 0 || args[0] !== "exec") return args; + if (args.includes("--json")) return args; + return ["exec", "--json", ...args.slice(1)]; +} + /** * Main entry point: run codex with retry logic for transient API failures. * Codex does not support --continue session resumption, so all retries are fresh runs. @@ -259,6 +290,20 @@ async function main() { log(`starting: command=${command} maxRetries=${MAX_RETRIES} initialDelayMs=${INITIAL_DELAY_MS}` + ` backoffMultiplier=${BACKOFF_MULTIPLIER} maxDelayMs=${MAX_DELAY_MS}` + ` nodeVersion=${process.version} platform=${process.platform}`); + // Diagnose API key presence so CI failures can be triaged without exposing secret values. + const codexApiKey = process.env.CODEX_API_KEY; + const openaiApiKey = process.env.OPENAI_API_KEY; + log(`secrets: CODEX_API_KEY=${codexApiKey ? `set (length=${codexApiKey.length})` : "not set"}` + ` OPENAI_API_KEY=${openaiApiKey ? `set (length=${openaiApiKey.length})` : "not set"}`); + + // Pre-flight: require at least one API key before spawning codex. + // Without a key, codex exits immediately with "Missing environment variable" and every + // retry attempt fails the same way. Failing here avoids burning the retry budget and + // surfaces a clear, actionable message in CI logs. + if (!codexApiKey && !openaiApiKey) { + log("fatal: no API key available - set CODEX_API_KEY or OPENAI_API_KEY and retry"); + process.exit(1); + } + // Resolve the prompt for the initial run (reads --prompt-file content). // A missing or unreadable prompt file is treated as a fatal startup error. let resolvedArgs; @@ -276,6 +321,10 @@ async function main() { const hadPromptFile = args.includes("--prompt-file"); const safeArgs = hadPromptFile && resolvedArgs.length > 0 ? [...resolvedArgs.slice(0, -1), ""] : resolvedArgs; + // Inject --json after `exec` to stream structured JSONL events to stdout, making + // Codex output machine-readable in CI without affecting the stderr progress stream. + resolvedArgs = injectJsonFlag(resolvedArgs); + // Fetch AWF API proxy reflection data before running the agent to capture initial proxy state. // This is best-effort: failures are logged but do not affect the agent run. await fetchAWFReflect({ logger: log }); @@ -308,6 +357,7 @@ async function main() { const isRateLimit = isRateLimitError(result.output); const isAuthenticationFailed = isAuthenticationFailedError(result.output); + const isMissingApiKey = isMissingApiKeyError(result.output); const isServer = isServerError(result.output); const permissionDeniedCount = countPermissionDeniedIssues(result.output); const hasNumerousPermissionDenied = hasNumerousPermissionDeniedIssues(result.output); @@ -316,6 +366,7 @@ async function main() { ` exitCode=${result.exitCode}` + ` isRateLimitError=${isRateLimit}` + ` isAuthenticationFailedError=${isAuthenticationFailed}` + + ` isMissingApiKeyError=${isMissingApiKey}` + ` isServerError=${isServer}` + ` permissionDeniedCount=${permissionDeniedCount}` + ` hasNumerousPermissionDenied=${hasNumerousPermissionDenied}` + @@ -328,6 +379,11 @@ async function main() { break; } + if (isMissingApiKey) { + log(`attempt ${attempt + 1}: missing API key — not retrying (configure CODEX_API_KEY or OPENAI_API_KEY)`); + break; + } + if (hasNumerousPermissionDenied) { const deniedCommands = extractDeniedCommands(result.output); emitMissingToolPermissionIssue({ deniedCommands }); @@ -363,8 +419,10 @@ async function main() { if (typeof module !== "undefined" && module.exports) { module.exports = { resolveCodexPromptFileArgs, + injectJsonFlag, isRateLimitError, isAuthenticationFailedError, + isMissingApiKeyError, isServerError, countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, diff --git a/setup/js/copilot_harness.cjs b/setup/js/copilot_harness.cjs index 9decbc7e..d515ef6a 100644 --- a/setup/js/copilot_harness.cjs +++ b/setup/js/copilot_harness.cjs @@ -90,6 +90,10 @@ const NO_AUTH_INFO_PATTERN = /No authentication information found/; // After a first-attempt auth failure, retrying is futile because the entrypoint unsets // COPILOT_GITHUB_TOKEN between attempts. const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[^)]+\))?/i; +// Pattern: Copilot CLI inference access denied +const INFERENCE_ACCESS_ERROR_PATTERN = /Access denied by policy settings|invalid access to inference/; +// Pattern: Agentic engine process killed by signal (timeout) +const AGENTIC_ENGINE_TIMEOUT_PATTERN = /signal=SIG(?:TERM|KILL|INT)/; // Pattern to detect null-type tool_call error that poisons conversation history. // Matches the Copilot API 400 error: @@ -166,6 +170,40 @@ function isAuthenticationFailedError(output) { return AUTHENTICATION_FAILED_PATTERN.test(output); } +/** + * Detect known Copilot error patterns for workflow outputs. + * @param {string} output + * @returns {{ inferenceAccessError: boolean, mcpPolicyError: boolean, agenticEngineTimeout: boolean, modelNotSupportedError: boolean }} + */ +function detectCopilotErrors(output) { + return { + inferenceAccessError: INFERENCE_ACCESS_ERROR_PATTERN.test(output), + mcpPolicyError: isMCPPolicyError(output), + agenticEngineTimeout: AGENTIC_ENGINE_TIMEOUT_PATTERN.test(output), + modelNotSupportedError: isModelNotSupportedError(output), + }; +} + +/** + * Write Copilot detection outputs to $GITHUB_OUTPUT. + * @param {{ inferenceAccessError: boolean, mcpPolicyError: boolean, agenticEngineTimeout: boolean, modelNotSupportedError: boolean }} results + */ +function writeCopilotOutputs(results) { + const outputFile = process.env.GITHUB_OUTPUT; + if (!outputFile) { + log("GITHUB_OUTPUT not set — skipping copilot error outputs"); + return; + } + + const lines = [ + `inference_access_error=${results.inferenceAccessError}`, + `mcp_policy_error=${results.mcpPolicyError}`, + `agentic_engine_timeout=${results.agenticEngineTimeout}`, + `model_not_supported_error=${results.modelNotSupportedError}`, + ]; + fs.appendFileSync(outputFile, lines.join("\n") + "\n"); +} + /** * Determines if the collected output contains a null-type tool_call error. * This error occurs when the model emits a malformed tool call with type: null. @@ -436,6 +474,12 @@ async function main() { // This prevents a broken --continue recovery from resurrecting --continue on the next attempt. let continueDisabledPermanently = false; const driverStartTime = Date.now(); + const detectedCopilotErrors = { + inferenceAccessError: false, + mcpPolicyError: false, + agenticEngineTimeout: false, + modelNotSupportedError: false, + }; for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { // Add --continue flag on retries so the copilot session continues from where it left off @@ -453,6 +497,11 @@ async function main() { const safeArgs = currentArgs.map((arg, i) => (currentArgs[i - 1] === "--prompt" || currentArgs[i - 1] === "-p" ? "" : arg)); const result = await runProcess({ command, args: currentArgs, attempt, log, logArgs: safeArgs }); lastExitCode = result.exitCode; + const attemptDetections = detectCopilotErrors(result.output); + detectedCopilotErrors.inferenceAccessError ||= attemptDetections.inferenceAccessError; + detectedCopilotErrors.mcpPolicyError ||= attemptDetections.mcpPolicyError; + detectedCopilotErrors.agenticEngineTimeout ||= attemptDetections.agenticEngineTimeout; + detectedCopilotErrors.modelNotSupportedError ||= attemptDetections.modelNotSupportedError; // Success — record exit code and stop retrying if (result.exitCode === 0) { @@ -614,9 +663,13 @@ if (typeof module !== "undefined" && module.exports) { fetchAWFReflect, fetchModelsFromUrl, countPermissionDeniedIssues, + detectCopilotErrors, hasNumerousPermissionDeniedIssues, + INFERENCE_ACCESS_ERROR_PATTERN, + AGENTIC_ENGINE_TIMEOUT_PATTERN, buildMissingToolPermissionIssuePayload, isAuthenticationFailedError, + writeCopilotOutputs, resolvePromptFileArgs, }; } diff --git a/setup/js/detect_copilot_errors.cjs b/setup/js/detect_agent_errors.cjs similarity index 86% rename from setup/js/detect_copilot_errors.cjs rename to setup/js/detect_agent_errors.cjs index 9d55e771..074a5817 100644 --- a/setup/js/detect_copilot_errors.cjs +++ b/setup/js/detect_agent_errors.cjs @@ -71,7 +71,7 @@ function detectErrors(logContent) { function writeOutputs(results) { const outputFile = process.env.GITHUB_OUTPUT; if (!outputFile) { - process.stderr.write("[detect-copilot-errors] GITHUB_OUTPUT not set — skipping output\n"); + process.stderr.write("[detect-agent-errors] GITHUB_OUTPUT not set — skipping output\n"); return; } @@ -90,22 +90,22 @@ function main() { if (fs.existsSync(LOG_FILE)) { logContent = fs.readFileSync(LOG_FILE, "utf8"); } else { - process.stderr.write(`[detect-copilot-errors] Log file not found: ${LOG_FILE}\n`); + process.stderr.write(`[detect-agent-errors] Log file not found: ${LOG_FILE}\n`); } const results = detectErrors(logContent); if (results.inferenceAccessError) { - process.stderr.write("[detect-copilot-errors] Detected inference access error in agent log\n"); + process.stderr.write("[detect-agent-errors] Detected inference access error in agent log\n"); } if (results.mcpPolicyError) { - process.stderr.write("[detect-copilot-errors] Detected MCP policy error in agent log\n"); + process.stderr.write("[detect-agent-errors] Detected MCP policy error in agent log\n"); } if (results.agenticEngineTimeout) { - process.stderr.write("[detect-copilot-errors] Detected timeout: engine process was killed by signal (step timeout-minutes likely exceeded)\n"); + process.stderr.write("[detect-agent-errors] Detected timeout: engine process was killed by signal (step timeout-minutes likely exceeded)\n"); } if (results.modelNotSupportedError) { - process.stderr.write("[detect-copilot-errors] Detected model-not-supported error: the requested model is unavailable for this subscription tier\n"); + process.stderr.write("[detect-agent-errors] Detected model-not-supported error: the requested model is unavailable for this subscription tier\n"); } writeOutputs(results); diff --git a/setup/js/effective_tokens.cjs b/setup/js/effective_tokens.cjs index 2f3f8cd6..cd465451 100644 --- a/setup/js/effective_tokens.cjs +++ b/setup/js/effective_tokens.cjs @@ -212,6 +212,115 @@ function formatET(n) { return `${(n / 1_000_000).toFixed(1).replace(/\.0$/, "")}M`; } +/** + * Build a deterministic compact model identifier for footer rendering. + * Uses well-known shortcuts for popular model families and a deterministic fallback. + * + * Examples: + * - claude-sonnet-4.6 -> sonnet46 + * - gpt-5.5 -> gpt55 + * - claude-opus-4-7 -> opus47 + * + * @param {string|undefined|null} modelName + * @returns {string} + */ +function reduceModelNameToIdentifier(modelName) { + const normalized = String(modelName || "") + .trim() + .toLowerCase(); + if (!normalized) return ""; + + if (normalized === "opus" || normalized === "sonnet" || normalized === "haiku") { + return normalized; + } + + const VERSION_SUFFIX_PATTERN = "[-_\\s]*([0-9]+)(?:[._-]+([0-9]+))?"; + const FALLBACK_LETTER_LENGTH = 3; + const FALLBACK_DIGIT_LENGTH = 2; + const FALLBACK_PADDING_CHAR = "x"; + + /** @type {Array<{ familyPattern: RegExp, versionPattern: RegExp, prefix: string }>} */ + const shortcuts = [ + { familyPattern: /sonnet/, versionPattern: new RegExp(`sonnet${VERSION_SUFFIX_PATTERN}`), prefix: "sonnet" }, + { familyPattern: /opus/, versionPattern: new RegExp(`opus${VERSION_SUFFIX_PATTERN}`), prefix: "opus" }, + { familyPattern: /haiku/, versionPattern: new RegExp(`haiku${VERSION_SUFFIX_PATTERN}`), prefix: "haiku" }, + { familyPattern: /gpt/, versionPattern: new RegExp(`gpt${VERSION_SUFFIX_PATTERN}`), prefix: "gpt" }, + { familyPattern: /gemini/, versionPattern: new RegExp(`gemini${VERSION_SUFFIX_PATTERN}`), prefix: "gem" }, + ]; + + for (const { familyPattern, versionPattern, prefix } of shortcuts) { + if (!familyPattern.test(normalized)) continue; + const version = extractModelVersionDigits(normalized, versionPattern); + return `${prefix}${version}`; + } + + return buildFallbackModelIdentifier(normalized, FALLBACK_LETTER_LENGTH, FALLBACK_DIGIT_LENGTH, FALLBACK_PADDING_CHAR); +} + +/** + * @param {string} normalizedModelName + * @param {RegExp} familyVersionPattern + * @returns {string} + */ +function extractModelVersionDigits(normalizedModelName, familyVersionPattern) { + const familyMatch = normalizedModelName.match(familyVersionPattern); + if (familyMatch) { + return normalizeVersionDigits(familyMatch[1], familyMatch[2]); + } + + const firstNumericMatch = normalizedModelName.match(/([0-9]+)(?:[._-]+([0-9]+))?/); + if (firstNumericMatch) { + return normalizeVersionDigits(firstNumericMatch[1], firstNumericMatch[2]); + } + + return "00"; +} + +/** + * @param {string|undefined} major + * @param {string|undefined} minor + * @returns {string} + */ +function normalizeVersionDigits(major, minor) { + const majorDigit = getFirstDigit(major); + // Treat any 3+ digit minor segment as a build/date-like stamp (e.g. 100, 20250514), + // not a semantic minor version, so identifiers stay stable (gpt-5-2025-08-07 -> gpt50). + const minorIsDateLike = minor && /^\d{3,}$/.test(minor); + const minorDigit = getFirstDigit(minor, Boolean(minorIsDateLike)); + return `${majorDigit}${minorDigit}`; +} + +/** + * @param {string|undefined} value + * @param {boolean} [treatAsMissing=false] + * @returns {string} + */ +function getFirstDigit(value, treatAsMissing = false) { + if (!value || treatAsMissing) return "0"; + const digitMatch = value.match(/\d/); + return digitMatch ? digitMatch[0] : "0"; +} + +/** + * @param {string} normalizedModelName + * @param {number} fallbackLetterLength + * @param {number} fallbackDigitLength + * @param {string} fallbackPaddingChar + * @returns {string} + */ +function buildFallbackModelIdentifier(normalizedModelName, fallbackLetterLength, fallbackDigitLength, fallbackPaddingChar) { + const compact = normalizedModelName.replace(/[^a-z0-9]+/g, ""); + if (!compact) return ""; + + // Pad with "x" to keep a fixed family slot for short/unknown model names. + const letterPart = compact.replace(/[0-9]/g, "").slice(0, fallbackLetterLength).padEnd(fallbackLetterLength, fallbackPaddingChar); + const digitPart = compact + .replace(/[^0-9]/g, "") + .slice(0, fallbackDigitLength) + .padEnd(fallbackDigitLength, "0"); + return `${letterPart}${digitPart}`.slice(0, 5); +} + /** * Resets the cached multipliers (for testing purposes). * @internal @@ -220,18 +329,41 @@ function _resetCache() { _parsedMultipliers = undefined; } +/** + * Resolve the actual model name to use in footer rendering. + * + * Prefers `primary_model` from agent_usage.json (the actual model name recorded + * by the firewall proxy during the run) over `GH_AW_ENGINE_MODEL` (which may be + * a user-supplied alias such as "agent" that hasn't been resolved to a real name). + * + * Falls back to `GH_AW_ENGINE_MODEL` when agent_usage.json is absent, unreadable, + * or does not contain a `primary_model` field (e.g. single-model runs before this + * field was introduced, or runs without token-usage.jsonl data). + * + * @returns {string} + */ +function resolveActualModelName() { + const usage = readAgentUsage(); + if (usage && typeof usage.primary_model === "string" && usage.primary_model) { + return usage.primary_model; + } + return process.env.GH_AW_ENGINE_MODEL || ""; +} + /** * Read effective tokens from the GH_AW_EFFECTIVE_TOKENS environment variable and return * a pre-formatted suffix string suitable for appending to footer text. * Returns "" when the variable is absent or the parsed value is not a positive integer. - * @returns {string} Suffix string, e.g. " · ● 12.5K" or "" + * @returns {string} Suffix string, e.g. " · 12.5K" or "" */ function getEffectiveTokensSuffix() { const raw = process.env.GH_AW_EFFECTIVE_TOKENS ?? ""; const parsed = parseInt(raw, 10); if (!isNaN(parsed) && parsed > 0) { - return ` · ● ${formatET(parsed)}`; + const reducedModel = reduceModelNameToIdentifier(resolveActualModelName()); + const modelPrefix = reducedModel ? `${reducedModel} ` : ""; + return ` · ${modelPrefix}${formatET(parsed)}`; } return ""; } @@ -241,7 +373,7 @@ const AGENT_USAGE_PATH = "/tmp/gh-aw/agent_usage.json"; /** * Read the aggregated token usage written by parse_token_usage.cjs. * Returns null when the file is absent or unparseable. - * @returns {{input_tokens: number, output_tokens: number, cache_read_tokens: number, cache_write_tokens: number, effective_tokens: number} | null} + * @returns {{input_tokens?: number, output_tokens?: number, cache_read_tokens?: number, cache_write_tokens?: number, effective_tokens?: number, primary_model?: string} | null} */ function readAgentUsage() { try { @@ -336,6 +468,8 @@ module.exports = { computeBaseWeightedTokens, computeEffectiveTokens, formatET, + reduceModelNameToIdentifier, + resolveActualModelName, getEffectiveTokensSuffix, AGENT_USAGE_PATH, readAgentUsage, diff --git a/setup/js/generate_git_patch.cjs b/setup/js/generate_git_patch.cjs index 01e30a4c..8d8723fe 100644 --- a/setup/js/generate_git_patch.cjs +++ b/setup/js/generate_git_patch.cjs @@ -372,24 +372,46 @@ async function generateGitPatch(branchName, baseBranch, options = {}) { debugLog(`Strategy 3: Found ${commitCount} commits not reachable from any remote ref`); if (commitCount > 0) { - // Get the merge-base with the first remote ref (typically origin/HEAD or origin/main) - // to determine the starting point for the patch - let baseCommit; + // Choose the closest merge-base across all remote refs. + // for-each-ref output is lexicographic, so "first ref" is arbitrary and can + // point to stale branches that produce oversized patches. + let bestBaseCommit = null; + let bestBaseRef = null; + let bestCommitCount = Number.POSITIVE_INFINITY; for (const ref of remoteRefs) { try { - baseCommit = execGitSync(["merge-base", ref, branchName], { cwd }).trim(); - if (baseCommit) { - debugLog(`Strategy 3: Found merge-base ${baseCommit} with ref ${ref}`); - break; + const candidateBase = execGitSync(["merge-base", ref, "--", branchName], { cwd }).trim(); + if (!candidateBase) { + continue; + } + + const candidateCommitCount = parseInt(execGitSync(["rev-list", "--count", `${candidateBase}..${branchName}`], { cwd }).trim(), 10); + if (Number.isNaN(candidateCommitCount)) { + debugLog(`Strategy 3: Ignoring merge-base ${candidateBase} from ref ${ref} due to invalid commit count`); + continue; + } + if (candidateCommitCount <= 0) { + debugLog(`Strategy 3: Skipping ref ${ref} — merge-base not behind branch (count=${candidateCommitCount})`); + continue; + } + + if (candidateCommitCount < bestCommitCount) { + bestBaseCommit = candidateBase; + bestBaseRef = ref; + bestCommitCount = candidateCommitCount; + if (bestCommitCount === 1) { + break; + } } } catch { // Try next ref } } - if (baseCommit) { - baseCommitSha = baseCommit; - const patchContent = execGitSync(["format-patch", `${baseCommit}..${branchName}`, "--stdout", ...excludeArgs()], { cwd }); + if (bestBaseCommit) { + baseCommitSha = bestBaseCommit; + debugLog(`Strategy 3: Selected merge-base ${bestBaseCommit} with ref ${bestBaseRef} (commitCount=${bestCommitCount})`); + const patchContent = execGitSync(["format-patch", `${bestBaseCommit}..${branchName}`, "--stdout", ...excludeArgs()], { cwd }); if (patchContent && patchContent.trim()) { fs.writeFileSync(patchPath, patchContent, "utf8"); diff --git a/setup/js/messages_core.cjs b/setup/js/messages_core.cjs index 120d217d..c37ede53 100644 --- a/setup/js/messages_core.cjs +++ b/setup/js/messages_core.cjs @@ -16,7 +16,7 @@ * - {triggering_number} - Issue/PR/Discussion number that triggered this workflow * - {effective_tokens} - Raw total effective token count for the run (e.g. 1200), only present when > 0 * - {effective_tokens_formatted} - Compact formatted effective tokens (e.g. "1.2K", "3M"), only present when > 0 - * - {effective_tokens_suffix} - Pre-formatted suffix including the ● symbol (e.g. " · ● 1.2K"), or "" when not available + * - {effective_tokens_suffix} - Pre-formatted suffix (e.g. " · 1.2K"), or "" when not available * - {operation} - Operation name (for staged mode titles/descriptions) * - {event_type} - Event type description (for run-started messages) * - {status} - Workflow status text (for run-failure messages) diff --git a/setup/js/messages_footer.cjs b/setup/js/messages_footer.cjs index 92a58e31..2e9497c2 100644 --- a/setup/js/messages_footer.cjs +++ b/setup/js/messages_footer.cjs @@ -12,7 +12,7 @@ const { getMessages, renderTemplate, renderTemplateFromFile, toSnakeCase, getPro const { getMissingInfoSections } = require("./missing_messages_helper.cjs"); const { getBlockedDomains, generateBlockedDomainsSection } = require("./firewall_blocked_domains.cjs"); const { getDifcFilteredEvents, generateDifcFilteredSection } = require("./gateway_difc_filtered.cjs"); -const { formatET } = require("./effective_tokens.cjs"); +const { formatET, reduceModelNameToIdentifier, resolveActualModelName } = require("./effective_tokens.cjs"); const { getDetectionWarningMessage } = require("./messages_run_status.cjs"); /** @@ -37,18 +37,29 @@ function getDetectionCautionAlert(workflowName, runUrl) { * both the raw count, compact formatted string, and a pre-formatted suffix. * Returns undefined/empty for all fields when the variable is absent or the parsed value * is not a positive integer. + * @param {string} modelName * @returns {{ effectiveTokens: number|undefined, effectiveTokensFormatted: string|undefined, effectiveTokensSuffix: string }} */ -function getEffectiveTokensFromEnv() { +function getEffectiveTokensFromEnv(modelName) { const raw = process.env.GH_AW_EFFECTIVE_TOKENS; const parsed = raw ? parseInt(raw, 10) : NaN; if (!isNaN(parsed) && parsed > 0) { + const modelPrefix = buildModelPrefix(modelName); const effectiveTokensFormatted = formatET(parsed); - return { effectiveTokens: parsed, effectiveTokensFormatted, effectiveTokensSuffix: ` · ● ${effectiveTokensFormatted}` }; + return { effectiveTokens: parsed, effectiveTokensFormatted, effectiveTokensSuffix: ` · ${modelPrefix}${effectiveTokensFormatted}` }; } return { effectiveTokens: undefined, effectiveTokensFormatted: undefined, effectiveTokensSuffix: "" }; } +/** + * @param {string} modelName + * @returns {string} + */ +function buildModelPrefix(modelName) { + const reducedModel = reduceModelNameToIdentifier(modelName); + return reducedModel ? `${reducedModel} ` : ""; +} + /** * @typedef {Object} FooterContext * @property {string} workflowName - Name of the workflow @@ -59,7 +70,8 @@ function getEffectiveTokensFromEnv() { * @property {number|string} [triggeringNumber] - Issue, PR, or discussion number that triggered this workflow * @property {string} [historyUrl] - GitHub search URL for items created by this workflow (for the history link) * @property {string} [historyLink] - Pre-formatted markdown history link (e.g. " · [◷](url)"), or "" if unavailable - * @property {number} [effectiveTokens] - Total effective token count for the run (shown as ● N when > 0, in compact format) + * @property {number} [effectiveTokens] - Total effective token count for the run (shown as N when > 0, in compact format) + * @property {string} [model] - Model name used for the run, used to build a compact model identifier in ET suffixes * @property {string} [emoji] - Optional emoji representing the workflow (from frontmatter) */ @@ -74,7 +86,10 @@ function getFooterMessage(ctx) { // Use effectiveTokens from context if provided, otherwise fall back to env var. // This ensures callers that don't pass effectiveTokens (e.g. update_activation_comment.cjs) // still get the effective token count in the footer when GH_AW_EFFECTIVE_TOKENS is set. - const { effectiveTokens: envEffectiveTokens } = getEffectiveTokensFromEnv(); + // Prefer the actual model name from token-usage data (primary_model in agent_usage.json) + // over GH_AW_ENGINE_MODEL, which may be a user-supplied alias (e.g. "agent"). + const resolvedModelName = ctx.model || resolveActualModelName(); + const { effectiveTokens: envEffectiveTokens, effectiveTokensFormatted: envEffectiveTokensFormatted, effectiveTokensSuffix: envEffectiveTokensSuffix } = getEffectiveTokensFromEnv(resolvedModelName); const effectiveTokens = ctx.effectiveTokens ?? envEffectiveTokens; // Pre-compute history_link as a ready-to-use markdown suffix (empty string when unavailable) @@ -84,9 +99,19 @@ function getFooterMessage(ctx) { const agenticWorkflowUrl = ctx.agenticWorkflowUrl || (ctx.runUrl ? `${ctx.runUrl}/agentic_workflow` : ""); // Pre-compute effective_tokens_formatted and effective_tokens_suffix for use in custom templates - const effectiveTokensFormatted = effectiveTokens ? formatET(effectiveTokens) : undefined; - // effective_tokens_suffix is always a string: either " · ● 1.2K" or "" (for safe use in templates) - const effectiveTokensSuffix = effectiveTokensFormatted ? ` · ● ${effectiveTokensFormatted}` : ""; + const hasExplicitContextEffectiveTokens = ctx.effectiveTokens !== undefined && ctx.effectiveTokens !== null; + let effectiveTokensFormatted = envEffectiveTokensFormatted; + // effective_tokens_suffix is always a string: either " · 1.2K" or "" (for safe use in templates) + let effectiveTokensSuffix = envEffectiveTokensSuffix; + if (hasExplicitContextEffectiveTokens) { + effectiveTokensFormatted = effectiveTokens ? formatET(effectiveTokens) : undefined; + if (effectiveTokensFormatted) { + const modelPrefix = buildModelPrefix(resolvedModelName); + effectiveTokensSuffix = ` · ${modelPrefix}${effectiveTokensFormatted}`; + } else { + effectiveTokensSuffix = ""; + } + } // Create context with both camelCase and snake_case keys, including computed history_link and agentic_workflow_url const templateContext = toSnakeCase({ ...ctx, effectiveTokens, historyLink, agenticWorkflowUrl, effectiveTokensFormatted, effectiveTokensSuffix }); @@ -102,9 +127,9 @@ function getFooterMessage(ctx) { if (ctx.triggeringNumber) { defaultFooter += " for issue #{triggering_number}"; } - // Append effective tokens with ● symbol when available (compact format, no "ET" label) + // Append effective tokens when available (compact format, no "ET" label) if (effectiveTokens) { - defaultFooter += ` · ● ${formatET(effectiveTokens)}`; + defaultFooter += effectiveTokensSuffix; } // Append history link when available if (ctx.historyUrl) { @@ -157,7 +182,8 @@ function getFooterWorkflowRecompileMessage(ctx) { const agenticWorkflowUrl = ctx.agenticWorkflowUrl || (ctx.runUrl ? `${ctx.runUrl}/agentic_workflow` : ""); // Read effective tokens from environment variable if available - const { effectiveTokens, effectiveTokensFormatted, effectiveTokensSuffix } = getEffectiveTokensFromEnv(); + const modelName = resolveActualModelName(); + const { effectiveTokens, effectiveTokensFormatted, effectiveTokensSuffix } = getEffectiveTokensFromEnv(modelName); // Create context with both camelCase and snake_case keys const templateContext = toSnakeCase({ ...ctx, agenticWorkflowUrl, effectiveTokens, effectiveTokensFormatted, effectiveTokensSuffix }); @@ -183,7 +209,8 @@ function getFooterWorkflowRecompileCommentMessage(ctx) { const agenticWorkflowUrl = ctx.agenticWorkflowUrl || (ctx.runUrl ? `${ctx.runUrl}/agentic_workflow` : ""); // Read effective tokens from environment variable if available - const { effectiveTokens, effectiveTokensFormatted, effectiveTokensSuffix } = getEffectiveTokensFromEnv(); + const modelName = resolveActualModelName(); + const { effectiveTokens, effectiveTokensFormatted, effectiveTokensSuffix } = getEffectiveTokensFromEnv(modelName); // Create context with both camelCase and snake_case keys const templateContext = toSnakeCase({ ...ctx, agenticWorkflowUrl, effectiveTokens, effectiveTokensFormatted, effectiveTokensSuffix }); @@ -222,7 +249,8 @@ function getFooterAgentFailureIssueMessage(ctx) { const agenticWorkflowUrl = ctx.agenticWorkflowUrl || (ctx.runUrl ? `${ctx.runUrl}/agentic_workflow` : ""); // Read effective tokens from environment variable if available - const { effectiveTokens, effectiveTokensFormatted, effectiveTokensSuffix } = getEffectiveTokensFromEnv(); + const modelName = resolveActualModelName(); + const { effectiveTokens, effectiveTokensFormatted, effectiveTokensSuffix } = getEffectiveTokensFromEnv(modelName); // Create context with both camelCase and snake_case keys, including computed history_link and agentic_workflow_url const templateContext = toSnakeCase({ ...ctx, historyLink, agenticWorkflowUrl, effectiveTokens, effectiveTokensFormatted, effectiveTokensSuffix }); @@ -234,7 +262,7 @@ function getFooterAgentFailureIssueMessage(ctx) { } else { // Default footer template with link to workflow run let defaultFooter = "> Generated from [{workflow_name}]({run_url})"; - // Append effective tokens with ● symbol when available (compact format, no "ET" label) + // Append effective tokens when available (compact format, no "ET" label) if (effectiveTokens) { defaultFooter += `{effective_tokens_suffix}`; } @@ -263,7 +291,8 @@ function getFooterAgentFailureCommentMessage(ctx) { const agenticWorkflowUrl = ctx.agenticWorkflowUrl || (ctx.runUrl ? `${ctx.runUrl}/agentic_workflow` : ""); // Read effective tokens from environment variable if available - const { effectiveTokens, effectiveTokensFormatted, effectiveTokensSuffix } = getEffectiveTokensFromEnv(); + const modelName = resolveActualModelName(); + const { effectiveTokens, effectiveTokensFormatted, effectiveTokensSuffix } = getEffectiveTokensFromEnv(modelName); // Create context with both camelCase and snake_case keys, including computed history_link and agentic_workflow_url const templateContext = toSnakeCase({ ...ctx, historyLink, agenticWorkflowUrl, effectiveTokens, effectiveTokensFormatted, effectiveTokensSuffix }); @@ -275,7 +304,7 @@ function getFooterAgentFailureCommentMessage(ctx) { } else { // Default footer template with link to workflow run let defaultFooter = "> Generated from [{workflow_name}]({run_url})"; - // Append effective tokens with ● symbol when available (compact format, no "ET" label) + // Append effective tokens when available (compact format, no "ET" label) if (effectiveTokens) { defaultFooter += `{effective_tokens_suffix}`; } @@ -386,7 +415,9 @@ function generateFooterWithMessages(workflowName, runUrl, workflowSource, workfl // Read effective tokens from environment variable if available. // GH_AW_EFFECTIVE_TOKENS is set by parse_mcp_gateway_log.cjs after computing ET // from the token-usage.jsonl produced by the firewall proxy. - const { effectiveTokens } = getEffectiveTokensFromEnv(); + // Prefer the actual model name from token-usage data over GH_AW_ENGINE_MODEL alias. + const modelName = resolveActualModelName(); + const { effectiveTokens } = getEffectiveTokensFromEnv(modelName); // Read workflow emoji from environment variable if available. const emoji = process.env.GH_AW_WORKFLOW_EMOJI || undefined; diff --git a/setup/js/parse_token_usage.cjs b/setup/js/parse_token_usage.cjs index 7aa337e7..ec37f4ac 100644 --- a/setup/js/parse_token_usage.cjs +++ b/setup/js/parse_token_usage.cjs @@ -113,12 +113,26 @@ async function main() { // Write agent_usage.json so the aggregated totals are bundled in the agent // artifact and accessible to third-party tools without parsing the step summary. const effectiveTokens = Math.round(summary.totalEffectiveTokens || 0); + + // Determine the primary model: the one with the highest effective tokens. + // This is the actual model name from the API call logs, which may differ from + // GH_AW_ENGINE_MODEL when the user specified a model alias (e.g. "agent"). + let primaryModel = ""; + let primaryModelET = -1; + for (const [model, usage] of Object.entries(summary.byModel || {})) { + if (model !== "unknown" && usage && typeof usage.effectiveTokens === "number" && usage.effectiveTokens > primaryModelET) { + primaryModelET = usage.effectiveTokens; + primaryModel = model; + } + } + const agentUsage = { input_tokens: summary.totalInputTokens, output_tokens: summary.totalOutputTokens, cache_read_tokens: summary.totalCacheReadTokens, cache_write_tokens: summary.totalCacheWriteTokens, effective_tokens: effectiveTokens, + ...(primaryModel ? { primary_model: primaryModel } : {}), }; fs.writeFileSync(AGENT_USAGE_PATH, JSON.stringify(agentUsage) + "\n");