Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/aw/compat.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"min-gh-aw": "0.72.0",
"max-gh-aw": "*",
"min-agent": "1.0.21",
"max-agent": "1.0.48",
"max-agent": "1.0.51",
"open": true
},
{
Expand Down
99 changes: 79 additions & 20 deletions setup/js/awf_reflect.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@

"use strict";

require("./shim.cjs");

const fs = require("fs");
const path = require("path");
const { withRetry } = require("./error_recovery.cjs");

// AWF API proxy management endpoint for discovering configured LLM providers and available models.
// The api-proxy sidecar exposes /reflect on its management port (port 10000) inside the AWF
Expand All @@ -29,6 +32,12 @@ const AWF_REFLECT_OUTPUT_PATH = "/tmp/gh-aw/sandbox/firewall/awf-reflect.json";
const AWF_REFLECT_TIMEOUT_MS = 60000;
// Milliseconds to wait for each models_url fallback fetch (shorter than the main reflect timeout).
const AWF_MODELS_URL_TIMEOUT_MS = 3000;
// Maximum attempts for models_url fallback fetches when the proxy is not yet ready.
const AWF_MODELS_URL_MAX_ATTEMPTS = 5;
// Base delay between models_url fallback retries. Uses exponential backoff.
const AWF_MODELS_URL_RETRY_BASE_MS = 250;
// Cap for exponential backoff delay between retries.
const AWF_MODELS_URL_RETRY_MAX_MS = 2000;
// Gemini model name prefix stripped from model IDs in the Gemini models API response.
// Example: { name: "models/gemini-1.5-pro" } → "gemini-1.5-pro"
const GEMINI_MODEL_NAME_PREFIX = "models/";
Expand Down Expand Up @@ -84,32 +93,79 @@ function extractModelIds(json) {
* @returns {Promise<string[]|null>}
*/
async function fetchModelsFromUrl(modelsUrl, timeoutMs, logger) {
const ac = new AbortController();
const timer = setTimeout(() => {
logger(`awf-reflect: models fetch timed out for ${modelsUrl}`);
ac.abort();
}, timeoutMs);
let attemptCounter = 0;
const retryConfig = {
maxRetries: AWF_MODELS_URL_MAX_ATTEMPTS - 1,
// withRetry multiplies delay before the next attempt, so divide by 2 here
// to preserve the intended first backoff of AWF_MODELS_URL_RETRY_BASE_MS.
initialDelayMs: Math.ceil(AWF_MODELS_URL_RETRY_BASE_MS / 2),
maxDelayMs: AWF_MODELS_URL_RETRY_MAX_MS,
backoffMultiplier: 2,
jitterMs: 0,
shouldRetry: error => {
const original = error?.originalError || error;
const status = original?.status ?? original?.response?.status ?? null;
const shouldRetry = status === 503;
if (shouldRetry && attemptCounter < AWF_MODELS_URL_MAX_ATTEMPTS) {
logger(`awf-reflect: models fetch returned 503 for ${modelsUrl}; retrying (attempt ${attemptCounter + 1}/${AWF_MODELS_URL_MAX_ATTEMPTS})`);
}
return shouldRetry;
},
};

try {
const res = await fetch(modelsUrl, { signal: ac.signal });
if (!res.ok) {
logger(`awf-reflect: models fetch returned ${res.status} for ${modelsUrl}`);
return null;
}
const json = await res.json();
const models = extractModelIds(json);
if (models) {
logger(`awf-reflect: fetched ${models.length} model(s) from ${modelsUrl}`);
}
return models;
return await withRetry(
async () => {
attemptCounter += 1;
const ac = new AbortController();
const timer = setTimeout(() => {
logger(`awf-reflect: models fetch timed out for ${modelsUrl}`);
ac.abort();
}, timeoutMs);
try {
const res = await fetch(modelsUrl, { signal: ac.signal });
if (!res.ok) {
if (res.status === 503) {
const err = Object.assign(new Error(`models fetch returned 503 for ${modelsUrl}`), { status: 503 });
throw err;
}
logger(`awf-reflect: models fetch returned ${res.status} for ${modelsUrl}`);
return null;
}
const json = await res.json();
const models = extractModelIds(json);
if (models) {
logger(`awf-reflect: fetched ${models.length} model(s) from ${modelsUrl}`);
}
return models;
} catch (err) {
const e = /** @type {Error} */ err;
if (e.name === "AbortError") {
return null; // already logged above
}
const status = e?.status ?? e?.response?.status ?? null;
if (status === 503) {
throw e;
}
logger(`awf-reflect: models fetch error for ${modelsUrl}: ${e.message}`);
return null;
} finally {
clearTimeout(timer);
}
},
retryConfig,
`awf-reflect models fetch for ${modelsUrl}`
);
} catch (err) {
const e = /** @type {Error} */ err;
if (e.name === "AbortError") {
return null; // already logged above
const original = e?.originalError || e;
const status = original?.status ?? original?.response?.status ?? null;
if (status === 503) {
logger(`awf-reflect: models fetch returned 503 for ${modelsUrl}`);
return null;
}
logger(`awf-reflect: models fetch error for ${modelsUrl}: ${e.message}`);
return null;
} finally {
clearTimeout(timer);
}
}

Expand Down Expand Up @@ -250,6 +306,9 @@ if (typeof module !== "undefined" && module.exports) {
AWF_REFLECT_OUTPUT_PATH,
AWF_REFLECT_TIMEOUT_MS,
AWF_MODELS_URL_TIMEOUT_MS,
AWF_MODELS_URL_MAX_ATTEMPTS,
AWF_MODELS_URL_RETRY_BASE_MS,
AWF_MODELS_URL_RETRY_MAX_MS,
GEMINI_MODEL_NAME_PREFIX,
enrichReflectModels,
extractModelIds,
Expand Down
58 changes: 58 additions & 0 deletions setup/js/codex_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ const MAX_DELAY_MS = 60000;
const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError/i;
const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[^)]+\))?/i;

// Pattern to detect a missing API key at startup — Codex emits this before making any API
// calls when neither CODEX_API_KEY nor OPENAI_API_KEY is available in the environment.
// Example: "ERROR: Missing environment variable: `OPENAI_API_KEY`"
const MISSING_API_KEY_PATTERN = /Missing environment variable:\s*`?(?:CODEX_API_KEY|OPENAI_API_KEY)\b`?/i;

// Pattern to detect OpenAI server-side errors (HTTP 500, 503).
// These are transient infrastructure failures that may resolve on retry.
const SERVER_ERROR_PATTERN = /InternalServerError|ServiceUnavailableError|500 Internal Server Error|503 Service Unavailable/i;
Expand Down Expand Up @@ -95,6 +100,16 @@ function isAuthenticationFailedError(output) {
return AUTHENTICATION_FAILED_PATTERN.test(output);
}

/**
* Determines if the collected output indicates a missing API key at startup.
* Codex exits before producing any agent output in this case, so retrying is futile.
* @param {string} output - Collected stdout+stderr from the process
* @returns {boolean}
*/
function isMissingApiKeyError(output) {
return MISSING_API_KEY_PATTERN.test(output);
}

/**
* Determines if the collected output contains an OpenAI server error.
* @param {string} output - Collected stdout+stderr from the process
Expand Down Expand Up @@ -245,6 +260,22 @@ function resolveCodexPromptFileArgs(args) {
return filteredArgs;
}

/**
* Inject `--json` after `exec` in the args list so that Codex streams structured
* JSON Lines (JSONL) to stdout. This enables machine-readable output for CI
* pipelines without changing how stderr progress output works.
*
* No-op when the subcommand is not `exec` or when `--json` is already present.
*
* @param {string[]} args
* @returns {string[]}
*/
function injectJsonFlag(args) {
if (args.length === 0 || args[0] !== "exec") return args;
if (args.includes("--json")) return args;
return ["exec", "--json", ...args.slice(1)];
}

/**
* Main entry point: run codex with retry logic for transient API failures.
* Codex does not support --continue session resumption, so all retries are fresh runs.
Expand All @@ -259,6 +290,20 @@ async function main() {

log(`starting: command=${command} maxRetries=${MAX_RETRIES} initialDelayMs=${INITIAL_DELAY_MS}` + ` backoffMultiplier=${BACKOFF_MULTIPLIER} maxDelayMs=${MAX_DELAY_MS}` + ` nodeVersion=${process.version} platform=${process.platform}`);

// Diagnose API key presence so CI failures can be triaged without exposing secret values.
const codexApiKey = process.env.CODEX_API_KEY;
const openaiApiKey = process.env.OPENAI_API_KEY;
log(`secrets: CODEX_API_KEY=${codexApiKey ? `set (length=${codexApiKey.length})` : "not set"}` + ` OPENAI_API_KEY=${openaiApiKey ? `set (length=${openaiApiKey.length})` : "not set"}`);

// Pre-flight: require at least one API key before spawning codex.
// Without a key, codex exits immediately with "Missing environment variable" and every
// retry attempt fails the same way. Failing here avoids burning the retry budget and
// surfaces a clear, actionable message in CI logs.
if (!codexApiKey && !openaiApiKey) {
log("fatal: no API key available - set CODEX_API_KEY or OPENAI_API_KEY and retry");
process.exit(1);
}

// Resolve the prompt for the initial run (reads --prompt-file content).
// A missing or unreadable prompt file is treated as a fatal startup error.
let resolvedArgs;
Expand All @@ -276,6 +321,10 @@ async function main() {
const hadPromptFile = args.includes("--prompt-file");
const safeArgs = hadPromptFile && resolvedArgs.length > 0 ? [...resolvedArgs.slice(0, -1), "<prompt omitted>"] : resolvedArgs;

// Inject --json after `exec` to stream structured JSONL events to stdout, making
// Codex output machine-readable in CI without affecting the stderr progress stream.
resolvedArgs = injectJsonFlag(resolvedArgs);

// Fetch AWF API proxy reflection data before running the agent to capture initial proxy state.
// This is best-effort: failures are logged but do not affect the agent run.
await fetchAWFReflect({ logger: log });
Expand Down Expand Up @@ -308,6 +357,7 @@ async function main() {

const isRateLimit = isRateLimitError(result.output);
const isAuthenticationFailed = isAuthenticationFailedError(result.output);
const isMissingApiKey = isMissingApiKeyError(result.output);
const isServer = isServerError(result.output);
const permissionDeniedCount = countPermissionDeniedIssues(result.output);
const hasNumerousPermissionDenied = hasNumerousPermissionDeniedIssues(result.output);
Expand All @@ -316,6 +366,7 @@ async function main() {
` exitCode=${result.exitCode}` +
` isRateLimitError=${isRateLimit}` +
` isAuthenticationFailedError=${isAuthenticationFailed}` +
` isMissingApiKeyError=${isMissingApiKey}` +
` isServerError=${isServer}` +
` permissionDeniedCount=${permissionDeniedCount}` +
` hasNumerousPermissionDenied=${hasNumerousPermissionDenied}` +
Expand All @@ -328,6 +379,11 @@ async function main() {
break;
}

if (isMissingApiKey) {
log(`attempt ${attempt + 1}: missing API key — not retrying (configure CODEX_API_KEY or OPENAI_API_KEY)`);
break;
}

if (hasNumerousPermissionDenied) {
const deniedCommands = extractDeniedCommands(result.output);
emitMissingToolPermissionIssue({ deniedCommands });
Expand Down Expand Up @@ -363,8 +419,10 @@ async function main() {
if (typeof module !== "undefined" && module.exports) {
module.exports = {
resolveCodexPromptFileArgs,
injectJsonFlag,
isRateLimitError,
isAuthenticationFailedError,
isMissingApiKeyError,
isServerError,
countPermissionDeniedIssues,
hasNumerousPermissionDeniedIssues,
Expand Down
53 changes: 53 additions & 0 deletions setup/js/copilot_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ const NO_AUTH_INFO_PATTERN = /No authentication information found/;
// After a first-attempt auth failure, retrying is futile because the entrypoint unsets
// COPILOT_GITHUB_TOKEN between attempts.
const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[^)]+\))?/i;
// Pattern: Copilot CLI inference access denied
const INFERENCE_ACCESS_ERROR_PATTERN = /Access denied by policy settings|invalid access to inference/;
// Pattern: Agentic engine process killed by signal (timeout)
const AGENTIC_ENGINE_TIMEOUT_PATTERN = /signal=SIG(?:TERM|KILL|INT)/;

// Pattern to detect null-type tool_call error that poisons conversation history.
// Matches the Copilot API 400 error:
Expand Down Expand Up @@ -166,6 +170,40 @@ function isAuthenticationFailedError(output) {
return AUTHENTICATION_FAILED_PATTERN.test(output);
}

/**
* Detect known Copilot error patterns for workflow outputs.
* @param {string} output
* @returns {{ inferenceAccessError: boolean, mcpPolicyError: boolean, agenticEngineTimeout: boolean, modelNotSupportedError: boolean }}
*/
function detectCopilotErrors(output) {
return {
inferenceAccessError: INFERENCE_ACCESS_ERROR_PATTERN.test(output),
mcpPolicyError: isMCPPolicyError(output),
agenticEngineTimeout: AGENTIC_ENGINE_TIMEOUT_PATTERN.test(output),
modelNotSupportedError: isModelNotSupportedError(output),
};
}

/**
* Write Copilot detection outputs to $GITHUB_OUTPUT.
* @param {{ inferenceAccessError: boolean, mcpPolicyError: boolean, agenticEngineTimeout: boolean, modelNotSupportedError: boolean }} results
*/
function writeCopilotOutputs(results) {
const outputFile = process.env.GITHUB_OUTPUT;
if (!outputFile) {
log("GITHUB_OUTPUT not set — skipping copilot error outputs");
return;
}

const lines = [
`inference_access_error=${results.inferenceAccessError}`,
`mcp_policy_error=${results.mcpPolicyError}`,
`agentic_engine_timeout=${results.agenticEngineTimeout}`,
`model_not_supported_error=${results.modelNotSupportedError}`,
];
fs.appendFileSync(outputFile, lines.join("\n") + "\n");
}

/**
* Determines if the collected output contains a null-type tool_call error.
* This error occurs when the model emits a malformed tool call with type: null.
Expand Down Expand Up @@ -436,6 +474,12 @@ async function main() {
// This prevents a broken --continue recovery from resurrecting --continue on the next attempt.
let continueDisabledPermanently = false;
const driverStartTime = Date.now();
const detectedCopilotErrors = {
inferenceAccessError: false,
mcpPolicyError: false,
agenticEngineTimeout: false,
modelNotSupportedError: false,
};

for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
// Add --continue flag on retries so the copilot session continues from where it left off
Expand All @@ -453,6 +497,11 @@ async function main() {
const safeArgs = currentArgs.map((arg, i) => (currentArgs[i - 1] === "--prompt" || currentArgs[i - 1] === "-p" ? "<redacted>" : arg));
const result = await runProcess({ command, args: currentArgs, attempt, log, logArgs: safeArgs });
lastExitCode = result.exitCode;
const attemptDetections = detectCopilotErrors(result.output);
detectedCopilotErrors.inferenceAccessError ||= attemptDetections.inferenceAccessError;
detectedCopilotErrors.mcpPolicyError ||= attemptDetections.mcpPolicyError;
detectedCopilotErrors.agenticEngineTimeout ||= attemptDetections.agenticEngineTimeout;
detectedCopilotErrors.modelNotSupportedError ||= attemptDetections.modelNotSupportedError;
Comment on lines 477 to +504

// Success — record exit code and stop retrying
if (result.exitCode === 0) {
Expand Down Expand Up @@ -614,9 +663,13 @@ if (typeof module !== "undefined" && module.exports) {
fetchAWFReflect,
fetchModelsFromUrl,
countPermissionDeniedIssues,
detectCopilotErrors,
hasNumerousPermissionDeniedIssues,
INFERENCE_ACCESS_ERROR_PATTERN,
AGENTIC_ENGINE_TIMEOUT_PATTERN,
buildMissingToolPermissionIssuePayload,
isAuthenticationFailedError,
writeCopilotOutputs,
resolvePromptFileArgs,
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ function detectErrors(logContent) {
function writeOutputs(results) {
const outputFile = process.env.GITHUB_OUTPUT;
if (!outputFile) {
process.stderr.write("[detect-copilot-errors] GITHUB_OUTPUT not set — skipping output\n");
process.stderr.write("[detect-agent-errors] GITHUB_OUTPUT not set — skipping output\n");
return;
}

Expand All @@ -90,22 +90,22 @@ function main() {
if (fs.existsSync(LOG_FILE)) {
logContent = fs.readFileSync(LOG_FILE, "utf8");
} else {
process.stderr.write(`[detect-copilot-errors] Log file not found: ${LOG_FILE}\n`);
process.stderr.write(`[detect-agent-errors] Log file not found: ${LOG_FILE}\n`);
}

const results = detectErrors(logContent);

if (results.inferenceAccessError) {
process.stderr.write("[detect-copilot-errors] Detected inference access error in agent log\n");
process.stderr.write("[detect-agent-errors] Detected inference access error in agent log\n");
}
if (results.mcpPolicyError) {
process.stderr.write("[detect-copilot-errors] Detected MCP policy error in agent log\n");
process.stderr.write("[detect-agent-errors] Detected MCP policy error in agent log\n");
}
if (results.agenticEngineTimeout) {
process.stderr.write("[detect-copilot-errors] Detected timeout: engine process was killed by signal (step timeout-minutes likely exceeded)\n");
process.stderr.write("[detect-agent-errors] Detected timeout: engine process was killed by signal (step timeout-minutes likely exceeded)\n");
}
if (results.modelNotSupportedError) {
process.stderr.write("[detect-copilot-errors] Detected model-not-supported error: the requested model is unavailable for this subscription tier\n");
process.stderr.write("[detect-agent-errors] Detected model-not-supported error: the requested model is unavailable for this subscription tier\n");
}

writeOutputs(results);
Expand Down
Loading
Loading