github · pelikhan · May 24, 2026 · May 24, 2026
diff --git a/.github/aw/compat.json b/.github/aw/compat.json
@@ -7,7 +7,7 @@
         "min-gh-aw": "0.72.0",
         "max-gh-aw": "*",
         "min-agent": "1.0.21",
-        "max-agent": "1.0.48",
+        "max-agent": "1.0.51",
         "open": true
       },
       {

diff --git a/setup/js/awf_reflect.cjs b/setup/js/awf_reflect.cjs
@@ -15,8 +15,11 @@
 
 "use strict";
 
+require("./shim.cjs");
+
 const fs = require("fs");
 const path = require("path");
+const { withRetry } = require("./error_recovery.cjs");
 
 // AWF API proxy management endpoint for discovering configured LLM providers and available models.
 // The api-proxy sidecar exposes /reflect on its management port (port 10000) inside the AWF
@@ -29,6 +32,12 @@ const AWF_REFLECT_OUTPUT_PATH = "/tmp/gh-aw/sandbox/firewall/awf-reflect.json";
 const AWF_REFLECT_TIMEOUT_MS = 60000;
 // Milliseconds to wait for each models_url fallback fetch (shorter than the main reflect timeout).
 const AWF_MODELS_URL_TIMEOUT_MS = 3000;
+// Maximum attempts for models_url fallback fetches when the proxy is not yet ready.
+const AWF_MODELS_URL_MAX_ATTEMPTS = 5;
+// Base delay between models_url fallback retries. Uses exponential backoff.
+const AWF_MODELS_URL_RETRY_BASE_MS = 250;
+// Cap for exponential backoff delay between retries.
+const AWF_MODELS_URL_RETRY_MAX_MS = 2000;
 // Gemini model name prefix stripped from model IDs in the Gemini models API response.
 // Example: { name: "models/gemini-1.5-pro" } → "gemini-1.5-pro"
 const GEMINI_MODEL_NAME_PREFIX = "models/";
@@ -84,32 +93,79 @@ function extractModelIds(json) {
  * @returns {Promise<string[]|null>}
  */
 async function fetchModelsFromUrl(modelsUrl, timeoutMs, logger) {
-  const ac = new AbortController();
-  const timer = setTimeout(() => {
-    logger(`awf-reflect: models fetch timed out for ${modelsUrl}`);
-    ac.abort();
-  }, timeoutMs);
+  let attemptCounter = 0;
+  const retryConfig = {
+    maxRetries: AWF_MODELS_URL_MAX_ATTEMPTS - 1,
+    // withRetry multiplies delay before the next attempt, so divide by 2 here
+    // to preserve the intended first backoff of AWF_MODELS_URL_RETRY_BASE_MS.
+    initialDelayMs: Math.ceil(AWF_MODELS_URL_RETRY_BASE_MS / 2),
+    maxDelayMs: AWF_MODELS_URL_RETRY_MAX_MS,
+    backoffMultiplier: 2,
+    jitterMs: 0,
+    shouldRetry: error => {
+      const original = error?.originalError || error;
+      const status = original?.status ?? original?.response?.status ?? null;
+      const shouldRetry = status === 503;
+      if (shouldRetry && attemptCounter < AWF_MODELS_URL_MAX_ATTEMPTS) {
+        logger(`awf-reflect: models fetch returned 503 for ${modelsUrl}; retrying (attempt ${attemptCounter + 1}/${AWF_MODELS_URL_MAX_ATTEMPTS})`);
+      }
+      return shouldRetry;
+    },
+  };
+
   try {
-    const res = await fetch(modelsUrl, { signal: ac.signal });
-    if (!res.ok) {
-      logger(`awf-reflect: models fetch returned ${res.status} for ${modelsUrl}`);
-      return null;
-    }
-    const json = await res.json();
-    const models = extractModelIds(json);
-    if (models) {
-      logger(`awf-reflect: fetched ${models.length} model(s) from ${modelsUrl}`);
-    }
-    return models;
+    return await withRetry(
+      async () => {
+        attemptCounter += 1;
+        const ac = new AbortController();
+        const timer = setTimeout(() => {
+          logger(`awf-reflect: models fetch timed out for ${modelsUrl}`);
+          ac.abort();
+        }, timeoutMs);
+        try {
+          const res = await fetch(modelsUrl, { signal: ac.signal });
+          if (!res.ok) {
+            if (res.status === 503) {
+              const err = Object.assign(new Error(`models fetch returned 503 for ${modelsUrl}`), { status: 503 });
+              throw err;
+            }
+            logger(`awf-reflect: models fetch returned ${res.status} for ${modelsUrl}`);
+            return null;
+          }
+          const json = await res.json();
+          const models = extractModelIds(json);
+          if (models) {
+            logger(`awf-reflect: fetched ${models.length} model(s) from ${modelsUrl}`);
+          }
+          return models;
+        } catch (err) {
+          const e = /** @type {Error} */ err;
+          if (e.name === "AbortError") {
+            return null; // already logged above
+          }
+          const status = e?.status ?? e?.response?.status ?? null;
+          if (status === 503) {
+            throw e;
+          }
+          logger(`awf-reflect: models fetch error for ${modelsUrl}: ${e.message}`);
+          return null;
+        } finally {
+          clearTimeout(timer);
+        }
+      },
+      retryConfig,
+      `awf-reflect models fetch for ${modelsUrl}`
+    );
   } catch (err) {
     const e = /** @type {Error} */ err;
-    if (e.name === "AbortError") {
-      return null; // already logged above
+    const original = e?.originalError || e;
+    const status = original?.status ?? original?.response?.status ?? null;
+    if (status === 503) {
+      logger(`awf-reflect: models fetch returned 503 for ${modelsUrl}`);
+      return null;
     }
     logger(`awf-reflect: models fetch error for ${modelsUrl}: ${e.message}`);
     return null;
-  } finally {
-    clearTimeout(timer);
   }
 }
 
@@ -250,6 +306,9 @@ if (typeof module !== "undefined" && module.exports) {
     AWF_REFLECT_OUTPUT_PATH,
     AWF_REFLECT_TIMEOUT_MS,
     AWF_MODELS_URL_TIMEOUT_MS,
+    AWF_MODELS_URL_MAX_ATTEMPTS,
+    AWF_MODELS_URL_RETRY_BASE_MS,
+    AWF_MODELS_URL_RETRY_MAX_MS,
     GEMINI_MODEL_NAME_PREFIX,
     enrichReflectModels,
     extractModelIds,

diff --git a/setup/js/codex_harness.cjs b/setup/js/codex_harness.cjs
@@ -60,6 +60,11 @@ const MAX_DELAY_MS = 60000;
 const RATE_LIMIT_ERROR_PATTERN = /rate_limit_exceeded|429 Too Many Requests|RateLimitError/i;
 const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[^)]+\))?/i;
 
+// Pattern to detect a missing API key at startup — Codex emits this before making any API
+// calls when neither CODEX_API_KEY nor OPENAI_API_KEY is available in the environment.
+// Example: "ERROR: Missing environment variable: `OPENAI_API_KEY`"
+const MISSING_API_KEY_PATTERN = /Missing environment variable:\s*`?(?:CODEX_API_KEY|OPENAI_API_KEY)\b`?/i;
+
 // Pattern to detect OpenAI server-side errors (HTTP 500, 503).
 // These are transient infrastructure failures that may resolve on retry.
 const SERVER_ERROR_PATTERN = /InternalServerError|ServiceUnavailableError|500 Internal Server Error|503 Service Unavailable/i;
@@ -95,6 +100,16 @@ function isAuthenticationFailedError(output) {
   return AUTHENTICATION_FAILED_PATTERN.test(output);
 }
 
+/**
+ * Determines if the collected output indicates a missing API key at startup.
+ * Codex exits before producing any agent output in this case, so retrying is futile.
+ * @param {string} output - Collected stdout+stderr from the process
+ * @returns {boolean}
+ */
+function isMissingApiKeyError(output) {
+  return MISSING_API_KEY_PATTERN.test(output);
+}
+
 /**
  * Determines if the collected output contains an OpenAI server error.
  * @param {string} output - Collected stdout+stderr from the process
@@ -245,6 +260,22 @@ function resolveCodexPromptFileArgs(args) {
   return filteredArgs;
 }
 
+/**
+ * Inject `--json` after `exec` in the args list so that Codex streams structured
+ * JSON Lines (JSONL) to stdout.  This enables machine-readable output for CI
+ * pipelines without changing how stderr progress output works.
+ *
+ * No-op when the subcommand is not `exec` or when `--json` is already present.
+ *
+ * @param {string[]} args
+ * @returns {string[]}
+ */
+function injectJsonFlag(args) {
+  if (args.length === 0 || args[0] !== "exec") return args;
+  if (args.includes("--json")) return args;
+  return ["exec", "--json", ...args.slice(1)];
+}
+
 /**
  * Main entry point: run codex with retry logic for transient API failures.
  * Codex does not support --continue session resumption, so all retries are fresh runs.
@@ -259,6 +290,20 @@ async function main() {
 
   log(`starting: command=${command} maxRetries=${MAX_RETRIES} initialDelayMs=${INITIAL_DELAY_MS}` + ` backoffMultiplier=${BACKOFF_MULTIPLIER} maxDelayMs=${MAX_DELAY_MS}` + ` nodeVersion=${process.version} platform=${process.platform}`);
 
+  // Diagnose API key presence so CI failures can be triaged without exposing secret values.
+  const codexApiKey = process.env.CODEX_API_KEY;
+  const openaiApiKey = process.env.OPENAI_API_KEY;
+  log(`secrets: CODEX_API_KEY=${codexApiKey ? `set (length=${codexApiKey.length})` : "not set"}` + ` OPENAI_API_KEY=${openaiApiKey ? `set (length=${openaiApiKey.length})` : "not set"}`);
+
+  // Pre-flight: require at least one API key before spawning codex.
+  // Without a key, codex exits immediately with "Missing environment variable" and every
+  // retry attempt fails the same way. Failing here avoids burning the retry budget and
+  // surfaces a clear, actionable message in CI logs.
+  if (!codexApiKey && !openaiApiKey) {
+    log("fatal: no API key available - set CODEX_API_KEY or OPENAI_API_KEY and retry");
+    process.exit(1);
+  }
+
   // Resolve the prompt for the initial run (reads --prompt-file content).
   // A missing or unreadable prompt file is treated as a fatal startup error.
   let resolvedArgs;
@@ -276,6 +321,10 @@ async function main() {
   const hadPromptFile = args.includes("--prompt-file");
   const safeArgs = hadPromptFile && resolvedArgs.length > 0 ? [...resolvedArgs.slice(0, -1), "<prompt omitted>"] : resolvedArgs;
 
+  // Inject --json after `exec` to stream structured JSONL events to stdout, making
+  // Codex output machine-readable in CI without affecting the stderr progress stream.
+  resolvedArgs = injectJsonFlag(resolvedArgs);
+
   // Fetch AWF API proxy reflection data before running the agent to capture initial proxy state.
   // This is best-effort: failures are logged but do not affect the agent run.
   await fetchAWFReflect({ logger: log });
@@ -308,6 +357,7 @@ async function main() {
 
     const isRateLimit = isRateLimitError(result.output);
     const isAuthenticationFailed = isAuthenticationFailedError(result.output);
+    const isMissingApiKey = isMissingApiKeyError(result.output);
     const isServer = isServerError(result.output);
     const permissionDeniedCount = countPermissionDeniedIssues(result.output);
     const hasNumerousPermissionDenied = hasNumerousPermissionDeniedIssues(result.output);
@@ -316,6 +366,7 @@ async function main() {
         ` exitCode=${result.exitCode}` +
         ` isRateLimitError=${isRateLimit}` +
         ` isAuthenticationFailedError=${isAuthenticationFailed}` +
+        ` isMissingApiKeyError=${isMissingApiKey}` +
         ` isServerError=${isServer}` +
         ` permissionDeniedCount=${permissionDeniedCount}` +
         ` hasNumerousPermissionDenied=${hasNumerousPermissionDenied}` +
@@ -328,6 +379,11 @@ async function main() {
       break;
     }
 
+    if (isMissingApiKey) {
+      log(`attempt ${attempt + 1}: missing API key — not retrying (configure CODEX_API_KEY or OPENAI_API_KEY)`);
+      break;
+    }
+
     if (hasNumerousPermissionDenied) {
       const deniedCommands = extractDeniedCommands(result.output);
       emitMissingToolPermissionIssue({ deniedCommands });
@@ -363,8 +419,10 @@ async function main() {
 if (typeof module !== "undefined" && module.exports) {
   module.exports = {
     resolveCodexPromptFileArgs,
+    injectJsonFlag,
     isRateLimitError,
     isAuthenticationFailedError,
+    isMissingApiKeyError,
     isServerError,
     countPermissionDeniedIssues,
     hasNumerousPermissionDeniedIssues,

diff --git a/setup/js/copilot_harness.cjs b/setup/js/copilot_harness.cjs
@@ -90,6 +90,10 @@ const NO_AUTH_INFO_PATTERN = /No authentication information found/;
 // After a first-attempt auth failure, retrying is futile because the entrypoint unsets
 // COPILOT_GITHUB_TOKEN between attempts.
 const AUTHENTICATION_FAILED_PATTERN = /Authentication failed(?:\s*\(Request ID:[^)]+\))?/i;
+// Pattern: Copilot CLI inference access denied
+const INFERENCE_ACCESS_ERROR_PATTERN = /Access denied by policy settings|invalid access to inference/;
+// Pattern: Agentic engine process killed by signal (timeout)
+const AGENTIC_ENGINE_TIMEOUT_PATTERN = /signal=SIG(?:TERM|KILL|INT)/;
 
 // Pattern to detect null-type tool_call error that poisons conversation history.
 // Matches the Copilot API 400 error:
@@ -166,6 +170,40 @@ function isAuthenticationFailedError(output) {
   return AUTHENTICATION_FAILED_PATTERN.test(output);
 }
 
+/**
+ * Detect known Copilot error patterns for workflow outputs.
+ * @param {string} output
+ * @returns {{ inferenceAccessError: boolean, mcpPolicyError: boolean, agenticEngineTimeout: boolean, modelNotSupportedError: boolean }}
+ */
+function detectCopilotErrors(output) {
+  return {
+    inferenceAccessError: INFERENCE_ACCESS_ERROR_PATTERN.test(output),
+    mcpPolicyError: isMCPPolicyError(output),
+    agenticEngineTimeout: AGENTIC_ENGINE_TIMEOUT_PATTERN.test(output),
+    modelNotSupportedError: isModelNotSupportedError(output),
+  };
+}
+
+/**
+ * Write Copilot detection outputs to $GITHUB_OUTPUT.
+ * @param {{ inferenceAccessError: boolean, mcpPolicyError: boolean, agenticEngineTimeout: boolean, modelNotSupportedError: boolean }} results
+ */
+function writeCopilotOutputs(results) {
+  const outputFile = process.env.GITHUB_OUTPUT;
+  if (!outputFile) {
+    log("GITHUB_OUTPUT not set — skipping copilot error outputs");
+    return;
+  }
+
+  const lines = [
+    `inference_access_error=${results.inferenceAccessError}`,
+    `mcp_policy_error=${results.mcpPolicyError}`,
+    `agentic_engine_timeout=${results.agenticEngineTimeout}`,
+    `model_not_supported_error=${results.modelNotSupportedError}`,
+  ];
+  fs.appendFileSync(outputFile, lines.join("\n") + "\n");
+}
+
 /**
  * Determines if the collected output contains a null-type tool_call error.
  * This error occurs when the model emits a malformed tool call with type: null.
@@ -436,6 +474,12 @@ async function main() {
   // This prevents a broken --continue recovery from resurrecting --continue on the next attempt.
   let continueDisabledPermanently = false;
   const driverStartTime = Date.now();
+  const detectedCopilotErrors = {
+    inferenceAccessError: false,
+    mcpPolicyError: false,
+    agenticEngineTimeout: false,
+    modelNotSupportedError: false,
+  };
 
   for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
     // Add --continue flag on retries so the copilot session continues from where it left off
@@ -453,6 +497,11 @@ async function main() {
     const safeArgs = currentArgs.map((arg, i) => (currentArgs[i - 1] === "--prompt" || currentArgs[i - 1] === "-p" ? "<redacted>" : arg));
     const result = await runProcess({ command, args: currentArgs, attempt, log, logArgs: safeArgs });
     lastExitCode = result.exitCode;
+    const attemptDetections = detectCopilotErrors(result.output);
+    detectedCopilotErrors.inferenceAccessError ||= attemptDetections.inferenceAccessError;
+    detectedCopilotErrors.mcpPolicyError ||= attemptDetections.mcpPolicyError;
+    detectedCopilotErrors.agenticEngineTimeout ||= attemptDetections.agenticEngineTimeout;
+    detectedCopilotErrors.modelNotSupportedError ||= attemptDetections.modelNotSupportedError;
 
     // Success — record exit code and stop retrying
     if (result.exitCode === 0) {
@@ -614,9 +663,13 @@ if (typeof module !== "undefined" && module.exports) {
     fetchAWFReflect,
     fetchModelsFromUrl,
     countPermissionDeniedIssues,
+    detectCopilotErrors,
     hasNumerousPermissionDeniedIssues,
+    INFERENCE_ACCESS_ERROR_PATTERN,
+    AGENTIC_ENGINE_TIMEOUT_PATTERN,
     buildMissingToolPermissionIssuePayload,
     isAuthenticationFailedError,
+    writeCopilotOutputs,
     resolvePromptFileArgs,
   };
 }

diff --git a/setup/js/detect_copilot_errors.cjs → setup/js/detect_agent_errors.cjs b/setup/js/detect_copilot_errors.cjs → setup/js/detect_agent_errors.cjs
@@ -71,7 +71,7 @@ function detectErrors(logContent) {
 function writeOutputs(results) {
   const outputFile = process.env.GITHUB_OUTPUT;
   if (!outputFile) {
-    process.stderr.write("[detect-copilot-errors] GITHUB_OUTPUT not set — skipping output\n");
+    process.stderr.write("[detect-agent-errors] GITHUB_OUTPUT not set — skipping output\n");
     return;
   }
 
@@ -90,22 +90,22 @@ function main() {
   if (fs.existsSync(LOG_FILE)) {
     logContent = fs.readFileSync(LOG_FILE, "utf8");
   } else {
-    process.stderr.write(`[detect-copilot-errors] Log file not found: ${LOG_FILE}\n`);
+    process.stderr.write(`[detect-agent-errors] Log file not found: ${LOG_FILE}\n`);
   }
 
   const results = detectErrors(logContent);
 
   if (results.inferenceAccessError) {
-    process.stderr.write("[detect-copilot-errors] Detected inference access error in agent log\n");
+    process.stderr.write("[detect-agent-errors] Detected inference access error in agent log\n");
   }
   if (results.mcpPolicyError) {
-    process.stderr.write("[detect-copilot-errors] Detected MCP policy error in agent log\n");
+    process.stderr.write("[detect-agent-errors] Detected MCP policy error in agent log\n");
   }
   if (results.agenticEngineTimeout) {
-    process.stderr.write("[detect-copilot-errors] Detected timeout: engine process was killed by signal (step timeout-minutes likely exceeded)\n");
+    process.stderr.write("[detect-agent-errors] Detected timeout: engine process was killed by signal (step timeout-minutes likely exceeded)\n");
   }
   if (results.modelNotSupportedError) {
-    process.stderr.write("[detect-copilot-errors] Detected model-not-supported error: the requested model is unavailable for this subscription tier\n");
+    process.stderr.write("[detect-agent-errors] Detected model-not-supported error: the requested model is unavailable for this subscription tier\n");
   }
 
   writeOutputs(results);