aws · jariy17 · May 6, 2026 · May 5, 2026
diff --git a/e2e-tests/ab-test-config-bundle.test.ts b/e2e-tests/ab-test-config-bundle.test.ts
@@ -105,10 +105,6 @@ describe.sequential('e2e: config-bundle AB test lifecycle', () => {
       await retry(
         async () => {
           const result = await run(['deploy', '--yes', '--json']);
-          if (result.exitCode !== 0) {
-            console.log('Initial deploy stdout:', result.stdout);
-            console.log('Initial deploy stderr:', result.stderr);
-          }
           expect(result.exitCode, `Initial deploy failed`).toBe(0);
           const json = parseJsonOutput(result.stdout) as { success: boolean };
           expect(json.success).toBe(true);
@@ -123,10 +119,12 @@ describe.sequential('e2e: config-bundle AB test lifecycle', () => {
   it.skipIf(!canRun)(
     'adds config-bundle AB test with 90/10 split',
     async () => {
-      // Config bundles reference ARNs from deployed resources.
-      // Use placeholder bundle ARNs — the deploy step will validate or create them.
-      const controlBundle = `arn:aws:bedrock-agentcore:ap-southeast-2:998846730471:config-bundle/control-v1`;
-      const treatmentBundle = `arn:aws:bedrock-agentcore:ap-southeast-2:998846730471:config-bundle/treatment-v1`;
+      // Use placeholder bundle ARNs that satisfy the service format constraints.
+      // Real config bundles would be created separately; these test the AB test wiring.
+      const region = process.env.AWS_REGION ?? 'us-east-1';
+      const account = process.env.AWS_ACCOUNT_ID ?? '000000000000';
+      const controlBundle = `arn:aws:bedrock-agentcore:${region}:${account}:configuration-bundle/control-bundle-AbCdEfGhIj`;
+      const treatmentBundle = `arn:aws:bedrock-agentcore:${region}:${account}:configuration-bundle/treatment-bundle-AbCdEfGhIj`;
 
       const result = await run([
         'add',
@@ -140,11 +138,11 @@ describe.sequential('e2e: config-bundle AB test lifecycle', () => {
         '--control-bundle',
         controlBundle,
         '--control-version',
-        'v1',
+        '00000000-0000-0000-0000-000000000001',
         '--treatment-bundle',
         treatmentBundle,
         '--treatment-version',
-        'v1',
+        '00000000-0000-0000-0000-000000000002',
         '--control-weight',
         '90',
         '--treatment-weight',

diff --git a/e2e-tests/ab-test-target-based.test.ts b/e2e-tests/ab-test-target-based.test.ts
@@ -195,10 +195,6 @@ describe.sequential('e2e: target-based AB test lifecycle', () => {
       await retry(
         async () => {
           const result = await run(['deploy', '--yes', '--json']);
-          if (result.exitCode !== 0) {
-            console.log('Deploy stdout:', result.stdout);
-            console.log('Deploy stderr:', result.stderr);
-          }
           expect(result.exitCode, `Deploy failed (stderr: ${result.stderr})`).toBe(0);
           const json = parseJsonOutput(result.stdout) as { success: boolean };
           expect(json.success).toBe(true);
@@ -210,6 +206,23 @@ describe.sequential('e2e: target-based AB test lifecycle', () => {
     600000
   );
 
+  it.skipIf(!canRun)(
+    'AB test reaches RUNNING status after deploy',
+    async () => {
+      await retry(
+        async () => {
+          const result = await run(['ab-test', abTestName, '--json']);
+          expect(result.exitCode, `ab-test lookup failed: ${result.stdout} ${result.stderr}`).toBe(0);
+          const json = parseJsonOutput(result.stdout) as { executionStatus: string };
+          expect(json.executionStatus, 'AB test should be RUNNING after deploy').toBe('RUNNING');
+        },
+        12,
+        15000
+      );
+    },
+    300000
+  );
+
   it.skipIf(!canRun)(
     'status shows all resources deployed',
     async () => {
@@ -220,7 +233,7 @@ describe.sequential('e2e: target-based AB test lifecycle', () => {
 
           const json = parseJsonOutput(result.stdout) as {
             success: boolean;
-            resources: { resourceType: string; name: string; deploymentState: string }[];
+            resources: { resourceType: string; name: string; deploymentState: string; invocationUrl?: string }[];
           };
           expect(json.success).toBe(true);
 
@@ -229,9 +242,12 @@ describe.sequential('e2e: target-based AB test lifecycle', () => {
           expect(agent, `Agent "${agentName}" should appear in status`).toBeDefined();
           expect(agent!.deploymentState).toBe('deployed');
 
-          // Gateway should be deployed
-          const gateway = json.resources.find(r => r.resourceType === 'http-gateway' && r.name === `${abTestName}-gw`);
-          expect(gateway, 'HTTP gateway should appear in status').toBeDefined();
+          // AB test should be deployed (HTTP gateways are not surfaced as top-level status resources)
+          const abTest = json.resources.find(r => r.resourceType === 'ab-test' && r.name === abTestName);
+          expect(abTest, `AB test "${abTestName}" should appear in status`).toBeDefined();
+          expect(abTest!.deploymentState).toBe('deployed');
+          // invocationUrl proves the HTTP gateway was deployed and wired up correctly
+          expect(abTest!.invocationUrl, 'AB test should have a gateway invocation URL').toBeTruthy();
         },
         3,
         15000
@@ -280,7 +296,7 @@ describe.sequential('e2e: target-based AB test lifecycle', () => {
     'promotes AB test (updates agentcore.json)',
     async () => {
       const result = await run(['promote', 'ab-test', abTestName, '--json']);
-      expect(result.exitCode, `Promote failed: ${result.stderr}`).toBe(0);
+      expect(result.exitCode, `Promote failed: ${result.stdout} ${result.stderr}`).toBe(0);
       const json = parseJsonOutput(result.stdout) as Record<string, unknown>;
       expect(json).toHaveProperty('success', true);
       expect(json).toHaveProperty('promoted', true);

diff --git a/scripts/run-e2e-local.sh b/scripts/run-e2e-local.sh
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+# Run E2E tests locally, replicating the GitHub Actions e2e-tests.yml workflow.
+#
+# Required env vars:
+#   E2E_ROLE_ARN    — IAM role ARN to assume (grants access to the test account)
+#   E2E_SECRET_ARN  — Secrets Manager ARN containing ANTHROPIC_API_KEY, OPENAI_API_KEY, GEMINI_API_KEY
+#
+# Optional env vars:
+#   AWS_REGION      — defaults to us-east-1
+#
+# Usage:
+#   export E2E_ROLE_ARN=arn:aws:iam::<account>:role/<role>
+#   export E2E_SECRET_ARN=arn:aws:secretsmanager:<region>:<account>:secret:<name>
+#   ./scripts/run-e2e-local.sh                          # runs strands-bedrock.test.ts (CI default)
+#   ./scripts/run-e2e-local.sh --all                    # runs the full e2e suite
+#   ./scripts/run-e2e-local.sh e2e-tests/foo.test.ts    # runs a specific test file
+#
+# Prerequisites: aws CLI, node >=20.19, npm, git, uv, jq
+
+set -euo pipefail
+
+ROLE_ARN="${E2E_ROLE_ARN:-}"
+SECRET_ARN="${E2E_SECRET_ARN:-}"
+AWS_REGION="${AWS_REGION:-us-east-1}"
+
+if [[ -z "$ROLE_ARN" ]]; then
+  echo "❌ E2E_ROLE_ARN is not set. Export it before running this script:"
+  echo "   export E2E_ROLE_ARN=arn:aws:iam::<account>:role/<role-name>"
+  exit 1
+fi
+
+if [[ -z "$SECRET_ARN" ]]; then
+  echo "❌ E2E_SECRET_ARN is not set. Export it before running this script:"
+  echo "   export E2E_SECRET_ARN=arn:aws:secretsmanager:<region>:<account>:secret:<name>"
+  exit 1
+fi
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+# ── Parse arguments ────────────────────────────────────────────────────────────
+RUN_ALL=false
+TEST_FILES=()
+for arg in "$@"; do
+  if [[ "$arg" == "--all" ]]; then
+    RUN_ALL=true
+  else
+    TEST_FILES+=("$arg")
+  fi
+done
+
+echo "=== Assuming IAM role ==="
+CREDS=$(aws sts assume-role \
+  --role-arn "$ROLE_ARN" \
+  --role-session-name "local-e2e-$(date +%s)" \
+  --duration-seconds 3600 \
+  --query 'Credentials.[AccessKeyId,SecretAccessKey,SessionToken]' \
+  --output text)
+
+export AWS_ACCESS_KEY_ID=$(echo "$CREDS" | awk '{print $1}')
+export AWS_SECRET_ACCESS_KEY=$(echo "$CREDS" | awk '{print $2}')
+export AWS_SESSION_TOKEN=$(echo "$CREDS" | awk '{print $3}')
+export AWS_REGION
+
+echo "✅ Assumed role successfully"
+
+echo "=== Fetching API keys from Secrets Manager ==="
+SECRET_JSON=$(aws secretsmanager get-secret-value \
+  --secret-id "$SECRET_ARN" \
+  --region "$AWS_REGION" \
+  --query SecretString \
+  --output text)
+
+# Mirror the GitHub workflow: parse-json-secrets maps keys to E2E_<KEY> then
+# the workflow maps them to the bare names the tests expect.
+export ANTHROPIC_API_KEY=$(echo "$SECRET_JSON" | jq -r '.ANTHROPIC_API_KEY // empty')
+export OPENAI_API_KEY=$(echo "$SECRET_JSON"    | jq -r '.OPENAI_API_KEY // empty')
+export GEMINI_API_KEY=$(echo "$SECRET_JSON"    | jq -r '.GEMINI_API_KEY // empty')
+
+echo "✅ Secrets loaded (keys present: $(echo "$SECRET_JSON" | jq -r 'keys | join(", ")')"
+
+echo "=== Setting AWS account env var ==="
+export AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
+echo "✅ AWS_ACCOUNT_ID=$AWS_ACCOUNT_ID  AWS_REGION=$AWS_REGION"
+
+echo "=== Configuring git (required for agentcore create) ==="
+git config --global user.email "ci@local" 2>/dev/null || true
+git config --global user.name "Local E2E"  2>/dev/null || true
+
+cd "$REPO_ROOT"
+
+echo "=== Installing dependencies ==="
+npm ci
+
+echo "=== Building CLI ==="
+npm run build
+
+echo "=== Installing CLI globally ==="
+TARBALL=$(npm pack | tail -1)
+npm install -g "$TARBALL"
+echo "✅ Installed: $(agentcore --version)"
+
+echo "=== Running E2E tests ==="
+if [[ "$RUN_ALL" == "true" ]]; then
+  echo "Running full e2e suite"
+  npx vitest run --project e2e
+elif [[ ${#TEST_FILES[@]} -gt 0 ]]; then
+  echo "Running: ${TEST_FILES[*]}"
+  npx vitest run --project e2e "${TEST_FILES[@]}"
+else
+  echo "Running default: e2e-tests/strands-bedrock.test.ts"
+  npx vitest run --project e2e e2e-tests/strands-bedrock.test.ts
+fi
diff --git a/src/cli/commands/pause/__tests__/promote.test.ts b/src/cli/commands/pause/__tests__/promote.test.ts
@@ -0,0 +1,59 @@
+import { waitForRunningThenStop } from '../promote-utils.js';
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+
+const mockGetABTest = vi.fn();
+const mockUpdateABTest = vi.fn();
+
+vi.mock('../../../aws/agentcore-ab-tests', () => ({
+  getABTest: (...args: unknown[]) => mockGetABTest(...args),
+  updateABTest: (...args: unknown[]) => mockUpdateABTest(...args),
+}));
+
+describe('waitForRunningThenStop', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    mockUpdateABTest.mockResolvedValue({ executionStatus: 'STOPPED' });
+  });
+
+  it('stops immediately when already RUNNING', async () => {
+    mockGetABTest.mockResolvedValue({ executionStatus: 'RUNNING' });
+
+    await waitForRunningThenStop('us-east-1', 'abt-123', 'MyTest', 3, 0);
+
+    expect(mockGetABTest).toHaveBeenCalledTimes(1);
+    expect(mockUpdateABTest).toHaveBeenCalledWith({
+      region: 'us-east-1',
+      abTestId: 'abt-123',
+      executionStatus: 'STOPPED',
+    });
+  });
+
+  it('polls until RUNNING then stops', async () => {
+    mockGetABTest
+      .mockResolvedValueOnce({ executionStatus: 'UPDATING' })
+      .mockResolvedValueOnce({ executionStatus: 'UPDATING' })
+      .mockResolvedValueOnce({ executionStatus: 'RUNNING' });
+
+    await waitForRunningThenStop('us-east-1', 'abt-123', 'MyTest', 5, 0);
+
+    expect(mockGetABTest).toHaveBeenCalledTimes(3);
+    expect(mockUpdateABTest).toHaveBeenCalledOnce();
+  });
+
+  it('throws if AB test never reaches RUNNING', async () => {
+    mockGetABTest.mockResolvedValue({ executionStatus: 'UPDATING' });
+
+    await expect(waitForRunningThenStop('us-east-1', 'abt-123', 'MyTest', 3, 0)).rejects.toThrow(
+      'did not reach RUNNING state'
+    );
+
+    expect(mockGetABTest).toHaveBeenCalledTimes(3);
+    expect(mockUpdateABTest).not.toHaveBeenCalled();
+  });
+
+  it('includes current status in the error message', async () => {
+    mockGetABTest.mockResolvedValue({ executionStatus: 'STOPPED' });
+
+    await expect(waitForRunningThenStop('us-east-1', 'abt-123', 'MyTest', 2, 0)).rejects.toThrow('current: STOPPED');
+  });
+});
diff --git a/src/cli/commands/pause/command.tsx b/src/cli/commands/pause/command.tsx
@@ -7,6 +7,7 @@ import type { OnlineEvalActionOptions } from '../../operations/eval';
 import { COMMAND_DESCRIPTIONS } from '../../tui/copy';
 import { requireProject } from '../../tui/guards';
 import { getRegion } from '../shared/region-utils';
+import { waitForRunningThenStop } from './promote-utils';
 import type { Command } from '@commander-js/extra-typings';
 import { Text, render } from 'ink';
 import React from 'react';
@@ -274,12 +275,7 @@ export const registerPromote = (program: Command) => {
           process.exit(1);
         }
 
-        // Stop the AB test
-        const result = await updateABTest({
-          region,
-          abTestId,
-          executionStatus: 'STOPPED',
-        });
+        const result = await waitForRunningThenStop(region, abTestId, name);
 
         // Apply promotion to agentcore.json
         const { promoteABTestConfig } = await import('../../operations/ab-test/promote');

diff --git a/src/cli/commands/pause/promote-utils.ts b/src/cli/commands/pause/promote-utils.ts
@@ -0,0 +1,28 @@
+import { getABTest, updateABTest } from '../../aws/agentcore-ab-tests';
+import type { UpdateABTestResult } from '../../aws/agentcore-ab-tests';
+
+/**
+ * Poll until the AB test reaches RUNNING status, then stop it.
+ * Throws if the test never reaches RUNNING within the allotted attempts.
+ */
+export async function waitForRunningThenStop(
+  region: string,
+  abTestId: string,
+  name: string,
+  maxAttempts = 12,
+  delayMs = 10_000
+): Promise<UpdateABTestResult> {
+  let currentStatus: string | undefined;
+  for (let attempt = 0; attempt < maxAttempts; attempt++) {
+    const current = await getABTest({ region, abTestId });
+    currentStatus = current.executionStatus;
+    if (currentStatus === 'RUNNING') break;
+    await new Promise(resolve => setTimeout(resolve, delayMs));
+  }
+  if (currentStatus !== 'RUNNING') {
+    throw new Error(
+      `AB test "${name}" did not reach RUNNING state after waiting (current: ${currentStatus}). Cannot promote.`
+    );
+  }
+  return updateABTest({ region, abTestId, executionStatus: 'STOPPED' });
+}
diff --git a/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts
@@ -517,22 +517,22 @@ describe('setupABTests', () => {
       const trustPolicy = JSON.parse(createRoleCall.input.AssumeRolePolicyDocument);
       expect(trustPolicy.Statement).toHaveLength(1);
       expect(trustPolicy.Statement[0].Principal.Service).toBe('bedrock-agentcore.amazonaws.com');
+      expect(trustPolicy.Statement[0].Condition.StringEquals['aws:SourceAccount']).toBeDefined();
+      expect(trustPolicy.Statement[0].Condition.ArnLike['aws:SourceArn']).toContain('ab-test/*');
 
       // Second call: PutRolePolicyCommand with inline policy
       const putPolicyCall = mockIAMSend.mock.calls[1]![0];
       const policy = JSON.parse(putPolicyCall.input.PolicyDocument);
       const sids = policy.Statement.map((s: { Sid: string }) => s.Sid);
-      expect(sids).toContain('GatewayRuleStatement');
-      expect(sids).toContain('GatewayReadStatement');
-      expect(sids).toContain('GatewayListStatement');
-      expect(sids).toContain('OnlineEvaluationConfigStatement');
-      expect(sids).toContain('ConfigurationBundleReadStatement');
-      expect(sids).toContain('CloudWatchLogReadStatement');
-      expect(sids).toContain('CloudWatchIndexPolicyStatement');
-
-      // ListGateways must use wildcard resource (can't be scoped)
-      const listGatewayStmt = policy.Statement.find((s: { Sid: string }) => s.Sid === 'GatewayListStatement');
-      expect(listGatewayStmt.Resource).toEqual(['*']);
+      expect(sids).toContain('AgentCoreResources');
+      expect(sids).toContain('CloudWatchLogs');
+
+      // AgentCoreResources must include all required actions
+      const agentCoreStmt = policy.Statement.find((s: { Sid: string }) => s.Sid === 'AgentCoreResources');
+      expect(agentCoreStmt.Action).toContain('bedrock-agentcore:GetEvaluator');
+      expect(agentCoreStmt.Action).toContain('bedrock-agentcore:GetGateway');
+      expect(agentCoreStmt.Action).toContain('bedrock-agentcore:GetOnlineEvaluationConfig');
+      expect(agentCoreStmt.Condition.StringEquals['aws:ResourceAccount']).toBeDefined();
     });
   });