From 8ad6ab81f8c50ae8fc07a3b4fa21df6701bb1e1d Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Tue, 5 May 2026 15:47:30 -0400
Subject: [PATCH 01/12] feat: add agents/ directory with orchestration
 framework
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a self-contained Python project for autonomous agents powered by
Bedrock AgentCore Harness. Includes:

- core/ — shared harness client (raw HTTP + SigV4), response parsing, config
- orchestrations/fix_and_review/ — multi-phase pipeline: plan → execute → verify → multi-round review → fix → PR
- bug_fixer/ — workflow entry point for fixing issues labeled 'bug'
- feature_builder/ — workflow entry point for building features from devex + impl docs
- pr_reviewer/ — migrated from .github/harness/ to share core infrastructure
- GitHub Actions workflows for both triggers
- 19 unit tests

Tested end-to-end: successfully planned, implemented, and reviewed fixes
for issues #761 and #924 with Opus 4.7, creating PRs with proper templates.
---
 .github/workflows/bug-fixer.yml               |  60 +++++
 .github/workflows/feature-builder.yml         |  58 ++++
 agents/bug_fixer/main.py                      |  40 +++
 agents/bug_fixer/prompts/executor.md          |  17 ++
 agents/bug_fixer/prompts/fixer.md             |  16 ++
 agents/bug_fixer/prompts/planner.md           |  20 ++
 agents/bug_fixer/prompts/reviewer.md          |  36 +++
 agents/bug_fixer/prompts/setup.md             |  19 ++
 agents/config.yaml                            |  10 +
 agents/core/__init__.py                       |   0
 agents/core/config.py                         |  30 +++
 agents/core/harness_client.py                 | 160 +++++++++++
 agents/core/parsing.py                        |  56 ++++
 agents/core/phases/__init__.py                |   0
 agents/feature_builder/__init__.py            |   0
 agents/feature_builder/main.py                |  64 +++++
 agents/feature_builder/prompts/executor.md    |  17 ++
 agents/feature_builder/prompts/fixer.md       |  16 ++
 agents/feature_builder/prompts/planner.md     |  27 ++
 agents/feature_builder/prompts/reviewer.md    |  36 +++
 agents/feature_builder/prompts/setup.md       |  18 ++
 agents/orchestrations/__init__.py             |   0
 .../orchestrations/fix_and_review/__init__.py |   0
 .../fix_and_review/orchestrator.py            | 252 ++++++++++++++++++
 .../fix_and_review/partitioning.py            | 121 +++++++++
 .../fix_and_review/phases/__init__.py         |   0
 .../fix_and_review/phases/aggregate.py        |  57 ++++
 .../fix_and_review/phases/complete.py         | 111 ++++++++
 .../fix_and_review/phases/execute.py          |  20 ++
 .../fix_and_review/phases/extract.py          |  55 ++++
 .../fix_and_review/phases/fix.py              |  30 +++
 .../fix_and_review/phases/plan.py             |  21 ++
 .../fix_and_review/phases/review.py           |  49 ++++
 .../fix_and_review/phases/setup.py            |  40 +++
 .../fix_and_review/phases/validate.py         |  33 +++
 .../fix_and_review/phases/verify.py           |  91 +++++++
 agents/orchestrations/one_shot/__init__.py    |   0
 .../one_shot/phases/__init__.py               |   0
 agents/orchestrations/review_only/__init__.py |   0
 .../review_only/phases/__init__.py            |   0
 agents/pr_reviewer/main.py                    | 200 ++++++++++++++
 agents/pr_reviewer/prompts/review.md          |  18 ++
 agents/pr_reviewer/prompts/system.md          |  25 ++
 agents/pyproject.toml                         |  18 ++
 agents/tests/__init__.py                      |   0
 agents/tests/test_config.py                   |  65 +++++
 agents/tests/test_harness_client.py           |  21 ++
 agents/tests/test_parsing.py                  |  81 ++++++
 agents/tests/test_partitioning.py             |  74 +++++
 49 files changed, 2082 insertions(+)
 create mode 100644 .github/workflows/bug-fixer.yml
 create mode 100644 .github/workflows/feature-builder.yml
 create mode 100644 agents/bug_fixer/main.py
 create mode 100644 agents/bug_fixer/prompts/executor.md
 create mode 100644 agents/bug_fixer/prompts/fixer.md
 create mode 100644 agents/bug_fixer/prompts/planner.md
 create mode 100644 agents/bug_fixer/prompts/reviewer.md
 create mode 100644 agents/bug_fixer/prompts/setup.md
 create mode 100644 agents/config.yaml
 create mode 100644 agents/core/__init__.py
 create mode 100644 agents/core/config.py
 create mode 100644 agents/core/harness_client.py
 create mode 100644 agents/core/parsing.py
 create mode 100644 agents/core/phases/__init__.py
 create mode 100644 agents/feature_builder/__init__.py
 create mode 100644 agents/feature_builder/main.py
 create mode 100644 agents/feature_builder/prompts/executor.md
 create mode 100644 agents/feature_builder/prompts/fixer.md
 create mode 100644 agents/feature_builder/prompts/planner.md
 create mode 100644 agents/feature_builder/prompts/reviewer.md
 create mode 100644 agents/feature_builder/prompts/setup.md
 create mode 100644 agents/orchestrations/__init__.py
 create mode 100644 agents/orchestrations/fix_and_review/__init__.py
 create mode 100644 agents/orchestrations/fix_and_review/orchestrator.py
 create mode 100644 agents/orchestrations/fix_and_review/partitioning.py
 create mode 100644 agents/orchestrations/fix_and_review/phases/__init__.py
 create mode 100644 agents/orchestrations/fix_and_review/phases/aggregate.py
 create mode 100644 agents/orchestrations/fix_and_review/phases/complete.py
 create mode 100644 agents/orchestrations/fix_and_review/phases/execute.py
 create mode 100644 agents/orchestrations/fix_and_review/phases/extract.py
 create mode 100644 agents/orchestrations/fix_and_review/phases/fix.py
 create mode 100644 agents/orchestrations/fix_and_review/phases/plan.py
 create mode 100644 agents/orchestrations/fix_and_review/phases/review.py
 create mode 100644 agents/orchestrations/fix_and_review/phases/setup.py
 create mode 100644 agents/orchestrations/fix_and_review/phases/validate.py
 create mode 100644 agents/orchestrations/fix_and_review/phases/verify.py
 create mode 100644 agents/orchestrations/one_shot/__init__.py
 create mode 100644 agents/orchestrations/one_shot/phases/__init__.py
 create mode 100644 agents/orchestrations/review_only/__init__.py
 create mode 100644 agents/orchestrations/review_only/phases/__init__.py
 create mode 100644 agents/pr_reviewer/main.py
 create mode 100644 agents/pr_reviewer/prompts/review.md
 create mode 100644 agents/pr_reviewer/prompts/system.md
 create mode 100644 agents/pyproject.toml
 create mode 100644 agents/tests/__init__.py
 create mode 100644 agents/tests/test_config.py
 create mode 100644 agents/tests/test_harness_client.py
 create mode 100644 agents/tests/test_parsing.py
 create mode 100644 agents/tests/test_partitioning.py

diff --git a/.github/workflows/bug-fixer.yml b/.github/workflows/bug-fixer.yml
new file mode 100644
index 000000000..a7de5d8c1
--- /dev/null
+++ b/.github/workflows/bug-fixer.yml
@@ -0,0 +1,60 @@
+name: Bug Fixer Agent
+
+on:
+  issues:
+    types: [labeled]
+  workflow_dispatch:
+    inputs:
+      issue_url:
+        description: 'GitHub issue URL'
+        required: true
+        type: string
+
+permissions:
+  id-token: write
+  contents: write
+  pull-requests: write
+  issues: write
+
+jobs:
+  fix-bug:
+    if: github.event.label.name == 'bug' || github.event_name == 'workflow_dispatch'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Determine issue URL
+        id: issue
+        env:
+          INPUT_URL: ${{ inputs.issue_url }}
+          EVENT_URL: ${{ github.event.issue.html_url }}
+          EVENT_NAME: ${{ github.event_name }}
+        run: |
+          if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
+            echo "url=$INPUT_URL" >> "$GITHUB_OUTPUT"
+          else
+            echo "url=$EVENT_URL" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Checkout
+        uses: actions/checkout@v6
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v6
+        with:
+          role-to-assume: ${{ secrets.HARNESS_AWS_ROLE_ARN }}
+          aws-region: us-west-2
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+
+      - name: Setup uv
+        uses: astral-sh/setup-uv@v7
+
+      - name: Run Bug Fixer Agent
+        working-directory: agents
+        env:
+          HARNESS_ARN: ${{ secrets.HARNESS_ARN }}
+          GH_TOKEN: ${{ secrets.PAT_TOKEN }}
+          ISSUE_URL: ${{ steps.issue.outputs.url }}
+        run: uv sync && uv run python -m bug_fixer.main --issue "$ISSUE_URL"
diff --git a/.github/workflows/feature-builder.yml b/.github/workflows/feature-builder.yml
new file mode 100644
index 000000000..13f8f7107
--- /dev/null
+++ b/.github/workflows/feature-builder.yml
@@ -0,0 +1,58 @@
+name: Feature Builder Agent
+
+on:
+  workflow_dispatch:
+    inputs:
+      devex_doc:
+        description: 'Path to devex doc (relative to repo root)'
+        required: true
+        type: string
+      impl_doc:
+        description: 'Path to implementation plan (relative to repo root)'
+        required: true
+        type: string
+      feature_name:
+        description: 'Feature name (used for branch naming)'
+        required: true
+        type: string
+
+permissions:
+  id-token: write
+  contents: write
+  pull-requests: write
+
+jobs:
+  build-feature:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v6
+        with:
+          role-to-assume: ${{ secrets.HARNESS_AWS_ROLE_ARN }}
+          aws-region: us-west-2
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+
+      - name: Setup uv
+        uses: astral-sh/setup-uv@v7
+
+      - name: Run Feature Builder Agent
+        working-directory: agents
+        env:
+          HARNESS_ARN: ${{ secrets.HARNESS_ARN }}
+          GH_TOKEN: ${{ secrets.PAT_TOKEN }}
+          DEVEX_DOC: ${{ inputs.devex_doc }}
+          IMPL_DOC: ${{ inputs.impl_doc }}
+          FEATURE_NAME: ${{ inputs.feature_name }}
+        run: |
+          uv sync
+          uv run python -m feature_builder.main \
+            --devex "../$DEVEX_DOC" \
+            --impl "../$IMPL_DOC" \
+            --name "$FEATURE_NAME"
diff --git a/agents/bug_fixer/main.py b/agents/bug_fixer/main.py
new file mode 100644
index 000000000..df1ba5b78
--- /dev/null
+++ b/agents/bug_fixer/main.py
@@ -0,0 +1,40 @@
+"""Bug Fixer Agent — resolves GitHub issues labeled 'bug'.
+
+Usage:
+    uv run bug_fixer/main.py --issue https://github.com/aws/agentcore-cli/issues/123
+    uv run bug_fixer/main.py --issue https://github.com/aws/agentcore-cli/issues/123 --config config.yaml
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+from orchestrations.fix_and_review.orchestrator import run_pipeline
+
+PROMPTS_DIR = Path(__file__).resolve().parent / "prompts"
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Bug Fixer Agent")
+    parser.add_argument("--issue", required=True, help="GitHub issue URL")
+    parser.add_argument("--config", default="config.yaml", help="Config YAML path")
+    parser.add_argument("--aws-profile", help="Override AWS profile")
+    parser.add_argument("--harness-arn", help="Override harness ARN")
+    args = parser.parse_args()
+
+    overrides = {}
+    if args.aws_profile:
+        overrides["aws_profile"] = args.aws_profile
+    if args.harness_arn:
+        overrides["harness_arn"] = args.harness_arn
+
+    return run_pipeline(
+        issue_url=args.issue,
+        config_path=args.config,
+        prompts_dir=PROMPTS_DIR,
+        **overrides,
+    )
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/agents/bug_fixer/prompts/executor.md b/agents/bug_fixer/prompts/executor.md
new file mode 100644
index 000000000..8f1f89273
--- /dev/null
+++ b/agents/bug_fixer/prompts/executor.md
@@ -0,0 +1,17 @@
+You are a senior software engineer implementing a planned change across one or two TypeScript repos.
+
+The plan:
+{plan}
+
+Instructions:
+1. Follow the plan exactly. Make the code changes described.
+2. Run `npm run typecheck 2>&1 | tail -20` in each affected repo. Fix any type errors.
+3. Run tests with summary output only: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20`
+4. If tests fail, debug the specific failing file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50`
+5. Commit your changes: `git add -A && git commit -m "feat: {commit_message}"`
+6. Push to fork remote: `git push origin {branch_name}`
+7. If you need to deviate from the plan, document why in your commit message.
+
+IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context. Use the grep pattern in step 3 for the summary, then target specific files in step 4 if something fails.
+
+Do not stop until typecheck and tests pass. If tests fail, analyze the failure, fix the code, and try again.
diff --git a/agents/bug_fixer/prompts/fixer.md b/agents/bug_fixer/prompts/fixer.md
new file mode 100644
index 000000000..a99a6f0d1
--- /dev/null
+++ b/agents/bug_fixer/prompts/fixer.md
@@ -0,0 +1,16 @@
+You are a senior software engineer fixing issues found during code review.
+
+The following findings were reported by reviewers. Address each one:
+
+{findings_text}
+
+Instructions:
+1. Fix each finding, starting with Critical severity first, then High, Medium, Low.
+2. If a finding is not applicable or is a false positive, explain why in a commit message.
+3. Run `npm run typecheck 2>&1 | tail -20` in each affected repo after fixes.
+4. Run tests with summary: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20`
+5. If tests fail, debug the specific file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50`
+6. Commit: `git add -A && git commit -m "fix: address review findings round {round_number}"`
+7. Push: `git push origin {branch_name}`
+
+IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context.
diff --git a/agents/bug_fixer/prompts/planner.md b/agents/bug_fixer/prompts/planner.md
new file mode 100644
index 000000000..a651788b2
--- /dev/null
+++ b/agents/bug_fixer/prompts/planner.md
@@ -0,0 +1,20 @@
+You are a senior software architect planning a fix for a GitHub issue.
+
+You have access to two TypeScript repositories:
+- agentcore-cli: AWS AgentCore CLI tool (Commander.js + Ink TUI, ~550 source files)
+- agentcore-l3-cdk-constructs: AWS CDK L3 constructs for AgentCore (~17 test files, shares schemas with CLI)
+
+The issue details are:
+{issue_details}
+
+Analyze the issue, explore the relevant code in both repos, and produce a structured implementation plan.
+
+Your plan MUST include:
+1. **Affected repos**: Which repos need changes (cli, cdk, or both)
+2. **Files to change**: Exact file paths to modify, create, or delete
+3. **Approach**: Step-by-step description of the changes
+4. **Risks**: What could go wrong, edge cases to watch for
+5. **Testing strategy**: What tests to add or modify, how to verify the fix
+6. **Scope estimate**: small (1-3 files), medium (4-7 files), or large (8+ files)
+
+Output the plan as structured markdown.
diff --git a/agents/bug_fixer/prompts/reviewer.md b/agents/bug_fixer/prompts/reviewer.md
new file mode 100644
index 000000000..9cd4cc7a2
--- /dev/null
+++ b/agents/bug_fixer/prompts/reviewer.md
@@ -0,0 +1,36 @@
+You are a senior code reviewer. You have been assigned a region of a code change to review.
+
+Issue being solved: {issue_summary}
+Branch: {branch_name}
+Repos: {cli_repo}, {cdk_repo}
+
+Your assigned focus: {focus}
+Files to focus on: {assigned_files}
+
+Instructions:
+1. Clone repos with the feature branch:
+   - git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli
+   - git clone --depth 10 --branch {branch_name} https://github.com/{cdk_repo}.git agentcore-l3-cdk-constructs
+   (If the branch doesn't exist in a repo, clone main instead: git clone --depth 10 https://github.com/{cli_repo}.git agentcore-cli)
+2. Run: git diff main (or git log if on the feature branch already)
+3. Review your assigned files for ALL concerns: correctness, architecture, security, testing adequacy, cross-repo consistency, and breaking changes
+4. Trace callers of changed functions. Check types. Verify test coverage.
+5. You do NOT need to run npm install — you are reviewing code, not building it.
+
+{previous_findings_context}
+
+Output your review as a JSON object wrapped in ```json fences.
+The JSON must have this exact schema:
+{{
+  "approved": boolean,
+  "findings": [
+    {{
+      "severity": "critical" | "high" | "medium" | "low",
+      "file": "path/to/file",
+      "line": number,
+      "description": "what's wrong",
+      "suggestion": "how to fix"
+    }}
+  ]
+}}
+Output ONLY the JSON object in code fences. No other text before or after.
diff --git a/agents/bug_fixer/prompts/setup.md b/agents/bug_fixer/prompts/setup.md
new file mode 100644
index 000000000..11c3e50df
--- /dev/null
+++ b/agents/bug_fixer/prompts/setup.md
@@ -0,0 +1,19 @@
+You are setting up a development environment to solve a GitHub issue.
+
+Steps:
+1. Install tools (Amazon Linux 2023): dnf install -y -q git nodejs20 npm > /dev/null 2>&1
+2. Authenticate GitHub: echo $GH_TOKEN | gh auth login --with-token
+3. Configure git to use gh for auth: gh auth setup-git
+4. Clone both repos:
+   - git clone https://github.com/{cli_repo}.git {cli_repo_name}
+   - git clone https://github.com/{cdk_repo}.git {cdk_repo_name}
+5. Install dependencies: cd {cli_repo_name} && npm install 2>&1 | tail -3 && cd ..
+6. Fetch the issue details: gh issue view {issue_url} --json title,body,labels,comments,assignees
+7. Create a feature branch in both repos:
+   - cd {cli_repo_name} && git checkout -b fix/{issue_number} && cd ..
+   - cd {cdk_repo_name} && git checkout -b fix/{issue_number} && cd ..
+8. Report back: the issue title, body, and which repos likely need changes based on the issue content.
+
+IMPORTANT: Run each step as a separate shell command. Do not combine them. If tools are already installed, skip step 1.
+
+Output the issue details as structured markdown with sections: Title, Body, Labels, and Initial Assessment.
diff --git a/agents/config.yaml b/agents/config.yaml
new file mode 100644
index 000000000..6ebd0a945
--- /dev/null
+++ b/agents/config.yaml
@@ -0,0 +1,10 @@
+harness_arn: "arn:aws:bedrock-agentcore:us-west-2:603141041947:harness/IssueSolver_aidandal-8SL97TEXjS"
+region: "us-west-2"
+aws_profile: "deploy"
+model_id: "global.anthropic.claude-opus-4-7"
+min_reviewers: 3
+max_reviewers: 5
+max_review_rounds: 5
+repos:
+  cli: "aws/agentcore-cli"
+  cdk: "aws/agentcore-l3-cdk-constructs"
diff --git a/agents/core/__init__.py b/agents/core/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/agents/core/config.py b/agents/core/config.py
new file mode 100644
index 000000000..fb81a3a97
--- /dev/null
+++ b/agents/core/config.py
@@ -0,0 +1,30 @@
+from dataclasses import dataclass
+
+import yaml
+
+
+@dataclass
+class PipelineConfig:
+    harness_arn: str
+    region: str = "us-west-2"
+    data_plane_endpoint: str | None = None
+    aws_profile: str = "deploy"
+    model_id: str = "global.anthropic.claude-opus-4-7"
+    min_reviewers: int = 3
+    max_reviewers: int = 5
+    max_review_rounds: int = 5
+    cli_repo: str = "aws/agentcore-cli"
+    cdk_repo: str = "aws/agentcore-l3-cdk-constructs"
+
+    @classmethod
+    def from_yaml(cls, path: str) -> "PipelineConfig":
+        with open(path) as f:
+            data = yaml.safe_load(f)
+
+        repos = data.pop("repos", {})
+        if "cli" in repos:
+            data["cli_repo"] = repos["cli"]
+        if "cdk" in repos:
+            data["cdk_repo"] = repos["cdk"]
+
+        return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
diff --git a/agents/core/harness_client.py b/agents/core/harness_client.py
new file mode 100644
index 000000000..33d296f32
--- /dev/null
+++ b/agents/core/harness_client.py
@@ -0,0 +1,160 @@
+import json
+import sys
+import uuid
+from urllib.parse import quote
+
+import boto3
+import urllib3
+from botocore.auth import SigV4Auth
+from botocore.awsrequest import AWSRequest
+from botocore.config import Config as BotoConfig
+from botocore.eventstream import EventStreamBuffer
+
+from core.config import PipelineConfig
+
+
+class HarnessClient:
+    def __init__(self, config: PipelineConfig):
+        self.config = config
+        self.session = boto3.Session(
+            region_name=config.region,
+            profile_name=config.aws_profile,
+        )
+        self.credentials = self.session.get_credentials().get_frozen_credentials()
+        self.http = urllib3.PoolManager()
+        self.client = self.session.client(
+            "bedrock-agentcore",
+            config=BotoConfig(read_timeout=600, connect_timeout=30, retries={"max_attempts": 2}),
+        )
+
+    def invoke(
+        self,
+        session_id: str,
+        message: str,
+        system_prompt: str | None = None,
+        verbose: bool = True,
+    ) -> str:
+        body: dict = {
+            "runtimeSessionId": session_id,
+            "messages": [{"role": "user", "content": [{"text": message}]}],
+            "model": {"bedrockModelConfig": {"modelId": self.config.model_id}},
+        }
+        if system_prompt:
+            body["systemPrompt"] = [{"text": system_prompt}]
+
+        region = self.config.region
+        arn = self.config.harness_arn
+        url = f"https://bedrock-agentcore.{region}.amazonaws.com/harnesses/invoke?harnessArn={quote(arn, safe='')}"
+
+        request = AWSRequest(method="POST", url=url, data=json.dumps(body), headers={
+            "Content-Type": "application/json",
+            "Accept": "application/vnd.amazon.eventstream",
+        })
+        SigV4Auth(self.credentials, "bedrock-agentcore", region).add_auth(request)
+
+        response = self.http.urlopen(
+            "POST", url, body=json.dumps(body).encode(),
+            headers=dict(request.headers),
+            preload_content=False,
+            timeout=urllib3.Timeout(connect=30, read=900),
+        )
+
+        if response.status != 200:
+            error = response.read().decode("utf-8")
+            if verbose:
+                print(f"\n  ⚠️  HTTP {response.status}: {error}", flush=True)
+            raise RuntimeError(f"InvokeHarness failed: HTTP {response.status}: {error}")
+
+        request_id = response.headers.get("x-amzn-RequestId", "unknown")
+        if verbose:
+            print(f"  [request: {request_id}]", flush=True)
+        self.last_request_id = request_id
+
+        return self._accumulate_text_from_http(response, verbose=verbose)
+
+    def run_command(self, session_id: str, command: str, verbose: bool = False) -> tuple[str, str, int]:
+        if verbose:
+            print(f"  $ {command}", flush=True)
+        response = self.client.invoke_agent_runtime_command(
+            agentRuntimeArn=self.config.harness_arn,
+            runtimeSessionId=session_id,
+            body={"command": command},
+        )
+        request_id = response.get("ResponseMetadata", {}).get("RequestId", "unknown")
+        self.last_request_id = request_id
+        return self._accumulate_command(response["stream"], verbose=verbose)
+
+    def _accumulate_text_from_http(self, http_response, verbose: bool = False) -> str:
+        text_parts: list[str] = []
+        tool_input_parts: list[str] = []
+        current_tool: str | None = None
+        event_buffer = EventStreamBuffer()
+
+        for chunk in http_response.stream(4096):
+            event_buffer.add_data(chunk)
+            for event in event_buffer:
+                if event.headers.get(":message-type") == "exception":
+                    payload = json.loads(event.payload.decode("utf-8"))
+                    if verbose:
+                        print(f"\n  ⚠️  Stream error: {payload}", flush=True)
+                    if text_parts:
+                        return "".join(text_parts)
+                    raise RuntimeError(f"Stream error: {payload}")
+
+                event_type = event.headers.get(":event-type", "")
+                if not event.payload:
+                    continue
+                payload = json.loads(event.payload.decode("utf-8"))
+
+                if event_type == "contentBlockStart":
+                    start = payload.get("start", {})
+                    if "toolUse" in start:
+                        current_tool = start["toolUse"].get("name", "unknown")
+                        tool_input_parts = []
+                    else:
+                        current_tool = None
+                elif event_type == "contentBlockDelta":
+                    delta = payload.get("delta", {})
+                    if "text" in delta:
+                        text_parts.append(delta["text"])
+                        if verbose:
+                            print(delta["text"], end="", flush=True)
+                    elif "toolUse" in delta and current_tool:
+                        tool_input_parts.append(delta["toolUse"].get("input", ""))
+                elif event_type == "contentBlockStop":
+                    if current_tool and verbose:
+                        tool_input = "".join(tool_input_parts)
+                        print(f"\n  🔧 {current_tool}: {tool_input[:200]}", flush=True)
+                        current_tool = None
+                        tool_input_parts = []
+                elif event_type == "messageStop" and verbose:
+                    print(flush=True)
+
+        return "".join(text_parts)
+
+    def _accumulate_command(self, stream, verbose: bool = False) -> tuple[str, str, int]:
+        stdout_parts: list[str] = []
+        stderr_parts: list[str] = []
+        exit_code = -1
+        for event in stream:
+            if "chunk" in event:
+                chunk = event["chunk"]
+                if "contentDelta" in chunk:
+                    delta = chunk["contentDelta"]
+                    if "stdout" in delta:
+                        stdout_parts.append(delta["stdout"])
+                        if verbose:
+                            print(delta["stdout"], end="", flush=True)
+                    if "stderr" in delta:
+                        stderr_parts.append(delta["stderr"])
+                        if verbose:
+                            print(delta["stderr"], end="", file=sys.stderr, flush=True)
+                elif "contentStop" in chunk:
+                    exit_code = chunk["contentStop"].get("exitCode", -1)
+                    if verbose:
+                        print(f"  [exit: {exit_code}]", flush=True)
+        return "".join(stdout_parts), "".join(stderr_parts), exit_code
+
+    @staticmethod
+    def new_session_id() -> str:
+        return str(uuid.uuid4()).upper()
diff --git a/agents/core/parsing.py b/agents/core/parsing.py
new file mode 100644
index 000000000..1f4aab67d
--- /dev/null
+++ b/agents/core/parsing.py
@@ -0,0 +1,56 @@
+import json
+import re
+from typing import Literal
+
+from pydantic import BaseModel, ValidationError
+
+
+class Finding(BaseModel):
+    severity: Literal["critical", "high", "medium", "low"]
+    file: str
+    line: int
+    description: str
+    suggestion: str
+
+
+class ReviewResult(BaseModel):
+    approved: bool
+    findings: list[Finding]
+
+
+def parse_reviewer_output(raw_text: str) -> ReviewResult | None:
+    json_str = _extract_json(raw_text)
+    if json_str is None:
+        return None
+    return _validate(json_str)
+
+
+def _extract_json(raw_text: str) -> str | None:
+    match = re.search(r"```json?\s*\n(.*?)\n\s*```", raw_text, re.DOTALL)
+    if match:
+        return match.group(1).strip()
+
+    start = raw_text.find("{")
+    if start == -1:
+        return None
+
+    depth = 0
+    for i in range(start, len(raw_text)):
+        if raw_text[i] == "{":
+            depth += 1
+        elif raw_text[i] == "}":
+            depth -= 1
+            if depth == 0:
+                candidate = raw_text[start : i + 1]
+                if "approved" in candidate:
+                    return candidate
+                return None
+    return None
+
+
+def _validate(json_str: str) -> ReviewResult | None:
+    try:
+        data = json.loads(json_str)
+        return ReviewResult(**data)
+    except (json.JSONDecodeError, ValidationError):
+        return None
diff --git a/agents/core/phases/__init__.py b/agents/core/phases/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/agents/feature_builder/__init__.py b/agents/feature_builder/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/agents/feature_builder/main.py b/agents/feature_builder/main.py
new file mode 100644
index 000000000..bbd6a53f2
--- /dev/null
+++ b/agents/feature_builder/main.py
@@ -0,0 +1,64 @@
+"""Feature Builder Agent — builds features from devex + implementation docs.
+
+Usage:
+    uv run python -m feature_builder.main --devex docs/devex.md --impl docs/impl.md
+    uv run python -m feature_builder.main --devex docs/devex.md --impl docs/impl.md --config config.yaml
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+from orchestrations.fix_and_review.orchestrator import run_pipeline
+
+PROMPTS_DIR = Path(__file__).resolve().parent / "prompts"
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Feature Builder Agent")
+    parser.add_argument("--devex", required=True, help="Path to devex doc (markdown)")
+    parser.add_argument("--impl", required=True, help="Path to implementation plan (markdown)")
+    parser.add_argument("--name", help="Feature name (used for branch naming)")
+    parser.add_argument("--config", default="config.yaml", help="Config YAML path")
+    parser.add_argument("--aws-profile", help="Override AWS profile")
+    parser.add_argument("--harness-arn", help="Override harness ARN")
+    args = parser.parse_args()
+
+    devex_path = Path(args.devex)
+    impl_path = Path(args.impl)
+
+    if not devex_path.exists():
+        print(f"Error: devex doc not found: {devex_path}", file=sys.stderr)
+        return 1
+    if not impl_path.exists():
+        print(f"Error: impl doc not found: {impl_path}", file=sys.stderr)
+        return 1
+
+    devex_content = devex_path.read_text()
+    impl_content = impl_path.read_text()
+
+    feature_name = args.name or devex_path.stem.replace(" ", "-").lower()
+
+    # Construct a synthetic "issue" that the orchestrator can consume
+    # The orchestrator expects an issue_url — we pass a placeholder and override the setup phase
+    issue_url = f"feature/{feature_name}"
+
+    overrides = {}
+    if args.aws_profile:
+        overrides["aws_profile"] = args.aws_profile
+    if args.harness_arn:
+        overrides["harness_arn"] = args.harness_arn
+
+    return run_pipeline(
+        issue_url=issue_url,
+        config_path=args.config,
+        prompts_dir=PROMPTS_DIR,
+        devex_content=devex_content,
+        impl_content=impl_content,
+        feature_name=feature_name,
+        **overrides,
+    )
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/agents/feature_builder/prompts/executor.md b/agents/feature_builder/prompts/executor.md
new file mode 100644
index 000000000..40601221e
--- /dev/null
+++ b/agents/feature_builder/prompts/executor.md
@@ -0,0 +1,17 @@
+You are a senior software engineer implementing a new feature across one or two TypeScript repos.
+
+The plan:
+{plan}
+
+Instructions:
+1. Follow the plan exactly. Make the code changes described.
+2. Run `npm run typecheck 2>&1 | tail -20` in each affected repo. Fix any type errors.
+3. Run tests with summary: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20`
+4. If tests fail, debug the specific file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50`
+5. Commit your changes: `git add -A && git commit -m "feat: {commit_message}"`
+6. Push to remote: `git push origin feature/{feature_name}`
+7. If you need to deviate from the plan, document why in your commit message.
+
+IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context. Use the grep pattern in step 3 for the summary, then target specific files in step 4 if something fails.
+
+Do not stop until typecheck and tests pass. If tests fail, analyze the failure, fix the code, and try again.
diff --git a/agents/feature_builder/prompts/fixer.md b/agents/feature_builder/prompts/fixer.md
new file mode 100644
index 000000000..67e04ebfb
--- /dev/null
+++ b/agents/feature_builder/prompts/fixer.md
@@ -0,0 +1,16 @@
+You are a senior software engineer fixing issues found during code review.
+
+The following findings were reported by reviewers. Address each one:
+
+{findings_text}
+
+Instructions:
+1. Fix each finding, starting with Critical severity first, then High, Medium, Low.
+2. If a finding is not applicable or is a false positive, explain why in a commit message.
+3. Run `npm run typecheck 2>&1 | tail -20` in each affected repo after fixes.
+4. Run tests with summary: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20`
+5. If tests fail, debug the specific file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50`
+6. Commit: `git add -A && git commit -m "fix: address review findings round {round_number}"`
+7. Push: `git push origin feature/{feature_name}`
+
+IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context.
diff --git a/agents/feature_builder/prompts/planner.md b/agents/feature_builder/prompts/planner.md
new file mode 100644
index 000000000..ee22e2956
--- /dev/null
+++ b/agents/feature_builder/prompts/planner.md
@@ -0,0 +1,27 @@
+You are a senior software architect planning the implementation of a new feature.
+
+You have access to two TypeScript repositories:
+- agentcore-cli: AWS AgentCore CLI tool (Commander.js + Ink TUI, ~550 source files)
+- agentcore-l3-cdk-constructs: AWS CDK L3 constructs for AgentCore (~17 test files, shares schemas with CLI)
+
+## DevEx Document (what the user experience should be)
+
+{devex_content}
+
+## Implementation Plan (technical approach)
+
+{impl_content}
+
+## Your Task
+
+Based on the devex doc and implementation plan above, explore the relevant code in both repos and produce a detailed, actionable implementation plan.
+
+Your plan MUST include:
+1. **Affected repos**: Which repos need changes (cli, cdk, or both)
+2. **Files to change**: Exact file paths to modify, create, or delete
+3. **Approach**: Step-by-step description of the changes, referencing specific functions and types in the codebase
+4. **Risks**: What could go wrong, edge cases to watch for
+5. **Testing strategy**: What tests to add or modify, how to verify the feature works
+6. **Scope estimate**: small (1-3 files), medium (4-7 files), or large (8+ files)
+
+Output the plan as structured markdown.
diff --git a/agents/feature_builder/prompts/reviewer.md b/agents/feature_builder/prompts/reviewer.md
new file mode 100644
index 000000000..d576c249f
--- /dev/null
+++ b/agents/feature_builder/prompts/reviewer.md
@@ -0,0 +1,36 @@
+You are a senior code reviewer. You have been assigned a region of a code change to review.
+
+Feature being built: {issue_summary}
+Branch: {branch_name}
+Repos: {cli_repo}, {cdk_repo}
+
+Your assigned focus: {focus}
+Files to focus on: {assigned_files}
+
+Instructions:
+1. Clone repos with the feature branch:
+   - git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli
+   - git clone --depth 10 --branch {branch_name} https://github.com/{cdk_repo}.git agentcore-l3-cdk-constructs
+   (If the branch doesn't exist in a repo, clone main instead: git clone --depth 10 https://github.com/{cli_repo}.git agentcore-cli)
+2. Run: git diff main (or git log if on the feature branch already)
+3. Review your assigned files for ALL concerns: correctness, architecture, security, testing adequacy, cross-repo consistency, and breaking changes
+4. Trace callers of changed functions. Check types. Verify test coverage.
+5. You do NOT need to run npm install — you are reviewing code, not building it.
+
+{previous_findings_context}
+
+Output your review as a JSON object wrapped in ```json fences.
+The JSON must have this exact schema:
+{{
+  "approved": boolean,
+  "findings": [
+    {{
+      "severity": "critical" | "high" | "medium" | "low",
+      "file": "path/to/file",
+      "line": number,
+      "description": "what's wrong",
+      "suggestion": "how to fix"
+    }}
+  ]
+}}
+Output ONLY the JSON object in code fences. No other text before or after.
diff --git a/agents/feature_builder/prompts/setup.md b/agents/feature_builder/prompts/setup.md
new file mode 100644
index 000000000..f06610da8
--- /dev/null
+++ b/agents/feature_builder/prompts/setup.md
@@ -0,0 +1,18 @@
+You are setting up a development environment to build a new feature.
+
+Steps:
+1. Install tools (Amazon Linux 2023): dnf install -y -q git nodejs20 npm > /dev/null 2>&1
+2. Authenticate GitHub: echo $GH_TOKEN | gh auth login --with-token
+3. Configure git to use gh for auth: gh auth setup-git
+4. Clone both repos:
+   - git clone https://github.com/{cli_repo}.git {cli_repo_name}
+   - git clone https://github.com/{cdk_repo}.git {cdk_repo_name}
+5. Install dependencies: cd {cli_repo_name} && npm install 2>&1 | tail -3 && cd ..
+6. Create a feature branch in both repos:
+   - cd {cli_repo_name} && git checkout -b feature/{feature_name} && cd ..
+   - cd {cdk_repo_name} && git checkout -b feature/{feature_name} && cd ..
+7. Report back confirmation that the environment is ready.
+
+IMPORTANT: Run each step as a separate shell command. Do not combine them. If tools are already installed, skip step 1.
+
+Output: Confirm environment is ready and which repos are cloned.
diff --git a/agents/orchestrations/__init__.py b/agents/orchestrations/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/agents/orchestrations/fix_and_review/__init__.py b/agents/orchestrations/fix_and_review/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/agents/orchestrations/fix_and_review/orchestrator.py b/agents/orchestrations/fix_and_review/orchestrator.py
new file mode 100644
index 000000000..eee22af25
--- /dev/null
+++ b/agents/orchestrations/fix_and_review/orchestrator.py
@@ -0,0 +1,252 @@
+import time
+from pathlib import Path
+
+from core.config import PipelineConfig
+from core.harness_client import HarnessClient
+from core.parsing import Finding
+from orchestrations.fix_and_review.partitioning import (
+    ReviewerAssignment,
+    calculate_reviewer_count,
+    partition_round1_by_directory,
+    partition_round2_focus_prompts,
+    partition_round3_risk_areas,
+)
+from orchestrations.fix_and_review.phases.aggregate import run_aggregate
+from orchestrations.fix_and_review.phases.complete import run_complete
+from orchestrations.fix_and_review.phases.execute import run_execute
+from orchestrations.fix_and_review.phases.extract import ExtractResult, run_extract
+from orchestrations.fix_and_review.phases.fix import run_fix
+from orchestrations.fix_and_review.phases.plan import run_plan
+from orchestrations.fix_and_review.phases.review import run_review
+from orchestrations.fix_and_review.phases.setup import run_setup, set_prompts_dir
+from orchestrations.fix_and_review.phases.validate import run_validate
+from orchestrations.fix_and_review.phases.verify import run_verify
+
+
+def run_pipeline(
+    issue_url: str,
+    config_path: str = "config.yaml",
+    prompts_dir: str | Path | None = None,
+    devex_content: str | None = None,
+    impl_content: str | None = None,
+    feature_name: str | None = None,
+    **overrides: str,
+) -> int:
+    config = PipelineConfig.from_yaml(config_path)
+    for key, value in overrides.items():
+        if hasattr(config, key):
+            setattr(config, key, value)
+
+    if prompts_dir:
+        set_prompts_dir(Path(prompts_dir))
+
+    is_feature = devex_content is not None
+    if is_feature:
+        issue_number = feature_name or "feature"
+        branch_name = f"feature/{feature_name or 'unnamed'}"
+    else:
+        issue_number = issue_url.rstrip("/").split("/")[-1]
+        branch_name = f"fix/{issue_number}"
+
+    client = HarnessClient(config)
+    session_id = HarnessClient.new_session_id()
+
+    pipeline_start = time.time()
+    print(f"=== Pipeline Start ===")
+    print(f"{'Feature' if is_feature else 'Issue'}: {feature_name or issue_url}")
+    print(f"Session: {session_id}")
+    print(f"Harness: {config.harness_arn}")
+    print()
+
+    def elapsed() -> str:
+        m, s = divmod(int(time.time() - pipeline_start), 60)
+        return f"{m}m{s:02d}s"
+
+    # Phase 0: Setup
+    t0 = time.time()
+    print("--- Phase 0: Setup ---")
+    issue_details = run_setup(client, config, session_id, issue_url,
+                              feature_name=feature_name)
+    if is_feature:
+        issue_title = feature_name or "unnamed feature"
+    else:
+        issue_title_raw, _, _ = client.run_command(
+            session_id, f"gh issue view {issue_url} --json title --jq .title 2>/dev/null"
+        )
+        issue_title = issue_title_raw.strip() or f"resolve #{issue_number}"
+    print(f"Setup complete. {'Feature' if is_feature else 'Issue'}: {issue_title} [{int(time.time()-t0)}s | total {elapsed()}]")
+    print()
+
+    # Phase 1: Plan
+    t0 = time.time()
+    print("--- Phase 1: Plan ---")
+    if is_feature:
+        plan = run_plan(client, config, session_id, issue_details,
+                        devex_content=devex_content, impl_content=impl_content)
+    else:
+        plan = run_plan(client, config, session_id, issue_details)
+    print(f"Plan generated ({len(plan)} chars). [{int(time.time()-t0)}s | total {elapsed()}]")
+    print()
+
+    # Phase 1.5: Validate Plan
+    t0 = time.time()
+    print("--- Phase 1.5: Validate Plan ---")
+    for attempt in range(3):
+        validation = run_validate(client, session_id, plan)
+        if validation.valid:
+            print(f"Plan validated. [{int(time.time()-t0)}s | total {elapsed()}]")
+            break
+        print(f"Validation errors: {validation.errors}")
+        if attempt < 2:
+            print("Re-planning...")
+            plan = run_plan(
+                client, config, session_id,
+                f"Previous plan had issues: {validation.errors}\n\n{issue_details}",
+            )
+        else:
+            print("WARNING: Plan validation failed after 3 attempts. Proceeding anyway.")
+    print()
+
+    # Phase 2: Execute
+    t0 = time.time()
+    print("--- Phase 2: Execute ---")
+    affected_repos: list[str] = []
+    if "agentcore-cli" in plan.lower() or "cli" in plan.lower():
+        affected_repos.append("agentcore-cli")
+    if "agentcore-l3-cdk" in plan.lower() or "cdk" in plan.lower():
+        affected_repos.append("agentcore-l3-cdk-constructs")
+    if not affected_repos:
+        affected_repos = ["agentcore-cli"]
+
+    for attempt in range(3):
+        run_execute(client, config, session_id, plan, branch_name, issue_number)
+        print(f"Execution complete. [{int(time.time()-t0)}s | total {elapsed()}]")
+
+        # Phase 2.5: Verify
+        print("--- Phase 2.5: Verify ---")
+        verification = run_verify(client, session_id, branch_name, affected_repos)
+        if verification.all_passed:
+            print(f"Verification passed. [{int(time.time()-t0)}s | total {elapsed()}]")
+            break
+        print(f"Verification failed: {verification.errors}")
+        if attempt < 2:
+            print("Re-executing with error context...")
+        else:
+            print("WARNING: Verification failed after 3 attempts. Proceeding to review anyway.")
+    print()
+
+    # Phase 3: Extract
+    t0 = time.time()
+    print("--- Phase 3: Extract ---")
+    extract = run_extract(client, session_id, config.cli_repo, config.cdk_repo)
+    print(
+        f"Extracted diff: {len(extract.stats.changed_files)} files, "
+        f"{extract.stats.total_lines} lines changed [{int(time.time()-t0)}s | total {elapsed()}]"
+    )
+    print()
+
+    # Review Loop
+    all_previous_findings_files: list[str] = []
+    review_summary_parts: list[str] = []
+
+    for round_num in range(1, config.max_review_rounds + 1):
+        t0 = time.time()
+        # Phase 4: Review
+        print(f"--- Phase 4: Review (Round {round_num}) ---")
+        num_reviewers = calculate_reviewer_count(
+            extract.stats, config.min_reviewers, config.max_reviewers
+        )
+
+        if round_num == 1:
+            assignments = partition_round1_by_directory(
+                extract.stats.changed_files, num_reviewers
+            )
+        elif round_num == 2:
+            focus_prompts = partition_round2_focus_prompts(num_reviewers)
+            assignments = [
+                ReviewerAssignment(files=extract.stats.changed_files, focus=fp)
+                for fp in focus_prompts
+            ]
+        else:
+            assignments = partition_round3_risk_areas(
+                all_previous_findings_files, extract.stats.changed_files, num_reviewers
+            )
+
+        previous_context = ""
+        if round_num > 1:
+            previous_context = (
+                f"These findings were identified and fixed in previous rounds: "
+                f"{', '.join(all_previous_findings_files)}. "
+                f"Do not re-raise issues that have already been addressed."
+            )
+
+        issue_summary = issue_details[:500] if issue_details else "See branch for details"
+        review_results = run_review(
+            client, config, assignments, branch_name, issue_summary, previous_context
+        )
+        print(f"Reviews collected from {len(review_results)} reviewers. [{int(time.time()-t0)}s | total {elapsed()}]")
+
+        # Phase 5: Aggregate
+        print(f"--- Phase 5: Aggregate (Round {round_num}) ---")
+        aggregate = run_aggregate(review_results)
+        print(
+            f"Approved: {aggregate.all_approved}, "
+            f"Findings: {len(aggregate.unique_findings)}, "
+            f"Parse failures: {aggregate.parse_failures}"
+        )
+
+        if aggregate.all_approved:
+            medium_plus = [
+                f for f in aggregate.unique_findings
+                if f.severity in ("critical", "high", "medium")
+            ]
+            if not medium_plus:
+                print(f"All reviewers approved. Moving to Complete. [total {elapsed()}]")
+                review_summary_parts.append(
+                    f"Round {round_num}: {len(aggregate.unique_findings)} findings, all approved"
+                )
+                break
+
+        review_summary_parts.append(
+            f"Round {round_num}: {len(aggregate.unique_findings)} findings"
+        )
+
+        for f in aggregate.unique_findings:
+            if f.file and f.file not in all_previous_findings_files:
+                all_previous_findings_files.append(f.file)
+
+        # Phase 6: Fix
+        t_fix = time.time()
+        print(f"--- Phase 6: Fix (Round {round_num}) ---")
+        run_fix(client, config, session_id, aggregate.unique_findings, branch_name, round_num)
+        print(f"Fixes applied. [{int(time.time()-t_fix)}s | total {elapsed()}]")
+
+        # Re-extract for next round
+        extract = run_extract(client, session_id, config.cli_repo, config.cdk_repo)
+        print()
+    else:
+        print(
+            f"WARNING: Max review rounds ({config.max_review_rounds}) reached "
+            f"without full approval."
+        )
+
+    # Phase 8: Complete
+    t0 = time.time()
+    print("--- Phase 8: Complete ---")
+    review_summary = "\n".join(review_summary_parts)
+    result = run_complete(
+        client, config, session_id, branch_name, issue_url, issue_number,
+        issue_title, review_summary, affected_repos,
+    )
+
+    if result.pr_urls:
+        print(f"\n=== Pipeline Complete [{elapsed()}] ===")
+        for url in result.pr_urls:
+            print(f"PR: {url}")
+    else:
+        print(f"\n=== Pipeline Failed [{elapsed()}] ===")
+        print(f"Errors: {result.errors}")
+
+    return 0 if result.pr_urls else 1
+
+
diff --git a/agents/orchestrations/fix_and_review/partitioning.py b/agents/orchestrations/fix_and_review/partitioning.py
new file mode 100644
index 000000000..5f0a42866
--- /dev/null
+++ b/agents/orchestrations/fix_and_review/partitioning.py
@@ -0,0 +1,121 @@
+from collections import defaultdict
+from dataclasses import dataclass
+from pathlib import PurePosixPath
+
+
+@dataclass
+class DiffStats:
+    changed_files: list[str]
+    total_lines: int
+    cross_repo: bool
+
+
+@dataclass
+class ReviewerAssignment:
+    files: list[str]
+    focus: str = ""
+
+
+def calculate_reviewer_count(stats: DiffStats, min_r: int = 3, max_r: int = 5) -> int:
+    file_count = len(stats.changed_files)
+    lines = stats.total_lines
+
+    if file_count <= 2 and lines < 100:
+        base = min_r
+    elif file_count <= 7 and lines < 500:
+        base = min_r + 1
+    else:
+        base = max_r
+
+    if stats.cross_repo:
+        base += 1
+
+    return min(base, max_r)
+
+
+def partition_round1_by_directory(
+    changed_files: list[str], num_reviewers: int
+) -> list[ReviewerAssignment]:
+    groups: dict[str, list[str]] = defaultdict(list)
+    for f in changed_files:
+        parts = PurePosixPath(f).parts
+        key = "/".join(parts[:2]) if len(parts) > 1 else parts[0] if parts else "root"
+        groups[key].append(f)
+
+    sorted_groups = sorted(groups.items(), key=lambda x: -len(x[1]))
+
+    if len(sorted_groups) >= num_reviewers:
+        assignments = [
+            ReviewerAssignment(files=files, focus=f"Review changes in {key}/")
+            for key, files in sorted_groups[:num_reviewers - 1]
+        ]
+        remaining_files = []
+        for key, files in sorted_groups[num_reviewers - 1:]:
+            remaining_files.extend(files)
+        assignments.append(
+            ReviewerAssignment(files=remaining_files, focus="Review remaining changes")
+        )
+        return assignments
+    else:
+        assignments = [
+            ReviewerAssignment(files=files, focus=f"Review changes in {key}/")
+            for key, files in sorted_groups
+        ]
+        focus_extras = [
+            "Focus on error handling and edge cases",
+            "Focus on type safety and interface consistency",
+            "Focus on backwards compatibility and breaking changes",
+        ]
+        largest_group_files = sorted_groups[0][1] if sorted_groups else changed_files
+        extra_idx = 0
+        while len(assignments) < num_reviewers:
+            assignments.append(
+                ReviewerAssignment(
+                    files=largest_group_files,
+                    focus=focus_extras[extra_idx % len(focus_extras)],
+                )
+            )
+            extra_idx += 1
+        return assignments
+
+
+def partition_round2_focus_prompts(num_reviewers: int) -> list[str]:
+    prompts = [
+        "Focus on correctness — does the logic match the issue requirements? Are there edge cases or off-by-one errors?",
+        "Focus on safety — breaking changes, backwards compatibility, error handling, and graceful degradation",
+        "Focus on testing — are the tests adequate? Do they cover acceptance criteria, edge cases, and error paths?",
+        "Focus on cross-repo consistency — do type definitions, schemas, and interfaces stay in sync across both repos?",
+        "Focus on performance and resource usage — are there unnecessary allocations, N+1 patterns, or missing caching?",
+    ]
+    return prompts[:num_reviewers]
+
+
+def partition_round3_risk_areas(
+    previous_findings_files: list[str],
+    all_changed_files: list[str],
+    num_reviewers: int,
+) -> list[ReviewerAssignment]:
+    risk_files = list(set(previous_findings_files))
+    non_risk_files = [f for f in all_changed_files if f not in risk_files]
+
+    assignments: list[ReviewerAssignment] = []
+    risk_reviewer_count = max(1, num_reviewers // 2)
+    for i in range(risk_reviewer_count):
+        assignments.append(
+            ReviewerAssignment(
+                files=risk_files,
+                focus=f"Deep review of previously-flagged files — verify fixes are correct and complete (pass {i + 1})",
+            )
+        )
+
+    remaining = num_reviewers - risk_reviewer_count
+    broader_files = risk_files + non_risk_files
+    for i in range(remaining):
+        assignments.append(
+            ReviewerAssignment(
+                files=broader_files,
+                focus=f"Broad review with emphasis on how fixes interact with surrounding code (pass {i + 1})",
+            )
+        )
+
+    return assignments
diff --git a/agents/orchestrations/fix_and_review/phases/__init__.py b/agents/orchestrations/fix_and_review/phases/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/agents/orchestrations/fix_and_review/phases/aggregate.py b/agents/orchestrations/fix_and_review/phases/aggregate.py
new file mode 100644
index 000000000..cb2fc3000
--- /dev/null
+++ b/agents/orchestrations/fix_and_review/phases/aggregate.py
@@ -0,0 +1,57 @@
+from dataclasses import dataclass
+
+from core.parsing import Finding, ReviewResult
+
+
+@dataclass
+class AggregateResult:
+    all_approved: bool
+    unique_findings: list[Finding]
+    parse_failures: int
+    total_reviewers: int
+
+
+def run_aggregate(
+    review_results: list[tuple[ReviewResult | None, str]],
+) -> AggregateResult:
+    all_approved = True
+    findings: list[Finding] = []
+    parse_failures = 0
+
+    for parsed, raw in review_results:
+        if parsed is None:
+            all_approved = False
+            parse_failures += 1
+            findings.append(
+                Finding(
+                    severity="high",
+                    file="",
+                    line=0,
+                    description="Reviewer output failed to parse after retries",
+                    suggestion="Manual review needed",
+                )
+            )
+        else:
+            if not parsed.approved:
+                all_approved = False
+            findings.extend(parsed.findings)
+
+    unique = _deduplicate(findings)
+
+    return AggregateResult(
+        all_approved=all_approved,
+        unique_findings=unique,
+        parse_failures=parse_failures,
+        total_reviewers=len(review_results),
+    )
+
+
+def _deduplicate(findings: list[Finding]) -> list[Finding]:
+    seen: set[str] = set()
+    unique: list[Finding] = []
+    for f in findings:
+        key = f"{f.file}:{f.line}:{f.description[:50]}"
+        if key not in seen:
+            seen.add(key)
+            unique.append(f)
+    return unique
diff --git a/agents/orchestrations/fix_and_review/phases/complete.py b/agents/orchestrations/fix_and_review/phases/complete.py
new file mode 100644
index 000000000..f34742ed1
--- /dev/null
+++ b/agents/orchestrations/fix_and_review/phases/complete.py
@@ -0,0 +1,111 @@
+import re
+from dataclasses import dataclass
+
+from core.config import PipelineConfig
+from core.harness_client import HarnessClient
+
+
+@dataclass
+class CompleteResult:
+    pr_urls: list[str]
+    rebase_succeeded: bool
+    errors: list[str]
+
+
+def run_complete(
+    client: HarnessClient,
+    config: PipelineConfig,
+    session_id: str,
+    branch_name: str,
+    issue_url: str,
+    issue_number: str,
+    issue_title: str,
+    review_summary: str,
+    affected_repos: list[str],
+) -> CompleteResult:
+    errors: list[str] = []
+    pr_urls: list[str] = []
+
+    # Normalize affected_repos to full org/repo format for comparison with config
+    full_repo_map = {
+        "agentcore-cli": config.cli_repo,
+        "agentcore-l3-cdk-constructs": config.cdk_repo,
+        config.cli_repo: config.cli_repo,
+        config.cdk_repo: config.cdk_repo,
+    }
+
+    # Filter to repos that actually have changes on the feature branch
+    repos_with_changes: list[str] = []
+    for repo in affected_repos:
+        repo_name = repo.split("/")[-1] if "/" in repo else repo
+        stdout, _, exit_code = client.run_command(
+            session_id, f"cd {repo_name} && git log main..{branch_name} --oneline 2>/dev/null"
+        )
+        if exit_code == 0 and stdout.strip():
+            full_repo = full_repo_map.get(repo, repo)
+            repos_with_changes.append(full_repo)
+
+    if not repos_with_changes:
+        return CompleteResult(pr_urls=[], rebase_succeeded=False, errors=["No repos have changes on the feature branch"])
+
+    # Rebase and push each repo that has changes
+    rebase_succeeded = True
+    for repo in repos_with_changes:
+        repo_name = repo.split("/")[-1] if "/" in repo else repo
+
+        _, stderr, exit_code = client.run_command(
+            session_id, f"cd {repo_name} && git fetch origin main && git rebase origin/main"
+        )
+        if exit_code != 0:
+            rebase_succeeded = False
+            client.run_command(session_id, f"cd {repo_name} && git rebase --abort")
+            errors.append(f"Rebase failed in {repo_name}: {stderr[:500]}")
+
+        _, stderr, exit_code = client.run_command(
+            session_id, f"cd {repo_name} && git push origin {branch_name} --force-with-lease"
+        )
+        if exit_code != 0:
+            errors.append(f"Push failed in {repo_name}: {stderr[:500]}")
+
+    # Create PRs — CDK first if both repos have changes
+    repo_order = []
+    if config.cdk_repo in repos_with_changes:
+        repo_order.append(config.cdk_repo)
+    if config.cli_repo in repos_with_changes:
+        repo_order.append(config.cli_repo)
+
+    for repo in repo_order:
+        repo_name = repo.split("/")[-1]
+
+        # Let the agent create the PR — it can read the repo's PR template and fill it in properly
+        pr_message = (
+            f"Create a pull request in this repo for branch {branch_name}.\n"
+            f"Issue: {issue_url} (#{issue_number})\n"
+            f"Issue title: {issue_title}\n"
+            f"Review summary: {review_summary}\n"
+            f"Read the PR template at .github/pull_request_template.md and fill it in properly.\n"
+            f"IMPORTANT: The Related Issue section MUST say 'Closes #{issue_number}' to auto-close the issue when merged.\n"
+            f"Use a descriptive title based on the issue title. Do NOT use a generic title like 'fix: resolve #N'.\n"
+            f"Use: gh pr create --title '<title>' --body-file /tmp/pr_body.md --head {branch_name}\n"
+            f"Write the filled-in template to /tmp/pr_body.md first."
+        )
+        pr_output = client.invoke(session_id=session_id, message=pr_message)
+
+        # Extract PR URL from the agent's output
+        url_match = re.search(r"https://github\.com/[^\s]+/pull/\d+", pr_output)
+        if url_match:
+            pr_urls.append(url_match.group(0))
+        else:
+            stdout, _, _ = client.run_command(
+                session_id, f"cd {repo_name} && gh pr list --head {branch_name} --json url --jq '.[0].url'"
+            )
+            if stdout.strip():
+                pr_urls.append(stdout.strip())
+            else:
+                errors.append(f"PR may have been created in {repo} but could not extract URL")
+        if exit_code == 0 and stdout.strip():
+            pr_urls.append(stdout.strip())
+        else:
+            errors.append(f"Failed to create PR in {repo}: {stderr[:500]}")
+
+    return CompleteResult(pr_urls=pr_urls, rebase_succeeded=rebase_succeeded, errors=errors)
diff --git a/agents/orchestrations/fix_and_review/phases/execute.py b/agents/orchestrations/fix_and_review/phases/execute.py
new file mode 100644
index 000000000..6d3835b3f
--- /dev/null
+++ b/agents/orchestrations/fix_and_review/phases/execute.py
@@ -0,0 +1,20 @@
+from core.config import PipelineConfig
+from core.harness_client import HarnessClient
+from orchestrations.fix_and_review.phases.setup import load_prompt
+
+
+def run_execute(
+    client: HarnessClient,
+    config: PipelineConfig,
+    session_id: str,
+    plan: str,
+    branch_name: str,
+    issue_number: str,
+) -> str:
+    prompt = load_prompt(
+        "executor.md",
+        plan=plan,
+        commit_message=f"fix issue #{issue_number}",
+        branch_name=branch_name,
+    )
+    return client.invoke(session_id=session_id, message=prompt)
diff --git a/agents/orchestrations/fix_and_review/phases/extract.py b/agents/orchestrations/fix_and_review/phases/extract.py
new file mode 100644
index 000000000..fbe8e713c
--- /dev/null
+++ b/agents/orchestrations/fix_and_review/phases/extract.py
@@ -0,0 +1,55 @@
+from dataclasses import dataclass
+
+from core.harness_client import HarnessClient
+from orchestrations.fix_and_review.partitioning import DiffStats
+
+
+@dataclass
+class ExtractResult:
+    diff_stat: str
+    full_diff: str
+    commit_log: str
+    stats: DiffStats
+
+
+def run_extract(
+    client: HarnessClient,
+    session_id: str,
+    cli_repo: str,
+    cdk_repo: str,
+) -> ExtractResult:
+    diff_stat_stdout, _, _ = client.run_command(session_id, "git diff main --stat")
+    full_diff_stdout, _, _ = client.run_command(session_id, "git diff main")
+    commit_log_stdout, _, _ = client.run_command(session_id, "git log main..HEAD --oneline")
+
+    changed_files: list[str] = []
+    for line in diff_stat_stdout.strip().split("\n"):
+        line = line.strip()
+        if "|" in line:
+            file_path = line.split("|")[0].strip()
+            if file_path:
+                changed_files.append(file_path)
+
+    total_lines = 0
+    for line in full_diff_stdout.split("\n"):
+        if line.startswith("+") and not line.startswith("+++"):
+            total_lines += 1
+        elif line.startswith("-") and not line.startswith("---"):
+            total_lines += 1
+
+    has_cli = any(f.startswith(cli_repo) or f.startswith("src/cli") for f in changed_files)
+    has_cdk = any(f.startswith(cdk_repo) or f.startswith("src/cdk") for f in changed_files)
+    cross_repo = has_cli and has_cdk
+
+    stats = DiffStats(
+        changed_files=changed_files,
+        total_lines=total_lines,
+        cross_repo=cross_repo,
+    )
+
+    return ExtractResult(
+        diff_stat=diff_stat_stdout,
+        full_diff=full_diff_stdout,
+        commit_log=commit_log_stdout,
+        stats=stats,
+    )
diff --git a/agents/orchestrations/fix_and_review/phases/fix.py b/agents/orchestrations/fix_and_review/phases/fix.py
new file mode 100644
index 000000000..ce11e110c
--- /dev/null
+++ b/agents/orchestrations/fix_and_review/phases/fix.py
@@ -0,0 +1,30 @@
+from core.config import PipelineConfig
+from core.harness_client import HarnessClient
+from core.parsing import Finding
+from orchestrations.fix_and_review.phases.setup import load_prompt
+
+
+def run_fix(
+    client: HarnessClient,
+    config: PipelineConfig,
+    session_id: str,
+    findings: list[Finding],
+    branch_name: str,
+    round_number: int,
+) -> str:
+    severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
+    sorted_findings = sorted(findings, key=lambda f: severity_order.get(f.severity, 4))
+
+    findings_text = ""
+    for f in sorted_findings:
+        findings_text += f"### [{f.severity.upper()}] {f.file}:{f.line}\n"
+        findings_text += f"**Issue:** {f.description}\n"
+        findings_text += f"**Suggestion:** {f.suggestion}\n\n"
+
+    prompt = load_prompt(
+        "fixer.md",
+        findings_text=findings_text,
+        round_number=str(round_number),
+        branch_name=branch_name,
+    )
+    return client.invoke(session_id=session_id, message=prompt)
diff --git a/agents/orchestrations/fix_and_review/phases/plan.py b/agents/orchestrations/fix_and_review/phases/plan.py
new file mode 100644
index 000000000..20eee440f
--- /dev/null
+++ b/agents/orchestrations/fix_and_review/phases/plan.py
@@ -0,0 +1,21 @@
+from core.config import PipelineConfig
+from core.harness_client import HarnessClient
+from orchestrations.fix_and_review.phases.setup import load_prompt
+
+
+def run_plan(
+    client: HarnessClient,
+    config: PipelineConfig,
+    session_id: str,
+    issue_details: str,
+    devex_content: str | None = None,
+    impl_content: str | None = None,
+) -> str:
+    if devex_content and impl_content:
+        prompt = load_prompt("planner.md",
+                            issue_details=issue_details,
+                            devex_content=devex_content,
+                            impl_content=impl_content)
+    else:
+        prompt = load_prompt("planner.md", issue_details=issue_details)
+    return client.invoke(session_id=session_id, message=prompt)
diff --git a/agents/orchestrations/fix_and_review/phases/review.py b/agents/orchestrations/fix_and_review/phases/review.py
new file mode 100644
index 000000000..be1fabbce
--- /dev/null
+++ b/agents/orchestrations/fix_and_review/phases/review.py
@@ -0,0 +1,49 @@
+from core.config import PipelineConfig
+from core.harness_client import HarnessClient
+from core.parsing import ReviewResult, parse_reviewer_output
+from orchestrations.fix_and_review.partitioning import ReviewerAssignment
+from orchestrations.fix_and_review.phases.setup import load_prompt
+
+
+def run_review(
+    client: HarnessClient,
+    config: PipelineConfig,
+    assignments: list[ReviewerAssignment],
+    branch_name: str,
+    issue_summary: str,
+    previous_findings_context: str = "",
+) -> list[tuple[ReviewResult | None, str]]:
+    results: list[tuple[ReviewResult | None, str]] = []
+
+    for assignment in assignments:
+        session_id = HarnessClient.new_session_id()
+        prompt = load_prompt(
+            "reviewer.md",
+            issue_summary=issue_summary,
+            branch_name=branch_name,
+            cli_repo=config.cli_repo,
+            cdk_repo=config.cdk_repo,
+            focus=assignment.focus,
+            assigned_files=", ".join(assignment.files),
+            previous_findings_context=previous_findings_context,
+        )
+
+        raw_output = client.invoke(session_id=session_id, message=prompt)
+        parsed = parse_reviewer_output(raw_output)
+
+        if parsed is None:
+            retry_msg = (
+                "Your previous output was not valid JSON. Please output ONLY a JSON object "
+                "wrapped in ```json fences with this schema: "
+                '{"approved": boolean, "findings": [{"severity": "critical"|"high"|"medium"|"low", '
+                '"file": "path", "line": number, "description": "...", "suggestion": "..."}]}'
+            )
+            for _ in range(2):
+                raw_output = client.invoke(session_id=session_id, message=retry_msg)
+                parsed = parse_reviewer_output(raw_output)
+                if parsed is not None:
+                    break
+
+        results.append((parsed, raw_output))
+
+    return results
diff --git a/agents/orchestrations/fix_and_review/phases/setup.py b/agents/orchestrations/fix_and_review/phases/setup.py
new file mode 100644
index 000000000..32608a182
--- /dev/null
+++ b/agents/orchestrations/fix_and_review/phases/setup.py
@@ -0,0 +1,40 @@
+from pathlib import Path
+
+from core.config import PipelineConfig
+from core.harness_client import HarnessClient
+
+
+_prompts_dir: Path | None = None
+
+
+def set_prompts_dir(path: Path) -> None:
+    global _prompts_dir
+    _prompts_dir = path
+
+
+def load_prompt(name: str, **kwargs: str) -> str:
+    if _prompts_dir is None:
+        raise RuntimeError("Prompts directory not set. Call set_prompts_dir() before running phases.")
+    template = (_prompts_dir / name).read_text()
+    return template.format(**kwargs)
+
+
+def run_setup(
+    client: HarnessClient,
+    config: PipelineConfig,
+    session_id: str,
+    issue_url: str,
+    feature_name: str | None = None,
+) -> str:
+    issue_number = issue_url.rstrip("/").split("/")[-1]
+    prompt = load_prompt(
+        "setup.md",
+        cli_repo=config.cli_repo,
+        cdk_repo=config.cdk_repo,
+        cli_repo_name=config.cli_repo.split("/")[-1],
+        cdk_repo_name=config.cdk_repo.split("/")[-1],
+        issue_url=issue_url,
+        issue_number=issue_number,
+        feature_name=feature_name or issue_number,
+    )
+    return client.invoke(session_id=session_id, message=prompt)
diff --git a/agents/orchestrations/fix_and_review/phases/validate.py b/agents/orchestrations/fix_and_review/phases/validate.py
new file mode 100644
index 000000000..9e13a7ce9
--- /dev/null
+++ b/agents/orchestrations/fix_and_review/phases/validate.py
@@ -0,0 +1,33 @@
+import re
+from dataclasses import dataclass
+
+from core.harness_client import HarnessClient
+
+
+@dataclass
+class ValidationResult:
+    valid: bool
+    errors: list[str]
+
+
+def run_validate(
+    client: HarnessClient,
+    session_id: str,
+    plan_text: str,
+) -> ValidationResult:
+    file_refs = re.findall(r"(?:src|test|tests)/[\w/.-]+\.(?:ts|tsx|js|json)", plan_text)
+    file_refs = list(set(file_refs))
+
+    errors: list[str] = []
+    for file_ref in file_refs:
+        for repo_dir in ["agentcore-cli", "agentcore-l3-cdk-constructs"]:
+            stdout, stderr, exit_code = client.run_command(
+                session_id, f"test -f {repo_dir}/{file_ref} && echo EXISTS || echo MISSING"
+            )
+            if "EXISTS" in stdout:
+                break
+        else:
+            if "create" not in plan_text.lower() or file_ref not in plan_text:
+                errors.append(f"File not found in either repo: {file_ref}")
+
+    return ValidationResult(valid=len(errors) == 0, errors=errors)
diff --git a/agents/orchestrations/fix_and_review/phases/verify.py b/agents/orchestrations/fix_and_review/phases/verify.py
new file mode 100644
index 000000000..72064b3dd
--- /dev/null
+++ b/agents/orchestrations/fix_and_review/phases/verify.py
@@ -0,0 +1,91 @@
+from dataclasses import dataclass
+
+from core.harness_client import HarnessClient
+
+TEST_COMMANDS = {
+    "agentcore-cli": "npm run test:unit",
+    "agentcore-l3-cdk-constructs": "npm test",
+}
+
+
+@dataclass
+class VerificationResult:
+    commits_exist: bool
+    typecheck_passes: bool
+    tests_pass: bool
+    branch_pushed: bool
+    errors: list[str]
+
+    @property
+    def all_passed(self) -> bool:
+        return self.commits_exist and self.typecheck_passes and self.tests_pass and self.branch_pushed
+
+
+def run_verify(
+    client: HarnessClient,
+    session_id: str,
+    branch_name: str,
+    affected_repos: list[str],
+) -> VerificationResult:
+    errors: list[str] = []
+
+    # Check commits exist — cd into first affected repo
+    first_repo = affected_repos[0] if affected_repos else "agentcore-cli"
+    stdout, _, exit_code = client.run_command(
+        session_id, f"cd {first_repo} && git log main..HEAD --oneline"
+    )
+    commits_exist = exit_code == 0 and len(stdout.strip()) > 0
+    if not commits_exist:
+        errors.append(f"No commits found on feature branch in {first_repo}")
+
+    # Only typecheck/test repos that were actually changed
+    typecheck_passes = True
+    for repo in affected_repos:
+        stdout, _, exit_code = client.run_command(
+            session_id, f"cd {repo} && git diff main --stat 2>/dev/null"
+        )
+        if not stdout.strip():
+            continue
+        print(f"  Running typecheck in {repo}...", flush=True)
+        _, stderr, exit_code = client.run_command(session_id, f"cd {repo} && npm run typecheck 2>&1 | tail -5")
+        if exit_code != 0:
+            typecheck_passes = False
+            errors.append(f"Typecheck failed in {repo}: {stderr[:500]}")
+
+    tests_pass = True
+    for repo in affected_repos:
+        stdout, _, exit_code = client.run_command(
+            session_id, f"cd {repo} && git diff main --stat 2>/dev/null"
+        )
+        if not stdout.strip():
+            continue
+        test_cmd = TEST_COMMANDS.get(repo, "npm test")
+        print(f"  Running tests in {repo} (may take a few minutes)...", flush=True)
+        _, stderr, exit_code = client.run_command(session_id, f'cd {repo} && {test_cmd} 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20')
+        if exit_code != 0:
+            tests_pass = False
+            errors.append(f"Tests failed in {repo}: {stderr[:500]}")
+
+    # Push from each repo that has changes
+    branch_pushed = True
+    for repo in affected_repos:
+        stdout, _, _ = client.run_command(
+            session_id, f"cd {repo} && git diff main --stat 2>/dev/null"
+        )
+        if not stdout.strip():
+            continue
+        print(f"  Pushing {branch_name} in {repo}...", flush=True)
+        _, stderr, exit_code = client.run_command(
+            session_id, f"cd {repo} && git push origin {branch_name}"
+        )
+        if exit_code != 0:
+            branch_pushed = False
+            errors.append(f"Push failed in {repo}: {stderr[:500]}")
+
+    return VerificationResult(
+        commits_exist=commits_exist,
+        typecheck_passes=typecheck_passes,
+        tests_pass=tests_pass,
+        branch_pushed=branch_pushed,
+        errors=errors,
+    )
diff --git a/agents/orchestrations/one_shot/__init__.py b/agents/orchestrations/one_shot/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/agents/orchestrations/one_shot/phases/__init__.py b/agents/orchestrations/one_shot/phases/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/agents/orchestrations/review_only/__init__.py b/agents/orchestrations/review_only/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/agents/orchestrations/review_only/phases/__init__.py b/agents/orchestrations/review_only/phases/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/agents/pr_reviewer/main.py b/agents/pr_reviewer/main.py
new file mode 100644
index 000000000..2ee174266
--- /dev/null
+++ b/agents/pr_reviewer/main.py
@@ -0,0 +1,200 @@
+"""Invoke Bedrock AgentCore Harness to review a GitHub PR.
+
+Reads PR_URL from the environment. Streams harness output to stdout.
+Uses the boto3 bedrock-agentcore client's invoke_harness API.
+"""
+
+import json
+import os
+import sys
+import time
+import uuid
+
+import boto3
+
+# ANSI color codes
+CYAN = "\033[36m"
+YELLOW = "\033[33m"
+GREEN = "\033[32m"
+RED = "\033[31m"
+DIM = "\033[2m"
+RESET = "\033[0m"
+
+SCRIPTS_DIR = os.path.dirname(__file__)
+
+
+def read_prompt(filename):
+    """Read a prompt template from the prompts directory."""
+    path = os.path.join(SCRIPTS_DIR, "prompts", filename)
+    with open(path) as f:
+        return f.read()
+
+
+def invoke_harness_streaming(harness_arn, session_id, system_prompt, messages, model_id, region):
+    """Call invoke_harness via boto3 and return the event stream."""
+    client = boto3.client("bedrock-agentcore", region_name=region)
+    response = client.invoke_harness(
+        harnessArn=harness_arn,
+        runtimeSessionId=session_id,
+        systemPrompt=[{"text": system_prompt}],
+        messages=messages,
+        model={"bedrockModelConfig": {"modelId": model_id}},
+    )
+    return response["stream"]
+
+
+def parse_events(event_stream):
+    """Yield (event_type, payload) tuples from the boto3 event stream."""
+    for event in event_stream:
+        if "contentBlockStart" in event:
+            yield "contentBlockStart", event["contentBlockStart"]
+        elif "contentBlockDelta" in event:
+            yield "contentBlockDelta", event["contentBlockDelta"]
+        elif "contentBlockStop" in event:
+            yield "contentBlockStop", event["contentBlockStop"]
+        elif "messageStop" in event:
+            yield "messageStop", event["messageStop"]
+        elif "internalServerException" in event:
+            yield "internalServerException", event["internalServerException"]
+        elif "runtimeClientError" in event:
+            yield "runtimeClientError", event["runtimeClientError"]
+
+
+def print_stream(event_stream):
+    """Display harness events with GitHub Actions log groups.
+
+    The harness streams events as the agent works:
+      contentBlockStart  — a new block begins (text or tool call)
+      contentBlockDelta  — incremental chunks of text or tool input JSON
+      contentBlockStop   — block complete, we now have full tool input to display
+      messageStop        — agent finished
+      internalServerException — server error
+
+    Tool calls are wrapped in ::group::/::endgroup:: for collapsible sections
+    in the GitHub Actions log UI. Agent reasoning text is printed inline in dim.
+    """
+    start_time = time.time()
+    iteration = 0
+    tool_name = None
+    tool_input = ""
+    tool_start = 0.0
+    in_group = False
+    text_buffer = ""
+
+    def close_group():
+        nonlocal in_group
+        if in_group:
+            print("::endgroup::", flush=True)
+            in_group = False
+
+    def flush_text():
+        nonlocal text_buffer
+        if text_buffer:
+            for line in text_buffer.splitlines():
+                print(f"{DIM}{line}{RESET}", flush=True)
+            text_buffer = ""
+
+    for event_type, payload in parse_events(event_stream):
+
+        if event_type == "contentBlockStart":
+            start = payload.get("start", {})
+            if "toolUse" in start:
+                tool_name = start["toolUse"].get("name", "unknown")
+                tool_input = ""
+                tool_start = time.time()
+                iteration += 1
+
+        elif event_type == "contentBlockDelta":
+            delta = payload.get("delta", {})
+            if "text" in delta:
+                close_group()
+                text_buffer += delta["text"]
+            if "toolUse" in delta:
+                tool_input += delta["toolUse"].get("input", "")
+
+        elif event_type == "contentBlockStop":
+            flush_text()
+            if tool_name:
+                elapsed = time.time() - tool_start
+                try:
+                    parsed = json.loads(tool_input)
+                except (json.JSONDecodeError, TypeError):
+                    parsed = tool_input
+
+                close_group()
+
+                cmd = parsed.get("command") if isinstance(parsed, dict) else None
+                header = f"{CYAN}[{iteration}]{RESET} {YELLOW}{tool_name}{RESET} {DIM}({elapsed:.1f}s){RESET}"
+                if cmd:
+                    header += f": $ {cmd}"
+
+                print(f"::group::{header}", flush=True)
+                in_group = True
+
+                if isinstance(parsed, dict):
+                    for k, v in parsed.items():
+                        if k != "command":
+                            print(f"  {DIM}{k}:{RESET} {str(v)[:300]}", flush=True)
+
+            tool_name = None
+            tool_input = ""
+
+        elif event_type == "messageStop":
+            flush_text()
+            close_group()
+            if payload.get("stopReason") == "end_turn":
+                total = time.time() - start_time
+                print(f"\n\n{GREEN}{'=' * 50}", flush=True)
+                print(f"  Done ({int(total // 60)}m {int(total % 60)}s)", flush=True)
+                print(f"{'=' * 50}{RESET}", flush=True)
+
+        elif event_type == "internalServerException":
+            close_group()
+            print(f"\n{RED}ERROR: {payload}{RESET}", file=sys.stderr)
+            sys.exit(1)
+
+        elif event_type == "runtimeClientError":
+            close_group()
+            print(f"\n{RED}ERROR: {payload.get('message', payload)}{RESET}", file=sys.stderr)
+            sys.exit(1)
+
+    close_group()
+    total = time.time() - start_time
+    print(f"\n{GREEN}Review complete.{RESET} {DIM}({iteration} tool calls, {int(total)}s total){RESET}")
+
+
+# --- Main ---
+
+# All config comes from environment variables (set via GitHub secrets/workflow)
+MODEL_ID = os.environ.get("HARNESS_MODEL_ID", "us.anthropic.claude-opus-4-7")
+HARNESS_ARN = os.environ.get("HARNESS_ARN", "")
+PR_URL = os.environ.get("PR_URL", "")
+
+for name, val in [("HARNESS_ARN", HARNESS_ARN), ("PR_URL", PR_URL)]:
+    if not val:
+        print(f"{RED}ERROR: {name} environment variable is required{RESET}", file=sys.stderr)
+        sys.exit(1)
+
+# Extract region from the ARN (arn:aws:bedrock-agentcore:{region}:{account}:harness/{id})
+REGION = HARNESS_ARN.split(":")[3]
+SESSION_ID = str(uuid.uuid4()).upper()
+
+print(f"{CYAN}Session:{RESET} {SESSION_ID}")
+print(f"{CYAN}PR:{RESET}      {PR_URL}")
+print(f"{CYAN}Harness:{RESET} {HARNESS_ARN}")
+print()
+
+SYSTEM_PROMPT = read_prompt("system.md")
+REVIEW_PROMPT = read_prompt("review.md").format(pr_url=PR_URL)
+
+messages = [{"role": "user", "content": [{"text": REVIEW_PROMPT}]}]
+
+try:
+    event_stream = invoke_harness_streaming(
+        HARNESS_ARN, SESSION_ID, SYSTEM_PROMPT, messages, MODEL_ID, REGION
+    )
+except Exception as e:
+    print(f"{RED}ERROR: Failed to invoke harness: {e}{RESET}", file=sys.stderr)
+    sys.exit(1)
+
+print_stream(event_stream)
diff --git a/agents/pr_reviewer/prompts/review.md b/agents/pr_reviewer/prompts/review.md
new file mode 100644
index 000000000..d34c67b95
--- /dev/null
+++ b/agents/pr_reviewer/prompts/review.md
@@ -0,0 +1,18 @@
+Review this GitHub PR: {pr_url}
+
+You have tools to fetch the PR diff, read files, search the web, and post comments on the PR.
+
+You have these repos cloned locally for context:
+
+- /opt/workspace/agentcore-cli — aws/agentcore-cli
+- /opt/workspace/agentcore-l3-cdk-constructs — aws/agentcore-l3-cdk-constructs
+
+Before reviewing, read all existing comments on the PR to understand what has already been discussed. Do not repeat or
+re-post issues that have already been raised in existing comments.
+
+Review the PR. If there are any serious issues that require code changes before merging, post a comment on the PR for
+each issue explaining the problem. If there are multiple ways to fix an issue, list the options so the author can
+choose. Skip style nits and minor suggestions — only flag things that actually need to change.
+
+If all serious issues have already been raised in existing comments, or if you found no new issues, post a single
+comment on the PR saying it looks good to merge (or that all issues have already been flagged).
diff --git a/agents/pr_reviewer/prompts/system.md b/agents/pr_reviewer/prompts/system.md
new file mode 100644
index 000000000..52a3d2260
--- /dev/null
+++ b/agents/pr_reviewer/prompts/system.md
@@ -0,0 +1,25 @@
+# AgentCore CLI Development Workspace
+
+This workspace contains two repos for developing and testing the AgentCore CLI.
+
+## Repositories
+
+### agentcore-cli/ (`aws/agentcore-cli`)
+
+The terminal experience for creating, developing, and deploying AI agents to AgentCore. Node.js/TypeScript CLI built
+with Ink (React-based TUI).
+
+### agentcore-l3-cdk-constructs/ (`aws/agentcore-l3-cdk-constructs`)
+
+AWS CDK L3 constructs for declaring and deploying AgentCore infrastructure. Used by agentcore-cli to vend CDK projects
+when users run `agentcore create`.
+
+## How they relate
+
+`agentcore-cli` is the main product. It vends CDK projects using constructs from `agentcore-l3-cdk-constructs`.
+
+## Testing with a bundled distribution
+
+Run `npm run bundle` in `agentcore-cli/` to create a tar distribution that includes the packaged
+`agentcore-l3-cdk-constructs`. You can then install it globally with `npm install -g <path-to-tar>` to test the CLI
+end-to-end.
diff --git a/agents/pyproject.toml b/agents/pyproject.toml
new file mode 100644
index 000000000..c1ea76be7
--- /dev/null
+++ b/agents/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "harness-invoke"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.14"
+dependencies = [
+    "boto3>=1.42.94,<1.43.2",
+    "botocore>=1.42.94,<1.43.2",
+    "pydantic>=2.13.1",
+    "pyyaml>=6.0.3",
+    "urllib3>=2.6.3",
+]
+
+[dependency-groups]
+dev = [
+    "pytest>=9.0.3",
+]
diff --git a/agents/tests/__init__.py b/agents/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/agents/tests/test_config.py b/agents/tests/test_config.py
new file mode 100644
index 000000000..3bea71950
--- /dev/null
+++ b/agents/tests/test_config.py
@@ -0,0 +1,65 @@
+import os
+import tempfile
+
+import pytest
+import yaml
+
+from core.config import PipelineConfig
+
+
+def test_from_yaml_loads_all_fields():
+    data = {
+        "harness_arn": "arn:aws:bedrock-agentcore:us-west-2:123456789:harness/Test-abc",
+        "region": "us-east-1",
+        "data_plane_endpoint": "https://dp.example.com",
+        "aws_profile": "test-profile",
+        "model_id": "global.anthropic.claude-opus-4-7",
+        "min_reviewers": 2,
+        "max_reviewers": 4,
+        "max_review_rounds": 3,
+        "repos": {
+            "cli": "aws/agentcore-cli",
+            "cdk": "aws/agentcore-l3-cdk-constructs",
+        },
+    }
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+        yaml.dump(data, f)
+        path = f.name
+
+    try:
+        config = PipelineConfig.from_yaml(path)
+        assert config.harness_arn == data["harness_arn"]
+        assert config.region == "us-east-1"
+        assert config.aws_profile == "test-profile"
+        assert config.min_reviewers == 2
+        assert config.max_reviewers == 4
+        assert config.cli_repo == "aws/agentcore-cli"
+        assert config.cdk_repo == "aws/agentcore-l3-cdk-constructs"
+    finally:
+        os.unlink(path)
+
+
+def test_from_yaml_uses_defaults_for_missing_fields():
+    data = {"harness_arn": "arn:aws:bedrock-agentcore:us-west-2:123:harness/X-abc"}
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+        yaml.dump(data, f)
+        path = f.name
+
+    try:
+        config = PipelineConfig.from_yaml(path)
+        assert config.harness_arn == data["harness_arn"]
+        assert config.region == "us-west-2"
+        assert config.aws_profile == "deploy"
+        assert config.min_reviewers == 3
+        assert config.max_reviewers == 5
+    finally:
+        os.unlink(path)
+
+
+def test_defaults():
+    config = PipelineConfig(harness_arn="arn:aws:bedrock-agentcore:us-west-2:123:harness/X-abc")
+    assert config.region == "us-west-2"
+    assert config.model_id == "global.anthropic.claude-opus-4-7"
+    assert config.min_reviewers == 3
+    assert config.max_reviewers == 5
+    assert config.max_review_rounds == 5
diff --git a/agents/tests/test_harness_client.py b/agents/tests/test_harness_client.py
new file mode 100644
index 000000000..59e4fd27d
--- /dev/null
+++ b/agents/tests/test_harness_client.py
@@ -0,0 +1,21 @@
+from core.harness_client import HarnessClient
+
+
+def test_accumulate_command_output():
+    client = HarnessClient.__new__(HarnessClient)
+    events = [
+        {"chunk": {"contentDelta": {"stdout": "file1.ts\n"}}},
+        {"chunk": {"contentDelta": {"stdout": "file2.ts\n"}}},
+        {"chunk": {"contentDelta": {"stderr": "warning: something\n"}}},
+        {"chunk": {"contentStop": {"exitCode": 0, "status": "SUCCESS"}}},
+    ]
+    stdout, stderr, exit_code = client._accumulate_command(events)
+    assert stdout == "file1.ts\nfile2.ts\n"
+    assert stderr == "warning: something\n"
+    assert exit_code == 0
+
+
+def test_new_session_id_format():
+    sid = HarnessClient.new_session_id()
+    assert len(sid) == 36
+    assert sid == sid.upper()
diff --git a/agents/tests/test_parsing.py b/agents/tests/test_parsing.py
new file mode 100644
index 000000000..835bcab05
--- /dev/null
+++ b/agents/tests/test_parsing.py
@@ -0,0 +1,81 @@
+import pytest
+
+from core.parsing import Finding, ReviewResult, parse_reviewer_output
+
+
+def test_parse_json_in_markdown_fences():
+    raw = '''Here is my review:
+
+```json
+{
+  "approved": false,
+  "findings": [
+    {
+      "severity": "high",
+      "file": "src/cli/commands/deploy/index.ts",
+      "line": 42,
+      "description": "Missing null check",
+      "suggestion": "Add null check before accessing property"
+    }
+  ]
+}
+```
+
+That's my review.'''
+    result = parse_reviewer_output(raw)
+    assert result is not None
+    assert result.approved is False
+    assert len(result.findings) == 1
+    assert result.findings[0].severity == "high"
+    assert result.findings[0].file == "src/cli/commands/deploy/index.ts"
+
+
+def test_parse_bare_json():
+    raw = '{"approved": true, "findings": []}'
+    result = parse_reviewer_output(raw)
+    assert result is not None
+    assert result.approved is True
+    assert result.findings == []
+
+
+def test_parse_returns_none_for_garbage():
+    result = parse_reviewer_output("This is just text with no JSON at all.")
+    assert result is None
+
+
+def test_parse_returns_none_for_invalid_schema():
+    raw = '{"approved": "yes", "findings": "none"}'
+    result = parse_reviewer_output(raw)
+    assert result is None
+
+
+def test_parse_json_with_nested_braces():
+    raw = '''```json
+{
+  "approved": false,
+  "findings": [
+    {
+      "severity": "medium",
+      "file": "src/schema/types.ts",
+      "line": 10,
+      "description": "Type should use Record<string, unknown> instead of object",
+      "suggestion": "Replace object with Record<string, unknown>"
+    }
+  ]
+}
+```'''
+    result = parse_reviewer_output(raw)
+    assert result is not None
+    assert len(result.findings) == 1
+    assert result.findings[0].severity == "medium"
+
+
+def test_finding_model_validates_severity():
+    with pytest.raises(Exception):
+        Finding(
+            severity="urgent",
+            file="test.ts",
+            line=1,
+            description="bad",
+            suggestion="fix",
+        )
diff --git a/agents/tests/test_partitioning.py b/agents/tests/test_partitioning.py
new file mode 100644
index 000000000..b834a0126
--- /dev/null
+++ b/agents/tests/test_partitioning.py
@@ -0,0 +1,74 @@
+import pytest
+
+from orchestrations.fix_and_review.partitioning import (
+    DiffStats,
+    ReviewerAssignment,
+    calculate_reviewer_count,
+    partition_round1_by_directory,
+    partition_round2_focus_prompts,
+    partition_round3_risk_areas,
+)
+
+
+def test_reviewer_count_small_diff():
+    stats = DiffStats(changed_files=["src/cli/commands/deploy/index.ts"], total_lines=30, cross_repo=False)
+    assert calculate_reviewer_count(stats, min_r=3, max_r=5) == 3
+
+
+def test_reviewer_count_medium_diff():
+    files = [f"src/file{i}.ts" for i in range(5)]
+    stats = DiffStats(changed_files=files, total_lines=300, cross_repo=False)
+    count = calculate_reviewer_count(stats, min_r=3, max_r=5)
+    assert 3 <= count <= 4
+
+
+def test_reviewer_count_large_diff():
+    files = [f"src/file{i}.ts" for i in range(10)]
+    stats = DiffStats(changed_files=files, total_lines=600, cross_repo=False)
+    assert calculate_reviewer_count(stats, min_r=3, max_r=5) == 5
+
+
+def test_reviewer_count_cross_repo_adds_one():
+    stats = DiffStats(changed_files=["src/a.ts", "src/b.ts"], total_lines=50, cross_repo=True)
+    count = calculate_reviewer_count(stats, min_r=3, max_r=5)
+    assert count == 4
+
+
+def test_round1_groups_by_directory():
+    files = [
+        "src/cli/commands/deploy/index.ts",
+        "src/cli/commands/deploy/utils.ts",
+        "src/cdk/constructs/l3/agent.ts",
+        "test/deploy.test.ts",
+    ]
+    assignments = partition_round1_by_directory(files, num_reviewers=3)
+    assert len(assignments) == 3
+    all_files = []
+    for a in assignments:
+        all_files.extend(a.files)
+    assert set(all_files) == set(files)
+
+
+def test_round1_fewer_groups_than_reviewers():
+    files = ["src/cli/commands/deploy/index.ts", "src/cli/commands/deploy/utils.ts"]
+    assignments = partition_round1_by_directory(files, num_reviewers=3)
+    assert len(assignments) == 3
+    assert all(len(a.files) > 0 for a in assignments)
+
+
+def test_round2_returns_focus_prompts():
+    prompts = partition_round2_focus_prompts(num_reviewers=4)
+    assert len(prompts) == 4
+    assert all(isinstance(p, str) for p in prompts)
+    assert len(set(prompts)) == 4
+
+
+def test_round3_focuses_on_previous_findings():
+    previous_findings_files = ["src/cli/commands/deploy/index.ts", "src/cdk/constructs/l3/agent.ts"]
+    all_changed_files = previous_findings_files + ["test/deploy.test.ts", "src/schema/types.ts"]
+    assignments = partition_round3_risk_areas(previous_findings_files, all_changed_files, num_reviewers=3)
+    assert len(assignments) == 3
+    risk_files_covered = set()
+    for a in assignments:
+        risk_files_covered.update(a.files)
+    assert set(previous_findings_files).issubset(risk_files_covered)

From 0b9208f869a122081da16611b07b28ceae6539a8 Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Tue, 5 May 2026 15:52:23 -0400
Subject: [PATCH 02/12] =?UTF-8?q?chore:=20remove=20pr=5Freviewer=20from=20?=
 =?UTF-8?q?this=20PR=20=E2=80=94=20migration=20is=20a=20separate=20task?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 agents/pr_reviewer/main.py           | 200 ---------------------------
 agents/pr_reviewer/prompts/review.md |  18 ---
 agents/pr_reviewer/prompts/system.md |  25 ----
 3 files changed, 243 deletions(-)
 delete mode 100644 agents/pr_reviewer/main.py
 delete mode 100644 agents/pr_reviewer/prompts/review.md
 delete mode 100644 agents/pr_reviewer/prompts/system.md

diff --git a/agents/pr_reviewer/main.py b/agents/pr_reviewer/main.py
deleted file mode 100644
index 2ee174266..000000000
--- a/agents/pr_reviewer/main.py
+++ /dev/null
@@ -1,200 +0,0 @@
-"""Invoke Bedrock AgentCore Harness to review a GitHub PR.
-
-Reads PR_URL from the environment. Streams harness output to stdout.
-Uses the boto3 bedrock-agentcore client's invoke_harness API.
-"""
-
-import json
-import os
-import sys
-import time
-import uuid
-
-import boto3
-
-# ANSI color codes
-CYAN = "\033[36m"
-YELLOW = "\033[33m"
-GREEN = "\033[32m"
-RED = "\033[31m"
-DIM = "\033[2m"
-RESET = "\033[0m"
-
-SCRIPTS_DIR = os.path.dirname(__file__)
-
-
-def read_prompt(filename):
-    """Read a prompt template from the prompts directory."""
-    path = os.path.join(SCRIPTS_DIR, "prompts", filename)
-    with open(path) as f:
-        return f.read()
-
-
-def invoke_harness_streaming(harness_arn, session_id, system_prompt, messages, model_id, region):
-    """Call invoke_harness via boto3 and return the event stream."""
-    client = boto3.client("bedrock-agentcore", region_name=region)
-    response = client.invoke_harness(
-        harnessArn=harness_arn,
-        runtimeSessionId=session_id,
-        systemPrompt=[{"text": system_prompt}],
-        messages=messages,
-        model={"bedrockModelConfig": {"modelId": model_id}},
-    )
-    return response["stream"]
-
-
-def parse_events(event_stream):
-    """Yield (event_type, payload) tuples from the boto3 event stream."""
-    for event in event_stream:
-        if "contentBlockStart" in event:
-            yield "contentBlockStart", event["contentBlockStart"]
-        elif "contentBlockDelta" in event:
-            yield "contentBlockDelta", event["contentBlockDelta"]
-        elif "contentBlockStop" in event:
-            yield "contentBlockStop", event["contentBlockStop"]
-        elif "messageStop" in event:
-            yield "messageStop", event["messageStop"]
-        elif "internalServerException" in event:
-            yield "internalServerException", event["internalServerException"]
-        elif "runtimeClientError" in event:
-            yield "runtimeClientError", event["runtimeClientError"]
-
-
-def print_stream(event_stream):
-    """Display harness events with GitHub Actions log groups.
-
-    The harness streams events as the agent works:
-      contentBlockStart  — a new block begins (text or tool call)
-      contentBlockDelta  — incremental chunks of text or tool input JSON
-      contentBlockStop   — block complete, we now have full tool input to display
-      messageStop        — agent finished
-      internalServerException — server error
-
-    Tool calls are wrapped in ::group::/::endgroup:: for collapsible sections
-    in the GitHub Actions log UI. Agent reasoning text is printed inline in dim.
-    """
-    start_time = time.time()
-    iteration = 0
-    tool_name = None
-    tool_input = ""
-    tool_start = 0.0
-    in_group = False
-    text_buffer = ""
-
-    def close_group():
-        nonlocal in_group
-        if in_group:
-            print("::endgroup::", flush=True)
-            in_group = False
-
-    def flush_text():
-        nonlocal text_buffer
-        if text_buffer:
-            for line in text_buffer.splitlines():
-                print(f"{DIM}{line}{RESET}", flush=True)
-            text_buffer = ""
-
-    for event_type, payload in parse_events(event_stream):
-
-        if event_type == "contentBlockStart":
-            start = payload.get("start", {})
-            if "toolUse" in start:
-                tool_name = start["toolUse"].get("name", "unknown")
-                tool_input = ""
-                tool_start = time.time()
-                iteration += 1
-
-        elif event_type == "contentBlockDelta":
-            delta = payload.get("delta", {})
-            if "text" in delta:
-                close_group()
-                text_buffer += delta["text"]
-            if "toolUse" in delta:
-                tool_input += delta["toolUse"].get("input", "")
-
-        elif event_type == "contentBlockStop":
-            flush_text()
-            if tool_name:
-                elapsed = time.time() - tool_start
-                try:
-                    parsed = json.loads(tool_input)
-                except (json.JSONDecodeError, TypeError):
-                    parsed = tool_input
-
-                close_group()
-
-                cmd = parsed.get("command") if isinstance(parsed, dict) else None
-                header = f"{CYAN}[{iteration}]{RESET} {YELLOW}{tool_name}{RESET} {DIM}({elapsed:.1f}s){RESET}"
-                if cmd:
-                    header += f": $ {cmd}"
-
-                print(f"::group::{header}", flush=True)
-                in_group = True
-
-                if isinstance(parsed, dict):
-                    for k, v in parsed.items():
-                        if k != "command":
-                            print(f"  {DIM}{k}:{RESET} {str(v)[:300]}", flush=True)
-
-            tool_name = None
-            tool_input = ""
-
-        elif event_type == "messageStop":
-            flush_text()
-            close_group()
-            if payload.get("stopReason") == "end_turn":
-                total = time.time() - start_time
-                print(f"\n\n{GREEN}{'=' * 50}", flush=True)
-                print(f"  Done ({int(total // 60)}m {int(total % 60)}s)", flush=True)
-                print(f"{'=' * 50}{RESET}", flush=True)
-
-        elif event_type == "internalServerException":
-            close_group()
-            print(f"\n{RED}ERROR: {payload}{RESET}", file=sys.stderr)
-            sys.exit(1)
-
-        elif event_type == "runtimeClientError":
-            close_group()
-            print(f"\n{RED}ERROR: {payload.get('message', payload)}{RESET}", file=sys.stderr)
-            sys.exit(1)
-
-    close_group()
-    total = time.time() - start_time
-    print(f"\n{GREEN}Review complete.{RESET} {DIM}({iteration} tool calls, {int(total)}s total){RESET}")
-
-
-# --- Main ---
-
-# All config comes from environment variables (set via GitHub secrets/workflow)
-MODEL_ID = os.environ.get("HARNESS_MODEL_ID", "us.anthropic.claude-opus-4-7")
-HARNESS_ARN = os.environ.get("HARNESS_ARN", "")
-PR_URL = os.environ.get("PR_URL", "")
-
-for name, val in [("HARNESS_ARN", HARNESS_ARN), ("PR_URL", PR_URL)]:
-    if not val:
-        print(f"{RED}ERROR: {name} environment variable is required{RESET}", file=sys.stderr)
-        sys.exit(1)
-
-# Extract region from the ARN (arn:aws:bedrock-agentcore:{region}:{account}:harness/{id})
-REGION = HARNESS_ARN.split(":")[3]
-SESSION_ID = str(uuid.uuid4()).upper()
-
-print(f"{CYAN}Session:{RESET} {SESSION_ID}")
-print(f"{CYAN}PR:{RESET}      {PR_URL}")
-print(f"{CYAN}Harness:{RESET} {HARNESS_ARN}")
-print()
-
-SYSTEM_PROMPT = read_prompt("system.md")
-REVIEW_PROMPT = read_prompt("review.md").format(pr_url=PR_URL)
-
-messages = [{"role": "user", "content": [{"text": REVIEW_PROMPT}]}]
-
-try:
-    event_stream = invoke_harness_streaming(
-        HARNESS_ARN, SESSION_ID, SYSTEM_PROMPT, messages, MODEL_ID, REGION
-    )
-except Exception as e:
-    print(f"{RED}ERROR: Failed to invoke harness: {e}{RESET}", file=sys.stderr)
-    sys.exit(1)
-
-print_stream(event_stream)
diff --git a/agents/pr_reviewer/prompts/review.md b/agents/pr_reviewer/prompts/review.md
deleted file mode 100644
index d34c67b95..000000000
--- a/agents/pr_reviewer/prompts/review.md
+++ /dev/null
@@ -1,18 +0,0 @@
-Review this GitHub PR: {pr_url}
-
-You have tools to fetch the PR diff, read files, search the web, and post comments on the PR.
-
-You have these repos cloned locally for context:
-
-- /opt/workspace/agentcore-cli — aws/agentcore-cli
-- /opt/workspace/agentcore-l3-cdk-constructs — aws/agentcore-l3-cdk-constructs
-
-Before reviewing, read all existing comments on the PR to understand what has already been discussed. Do not repeat or
-re-post issues that have already been raised in existing comments.
-
-Review the PR. If there are any serious issues that require code changes before merging, post a comment on the PR for
-each issue explaining the problem. If there are multiple ways to fix an issue, list the options so the author can
-choose. Skip style nits and minor suggestions — only flag things that actually need to change.
-
-If all serious issues have already been raised in existing comments, or if you found no new issues, post a single
-comment on the PR saying it looks good to merge (or that all issues have already been flagged).
diff --git a/agents/pr_reviewer/prompts/system.md b/agents/pr_reviewer/prompts/system.md
deleted file mode 100644
index 52a3d2260..000000000
--- a/agents/pr_reviewer/prompts/system.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# AgentCore CLI Development Workspace
-
-This workspace contains two repos for developing and testing the AgentCore CLI.
-
-## Repositories
-
-### agentcore-cli/ (`aws/agentcore-cli`)
-
-The terminal experience for creating, developing, and deploying AI agents to AgentCore. Node.js/TypeScript CLI built
-with Ink (React-based TUI).
-
-### agentcore-l3-cdk-constructs/ (`aws/agentcore-l3-cdk-constructs`)
-
-AWS CDK L3 constructs for declaring and deploying AgentCore infrastructure. Used by agentcore-cli to vend CDK projects
-when users run `agentcore create`.
-
-## How they relate
-
-`agentcore-cli` is the main product. It vends CDK projects using constructs from `agentcore-l3-cdk-constructs`.
-
-## Testing with a bundled distribution
-
-Run `npm run bundle` in `agentcore-cli/` to create a tar distribution that includes the packaged
-`agentcore-l3-cdk-constructs`. You can then install it globally with `npm install -g <path-to-tar>` to test the CLI
-end-to-end.

From 13964f991e87779a332575a2a334c5338dcbf850 Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Tue, 5 May 2026 16:57:22 -0400
Subject: [PATCH 03/12] fix: address 14 review findings from multi-agent code
 review

Critical:
- Remove stale variables in complete.py causing duplicate PR URLs

High:
- Add input validation in feature-builder.yml (path traversal, command injection)
- Resolve AWS credentials per-request instead of freezing at construction
- Use format_map with defaults to prevent KeyError on missing template vars
- Capture test exit code separately from grep display in verify.py
- Make JSON brace-depth counter string-aware in parsing.py
- Gitignore config.yaml (contains account-specific ARN), add config.yaml.example
- Guard against empty changed_files in partition_round1_by_directory

Medium:
- Add type coercion for numeric overrides in orchestrator
- Only push after all local checks pass in verify.py
- Skip push when rebase fails in complete.py
- Lower Python requirement to >=3.12
- Widen boto3/botocore version constraints
---
 .github/workflows/feature-builder.yml         | 23 ++++++++++
 agents/.gitignore                             |  3 ++
 agents/config.yaml.example                    | 10 ++++
 agents/core/harness_client.py                 |  4 +-
 agents/core/parsing.py                        | 38 ++++++++++-----
 .../fix_and_review/orchestrator.py            |  3 ++
 .../fix_and_review/partitioning.py            |  3 ++
 .../fix_and_review/phases/complete.py         |  5 +-
 .../fix_and_review/phases/setup.py            |  3 +-
 .../fix_and_review/phases/verify.py           | 46 ++++++++++++-------
 agents/pyproject.toml                         | 16 +++----
 11 files changed, 111 insertions(+), 43 deletions(-)
 create mode 100644 agents/.gitignore
 create mode 100644 agents/config.yaml.example

diff --git a/.github/workflows/feature-builder.yml b/.github/workflows/feature-builder.yml
index 13f8f7107..8cd67b04e 100644
--- a/.github/workflows/feature-builder.yml
+++ b/.github/workflows/feature-builder.yml
@@ -25,6 +25,29 @@ jobs:
   build-feature:
     runs-on: ubuntu-latest
     steps:
+      - name: Validate inputs
+        env:
+          FEATURE_NAME: ${{ inputs.feature_name }}
+          DEVEX_DOC: ${{ inputs.devex_doc }}
+          IMPL_DOC: ${{ inputs.impl_doc }}
+        run: |
+          if [[ ! "$FEATURE_NAME" =~ ^[a-zA-Z0-9_-]+$ ]]; then
+            echo "Error: feature_name must be alphanumeric with hyphens/underscores only"
+            exit 1
+          fi
+          if [[ "$DEVEX_DOC" == *".."* ]] || [[ "$IMPL_DOC" == *".."* ]]; then
+            echo "Error: doc paths must not contain '..'"
+            exit 1
+          fi
+          if [[ ! "$DEVEX_DOC" == *.md ]]; then
+            echo "Error: devex_doc must be a .md file"
+            exit 1
+          fi
+          if [[ ! "$IMPL_DOC" == *.md ]]; then
+            echo "Error: impl_doc must be a .md file"
+            exit 1
+          fi
+
       - name: Checkout
         uses: actions/checkout@v6
 
diff --git a/agents/.gitignore b/agents/.gitignore
new file mode 100644
index 000000000..77a810105
--- /dev/null
+++ b/agents/.gitignore
@@ -0,0 +1,3 @@
+config.yaml
+__pycache__/
+.venv/
diff --git a/agents/config.yaml.example b/agents/config.yaml.example
new file mode 100644
index 000000000..0f233d50e
--- /dev/null
+++ b/agents/config.yaml.example
@@ -0,0 +1,10 @@
+harness_arn: "arn:aws:bedrock-agentcore:us-west-2:YOUR_ACCOUNT_ID:harness/YOUR_HARNESS_ID"
+region: "us-west-2"
+aws_profile: "deploy"
+model_id: "global.anthropic.claude-opus-4-7"
+min_reviewers: 3
+max_reviewers: 5
+max_review_rounds: 5
+repos:
+  cli: "aws/agentcore-cli"
+  cdk: "aws/agentcore-l3-cdk-constructs"
diff --git a/agents/core/harness_client.py b/agents/core/harness_client.py
index 33d296f32..0a811a31a 100644
--- a/agents/core/harness_client.py
+++ b/agents/core/harness_client.py
@@ -20,7 +20,6 @@ def __init__(self, config: PipelineConfig):
             region_name=config.region,
             profile_name=config.aws_profile,
         )
-        self.credentials = self.session.get_credentials().get_frozen_credentials()
         self.http = urllib3.PoolManager()
         self.client = self.session.client(
             "bedrock-agentcore",
@@ -50,7 +49,8 @@ def invoke(
             "Content-Type": "application/json",
             "Accept": "application/vnd.amazon.eventstream",
         })
-        SigV4Auth(self.credentials, "bedrock-agentcore", region).add_auth(request)
+        credentials = self.session.get_credentials().get_frozen_credentials()
+        SigV4Auth(credentials, "bedrock-agentcore", region).add_auth(request)
 
         response = self.http.urlopen(
             "POST", url, body=json.dumps(body).encode(),
diff --git a/agents/core/parsing.py b/agents/core/parsing.py
index 1f4aab67d..228bc23f3 100644
--- a/agents/core/parsing.py
+++ b/agents/core/parsing.py
@@ -34,17 +34,33 @@ def _extract_json(raw_text: str) -> str | None:
     if start == -1:
         return None
 
-    depth = 0
-    for i in range(start, len(raw_text)):
-        if raw_text[i] == "{":
-            depth += 1
-        elif raw_text[i] == "}":
-            depth -= 1
-            if depth == 0:
-                candidate = raw_text[start : i + 1]
-                if "approved" in candidate:
-                    return candidate
-                return None
+    while start != -1:
+        depth = 0
+        in_string = False
+        escape_next = False
+        for i in range(start, len(raw_text)):
+            c = raw_text[i]
+            if escape_next:
+                escape_next = False
+                continue
+            if c == "\\":
+                escape_next = True
+                continue
+            if c == '"':
+                in_string = not in_string
+                continue
+            if in_string:
+                continue
+            if c == "{":
+                depth += 1
+            elif c == "}":
+                depth -= 1
+                if depth == 0:
+                    candidate = raw_text[start : i + 1]
+                    if "approved" in candidate:
+                        return candidate
+                    break
+        start = raw_text.find("{", start + 1)
     return None
 
 
diff --git a/agents/orchestrations/fix_and_review/orchestrator.py b/agents/orchestrations/fix_and_review/orchestrator.py
index eee22af25..d11324703 100644
--- a/agents/orchestrations/fix_and_review/orchestrator.py
+++ b/agents/orchestrations/fix_and_review/orchestrator.py
@@ -35,6 +35,9 @@ def run_pipeline(
     config = PipelineConfig.from_yaml(config_path)
     for key, value in overrides.items():
         if hasattr(config, key):
+            field_type = type(getattr(config, key))
+            if field_type == int:
+                value = int(value)
             setattr(config, key, value)
 
     if prompts_dir:
diff --git a/agents/orchestrations/fix_and_review/partitioning.py b/agents/orchestrations/fix_and_review/partitioning.py
index 5f0a42866..bbfed1993 100644
--- a/agents/orchestrations/fix_and_review/partitioning.py
+++ b/agents/orchestrations/fix_and_review/partitioning.py
@@ -36,6 +36,9 @@ def calculate_reviewer_count(stats: DiffStats, min_r: int = 3, max_r: int = 5) -
 def partition_round1_by_directory(
     changed_files: list[str], num_reviewers: int
 ) -> list[ReviewerAssignment]:
+    if not changed_files:
+        return [ReviewerAssignment(files=[], focus="No files to review")]
+
     groups: dict[str, list[str]] = defaultdict(list)
     for f in changed_files:
         parts = PurePosixPath(f).parts
diff --git a/agents/orchestrations/fix_and_review/phases/complete.py b/agents/orchestrations/fix_and_review/phases/complete.py
index f34742ed1..b5225af27 100644
--- a/agents/orchestrations/fix_and_review/phases/complete.py
+++ b/agents/orchestrations/fix_and_review/phases/complete.py
@@ -60,6 +60,7 @@ def run_complete(
             rebase_succeeded = False
             client.run_command(session_id, f"cd {repo_name} && git rebase --abort")
             errors.append(f"Rebase failed in {repo_name}: {stderr[:500]}")
+            continue
 
         _, stderr, exit_code = client.run_command(
             session_id, f"cd {repo_name} && git push origin {branch_name} --force-with-lease"
@@ -103,9 +104,5 @@ def run_complete(
                 pr_urls.append(stdout.strip())
             else:
                 errors.append(f"PR may have been created in {repo} but could not extract URL")
-        if exit_code == 0 and stdout.strip():
-            pr_urls.append(stdout.strip())
-        else:
-            errors.append(f"Failed to create PR in {repo}: {stderr[:500]}")
 
     return CompleteResult(pr_urls=pr_urls, rebase_succeeded=rebase_succeeded, errors=errors)
diff --git a/agents/orchestrations/fix_and_review/phases/setup.py b/agents/orchestrations/fix_and_review/phases/setup.py
index 32608a182..6b8fe3592 100644
--- a/agents/orchestrations/fix_and_review/phases/setup.py
+++ b/agents/orchestrations/fix_and_review/phases/setup.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 from pathlib import Path
 
 from core.config import PipelineConfig
@@ -16,7 +17,7 @@ def load_prompt(name: str, **kwargs: str) -> str:
     if _prompts_dir is None:
         raise RuntimeError("Prompts directory not set. Call set_prompts_dir() before running phases.")
     template = (_prompts_dir / name).read_text()
-    return template.format(**kwargs)
+    return template.format_map(defaultdict(str, **kwargs))
 
 
 def run_setup(
diff --git a/agents/orchestrations/fix_and_review/phases/verify.py b/agents/orchestrations/fix_and_review/phases/verify.py
index 72064b3dd..c8070092f 100644
--- a/agents/orchestrations/fix_and_review/phases/verify.py
+++ b/agents/orchestrations/fix_and_review/phases/verify.py
@@ -61,26 +61,38 @@ def run_verify(
             continue
         test_cmd = TEST_COMMANDS.get(repo, "npm test")
         print(f"  Running tests in {repo} (may take a few minutes)...", flush=True)
-        _, stderr, exit_code = client.run_command(session_id, f'cd {repo} && {test_cmd} 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20')
-        if exit_code != 0:
+        stdout, stderr, exit_code = client.run_command(
+            session_id, f'cd {repo} && {test_cmd} > /tmp/test_output.txt 2>&1; echo "EXIT:$?"'
+        )
+        test_exit = 1
+        for line in stdout.strip().split("\n"):
+            if line.startswith("EXIT:"):
+                test_exit = int(line.split(":")[1])
+        if test_exit != 0:
             tests_pass = False
-            errors.append(f"Tests failed in {repo}: {stderr[:500]}")
+            summary, _, _ = client.run_command(
+                session_id, 'grep -E "(FAIL|PASS|Tests:|Test Suites:)" /tmp/test_output.txt | tail -20'
+            )
+            errors.append(f"Tests failed in {repo}: {summary[:500]}")
 
-    # Push from each repo that has changes
+    # Only push if all local checks passed
     branch_pushed = True
-    for repo in affected_repos:
-        stdout, _, _ = client.run_command(
-            session_id, f"cd {repo} && git diff main --stat 2>/dev/null"
-        )
-        if not stdout.strip():
-            continue
-        print(f"  Pushing {branch_name} in {repo}...", flush=True)
-        _, stderr, exit_code = client.run_command(
-            session_id, f"cd {repo} && git push origin {branch_name}"
-        )
-        if exit_code != 0:
-            branch_pushed = False
-            errors.append(f"Push failed in {repo}: {stderr[:500]}")
+    if not (typecheck_passes and tests_pass):
+        branch_pushed = False
+    else:
+        for repo in affected_repos:
+            stdout, _, _ = client.run_command(
+                session_id, f"cd {repo} && git diff main --stat 2>/dev/null"
+            )
+            if not stdout.strip():
+                continue
+            print(f"  Pushing {branch_name} in {repo}...", flush=True)
+            _, stderr, exit_code = client.run_command(
+                session_id, f"cd {repo} && git push origin {branch_name}"
+            )
+            if exit_code != 0:
+                branch_pushed = False
+                errors.append(f"Push failed in {repo}: {stderr[:500]}")
 
     return VerificationResult(
         commits_exist=commits_exist,
diff --git a/agents/pyproject.toml b/agents/pyproject.toml
index c1ea76be7..9f8611d66 100644
--- a/agents/pyproject.toml
+++ b/agents/pyproject.toml
@@ -1,15 +1,15 @@
 [project]
-name = "harness-invoke"
+name = "agentcore-agents"
 version = "0.1.0"
-description = "Add your description here"
+description = "Multi-agent orchestration for automated issue resolution and feature building"
 readme = "README.md"
-requires-python = ">=3.14"
+requires-python = ">=3.12"
 dependencies = [
-    "boto3>=1.42.94,<1.43.2",
-    "botocore>=1.42.94,<1.43.2",
-    "pydantic>=2.13.1",
-    "pyyaml>=6.0.3",
-    "urllib3>=2.6.3",
+    "boto3>=1.42.94",
+    "botocore>=1.42.94",
+    "pydantic>=2.10.0",
+    "pyyaml>=6.0",
+    "urllib3>=2.0",
 ]
 
 [dependency-groups]

From ff86d2d0b9c70817ba7eed29b478461f48a91e31 Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Tue, 5 May 2026 23:14:27 -0400
Subject: [PATCH 04/12] =?UTF-8?q?perf:=20switch=20to=20targeted=20testing?=
 =?UTF-8?q?=20=E2=80=94=20only=20run=20tests=20for=20changed=20files,=20le?=
 =?UTF-8?q?t=20CI=20handle=20full=20suite?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 agents/bug_fixer/prompts/executor.md          | 16 +++++++---
 agents/bug_fixer/prompts/fixer.md             | 11 +++++--
 agents/feature_builder/prompts/executor.md    | 16 +++++++---
 agents/feature_builder/prompts/fixer.md       | 13 +++++---
 .../fix_and_review/phases/verify.py           | 31 ++++++++++++++++---
 5 files changed, 66 insertions(+), 21 deletions(-)

diff --git a/agents/bug_fixer/prompts/executor.md b/agents/bug_fixer/prompts/executor.md
index 8f1f89273..19ab91026 100644
--- a/agents/bug_fixer/prompts/executor.md
+++ b/agents/bug_fixer/prompts/executor.md
@@ -6,12 +6,18 @@ The plan:
 Instructions:
 1. Follow the plan exactly. Make the code changes described.
 2. Run `npm run typecheck 2>&1 | tail -20` in each affected repo. Fix any type errors.
-3. Run tests with summary output only: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20`
-4. If tests fail, debug the specific failing file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50`
+3. Run ONLY the tests related to the files you changed. Use targeted test commands:
+   - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30`
+   - If you changed `src/cli/aws/account.ts`, run `npx vitest run --project unit src/cli/aws/__tests__/account.test.ts`
+   - Run 1-5 targeted test files, NOT the full suite.
+4. If targeted tests fail, fix the code and re-run only those tests.
 5. Commit your changes: `git add -A && git commit -m "feat: {commit_message}"`
-6. Push to fork remote: `git push origin {branch_name}`
+6. Push to remote: `git push origin {branch_name}`
 7. If you need to deviate from the plan, document why in your commit message.
 
-IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context. Use the grep pattern in step 3 for the summary, then target specific files in step 4 if something fails.
+IMPORTANT:
+- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests for files you changed.
+- CI will run the full test suite after the PR is created.
+- Always pipe test output through `| tail -30` to avoid context overflow.
 
-Do not stop until typecheck and tests pass. If tests fail, analyze the failure, fix the code, and try again.
+Do not stop until typecheck and targeted tests pass. If tests fail, analyze the failure, fix the code, and try again.
diff --git a/agents/bug_fixer/prompts/fixer.md b/agents/bug_fixer/prompts/fixer.md
index a99a6f0d1..b108e594e 100644
--- a/agents/bug_fixer/prompts/fixer.md
+++ b/agents/bug_fixer/prompts/fixer.md
@@ -8,9 +8,14 @@ Instructions:
 1. Fix each finding, starting with Critical severity first, then High, Medium, Low.
 2. If a finding is not applicable or is a false positive, explain why in a commit message.
 3. Run `npm run typecheck 2>&1 | tail -20` in each affected repo after fixes.
-4. Run tests with summary: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20`
-5. If tests fail, debug the specific file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50`
+4. Run ONLY targeted tests for files you changed:
+   - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30`
+   - Run 1-5 targeted test files, NOT the full suite.
+5. If targeted tests fail, fix and re-run only those tests.
 6. Commit: `git add -A && git commit -m "fix: address review findings round {round_number}"`
 7. Push: `git push origin {branch_name}`
 
-IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context.
+IMPORTANT:
+- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests.
+- CI will validate the full suite after PR creation.
+- Always pipe test output through `| tail -30`.
diff --git a/agents/feature_builder/prompts/executor.md b/agents/feature_builder/prompts/executor.md
index 40601221e..57b641f7c 100644
--- a/agents/feature_builder/prompts/executor.md
+++ b/agents/feature_builder/prompts/executor.md
@@ -6,12 +6,18 @@ The plan:
 Instructions:
 1. Follow the plan exactly. Make the code changes described.
 2. Run `npm run typecheck 2>&1 | tail -20` in each affected repo. Fix any type errors.
-3. Run tests with summary: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20`
-4. If tests fail, debug the specific file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50`
+3. Run ONLY the tests related to the files you changed. Use targeted test commands:
+   - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30`
+   - If you changed `src/cli/commands/deploy/actions.ts`, run its test: `npx vitest run --project unit src/cli/commands/deploy/__tests__/actions.test.ts`
+   - Run 1-5 targeted test files, NOT the full suite.
+4. If targeted tests fail, fix the code and re-run only those tests.
 5. Commit your changes: `git add -A && git commit -m "feat: {commit_message}"`
-6. Push to remote: `git push origin feature/{feature_name}`
+6. Push to remote: `git push origin {branch_name}`
 7. If you need to deviate from the plan, document why in your commit message.
 
-IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context. Use the grep pattern in step 3 for the summary, then target specific files in step 4 if something fails.
+IMPORTANT:
+- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests for files you changed.
+- CI will run the full test suite after the PR is created.
+- Always pipe test output through `| tail -30` to avoid context overflow.
 
-Do not stop until typecheck and tests pass. If tests fail, analyze the failure, fix the code, and try again.
+Do not stop until typecheck and targeted tests pass. If tests fail, analyze the failure, fix the code, and try again.
diff --git a/agents/feature_builder/prompts/fixer.md b/agents/feature_builder/prompts/fixer.md
index 67e04ebfb..b108e594e 100644
--- a/agents/feature_builder/prompts/fixer.md
+++ b/agents/feature_builder/prompts/fixer.md
@@ -8,9 +8,14 @@ Instructions:
 1. Fix each finding, starting with Critical severity first, then High, Medium, Low.
 2. If a finding is not applicable or is a false positive, explain why in a commit message.
 3. Run `npm run typecheck 2>&1 | tail -20` in each affected repo after fixes.
-4. Run tests with summary: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20`
-5. If tests fail, debug the specific file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50`
+4. Run ONLY targeted tests for files you changed:
+   - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30`
+   - Run 1-5 targeted test files, NOT the full suite.
+5. If targeted tests fail, fix and re-run only those tests.
 6. Commit: `git add -A && git commit -m "fix: address review findings round {round_number}"`
-7. Push: `git push origin feature/{feature_name}`
+7. Push: `git push origin {branch_name}`
 
-IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context.
+IMPORTANT:
+- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests.
+- CI will validate the full suite after PR creation.
+- Always pipe test output through `| tail -30`.
diff --git a/agents/orchestrations/fix_and_review/phases/verify.py b/agents/orchestrations/fix_and_review/phases/verify.py
index c8070092f..b72158235 100644
--- a/agents/orchestrations/fix_and_review/phases/verify.py
+++ b/agents/orchestrations/fix_and_review/phases/verify.py
@@ -59,10 +59,33 @@ def run_verify(
         )
         if not stdout.strip():
             continue
-        test_cmd = TEST_COMMANDS.get(repo, "npm test")
-        print(f"  Running tests in {repo} (may take a few minutes)...", flush=True)
+        # Find test files related to changed source files
+        print(f"  Running targeted tests in {repo}...", flush=True)
+        changed_files_out, _, _ = client.run_command(
+            session_id, f"cd {repo} && git diff main --name-only | head -20"
+        )
+        test_files: list[str] = []
+        for changed in changed_files_out.strip().split("\n"):
+            changed = changed.strip()
+            if not changed:
+                continue
+            if "__tests__" in changed or ".test." in changed:
+                test_files.append(changed)
+            else:
+                # Look for adjacent test file
+                test_candidate = changed.replace("/src/", "/src/").replace(".ts", ".test.ts")
+                dir_parts = changed.rsplit("/", 1)
+                if len(dir_parts) == 2:
+                    test_dir = f"{dir_parts[0]}/__tests__/{dir_parts[1].replace('.ts', '.test.ts')}"
+                    test_files.append(test_dir)
+
+        if not test_files:
+            continue
+
+        # Run only the targeted tests (max 5)
+        test_paths = " ".join(test_files[:5])
         stdout, stderr, exit_code = client.run_command(
-            session_id, f'cd {repo} && {test_cmd} > /tmp/test_output.txt 2>&1; echo "EXIT:$?"'
+            session_id, f'cd {repo} && npx vitest run --project unit {test_paths} > /tmp/test_output.txt 2>&1; echo "EXIT:$?"'
         )
         test_exit = 1
         for line in stdout.strip().split("\n"):
@@ -71,7 +94,7 @@ def run_verify(
         if test_exit != 0:
             tests_pass = False
             summary, _, _ = client.run_command(
-                session_id, 'grep -E "(FAIL|PASS|Tests:|Test Suites:)" /tmp/test_output.txt | tail -20'
+                session_id, 'tail -20 /tmp/test_output.txt'
             )
             errors.append(f"Tests failed in {repo}: {summary[:500]}")
 

From 6912258eaf6d6822ca0824fb024dac71a8219db1 Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Wed, 6 May 2026 00:00:51 -0400
Subject: [PATCH 05/12] =?UTF-8?q?fix:=20extract=20phase=20cds=20into=20eac?=
 =?UTF-8?q?h=20repo=20before=20git=20diff=20=E2=80=94=20fixes=200-file=20e?=
 =?UTF-8?q?xtraction=20bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../fix_and_review/phases/extract.py          | 64 +++++++++++++------
 1 file changed, 43 insertions(+), 21 deletions(-)

diff --git a/agents/orchestrations/fix_and_review/phases/extract.py b/agents/orchestrations/fix_and_review/phases/extract.py
index fbe8e713c..b9f8f7724 100644
--- a/agents/orchestrations/fix_and_review/phases/extract.py
+++ b/agents/orchestrations/fix_and_review/phases/extract.py
@@ -18,27 +18,49 @@ def run_extract(
     cli_repo: str,
     cdk_repo: str,
 ) -> ExtractResult:
-    diff_stat_stdout, _, _ = client.run_command(session_id, "git diff main --stat")
-    full_diff_stdout, _, _ = client.run_command(session_id, "git diff main")
-    commit_log_stdout, _, _ = client.run_command(session_id, "git log main..HEAD --oneline")
+    cli_name = cli_repo.split("/")[-1]
+    cdk_name = cdk_repo.split("/")[-1]
 
+    all_diff_stat = ""
+    all_full_diff = ""
+    all_commit_log = ""
     changed_files: list[str] = []
-    for line in diff_stat_stdout.strip().split("\n"):
-        line = line.strip()
-        if "|" in line:
-            file_path = line.split("|")[0].strip()
-            if file_path:
-                changed_files.append(file_path)
-
     total_lines = 0
-    for line in full_diff_stdout.split("\n"):
-        if line.startswith("+") and not line.startswith("+++"):
-            total_lines += 1
-        elif line.startswith("-") and not line.startswith("---"):
-            total_lines += 1
-
-    has_cli = any(f.startswith(cli_repo) or f.startswith("src/cli") for f in changed_files)
-    has_cdk = any(f.startswith(cdk_repo) or f.startswith("src/cdk") for f in changed_files)
+    has_cli = False
+    has_cdk = False
+
+    for repo_name in [cli_name, cdk_name]:
+        # Check if this repo has changes on the branch
+        commit_log, _, exit_code = client.run_command(
+            session_id, f"cd {repo_name} && git log main..HEAD --oneline 2>/dev/null"
+        )
+        if exit_code != 0 or not commit_log.strip():
+            continue
+
+        diff_stat, _, _ = client.run_command(session_id, f"cd {repo_name} && git diff main --stat")
+        full_diff, _, _ = client.run_command(session_id, f"cd {repo_name} && git diff main")
+
+        all_diff_stat += diff_stat
+        all_full_diff += full_diff
+        all_commit_log += commit_log
+
+        for line in diff_stat.strip().split("\n"):
+            line = line.strip()
+            if "|" in line:
+                file_path = line.split("|")[0].strip()
+                if file_path:
+                    changed_files.append(file_path)
+                    if repo_name == cli_name:
+                        has_cli = True
+                    else:
+                        has_cdk = True
+
+        for line in full_diff.split("\n"):
+            if line.startswith("+") and not line.startswith("+++"):
+                total_lines += 1
+            elif line.startswith("-") and not line.startswith("---"):
+                total_lines += 1
+
     cross_repo = has_cli and has_cdk
 
     stats = DiffStats(
@@ -48,8 +70,8 @@ def run_extract(
     )
 
     return ExtractResult(
-        diff_stat=diff_stat_stdout,
-        full_diff=full_diff_stdout,
-        commit_log=commit_log_stdout,
+        diff_stat=all_diff_stat,
+        full_diff=all_full_diff,
+        commit_log=all_commit_log,
         stats=stats,
     )

From 41ec7e99bce4065e4b419ccb097290a76288d9c5 Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Wed, 6 May 2026 00:20:50 -0400
Subject: [PATCH 06/12] fix: add maxIterations per phase to prevent context
 overflow, use force-with-lease for push

---
 agents/core/harness_client.py                          | 3 +++
 agents/orchestrations/fix_and_review/phases/execute.py | 2 +-
 agents/orchestrations/fix_and_review/phases/fix.py     | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/agents/core/harness_client.py b/agents/core/harness_client.py
index 0a811a31a..87b82656a 100644
--- a/agents/core/harness_client.py
+++ b/agents/core/harness_client.py
@@ -31,6 +31,7 @@ def invoke(
         session_id: str,
         message: str,
         system_prompt: str | None = None,
+        max_iterations: int | None = None,
         verbose: bool = True,
     ) -> str:
         body: dict = {
@@ -40,6 +41,8 @@ def invoke(
         }
         if system_prompt:
             body["systemPrompt"] = [{"text": system_prompt}]
+        if max_iterations:
+            body["maxIterations"] = max_iterations
 
         region = self.config.region
         arn = self.config.harness_arn
diff --git a/agents/orchestrations/fix_and_review/phases/execute.py b/agents/orchestrations/fix_and_review/phases/execute.py
index 6d3835b3f..9587484c6 100644
--- a/agents/orchestrations/fix_and_review/phases/execute.py
+++ b/agents/orchestrations/fix_and_review/phases/execute.py
@@ -17,4 +17,4 @@ def run_execute(
         commit_message=f"fix issue #{issue_number}",
         branch_name=branch_name,
     )
-    return client.invoke(session_id=session_id, message=prompt)
+    return client.invoke(session_id=session_id, message=prompt, max_iterations=40)
diff --git a/agents/orchestrations/fix_and_review/phases/fix.py b/agents/orchestrations/fix_and_review/phases/fix.py
index ce11e110c..8631d498f 100644
--- a/agents/orchestrations/fix_and_review/phases/fix.py
+++ b/agents/orchestrations/fix_and_review/phases/fix.py
@@ -27,4 +27,4 @@ def run_fix(
         round_number=str(round_number),
         branch_name=branch_name,
     )
-    return client.invoke(session_id=session_id, message=prompt)
+    return client.invoke(session_id=session_id, message=prompt, max_iterations=30)

From 362766c23a9bf1dedcc1d1ce6b2b1a4ba8c27f49 Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Wed, 6 May 2026 00:48:30 -0400
Subject: [PATCH 07/12] fix: commit-early strategy, unique branch per run,
 abort on 0-file extract

---
 agents/bug_fixer/prompts/executor.md          | 20 +++++++++----------
 agents/feature_builder/prompts/executor.md    | 20 +++++++++----------
 .../fix_and_review/orchestrator.py            |  6 +++++-
 .../fix_and_review/phases/execute.py          |  2 +-
 .../fix_and_review/phases/fix.py              |  2 +-
 5 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/agents/bug_fixer/prompts/executor.md b/agents/bug_fixer/prompts/executor.md
index 19ab91026..722ea6f2f 100644
--- a/agents/bug_fixer/prompts/executor.md
+++ b/agents/bug_fixer/prompts/executor.md
@@ -5,19 +5,17 @@ The plan:
 
 Instructions:
 1. Follow the plan exactly. Make the code changes described.
-2. Run `npm run typecheck 2>&1 | tail -20` in each affected repo. Fix any type errors.
-3. Run ONLY the tests related to the files you changed. Use targeted test commands:
+2. COMMIT IMMEDIATELY after writing your changes: `git add -A && git commit -m "feat: {commit_message}"`
+3. Run `npm run typecheck 2>&1 | tail -20`. If there are type errors, fix them and commit again.
+4. Run ONLY targeted tests for files you changed:
    - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30`
-   - If you changed `src/cli/aws/account.ts`, run `npx vitest run --project unit src/cli/aws/__tests__/account.test.ts`
    - Run 1-5 targeted test files, NOT the full suite.
-4. If targeted tests fail, fix the code and re-run only those tests.
-5. Commit your changes: `git add -A && git commit -m "feat: {commit_message}"`
+5. If targeted tests fail, fix and commit again.
 6. Push to remote: `git push origin {branch_name}`
-7. If you need to deviate from the plan, document why in your commit message.
 
-IMPORTANT:
-- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests for files you changed.
+CRITICAL RULES:
+- COMMIT EARLY AND OFTEN. Your first commit should happen BEFORE running typecheck. Commit after every fix. A commit with typecheck errors is better than no commit at all.
+- If typecheck has more than 5 errors, fix the most critical ones, commit what you have, and move on. Do NOT spend more than 3 attempts fixing typecheck.
+- Do NOT run `npm run test:unit` (full suite). Only run targeted tests.
 - CI will run the full test suite after the PR is created.
-- Always pipe test output through `| tail -30` to avoid context overflow.
-
-Do not stop until typecheck and targeted tests pass. If tests fail, analyze the failure, fix the code, and try again.
+- Always pipe output through `| tail -30`.
diff --git a/agents/feature_builder/prompts/executor.md b/agents/feature_builder/prompts/executor.md
index 57b641f7c..77d974e28 100644
--- a/agents/feature_builder/prompts/executor.md
+++ b/agents/feature_builder/prompts/executor.md
@@ -5,19 +5,17 @@ The plan:
 
 Instructions:
 1. Follow the plan exactly. Make the code changes described.
-2. Run `npm run typecheck 2>&1 | tail -20` in each affected repo. Fix any type errors.
-3. Run ONLY the tests related to the files you changed. Use targeted test commands:
+2. COMMIT IMMEDIATELY after writing your changes: `git add -A && git commit -m "feat: {commit_message}"`
+3. Run `npm run typecheck 2>&1 | tail -20`. If there are type errors, fix them and commit again.
+4. Run ONLY targeted tests for files you changed:
    - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30`
-   - If you changed `src/cli/commands/deploy/actions.ts`, run its test: `npx vitest run --project unit src/cli/commands/deploy/__tests__/actions.test.ts`
    - Run 1-5 targeted test files, NOT the full suite.
-4. If targeted tests fail, fix the code and re-run only those tests.
-5. Commit your changes: `git add -A && git commit -m "feat: {commit_message}"`
+5. If targeted tests fail, fix and commit again.
 6. Push to remote: `git push origin {branch_name}`
-7. If you need to deviate from the plan, document why in your commit message.
 
-IMPORTANT:
-- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests for files you changed.
+CRITICAL RULES:
+- COMMIT EARLY AND OFTEN. Your first commit should happen BEFORE running typecheck. Commit after every fix. A commit with typecheck errors is better than no commit at all.
+- If typecheck has more than 5 errors, fix the most critical ones, commit what you have, and move on. Do NOT spend more than 3 attempts fixing typecheck.
+- Do NOT run `npm run test:unit` (full suite). Only run targeted tests.
 - CI will run the full test suite after the PR is created.
-- Always pipe test output through `| tail -30` to avoid context overflow.
-
-Do not stop until typecheck and targeted tests pass. If tests fail, analyze the failure, fix the code, and try again.
+- Always pipe output through `| tail -30`.
diff --git a/agents/orchestrations/fix_and_review/orchestrator.py b/agents/orchestrations/fix_and_review/orchestrator.py
index d11324703..ba2c1089e 100644
--- a/agents/orchestrations/fix_and_review/orchestrator.py
+++ b/agents/orchestrations/fix_and_review/orchestrator.py
@@ -49,7 +49,8 @@ def run_pipeline(
         branch_name = f"feature/{feature_name or 'unnamed'}"
     else:
         issue_number = issue_url.rstrip("/").split("/")[-1]
-        branch_name = f"fix/{issue_number}"
+        short_id = HarnessClient.new_session_id()[:8].lower()
+        branch_name = f"fix/{issue_number}-{short_id}"
 
     client = HarnessClient(config)
     session_id = HarnessClient.new_session_id()
@@ -146,6 +147,9 @@ def elapsed() -> str:
         f"Extracted diff: {len(extract.stats.changed_files)} files, "
         f"{extract.stats.total_lines} lines changed [{int(time.time()-t0)}s | total {elapsed()}]"
     )
+    if not extract.stats.changed_files:
+        print("\n=== Pipeline Failed — no changes were produced. Agent may have failed to commit. ===")
+        return 1
     print()
 
     # Review Loop
diff --git a/agents/orchestrations/fix_and_review/phases/execute.py b/agents/orchestrations/fix_and_review/phases/execute.py
index 9587484c6..6d3835b3f 100644
--- a/agents/orchestrations/fix_and_review/phases/execute.py
+++ b/agents/orchestrations/fix_and_review/phases/execute.py
@@ -17,4 +17,4 @@ def run_execute(
         commit_message=f"fix issue #{issue_number}",
         branch_name=branch_name,
     )
-    return client.invoke(session_id=session_id, message=prompt, max_iterations=40)
+    return client.invoke(session_id=session_id, message=prompt)
diff --git a/agents/orchestrations/fix_and_review/phases/fix.py b/agents/orchestrations/fix_and_review/phases/fix.py
index 8631d498f..ce11e110c 100644
--- a/agents/orchestrations/fix_and_review/phases/fix.py
+++ b/agents/orchestrations/fix_and_review/phases/fix.py
@@ -27,4 +27,4 @@ def run_fix(
         round_number=str(round_number),
         branch_name=branch_name,
     )
-    return client.invoke(session_id=session_id, message=prompt, max_iterations=30)
+    return client.invoke(session_id=session_id, message=prompt)

From 35a3e348253f01fe65d0f5ed7fe00b73906c4187 Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Wed, 6 May 2026 10:42:47 -0400
Subject: [PATCH 08/12] fix: add retry on connection errors, fix node 20
 symlinks in setup, fix double serialization

---
 agents/bug_fixer/prompts/setup.md       |  6 +++++-
 agents/core/harness_client.py           | 28 +++++++++++++++++++++++--
 agents/feature_builder/prompts/setup.md |  6 +++++-
 3 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/agents/bug_fixer/prompts/setup.md b/agents/bug_fixer/prompts/setup.md
index 11c3e50df..04fea6c9d 100644
--- a/agents/bug_fixer/prompts/setup.md
+++ b/agents/bug_fixer/prompts/setup.md
@@ -1,7 +1,11 @@
 You are setting up a development environment to solve a GitHub issue.
 
 Steps:
-1. Install tools (Amazon Linux 2023): dnf install -y -q git nodejs20 npm > /dev/null 2>&1
+1. Install tools and configure node 20 as default:
+   dnf install -y -q git nodejs20 > /dev/null 2>&1
+   ln -sf /usr/bin/node-20 /usr/local/bin/node
+   ln -sf /usr/lib/nodejs20/lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm
+   export PATH=/usr/local/bin:$PATH
 2. Authenticate GitHub: echo $GH_TOKEN | gh auth login --with-token
 3. Configure git to use gh for auth: gh auth setup-git
 4. Clone both repos:
diff --git a/agents/core/harness_client.py b/agents/core/harness_client.py
index 87b82656a..2bd438638 100644
--- a/agents/core/harness_client.py
+++ b/agents/core/harness_client.py
@@ -1,5 +1,6 @@
 import json
 import sys
+import time
 import uuid
 from urllib.parse import quote
 
@@ -33,6 +34,28 @@ def invoke(
         system_prompt: str | None = None,
         max_iterations: int | None = None,
         verbose: bool = True,
+        retries: int = 2,
+    ) -> str:
+        for attempt in range(retries + 1):
+            try:
+                return self._invoke_once(session_id, message, system_prompt, max_iterations, verbose)
+            except (urllib3.exceptions.ProtocolError, urllib3.exceptions.ReadTimeoutError, ConnectionResetError) as e:
+                if attempt < retries:
+                    if verbose:
+                        print(f"\n  ⚠️  Connection error (attempt {attempt + 1}/{retries + 1}): {e}. Retrying...", flush=True)
+                    time.sleep(5)
+                else:
+                    if verbose:
+                        print(f"\n  ⚠️  Connection error after {retries + 1} attempts: {e}", flush=True)
+                    raise
+
+    def _invoke_once(
+        self,
+        session_id: str,
+        message: str,
+        system_prompt: str | None = None,
+        max_iterations: int | None = None,
+        verbose: bool = True,
     ) -> str:
         body: dict = {
             "runtimeSessionId": session_id,
@@ -48,7 +71,8 @@ def invoke(
         arn = self.config.harness_arn
         url = f"https://bedrock-agentcore.{region}.amazonaws.com/harnesses/invoke?harnessArn={quote(arn, safe='')}"
 
-        request = AWSRequest(method="POST", url=url, data=json.dumps(body), headers={
+        body_bytes = json.dumps(body).encode()
+        request = AWSRequest(method="POST", url=url, data=body_bytes, headers={
             "Content-Type": "application/json",
             "Accept": "application/vnd.amazon.eventstream",
         })
@@ -56,7 +80,7 @@ def invoke(
         SigV4Auth(credentials, "bedrock-agentcore", region).add_auth(request)
 
         response = self.http.urlopen(
-            "POST", url, body=json.dumps(body).encode(),
+            "POST", url, body=body_bytes,
             headers=dict(request.headers),
             preload_content=False,
             timeout=urllib3.Timeout(connect=30, read=900),
diff --git a/agents/feature_builder/prompts/setup.md b/agents/feature_builder/prompts/setup.md
index f06610da8..e9d67786d 100644
--- a/agents/feature_builder/prompts/setup.md
+++ b/agents/feature_builder/prompts/setup.md
@@ -1,7 +1,11 @@
 You are setting up a development environment to build a new feature.
 
 Steps:
-1. Install tools (Amazon Linux 2023): dnf install -y -q git nodejs20 npm > /dev/null 2>&1
+1. Install tools and configure node 20 as default:
+   dnf install -y -q git nodejs20 > /dev/null 2>&1
+   ln -sf /usr/bin/node-20 /usr/local/bin/node
+   ln -sf /usr/lib/nodejs20/lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm
+   export PATH=/usr/local/bin:$PATH
 2. Authenticate GitHub: echo $GH_TOKEN | gh auth login --with-token
 3. Configure git to use gh for auth: gh auth setup-git
 4. Clone both repos:

From 3bad3e266b56d28e911f6de8f51330c617b1b737 Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Wed, 6 May 2026 12:02:31 -0400
Subject: [PATCH 09/12] =?UTF-8?q?fix:=20constrain=20reviewer=20to=2010=20t?=
 =?UTF-8?q?ool=20calls=20max=20=E2=80=94=20prevent=20codebase=20exploratio?=
 =?UTF-8?q?n=20rabbit=20holes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 agents/bug_fixer/prompts/reviewer.md       | 25 ++++++++++++----------
 agents/feature_builder/prompts/reviewer.md | 25 ++++++++++++----------
 2 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/agents/bug_fixer/prompts/reviewer.md b/agents/bug_fixer/prompts/reviewer.md
index 9cd4cc7a2..a7bbef4f1 100644
--- a/agents/bug_fixer/prompts/reviewer.md
+++ b/agents/bug_fixer/prompts/reviewer.md
@@ -1,4 +1,4 @@
-You are a senior code reviewer. You have been assigned a region of a code change to review.
+You are a senior code reviewer. Review ONLY the diff on the feature branch.
 
 Issue being solved: {issue_summary}
 Branch: {branch_name}
@@ -8,19 +8,22 @@ Your assigned focus: {focus}
 Files to focus on: {assigned_files}
 
 Instructions:
-1. Clone repos with the feature branch:
-   - git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli
-   - git clone --depth 10 --branch {branch_name} https://github.com/{cdk_repo}.git agentcore-l3-cdk-constructs
-   (If the branch doesn't exist in a repo, clone main instead: git clone --depth 10 https://github.com/{cli_repo}.git agentcore-cli)
-2. Run: git diff main (or git log if on the feature branch already)
-3. Review your assigned files for ALL concerns: correctness, architecture, security, testing adequacy, cross-repo consistency, and breaking changes
-4. Trace callers of changed functions. Check types. Verify test coverage.
-5. You do NOT need to run npm install — you are reviewing code, not building it.
+1. Clone the repo: git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli 2>&1 | tail -3
+   (If branch doesn't exist, clone main instead)
+2. Run: cd agentcore-cli && git diff main
+3. Read ONLY the changed files and their immediate context (the functions/classes that were modified).
+4. If you need to check a caller or type, read at most 1-2 additional files. No more.
+5. Produce your verdict.
 
 {previous_findings_context}
 
-Output your review as a JSON object wrapped in ```json fences.
-The JSON must have this exact schema:
+CRITICAL CONSTRAINTS:
+- Use at most 10 shell commands total. Do NOT explore the whole codebase.
+- Focus on: correctness, breaking changes, obvious bugs. Skip style nits.
+- If the code looks correct and doesn't break anything, approve it.
+- Do NOT run npm install, npm test, or any build commands.
+
+Output your review as a JSON object wrapped in ```json fences:
 {{
   "approved": boolean,
   "findings": [
diff --git a/agents/feature_builder/prompts/reviewer.md b/agents/feature_builder/prompts/reviewer.md
index d576c249f..f2f71391d 100644
--- a/agents/feature_builder/prompts/reviewer.md
+++ b/agents/feature_builder/prompts/reviewer.md
@@ -1,4 +1,4 @@
-You are a senior code reviewer. You have been assigned a region of a code change to review.
+You are a senior code reviewer. Review ONLY the diff on the feature branch.
 
 Feature being built: {issue_summary}
 Branch: {branch_name}
@@ -8,19 +8,22 @@ Your assigned focus: {focus}
 Files to focus on: {assigned_files}
 
 Instructions:
-1. Clone repos with the feature branch:
-   - git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli
-   - git clone --depth 10 --branch {branch_name} https://github.com/{cdk_repo}.git agentcore-l3-cdk-constructs
-   (If the branch doesn't exist in a repo, clone main instead: git clone --depth 10 https://github.com/{cli_repo}.git agentcore-cli)
-2. Run: git diff main (or git log if on the feature branch already)
-3. Review your assigned files for ALL concerns: correctness, architecture, security, testing adequacy, cross-repo consistency, and breaking changes
-4. Trace callers of changed functions. Check types. Verify test coverage.
-5. You do NOT need to run npm install — you are reviewing code, not building it.
+1. Clone the repo: git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli 2>&1 | tail -3
+   (If branch doesn't exist, clone main instead)
+2. Run: cd agentcore-cli && git diff main
+3. Read ONLY the changed files and their immediate context (the functions/classes that were modified).
+4. If you need to check a caller or type, read at most 1-2 additional files. No more.
+5. Produce your verdict.
 
 {previous_findings_context}
 
-Output your review as a JSON object wrapped in ```json fences.
-The JSON must have this exact schema:
+CRITICAL CONSTRAINTS:
+- Use at most 10 shell commands total. Do NOT explore the whole codebase.
+- Focus on: correctness, breaking changes, obvious bugs. Skip style nits.
+- If the code looks correct and doesn't break anything, approve it.
+- Do NOT run npm install, npm test, or any build commands.
+
+Output your review as a JSON object wrapped in ```json fences:
 {{
   "approved": boolean,
   "findings": [

From 6ea694821894ae54c62adbddd0210d48b8c8651e Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Wed, 6 May 2026 13:54:16 -0400
Subject: [PATCH 10/12] =?UTF-8?q?fix:=20pass=20branch=5Fname=20to=20setup?=
 =?UTF-8?q?=20prompt=20=E2=80=94=20sync=20branch=20naming=20between=20orch?=
 =?UTF-8?q?estrator=20and=20agent?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 agents/bug_fixer/prompts/setup.md                    | 4 ++--
 agents/feature_builder/prompts/setup.md              | 4 ++--
 agents/orchestrations/fix_and_review/orchestrator.py | 2 +-
 agents/orchestrations/fix_and_review/phases/setup.py | 2 ++
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/agents/bug_fixer/prompts/setup.md b/agents/bug_fixer/prompts/setup.md
index 04fea6c9d..75066446d 100644
--- a/agents/bug_fixer/prompts/setup.md
+++ b/agents/bug_fixer/prompts/setup.md
@@ -14,8 +14,8 @@ Steps:
 5. Install dependencies: cd {cli_repo_name} && npm install 2>&1 | tail -3 && cd ..
 6. Fetch the issue details: gh issue view {issue_url} --json title,body,labels,comments,assignees
 7. Create a feature branch in both repos:
-   - cd {cli_repo_name} && git checkout -b fix/{issue_number} && cd ..
-   - cd {cdk_repo_name} && git checkout -b fix/{issue_number} && cd ..
+   - cd {cli_repo_name} && git checkout -b {branch_name} && cd ..
+   - cd {cdk_repo_name} && git checkout -b {branch_name} && cd ..
 8. Report back: the issue title, body, and which repos likely need changes based on the issue content.
 
 IMPORTANT: Run each step as a separate shell command. Do not combine them. If tools are already installed, skip step 1.
diff --git a/agents/feature_builder/prompts/setup.md b/agents/feature_builder/prompts/setup.md
index e9d67786d..3757a4f55 100644
--- a/agents/feature_builder/prompts/setup.md
+++ b/agents/feature_builder/prompts/setup.md
@@ -13,8 +13,8 @@ Steps:
    - git clone https://github.com/{cdk_repo}.git {cdk_repo_name}
 5. Install dependencies: cd {cli_repo_name} && npm install 2>&1 | tail -3 && cd ..
 6. Create a feature branch in both repos:
-   - cd {cli_repo_name} && git checkout -b feature/{feature_name} && cd ..
-   - cd {cdk_repo_name} && git checkout -b feature/{feature_name} && cd ..
+   - cd {cli_repo_name} && git checkout -b {branch_name} && cd ..
+   - cd {cdk_repo_name} && git checkout -b {branch_name} && cd ..
 7. Report back confirmation that the environment is ready.
 
 IMPORTANT: Run each step as a separate shell command. Do not combine them. If tools are already installed, skip step 1.
diff --git a/agents/orchestrations/fix_and_review/orchestrator.py b/agents/orchestrations/fix_and_review/orchestrator.py
index ba2c1089e..2eb1ab194 100644
--- a/agents/orchestrations/fix_and_review/orchestrator.py
+++ b/agents/orchestrations/fix_and_review/orchestrator.py
@@ -70,7 +70,7 @@ def elapsed() -> str:
     t0 = time.time()
     print("--- Phase 0: Setup ---")
     issue_details = run_setup(client, config, session_id, issue_url,
-                              feature_name=feature_name)
+                              feature_name=feature_name, branch_name=branch_name)
     if is_feature:
         issue_title = feature_name or "unnamed feature"
     else:
diff --git a/agents/orchestrations/fix_and_review/phases/setup.py b/agents/orchestrations/fix_and_review/phases/setup.py
index 6b8fe3592..16982b445 100644
--- a/agents/orchestrations/fix_and_review/phases/setup.py
+++ b/agents/orchestrations/fix_and_review/phases/setup.py
@@ -26,6 +26,7 @@ def run_setup(
     session_id: str,
     issue_url: str,
     feature_name: str | None = None,
+    branch_name: str | None = None,
 ) -> str:
     issue_number = issue_url.rstrip("/").split("/")[-1]
     prompt = load_prompt(
@@ -37,5 +38,6 @@ def run_setup(
         issue_url=issue_url,
         issue_number=issue_number,
         feature_name=feature_name or issue_number,
+        branch_name=branch_name or f"fix/{issue_number}",
     )
     return client.invoke(session_id=session_id, message=prompt)

From 139a6141ac7e0fc618a66d22ca92be97a34c61a0 Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Wed, 6 May 2026 14:10:14 -0400
Subject: [PATCH 11/12] feat: parallelize reviewers with ThreadPoolExecutor,
 relax hard tool limit

---
 agents/bug_fixer/prompts/reviewer.md          |  6 +-
 agents/feature_builder/prompts/reviewer.md    |  6 +-
 .../fix_and_review/phases/review.py           | 83 ++++++++++++-------
 3 files changed, 59 insertions(+), 36 deletions(-)

diff --git a/agents/bug_fixer/prompts/reviewer.md b/agents/bug_fixer/prompts/reviewer.md
index a7bbef4f1..3fd90784b 100644
--- a/agents/bug_fixer/prompts/reviewer.md
+++ b/agents/bug_fixer/prompts/reviewer.md
@@ -17,9 +17,9 @@ Instructions:
 
 {previous_findings_context}
 
-CRITICAL CONSTRAINTS:
-- Use at most 10 shell commands total. Do NOT explore the whole codebase.
-- Focus on: correctness, breaking changes, obvious bugs. Skip style nits.
+CONSTRAINTS:
+- Stay focused on the diff and immediately related code. Do not explore unrelated parts of the codebase.
+- Focus on: correctness, breaking changes, obvious bugs, missing error handling. Skip style nits.
 - If the code looks correct and doesn't break anything, approve it.
 - Do NOT run npm install, npm test, or any build commands.
 
diff --git a/agents/feature_builder/prompts/reviewer.md b/agents/feature_builder/prompts/reviewer.md
index f2f71391d..70fe76285 100644
--- a/agents/feature_builder/prompts/reviewer.md
+++ b/agents/feature_builder/prompts/reviewer.md
@@ -17,9 +17,9 @@ Instructions:
 
 {previous_findings_context}
 
-CRITICAL CONSTRAINTS:
-- Use at most 10 shell commands total. Do NOT explore the whole codebase.
-- Focus on: correctness, breaking changes, obvious bugs. Skip style nits.
+CONSTRAINTS:
+- Stay focused on the diff and immediately related code. Do not explore unrelated parts of the codebase.
+- Focus on: correctness, breaking changes, obvious bugs, missing error handling. Skip style nits.
 - If the code looks correct and doesn't break anything, approve it.
 - Do NOT run npm install, npm test, or any build commands.
 
diff --git a/agents/orchestrations/fix_and_review/phases/review.py b/agents/orchestrations/fix_and_review/phases/review.py
index be1fabbce..1e5196353 100644
--- a/agents/orchestrations/fix_and_review/phases/review.py
+++ b/agents/orchestrations/fix_and_review/phases/review.py
@@ -1,3 +1,5 @@
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
 from core.config import PipelineConfig
 from core.harness_client import HarnessClient
 from core.parsing import ReviewResult, parse_reviewer_output
@@ -5,6 +7,45 @@
 from orchestrations.fix_and_review.phases.setup import load_prompt
 
 
+def _run_single_reviewer(
+    client: HarnessClient,
+    config: PipelineConfig,
+    assignment: ReviewerAssignment,
+    branch_name: str,
+    issue_summary: str,
+    previous_findings_context: str,
+) -> tuple[ReviewResult | None, str]:
+    session_id = HarnessClient.new_session_id()
+    prompt = load_prompt(
+        "reviewer.md",
+        issue_summary=issue_summary,
+        branch_name=branch_name,
+        cli_repo=config.cli_repo,
+        cdk_repo=config.cdk_repo,
+        focus=assignment.focus,
+        assigned_files=", ".join(assignment.files),
+        previous_findings_context=previous_findings_context,
+    )
+
+    raw_output = client.invoke(session_id=session_id, message=prompt)
+    parsed = parse_reviewer_output(raw_output)
+
+    if parsed is None:
+        retry_msg = (
+            "Your previous output was not valid JSON. Please output ONLY a JSON object "
+            "wrapped in ```json fences with this schema: "
+            '{"approved": boolean, "findings": [{"severity": "critical"|"high"|"medium"|"low", '
+            '"file": "path", "line": number, "description": "...", "suggestion": "..."}]}'
+        )
+        for _ in range(2):
+            raw_output = client.invoke(session_id=session_id, message=retry_msg)
+            parsed = parse_reviewer_output(raw_output)
+            if parsed is not None:
+                break
+
+    return (parsed, raw_output)
+
+
 def run_review(
     client: HarnessClient,
     config: PipelineConfig,
@@ -13,37 +54,19 @@ def run_review(
     issue_summary: str,
     previous_findings_context: str = "",
 ) -> list[tuple[ReviewResult | None, str]]:
-    results: list[tuple[ReviewResult | None, str]] = []
-
-    for assignment in assignments:
-        session_id = HarnessClient.new_session_id()
-        prompt = load_prompt(
-            "reviewer.md",
-            issue_summary=issue_summary,
-            branch_name=branch_name,
-            cli_repo=config.cli_repo,
-            cdk_repo=config.cdk_repo,
-            focus=assignment.focus,
-            assigned_files=", ".join(assignment.files),
-            previous_findings_context=previous_findings_context,
-        )
-
-        raw_output = client.invoke(session_id=session_id, message=prompt)
-        parsed = parse_reviewer_output(raw_output)
+    print(f"  Spawning {len(assignments)} reviewers in parallel...", flush=True)
 
-        if parsed is None:
-            retry_msg = (
-                "Your previous output was not valid JSON. Please output ONLY a JSON object "
-                "wrapped in ```json fences with this schema: "
-                '{"approved": boolean, "findings": [{"severity": "critical"|"high"|"medium"|"low", '
-                '"file": "path", "line": number, "description": "...", "suggestion": "..."}]}'
+    with ThreadPoolExecutor(max_workers=len(assignments)) as executor:
+        futures = [
+            executor.submit(
+                _run_single_reviewer,
+                client, config, assignment, branch_name,
+                issue_summary, previous_findings_context,
             )
-            for _ in range(2):
-                raw_output = client.invoke(session_id=session_id, message=retry_msg)
-                parsed = parse_reviewer_output(raw_output)
-                if parsed is not None:
-                    break
-
-        results.append((parsed, raw_output))
+            for assignment in assignments
+        ]
+        results: list[tuple[ReviewResult | None, str]] = []
+        for future in as_completed(futures):
+            results.append(future.result())
 
     return results

From 7026fd9ed6be9676b1eede610653cb5953a9c786 Mon Sep 17 00:00:00 2001
From: Aidan Daly <aidandal@amazon.com>
Date: Wed, 6 May 2026 16:27:39 -0400
Subject: [PATCH 12/12] =?UTF-8?q?fix:=20use=20origin/main=20in=20extract?=
 =?UTF-8?q?=20and=20complete=20phases=20=E2=80=94=20local=20main=20ref=20m?=
 =?UTF-8?q?ay=20be=20stale?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 agents/orchestrations/fix_and_review/phases/complete.py | 2 +-
 agents/orchestrations/fix_and_review/phases/extract.py  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/agents/orchestrations/fix_and_review/phases/complete.py b/agents/orchestrations/fix_and_review/phases/complete.py
index b5225af27..3fb4f4e5a 100644
--- a/agents/orchestrations/fix_and_review/phases/complete.py
+++ b/agents/orchestrations/fix_and_review/phases/complete.py
@@ -39,7 +39,7 @@ def run_complete(
     for repo in affected_repos:
         repo_name = repo.split("/")[-1] if "/" in repo else repo
         stdout, _, exit_code = client.run_command(
-            session_id, f"cd {repo_name} && git log main..{branch_name} --oneline 2>/dev/null"
+            session_id, f"cd {repo_name} && git log origin/main..{branch_name} --oneline 2>/dev/null"
         )
         if exit_code == 0 and stdout.strip():
             full_repo = full_repo_map.get(repo, repo)
diff --git a/agents/orchestrations/fix_and_review/phases/extract.py b/agents/orchestrations/fix_and_review/phases/extract.py
index b9f8f7724..daed91e41 100644
--- a/agents/orchestrations/fix_and_review/phases/extract.py
+++ b/agents/orchestrations/fix_and_review/phases/extract.py
@@ -32,13 +32,13 @@ def run_extract(
     for repo_name in [cli_name, cdk_name]:
         # Check if this repo has changes on the branch
         commit_log, _, exit_code = client.run_command(
-            session_id, f"cd {repo_name} && git log main..HEAD --oneline 2>/dev/null"
+            session_id, f"cd {repo_name} && git log origin/main..HEAD --oneline 2>/dev/null"
         )
         if exit_code != 0 or not commit_log.strip():
             continue
 
-        diff_stat, _, _ = client.run_command(session_id, f"cd {repo_name} && git diff main --stat")
-        full_diff, _, _ = client.run_command(session_id, f"cd {repo_name} && git diff main")
+        diff_stat, _, _ = client.run_command(session_id, f"cd {repo_name} && git diff origin/main --stat")
+        full_diff, _, _ = client.run_command(session_id, f"cd {repo_name} && git diff origin/main")
 
         all_diff_stat += diff_stat
         all_full_diff += full_diff