From 8ad6ab81f8c50ae8fc07a3b4fa21df6701bb1e1d Mon Sep 17 00:00:00 2001 From: Aidan Daly Date: Tue, 5 May 2026 15:47:30 -0400 Subject: [PATCH 01/12] feat: add agents/ directory with orchestration framework MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a self-contained Python project for autonomous agents powered by Bedrock AgentCore Harness. Includes: - core/ — shared harness client (raw HTTP + SigV4), response parsing, config - orchestrations/fix_and_review/ — multi-phase pipeline: plan → execute → verify → multi-round review → fix → PR - bug_fixer/ — workflow entry point for fixing issues labeled 'bug' - feature_builder/ — workflow entry point for building features from devex + impl docs - pr_reviewer/ — migrated from .github/harness/ to share core infrastructure - GitHub Actions workflows for both triggers - 19 unit tests Tested end-to-end: successfully planned, implemented, and reviewed fixes for issues #761 and #924 with Opus 4.7, creating PRs with proper templates. --- .github/workflows/bug-fixer.yml | 60 +++++ .github/workflows/feature-builder.yml | 58 ++++ agents/bug_fixer/main.py | 40 +++ agents/bug_fixer/prompts/executor.md | 17 ++ agents/bug_fixer/prompts/fixer.md | 16 ++ agents/bug_fixer/prompts/planner.md | 20 ++ agents/bug_fixer/prompts/reviewer.md | 36 +++ agents/bug_fixer/prompts/setup.md | 19 ++ agents/config.yaml | 10 + agents/core/__init__.py | 0 agents/core/config.py | 30 +++ agents/core/harness_client.py | 160 +++++++++++ agents/core/parsing.py | 56 ++++ agents/core/phases/__init__.py | 0 agents/feature_builder/__init__.py | 0 agents/feature_builder/main.py | 64 +++++ agents/feature_builder/prompts/executor.md | 17 ++ agents/feature_builder/prompts/fixer.md | 16 ++ agents/feature_builder/prompts/planner.md | 27 ++ agents/feature_builder/prompts/reviewer.md | 36 +++ agents/feature_builder/prompts/setup.md | 18 ++ agents/orchestrations/__init__.py | 0 .../orchestrations/fix_and_review/__init__.py | 0 .../fix_and_review/orchestrator.py | 252 ++++++++++++++++++ .../fix_and_review/partitioning.py | 121 +++++++++ .../fix_and_review/phases/__init__.py | 0 .../fix_and_review/phases/aggregate.py | 57 ++++ .../fix_and_review/phases/complete.py | 111 ++++++++ .../fix_and_review/phases/execute.py | 20 ++ .../fix_and_review/phases/extract.py | 55 ++++ .../fix_and_review/phases/fix.py | 30 +++ .../fix_and_review/phases/plan.py | 21 ++ .../fix_and_review/phases/review.py | 49 ++++ .../fix_and_review/phases/setup.py | 40 +++ .../fix_and_review/phases/validate.py | 33 +++ .../fix_and_review/phases/verify.py | 91 +++++++ agents/orchestrations/one_shot/__init__.py | 0 .../one_shot/phases/__init__.py | 0 agents/orchestrations/review_only/__init__.py | 0 .../review_only/phases/__init__.py | 0 agents/pr_reviewer/main.py | 200 ++++++++++++++ agents/pr_reviewer/prompts/review.md | 18 ++ agents/pr_reviewer/prompts/system.md | 25 ++ agents/pyproject.toml | 18 ++ agents/tests/__init__.py | 0 agents/tests/test_config.py | 65 +++++ agents/tests/test_harness_client.py | 21 ++ agents/tests/test_parsing.py | 81 ++++++ agents/tests/test_partitioning.py | 74 +++++ 49 files changed, 2082 insertions(+) create mode 100644 .github/workflows/bug-fixer.yml create mode 100644 .github/workflows/feature-builder.yml create mode 100644 agents/bug_fixer/main.py create mode 100644 agents/bug_fixer/prompts/executor.md create mode 100644 agents/bug_fixer/prompts/fixer.md create mode 100644 agents/bug_fixer/prompts/planner.md create mode 100644 agents/bug_fixer/prompts/reviewer.md create mode 100644 agents/bug_fixer/prompts/setup.md create mode 100644 agents/config.yaml create mode 100644 agents/core/__init__.py create mode 100644 agents/core/config.py create mode 100644 agents/core/harness_client.py create mode 100644 agents/core/parsing.py create mode 100644 agents/core/phases/__init__.py create mode 100644 agents/feature_builder/__init__.py create mode 100644 agents/feature_builder/main.py create mode 100644 agents/feature_builder/prompts/executor.md create mode 100644 agents/feature_builder/prompts/fixer.md create mode 100644 agents/feature_builder/prompts/planner.md create mode 100644 agents/feature_builder/prompts/reviewer.md create mode 100644 agents/feature_builder/prompts/setup.md create mode 100644 agents/orchestrations/__init__.py create mode 100644 agents/orchestrations/fix_and_review/__init__.py create mode 100644 agents/orchestrations/fix_and_review/orchestrator.py create mode 100644 agents/orchestrations/fix_and_review/partitioning.py create mode 100644 agents/orchestrations/fix_and_review/phases/__init__.py create mode 100644 agents/orchestrations/fix_and_review/phases/aggregate.py create mode 100644 agents/orchestrations/fix_and_review/phases/complete.py create mode 100644 agents/orchestrations/fix_and_review/phases/execute.py create mode 100644 agents/orchestrations/fix_and_review/phases/extract.py create mode 100644 agents/orchestrations/fix_and_review/phases/fix.py create mode 100644 agents/orchestrations/fix_and_review/phases/plan.py create mode 100644 agents/orchestrations/fix_and_review/phases/review.py create mode 100644 agents/orchestrations/fix_and_review/phases/setup.py create mode 100644 agents/orchestrations/fix_and_review/phases/validate.py create mode 100644 agents/orchestrations/fix_and_review/phases/verify.py create mode 100644 agents/orchestrations/one_shot/__init__.py create mode 100644 agents/orchestrations/one_shot/phases/__init__.py create mode 100644 agents/orchestrations/review_only/__init__.py create mode 100644 agents/orchestrations/review_only/phases/__init__.py create mode 100644 agents/pr_reviewer/main.py create mode 100644 agents/pr_reviewer/prompts/review.md create mode 100644 agents/pr_reviewer/prompts/system.md create mode 100644 agents/pyproject.toml create mode 100644 agents/tests/__init__.py create mode 100644 agents/tests/test_config.py create mode 100644 agents/tests/test_harness_client.py create mode 100644 agents/tests/test_parsing.py create mode 100644 agents/tests/test_partitioning.py diff --git a/.github/workflows/bug-fixer.yml b/.github/workflows/bug-fixer.yml new file mode 100644 index 000000000..a7de5d8c1 --- /dev/null +++ b/.github/workflows/bug-fixer.yml @@ -0,0 +1,60 @@ +name: Bug Fixer Agent + +on: + issues: + types: [labeled] + workflow_dispatch: + inputs: + issue_url: + description: 'GitHub issue URL' + required: true + type: string + +permissions: + id-token: write + contents: write + pull-requests: write + issues: write + +jobs: + fix-bug: + if: github.event.label.name == 'bug' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + steps: + - name: Determine issue URL + id: issue + env: + INPUT_URL: ${{ inputs.issue_url }} + EVENT_URL: ${{ github.event.issue.html_url }} + EVENT_NAME: ${{ github.event_name }} + run: | + if [ "$EVENT_NAME" = "workflow_dispatch" ]; then + echo "url=$INPUT_URL" >> "$GITHUB_OUTPUT" + else + echo "url=$EVENT_URL" >> "$GITHUB_OUTPUT" + fi + + - name: Checkout + uses: actions/checkout@v6 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v6 + with: + role-to-assume: ${{ secrets.HARNESS_AWS_ROLE_ARN }} + aws-region: us-west-2 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Setup uv + uses: astral-sh/setup-uv@v7 + + - name: Run Bug Fixer Agent + working-directory: agents + env: + HARNESS_ARN: ${{ secrets.HARNESS_ARN }} + GH_TOKEN: ${{ secrets.PAT_TOKEN }} + ISSUE_URL: ${{ steps.issue.outputs.url }} + run: uv sync && uv run python -m bug_fixer.main --issue "$ISSUE_URL" diff --git a/.github/workflows/feature-builder.yml b/.github/workflows/feature-builder.yml new file mode 100644 index 000000000..13f8f7107 --- /dev/null +++ b/.github/workflows/feature-builder.yml @@ -0,0 +1,58 @@ +name: Feature Builder Agent + +on: + workflow_dispatch: + inputs: + devex_doc: + description: 'Path to devex doc (relative to repo root)' + required: true + type: string + impl_doc: + description: 'Path to implementation plan (relative to repo root)' + required: true + type: string + feature_name: + description: 'Feature name (used for branch naming)' + required: true + type: string + +permissions: + id-token: write + contents: write + pull-requests: write + +jobs: + build-feature: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v6 + with: + role-to-assume: ${{ secrets.HARNESS_AWS_ROLE_ARN }} + aws-region: us-west-2 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Setup uv + uses: astral-sh/setup-uv@v7 + + - name: Run Feature Builder Agent + working-directory: agents + env: + HARNESS_ARN: ${{ secrets.HARNESS_ARN }} + GH_TOKEN: ${{ secrets.PAT_TOKEN }} + DEVEX_DOC: ${{ inputs.devex_doc }} + IMPL_DOC: ${{ inputs.impl_doc }} + FEATURE_NAME: ${{ inputs.feature_name }} + run: | + uv sync + uv run python -m feature_builder.main \ + --devex "../$DEVEX_DOC" \ + --impl "../$IMPL_DOC" \ + --name "$FEATURE_NAME" diff --git a/agents/bug_fixer/main.py b/agents/bug_fixer/main.py new file mode 100644 index 000000000..df1ba5b78 --- /dev/null +++ b/agents/bug_fixer/main.py @@ -0,0 +1,40 @@ +"""Bug Fixer Agent — resolves GitHub issues labeled 'bug'. + +Usage: + uv run bug_fixer/main.py --issue https://github.com/aws/agentcore-cli/issues/123 + uv run bug_fixer/main.py --issue https://github.com/aws/agentcore-cli/issues/123 --config config.yaml +""" + +import argparse +import sys +from pathlib import Path + +from orchestrations.fix_and_review.orchestrator import run_pipeline + +PROMPTS_DIR = Path(__file__).resolve().parent / "prompts" + + +def main(): + parser = argparse.ArgumentParser(description="Bug Fixer Agent") + parser.add_argument("--issue", required=True, help="GitHub issue URL") + parser.add_argument("--config", default="config.yaml", help="Config YAML path") + parser.add_argument("--aws-profile", help="Override AWS profile") + parser.add_argument("--harness-arn", help="Override harness ARN") + args = parser.parse_args() + + overrides = {} + if args.aws_profile: + overrides["aws_profile"] = args.aws_profile + if args.harness_arn: + overrides["harness_arn"] = args.harness_arn + + return run_pipeline( + issue_url=args.issue, + config_path=args.config, + prompts_dir=PROMPTS_DIR, + **overrides, + ) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/agents/bug_fixer/prompts/executor.md b/agents/bug_fixer/prompts/executor.md new file mode 100644 index 000000000..8f1f89273 --- /dev/null +++ b/agents/bug_fixer/prompts/executor.md @@ -0,0 +1,17 @@ +You are a senior software engineer implementing a planned change across one or two TypeScript repos. + +The plan: +{plan} + +Instructions: +1. Follow the plan exactly. Make the code changes described. +2. Run `npm run typecheck 2>&1 | tail -20` in each affected repo. Fix any type errors. +3. Run tests with summary output only: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20` +4. If tests fail, debug the specific failing file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50` +5. Commit your changes: `git add -A && git commit -m "feat: {commit_message}"` +6. Push to fork remote: `git push origin {branch_name}` +7. If you need to deviate from the plan, document why in your commit message. + +IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context. Use the grep pattern in step 3 for the summary, then target specific files in step 4 if something fails. + +Do not stop until typecheck and tests pass. If tests fail, analyze the failure, fix the code, and try again. diff --git a/agents/bug_fixer/prompts/fixer.md b/agents/bug_fixer/prompts/fixer.md new file mode 100644 index 000000000..a99a6f0d1 --- /dev/null +++ b/agents/bug_fixer/prompts/fixer.md @@ -0,0 +1,16 @@ +You are a senior software engineer fixing issues found during code review. + +The following findings were reported by reviewers. Address each one: + +{findings_text} + +Instructions: +1. Fix each finding, starting with Critical severity first, then High, Medium, Low. +2. If a finding is not applicable or is a false positive, explain why in a commit message. +3. Run `npm run typecheck 2>&1 | tail -20` in each affected repo after fixes. +4. Run tests with summary: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20` +5. If tests fail, debug the specific file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50` +6. Commit: `git add -A && git commit -m "fix: address review findings round {round_number}"` +7. Push: `git push origin {branch_name}` + +IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context. diff --git a/agents/bug_fixer/prompts/planner.md b/agents/bug_fixer/prompts/planner.md new file mode 100644 index 000000000..a651788b2 --- /dev/null +++ b/agents/bug_fixer/prompts/planner.md @@ -0,0 +1,20 @@ +You are a senior software architect planning a fix for a GitHub issue. + +You have access to two TypeScript repositories: +- agentcore-cli: AWS AgentCore CLI tool (Commander.js + Ink TUI, ~550 source files) +- agentcore-l3-cdk-constructs: AWS CDK L3 constructs for AgentCore (~17 test files, shares schemas with CLI) + +The issue details are: +{issue_details} + +Analyze the issue, explore the relevant code in both repos, and produce a structured implementation plan. + +Your plan MUST include: +1. **Affected repos**: Which repos need changes (cli, cdk, or both) +2. **Files to change**: Exact file paths to modify, create, or delete +3. **Approach**: Step-by-step description of the changes +4. **Risks**: What could go wrong, edge cases to watch for +5. **Testing strategy**: What tests to add or modify, how to verify the fix +6. **Scope estimate**: small (1-3 files), medium (4-7 files), or large (8+ files) + +Output the plan as structured markdown. diff --git a/agents/bug_fixer/prompts/reviewer.md b/agents/bug_fixer/prompts/reviewer.md new file mode 100644 index 000000000..9cd4cc7a2 --- /dev/null +++ b/agents/bug_fixer/prompts/reviewer.md @@ -0,0 +1,36 @@ +You are a senior code reviewer. You have been assigned a region of a code change to review. + +Issue being solved: {issue_summary} +Branch: {branch_name} +Repos: {cli_repo}, {cdk_repo} + +Your assigned focus: {focus} +Files to focus on: {assigned_files} + +Instructions: +1. Clone repos with the feature branch: + - git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli + - git clone --depth 10 --branch {branch_name} https://github.com/{cdk_repo}.git agentcore-l3-cdk-constructs + (If the branch doesn't exist in a repo, clone main instead: git clone --depth 10 https://github.com/{cli_repo}.git agentcore-cli) +2. Run: git diff main (or git log if on the feature branch already) +3. Review your assigned files for ALL concerns: correctness, architecture, security, testing adequacy, cross-repo consistency, and breaking changes +4. Trace callers of changed functions. Check types. Verify test coverage. +5. You do NOT need to run npm install — you are reviewing code, not building it. + +{previous_findings_context} + +Output your review as a JSON object wrapped in ```json fences. +The JSON must have this exact schema: +{{ + "approved": boolean, + "findings": [ + {{ + "severity": "critical" | "high" | "medium" | "low", + "file": "path/to/file", + "line": number, + "description": "what's wrong", + "suggestion": "how to fix" + }} + ] +}} +Output ONLY the JSON object in code fences. No other text before or after. diff --git a/agents/bug_fixer/prompts/setup.md b/agents/bug_fixer/prompts/setup.md new file mode 100644 index 000000000..11c3e50df --- /dev/null +++ b/agents/bug_fixer/prompts/setup.md @@ -0,0 +1,19 @@ +You are setting up a development environment to solve a GitHub issue. + +Steps: +1. Install tools (Amazon Linux 2023): dnf install -y -q git nodejs20 npm > /dev/null 2>&1 +2. Authenticate GitHub: echo $GH_TOKEN | gh auth login --with-token +3. Configure git to use gh for auth: gh auth setup-git +4. Clone both repos: + - git clone https://github.com/{cli_repo}.git {cli_repo_name} + - git clone https://github.com/{cdk_repo}.git {cdk_repo_name} +5. Install dependencies: cd {cli_repo_name} && npm install 2>&1 | tail -3 && cd .. +6. Fetch the issue details: gh issue view {issue_url} --json title,body,labels,comments,assignees +7. Create a feature branch in both repos: + - cd {cli_repo_name} && git checkout -b fix/{issue_number} && cd .. + - cd {cdk_repo_name} && git checkout -b fix/{issue_number} && cd .. +8. Report back: the issue title, body, and which repos likely need changes based on the issue content. + +IMPORTANT: Run each step as a separate shell command. Do not combine them. If tools are already installed, skip step 1. + +Output the issue details as structured markdown with sections: Title, Body, Labels, and Initial Assessment. diff --git a/agents/config.yaml b/agents/config.yaml new file mode 100644 index 000000000..6ebd0a945 --- /dev/null +++ b/agents/config.yaml @@ -0,0 +1,10 @@ +harness_arn: "arn:aws:bedrock-agentcore:us-west-2:603141041947:harness/IssueSolver_aidandal-8SL97TEXjS" +region: "us-west-2" +aws_profile: "deploy" +model_id: "global.anthropic.claude-opus-4-7" +min_reviewers: 3 +max_reviewers: 5 +max_review_rounds: 5 +repos: + cli: "aws/agentcore-cli" + cdk: "aws/agentcore-l3-cdk-constructs" diff --git a/agents/core/__init__.py b/agents/core/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/core/config.py b/agents/core/config.py new file mode 100644 index 000000000..fb81a3a97 --- /dev/null +++ b/agents/core/config.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass + +import yaml + + +@dataclass +class PipelineConfig: + harness_arn: str + region: str = "us-west-2" + data_plane_endpoint: str | None = None + aws_profile: str = "deploy" + model_id: str = "global.anthropic.claude-opus-4-7" + min_reviewers: int = 3 + max_reviewers: int = 5 + max_review_rounds: int = 5 + cli_repo: str = "aws/agentcore-cli" + cdk_repo: str = "aws/agentcore-l3-cdk-constructs" + + @classmethod + def from_yaml(cls, path: str) -> "PipelineConfig": + with open(path) as f: + data = yaml.safe_load(f) + + repos = data.pop("repos", {}) + if "cli" in repos: + data["cli_repo"] = repos["cli"] + if "cdk" in repos: + data["cdk_repo"] = repos["cdk"] + + return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__}) diff --git a/agents/core/harness_client.py b/agents/core/harness_client.py new file mode 100644 index 000000000..33d296f32 --- /dev/null +++ b/agents/core/harness_client.py @@ -0,0 +1,160 @@ +import json +import sys +import uuid +from urllib.parse import quote + +import boto3 +import urllib3 +from botocore.auth import SigV4Auth +from botocore.awsrequest import AWSRequest +from botocore.config import Config as BotoConfig +from botocore.eventstream import EventStreamBuffer + +from core.config import PipelineConfig + + +class HarnessClient: + def __init__(self, config: PipelineConfig): + self.config = config + self.session = boto3.Session( + region_name=config.region, + profile_name=config.aws_profile, + ) + self.credentials = self.session.get_credentials().get_frozen_credentials() + self.http = urllib3.PoolManager() + self.client = self.session.client( + "bedrock-agentcore", + config=BotoConfig(read_timeout=600, connect_timeout=30, retries={"max_attempts": 2}), + ) + + def invoke( + self, + session_id: str, + message: str, + system_prompt: str | None = None, + verbose: bool = True, + ) -> str: + body: dict = { + "runtimeSessionId": session_id, + "messages": [{"role": "user", "content": [{"text": message}]}], + "model": {"bedrockModelConfig": {"modelId": self.config.model_id}}, + } + if system_prompt: + body["systemPrompt"] = [{"text": system_prompt}] + + region = self.config.region + arn = self.config.harness_arn + url = f"https://bedrock-agentcore.{region}.amazonaws.com/harnesses/invoke?harnessArn={quote(arn, safe='')}" + + request = AWSRequest(method="POST", url=url, data=json.dumps(body), headers={ + "Content-Type": "application/json", + "Accept": "application/vnd.amazon.eventstream", + }) + SigV4Auth(self.credentials, "bedrock-agentcore", region).add_auth(request) + + response = self.http.urlopen( + "POST", url, body=json.dumps(body).encode(), + headers=dict(request.headers), + preload_content=False, + timeout=urllib3.Timeout(connect=30, read=900), + ) + + if response.status != 200: + error = response.read().decode("utf-8") + if verbose: + print(f"\n ⚠️ HTTP {response.status}: {error}", flush=True) + raise RuntimeError(f"InvokeHarness failed: HTTP {response.status}: {error}") + + request_id = response.headers.get("x-amzn-RequestId", "unknown") + if verbose: + print(f" [request: {request_id}]", flush=True) + self.last_request_id = request_id + + return self._accumulate_text_from_http(response, verbose=verbose) + + def run_command(self, session_id: str, command: str, verbose: bool = False) -> tuple[str, str, int]: + if verbose: + print(f" $ {command}", flush=True) + response = self.client.invoke_agent_runtime_command( + agentRuntimeArn=self.config.harness_arn, + runtimeSessionId=session_id, + body={"command": command}, + ) + request_id = response.get("ResponseMetadata", {}).get("RequestId", "unknown") + self.last_request_id = request_id + return self._accumulate_command(response["stream"], verbose=verbose) + + def _accumulate_text_from_http(self, http_response, verbose: bool = False) -> str: + text_parts: list[str] = [] + tool_input_parts: list[str] = [] + current_tool: str | None = None + event_buffer = EventStreamBuffer() + + for chunk in http_response.stream(4096): + event_buffer.add_data(chunk) + for event in event_buffer: + if event.headers.get(":message-type") == "exception": + payload = json.loads(event.payload.decode("utf-8")) + if verbose: + print(f"\n ⚠️ Stream error: {payload}", flush=True) + if text_parts: + return "".join(text_parts) + raise RuntimeError(f"Stream error: {payload}") + + event_type = event.headers.get(":event-type", "") + if not event.payload: + continue + payload = json.loads(event.payload.decode("utf-8")) + + if event_type == "contentBlockStart": + start = payload.get("start", {}) + if "toolUse" in start: + current_tool = start["toolUse"].get("name", "unknown") + tool_input_parts = [] + else: + current_tool = None + elif event_type == "contentBlockDelta": + delta = payload.get("delta", {}) + if "text" in delta: + text_parts.append(delta["text"]) + if verbose: + print(delta["text"], end="", flush=True) + elif "toolUse" in delta and current_tool: + tool_input_parts.append(delta["toolUse"].get("input", "")) + elif event_type == "contentBlockStop": + if current_tool and verbose: + tool_input = "".join(tool_input_parts) + print(f"\n 🔧 {current_tool}: {tool_input[:200]}", flush=True) + current_tool = None + tool_input_parts = [] + elif event_type == "messageStop" and verbose: + print(flush=True) + + return "".join(text_parts) + + def _accumulate_command(self, stream, verbose: bool = False) -> tuple[str, str, int]: + stdout_parts: list[str] = [] + stderr_parts: list[str] = [] + exit_code = -1 + for event in stream: + if "chunk" in event: + chunk = event["chunk"] + if "contentDelta" in chunk: + delta = chunk["contentDelta"] + if "stdout" in delta: + stdout_parts.append(delta["stdout"]) + if verbose: + print(delta["stdout"], end="", flush=True) + if "stderr" in delta: + stderr_parts.append(delta["stderr"]) + if verbose: + print(delta["stderr"], end="", file=sys.stderr, flush=True) + elif "contentStop" in chunk: + exit_code = chunk["contentStop"].get("exitCode", -1) + if verbose: + print(f" [exit: {exit_code}]", flush=True) + return "".join(stdout_parts), "".join(stderr_parts), exit_code + + @staticmethod + def new_session_id() -> str: + return str(uuid.uuid4()).upper() diff --git a/agents/core/parsing.py b/agents/core/parsing.py new file mode 100644 index 000000000..1f4aab67d --- /dev/null +++ b/agents/core/parsing.py @@ -0,0 +1,56 @@ +import json +import re +from typing import Literal + +from pydantic import BaseModel, ValidationError + + +class Finding(BaseModel): + severity: Literal["critical", "high", "medium", "low"] + file: str + line: int + description: str + suggestion: str + + +class ReviewResult(BaseModel): + approved: bool + findings: list[Finding] + + +def parse_reviewer_output(raw_text: str) -> ReviewResult | None: + json_str = _extract_json(raw_text) + if json_str is None: + return None + return _validate(json_str) + + +def _extract_json(raw_text: str) -> str | None: + match = re.search(r"```json?\s*\n(.*?)\n\s*```", raw_text, re.DOTALL) + if match: + return match.group(1).strip() + + start = raw_text.find("{") + if start == -1: + return None + + depth = 0 + for i in range(start, len(raw_text)): + if raw_text[i] == "{": + depth += 1 + elif raw_text[i] == "}": + depth -= 1 + if depth == 0: + candidate = raw_text[start : i + 1] + if "approved" in candidate: + return candidate + return None + return None + + +def _validate(json_str: str) -> ReviewResult | None: + try: + data = json.loads(json_str) + return ReviewResult(**data) + except (json.JSONDecodeError, ValidationError): + return None diff --git a/agents/core/phases/__init__.py b/agents/core/phases/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/feature_builder/__init__.py b/agents/feature_builder/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/feature_builder/main.py b/agents/feature_builder/main.py new file mode 100644 index 000000000..bbd6a53f2 --- /dev/null +++ b/agents/feature_builder/main.py @@ -0,0 +1,64 @@ +"""Feature Builder Agent — builds features from devex + implementation docs. + +Usage: + uv run python -m feature_builder.main --devex docs/devex.md --impl docs/impl.md + uv run python -m feature_builder.main --devex docs/devex.md --impl docs/impl.md --config config.yaml +""" + +import argparse +import sys +from pathlib import Path + +from orchestrations.fix_and_review.orchestrator import run_pipeline + +PROMPTS_DIR = Path(__file__).resolve().parent / "prompts" + + +def main(): + parser = argparse.ArgumentParser(description="Feature Builder Agent") + parser.add_argument("--devex", required=True, help="Path to devex doc (markdown)") + parser.add_argument("--impl", required=True, help="Path to implementation plan (markdown)") + parser.add_argument("--name", help="Feature name (used for branch naming)") + parser.add_argument("--config", default="config.yaml", help="Config YAML path") + parser.add_argument("--aws-profile", help="Override AWS profile") + parser.add_argument("--harness-arn", help="Override harness ARN") + args = parser.parse_args() + + devex_path = Path(args.devex) + impl_path = Path(args.impl) + + if not devex_path.exists(): + print(f"Error: devex doc not found: {devex_path}", file=sys.stderr) + return 1 + if not impl_path.exists(): + print(f"Error: impl doc not found: {impl_path}", file=sys.stderr) + return 1 + + devex_content = devex_path.read_text() + impl_content = impl_path.read_text() + + feature_name = args.name or devex_path.stem.replace(" ", "-").lower() + + # Construct a synthetic "issue" that the orchestrator can consume + # The orchestrator expects an issue_url — we pass a placeholder and override the setup phase + issue_url = f"feature/{feature_name}" + + overrides = {} + if args.aws_profile: + overrides["aws_profile"] = args.aws_profile + if args.harness_arn: + overrides["harness_arn"] = args.harness_arn + + return run_pipeline( + issue_url=issue_url, + config_path=args.config, + prompts_dir=PROMPTS_DIR, + devex_content=devex_content, + impl_content=impl_content, + feature_name=feature_name, + **overrides, + ) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/agents/feature_builder/prompts/executor.md b/agents/feature_builder/prompts/executor.md new file mode 100644 index 000000000..40601221e --- /dev/null +++ b/agents/feature_builder/prompts/executor.md @@ -0,0 +1,17 @@ +You are a senior software engineer implementing a new feature across one or two TypeScript repos. + +The plan: +{plan} + +Instructions: +1. Follow the plan exactly. Make the code changes described. +2. Run `npm run typecheck 2>&1 | tail -20` in each affected repo. Fix any type errors. +3. Run tests with summary: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20` +4. If tests fail, debug the specific file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50` +5. Commit your changes: `git add -A && git commit -m "feat: {commit_message}"` +6. Push to remote: `git push origin feature/{feature_name}` +7. If you need to deviate from the plan, document why in your commit message. + +IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context. Use the grep pattern in step 3 for the summary, then target specific files in step 4 if something fails. + +Do not stop until typecheck and tests pass. If tests fail, analyze the failure, fix the code, and try again. diff --git a/agents/feature_builder/prompts/fixer.md b/agents/feature_builder/prompts/fixer.md new file mode 100644 index 000000000..67e04ebfb --- /dev/null +++ b/agents/feature_builder/prompts/fixer.md @@ -0,0 +1,16 @@ +You are a senior software engineer fixing issues found during code review. + +The following findings were reported by reviewers. Address each one: + +{findings_text} + +Instructions: +1. Fix each finding, starting with Critical severity first, then High, Medium, Low. +2. If a finding is not applicable or is a false positive, explain why in a commit message. +3. Run `npm run typecheck 2>&1 | tail -20` in each affected repo after fixes. +4. Run tests with summary: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20` +5. If tests fail, debug the specific file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50` +6. Commit: `git add -A && git commit -m "fix: address review findings round {round_number}"` +7. Push: `git push origin feature/{feature_name}` + +IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context. diff --git a/agents/feature_builder/prompts/planner.md b/agents/feature_builder/prompts/planner.md new file mode 100644 index 000000000..ee22e2956 --- /dev/null +++ b/agents/feature_builder/prompts/planner.md @@ -0,0 +1,27 @@ +You are a senior software architect planning the implementation of a new feature. + +You have access to two TypeScript repositories: +- agentcore-cli: AWS AgentCore CLI tool (Commander.js + Ink TUI, ~550 source files) +- agentcore-l3-cdk-constructs: AWS CDK L3 constructs for AgentCore (~17 test files, shares schemas with CLI) + +## DevEx Document (what the user experience should be) + +{devex_content} + +## Implementation Plan (technical approach) + +{impl_content} + +## Your Task + +Based on the devex doc and implementation plan above, explore the relevant code in both repos and produce a detailed, actionable implementation plan. + +Your plan MUST include: +1. **Affected repos**: Which repos need changes (cli, cdk, or both) +2. **Files to change**: Exact file paths to modify, create, or delete +3. **Approach**: Step-by-step description of the changes, referencing specific functions and types in the codebase +4. **Risks**: What could go wrong, edge cases to watch for +5. **Testing strategy**: What tests to add or modify, how to verify the feature works +6. **Scope estimate**: small (1-3 files), medium (4-7 files), or large (8+ files) + +Output the plan as structured markdown. diff --git a/agents/feature_builder/prompts/reviewer.md b/agents/feature_builder/prompts/reviewer.md new file mode 100644 index 000000000..d576c249f --- /dev/null +++ b/agents/feature_builder/prompts/reviewer.md @@ -0,0 +1,36 @@ +You are a senior code reviewer. You have been assigned a region of a code change to review. + +Feature being built: {issue_summary} +Branch: {branch_name} +Repos: {cli_repo}, {cdk_repo} + +Your assigned focus: {focus} +Files to focus on: {assigned_files} + +Instructions: +1. Clone repos with the feature branch: + - git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli + - git clone --depth 10 --branch {branch_name} https://github.com/{cdk_repo}.git agentcore-l3-cdk-constructs + (If the branch doesn't exist in a repo, clone main instead: git clone --depth 10 https://github.com/{cli_repo}.git agentcore-cli) +2. Run: git diff main (or git log if on the feature branch already) +3. Review your assigned files for ALL concerns: correctness, architecture, security, testing adequacy, cross-repo consistency, and breaking changes +4. Trace callers of changed functions. Check types. Verify test coverage. +5. You do NOT need to run npm install — you are reviewing code, not building it. + +{previous_findings_context} + +Output your review as a JSON object wrapped in ```json fences. +The JSON must have this exact schema: +{{ + "approved": boolean, + "findings": [ + {{ + "severity": "critical" | "high" | "medium" | "low", + "file": "path/to/file", + "line": number, + "description": "what's wrong", + "suggestion": "how to fix" + }} + ] +}} +Output ONLY the JSON object in code fences. No other text before or after. diff --git a/agents/feature_builder/prompts/setup.md b/agents/feature_builder/prompts/setup.md new file mode 100644 index 000000000..f06610da8 --- /dev/null +++ b/agents/feature_builder/prompts/setup.md @@ -0,0 +1,18 @@ +You are setting up a development environment to build a new feature. + +Steps: +1. Install tools (Amazon Linux 2023): dnf install -y -q git nodejs20 npm > /dev/null 2>&1 +2. Authenticate GitHub: echo $GH_TOKEN | gh auth login --with-token +3. Configure git to use gh for auth: gh auth setup-git +4. Clone both repos: + - git clone https://github.com/{cli_repo}.git {cli_repo_name} + - git clone https://github.com/{cdk_repo}.git {cdk_repo_name} +5. Install dependencies: cd {cli_repo_name} && npm install 2>&1 | tail -3 && cd .. +6. Create a feature branch in both repos: + - cd {cli_repo_name} && git checkout -b feature/{feature_name} && cd .. + - cd {cdk_repo_name} && git checkout -b feature/{feature_name} && cd .. +7. Report back confirmation that the environment is ready. + +IMPORTANT: Run each step as a separate shell command. Do not combine them. If tools are already installed, skip step 1. + +Output: Confirm environment is ready and which repos are cloned. diff --git a/agents/orchestrations/__init__.py b/agents/orchestrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/orchestrations/fix_and_review/__init__.py b/agents/orchestrations/fix_and_review/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/orchestrations/fix_and_review/orchestrator.py b/agents/orchestrations/fix_and_review/orchestrator.py new file mode 100644 index 000000000..eee22af25 --- /dev/null +++ b/agents/orchestrations/fix_and_review/orchestrator.py @@ -0,0 +1,252 @@ +import time +from pathlib import Path + +from core.config import PipelineConfig +from core.harness_client import HarnessClient +from core.parsing import Finding +from orchestrations.fix_and_review.partitioning import ( + ReviewerAssignment, + calculate_reviewer_count, + partition_round1_by_directory, + partition_round2_focus_prompts, + partition_round3_risk_areas, +) +from orchestrations.fix_and_review.phases.aggregate import run_aggregate +from orchestrations.fix_and_review.phases.complete import run_complete +from orchestrations.fix_and_review.phases.execute import run_execute +from orchestrations.fix_and_review.phases.extract import ExtractResult, run_extract +from orchestrations.fix_and_review.phases.fix import run_fix +from orchestrations.fix_and_review.phases.plan import run_plan +from orchestrations.fix_and_review.phases.review import run_review +from orchestrations.fix_and_review.phases.setup import run_setup, set_prompts_dir +from orchestrations.fix_and_review.phases.validate import run_validate +from orchestrations.fix_and_review.phases.verify import run_verify + + +def run_pipeline( + issue_url: str, + config_path: str = "config.yaml", + prompts_dir: str | Path | None = None, + devex_content: str | None = None, + impl_content: str | None = None, + feature_name: str | None = None, + **overrides: str, +) -> int: + config = PipelineConfig.from_yaml(config_path) + for key, value in overrides.items(): + if hasattr(config, key): + setattr(config, key, value) + + if prompts_dir: + set_prompts_dir(Path(prompts_dir)) + + is_feature = devex_content is not None + if is_feature: + issue_number = feature_name or "feature" + branch_name = f"feature/{feature_name or 'unnamed'}" + else: + issue_number = issue_url.rstrip("/").split("/")[-1] + branch_name = f"fix/{issue_number}" + + client = HarnessClient(config) + session_id = HarnessClient.new_session_id() + + pipeline_start = time.time() + print(f"=== Pipeline Start ===") + print(f"{'Feature' if is_feature else 'Issue'}: {feature_name or issue_url}") + print(f"Session: {session_id}") + print(f"Harness: {config.harness_arn}") + print() + + def elapsed() -> str: + m, s = divmod(int(time.time() - pipeline_start), 60) + return f"{m}m{s:02d}s" + + # Phase 0: Setup + t0 = time.time() + print("--- Phase 0: Setup ---") + issue_details = run_setup(client, config, session_id, issue_url, + feature_name=feature_name) + if is_feature: + issue_title = feature_name or "unnamed feature" + else: + issue_title_raw, _, _ = client.run_command( + session_id, f"gh issue view {issue_url} --json title --jq .title 2>/dev/null" + ) + issue_title = issue_title_raw.strip() or f"resolve #{issue_number}" + print(f"Setup complete. {'Feature' if is_feature else 'Issue'}: {issue_title} [{int(time.time()-t0)}s | total {elapsed()}]") + print() + + # Phase 1: Plan + t0 = time.time() + print("--- Phase 1: Plan ---") + if is_feature: + plan = run_plan(client, config, session_id, issue_details, + devex_content=devex_content, impl_content=impl_content) + else: + plan = run_plan(client, config, session_id, issue_details) + print(f"Plan generated ({len(plan)} chars). [{int(time.time()-t0)}s | total {elapsed()}]") + print() + + # Phase 1.5: Validate Plan + t0 = time.time() + print("--- Phase 1.5: Validate Plan ---") + for attempt in range(3): + validation = run_validate(client, session_id, plan) + if validation.valid: + print(f"Plan validated. [{int(time.time()-t0)}s | total {elapsed()}]") + break + print(f"Validation errors: {validation.errors}") + if attempt < 2: + print("Re-planning...") + plan = run_plan( + client, config, session_id, + f"Previous plan had issues: {validation.errors}\n\n{issue_details}", + ) + else: + print("WARNING: Plan validation failed after 3 attempts. Proceeding anyway.") + print() + + # Phase 2: Execute + t0 = time.time() + print("--- Phase 2: Execute ---") + affected_repos: list[str] = [] + if "agentcore-cli" in plan.lower() or "cli" in plan.lower(): + affected_repos.append("agentcore-cli") + if "agentcore-l3-cdk" in plan.lower() or "cdk" in plan.lower(): + affected_repos.append("agentcore-l3-cdk-constructs") + if not affected_repos: + affected_repos = ["agentcore-cli"] + + for attempt in range(3): + run_execute(client, config, session_id, plan, branch_name, issue_number) + print(f"Execution complete. [{int(time.time()-t0)}s | total {elapsed()}]") + + # Phase 2.5: Verify + print("--- Phase 2.5: Verify ---") + verification = run_verify(client, session_id, branch_name, affected_repos) + if verification.all_passed: + print(f"Verification passed. [{int(time.time()-t0)}s | total {elapsed()}]") + break + print(f"Verification failed: {verification.errors}") + if attempt < 2: + print("Re-executing with error context...") + else: + print("WARNING: Verification failed after 3 attempts. Proceeding to review anyway.") + print() + + # Phase 3: Extract + t0 = time.time() + print("--- Phase 3: Extract ---") + extract = run_extract(client, session_id, config.cli_repo, config.cdk_repo) + print( + f"Extracted diff: {len(extract.stats.changed_files)} files, " + f"{extract.stats.total_lines} lines changed [{int(time.time()-t0)}s | total {elapsed()}]" + ) + print() + + # Review Loop + all_previous_findings_files: list[str] = [] + review_summary_parts: list[str] = [] + + for round_num in range(1, config.max_review_rounds + 1): + t0 = time.time() + # Phase 4: Review + print(f"--- Phase 4: Review (Round {round_num}) ---") + num_reviewers = calculate_reviewer_count( + extract.stats, config.min_reviewers, config.max_reviewers + ) + + if round_num == 1: + assignments = partition_round1_by_directory( + extract.stats.changed_files, num_reviewers + ) + elif round_num == 2: + focus_prompts = partition_round2_focus_prompts(num_reviewers) + assignments = [ + ReviewerAssignment(files=extract.stats.changed_files, focus=fp) + for fp in focus_prompts + ] + else: + assignments = partition_round3_risk_areas( + all_previous_findings_files, extract.stats.changed_files, num_reviewers + ) + + previous_context = "" + if round_num > 1: + previous_context = ( + f"These findings were identified and fixed in previous rounds: " + f"{', '.join(all_previous_findings_files)}. " + f"Do not re-raise issues that have already been addressed." + ) + + issue_summary = issue_details[:500] if issue_details else "See branch for details" + review_results = run_review( + client, config, assignments, branch_name, issue_summary, previous_context + ) + print(f"Reviews collected from {len(review_results)} reviewers. [{int(time.time()-t0)}s | total {elapsed()}]") + + # Phase 5: Aggregate + print(f"--- Phase 5: Aggregate (Round {round_num}) ---") + aggregate = run_aggregate(review_results) + print( + f"Approved: {aggregate.all_approved}, " + f"Findings: {len(aggregate.unique_findings)}, " + f"Parse failures: {aggregate.parse_failures}" + ) + + if aggregate.all_approved: + medium_plus = [ + f for f in aggregate.unique_findings + if f.severity in ("critical", "high", "medium") + ] + if not medium_plus: + print(f"All reviewers approved. Moving to Complete. [total {elapsed()}]") + review_summary_parts.append( + f"Round {round_num}: {len(aggregate.unique_findings)} findings, all approved" + ) + break + + review_summary_parts.append( + f"Round {round_num}: {len(aggregate.unique_findings)} findings" + ) + + for f in aggregate.unique_findings: + if f.file and f.file not in all_previous_findings_files: + all_previous_findings_files.append(f.file) + + # Phase 6: Fix + t_fix = time.time() + print(f"--- Phase 6: Fix (Round {round_num}) ---") + run_fix(client, config, session_id, aggregate.unique_findings, branch_name, round_num) + print(f"Fixes applied. [{int(time.time()-t_fix)}s | total {elapsed()}]") + + # Re-extract for next round + extract = run_extract(client, session_id, config.cli_repo, config.cdk_repo) + print() + else: + print( + f"WARNING: Max review rounds ({config.max_review_rounds}) reached " + f"without full approval." + ) + + # Phase 8: Complete + t0 = time.time() + print("--- Phase 8: Complete ---") + review_summary = "\n".join(review_summary_parts) + result = run_complete( + client, config, session_id, branch_name, issue_url, issue_number, + issue_title, review_summary, affected_repos, + ) + + if result.pr_urls: + print(f"\n=== Pipeline Complete [{elapsed()}] ===") + for url in result.pr_urls: + print(f"PR: {url}") + else: + print(f"\n=== Pipeline Failed [{elapsed()}] ===") + print(f"Errors: {result.errors}") + + return 0 if result.pr_urls else 1 + + diff --git a/agents/orchestrations/fix_and_review/partitioning.py b/agents/orchestrations/fix_and_review/partitioning.py new file mode 100644 index 000000000..5f0a42866 --- /dev/null +++ b/agents/orchestrations/fix_and_review/partitioning.py @@ -0,0 +1,121 @@ +from collections import defaultdict +from dataclasses import dataclass +from pathlib import PurePosixPath + + +@dataclass +class DiffStats: + changed_files: list[str] + total_lines: int + cross_repo: bool + + +@dataclass +class ReviewerAssignment: + files: list[str] + focus: str = "" + + +def calculate_reviewer_count(stats: DiffStats, min_r: int = 3, max_r: int = 5) -> int: + file_count = len(stats.changed_files) + lines = stats.total_lines + + if file_count <= 2 and lines < 100: + base = min_r + elif file_count <= 7 and lines < 500: + base = min_r + 1 + else: + base = max_r + + if stats.cross_repo: + base += 1 + + return min(base, max_r) + + +def partition_round1_by_directory( + changed_files: list[str], num_reviewers: int +) -> list[ReviewerAssignment]: + groups: dict[str, list[str]] = defaultdict(list) + for f in changed_files: + parts = PurePosixPath(f).parts + key = "/".join(parts[:2]) if len(parts) > 1 else parts[0] if parts else "root" + groups[key].append(f) + + sorted_groups = sorted(groups.items(), key=lambda x: -len(x[1])) + + if len(sorted_groups) >= num_reviewers: + assignments = [ + ReviewerAssignment(files=files, focus=f"Review changes in {key}/") + for key, files in sorted_groups[:num_reviewers - 1] + ] + remaining_files = [] + for key, files in sorted_groups[num_reviewers - 1:]: + remaining_files.extend(files) + assignments.append( + ReviewerAssignment(files=remaining_files, focus="Review remaining changes") + ) + return assignments + else: + assignments = [ + ReviewerAssignment(files=files, focus=f"Review changes in {key}/") + for key, files in sorted_groups + ] + focus_extras = [ + "Focus on error handling and edge cases", + "Focus on type safety and interface consistency", + "Focus on backwards compatibility and breaking changes", + ] + largest_group_files = sorted_groups[0][1] if sorted_groups else changed_files + extra_idx = 0 + while len(assignments) < num_reviewers: + assignments.append( + ReviewerAssignment( + files=largest_group_files, + focus=focus_extras[extra_idx % len(focus_extras)], + ) + ) + extra_idx += 1 + return assignments + + +def partition_round2_focus_prompts(num_reviewers: int) -> list[str]: + prompts = [ + "Focus on correctness — does the logic match the issue requirements? Are there edge cases or off-by-one errors?", + "Focus on safety — breaking changes, backwards compatibility, error handling, and graceful degradation", + "Focus on testing — are the tests adequate? Do they cover acceptance criteria, edge cases, and error paths?", + "Focus on cross-repo consistency — do type definitions, schemas, and interfaces stay in sync across both repos?", + "Focus on performance and resource usage — are there unnecessary allocations, N+1 patterns, or missing caching?", + ] + return prompts[:num_reviewers] + + +def partition_round3_risk_areas( + previous_findings_files: list[str], + all_changed_files: list[str], + num_reviewers: int, +) -> list[ReviewerAssignment]: + risk_files = list(set(previous_findings_files)) + non_risk_files = [f for f in all_changed_files if f not in risk_files] + + assignments: list[ReviewerAssignment] = [] + risk_reviewer_count = max(1, num_reviewers // 2) + for i in range(risk_reviewer_count): + assignments.append( + ReviewerAssignment( + files=risk_files, + focus=f"Deep review of previously-flagged files — verify fixes are correct and complete (pass {i + 1})", + ) + ) + + remaining = num_reviewers - risk_reviewer_count + broader_files = risk_files + non_risk_files + for i in range(remaining): + assignments.append( + ReviewerAssignment( + files=broader_files, + focus=f"Broad review with emphasis on how fixes interact with surrounding code (pass {i + 1})", + ) + ) + + return assignments diff --git a/agents/orchestrations/fix_and_review/phases/__init__.py b/agents/orchestrations/fix_and_review/phases/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/orchestrations/fix_and_review/phases/aggregate.py b/agents/orchestrations/fix_and_review/phases/aggregate.py new file mode 100644 index 000000000..cb2fc3000 --- /dev/null +++ b/agents/orchestrations/fix_and_review/phases/aggregate.py @@ -0,0 +1,57 @@ +from dataclasses import dataclass + +from core.parsing import Finding, ReviewResult + + +@dataclass +class AggregateResult: + all_approved: bool + unique_findings: list[Finding] + parse_failures: int + total_reviewers: int + + +def run_aggregate( + review_results: list[tuple[ReviewResult | None, str]], +) -> AggregateResult: + all_approved = True + findings: list[Finding] = [] + parse_failures = 0 + + for parsed, raw in review_results: + if parsed is None: + all_approved = False + parse_failures += 1 + findings.append( + Finding( + severity="high", + file="", + line=0, + description="Reviewer output failed to parse after retries", + suggestion="Manual review needed", + ) + ) + else: + if not parsed.approved: + all_approved = False + findings.extend(parsed.findings) + + unique = _deduplicate(findings) + + return AggregateResult( + all_approved=all_approved, + unique_findings=unique, + parse_failures=parse_failures, + total_reviewers=len(review_results), + ) + + +def _deduplicate(findings: list[Finding]) -> list[Finding]: + seen: set[str] = set() + unique: list[Finding] = [] + for f in findings: + key = f"{f.file}:{f.line}:{f.description[:50]}" + if key not in seen: + seen.add(key) + unique.append(f) + return unique diff --git a/agents/orchestrations/fix_and_review/phases/complete.py b/agents/orchestrations/fix_and_review/phases/complete.py new file mode 100644 index 000000000..f34742ed1 --- /dev/null +++ b/agents/orchestrations/fix_and_review/phases/complete.py @@ -0,0 +1,111 @@ +import re +from dataclasses import dataclass + +from core.config import PipelineConfig +from core.harness_client import HarnessClient + + +@dataclass +class CompleteResult: + pr_urls: list[str] + rebase_succeeded: bool + errors: list[str] + + +def run_complete( + client: HarnessClient, + config: PipelineConfig, + session_id: str, + branch_name: str, + issue_url: str, + issue_number: str, + issue_title: str, + review_summary: str, + affected_repos: list[str], +) -> CompleteResult: + errors: list[str] = [] + pr_urls: list[str] = [] + + # Normalize affected_repos to full org/repo format for comparison with config + full_repo_map = { + "agentcore-cli": config.cli_repo, + "agentcore-l3-cdk-constructs": config.cdk_repo, + config.cli_repo: config.cli_repo, + config.cdk_repo: config.cdk_repo, + } + + # Filter to repos that actually have changes on the feature branch + repos_with_changes: list[str] = [] + for repo in affected_repos: + repo_name = repo.split("/")[-1] if "/" in repo else repo + stdout, _, exit_code = client.run_command( + session_id, f"cd {repo_name} && git log main..{branch_name} --oneline 2>/dev/null" + ) + if exit_code == 0 and stdout.strip(): + full_repo = full_repo_map.get(repo, repo) + repos_with_changes.append(full_repo) + + if not repos_with_changes: + return CompleteResult(pr_urls=[], rebase_succeeded=False, errors=["No repos have changes on the feature branch"]) + + # Rebase and push each repo that has changes + rebase_succeeded = True + for repo in repos_with_changes: + repo_name = repo.split("/")[-1] if "/" in repo else repo + + _, stderr, exit_code = client.run_command( + session_id, f"cd {repo_name} && git fetch origin main && git rebase origin/main" + ) + if exit_code != 0: + rebase_succeeded = False + client.run_command(session_id, f"cd {repo_name} && git rebase --abort") + errors.append(f"Rebase failed in {repo_name}: {stderr[:500]}") + + _, stderr, exit_code = client.run_command( + session_id, f"cd {repo_name} && git push origin {branch_name} --force-with-lease" + ) + if exit_code != 0: + errors.append(f"Push failed in {repo_name}: {stderr[:500]}") + + # Create PRs — CDK first if both repos have changes + repo_order = [] + if config.cdk_repo in repos_with_changes: + repo_order.append(config.cdk_repo) + if config.cli_repo in repos_with_changes: + repo_order.append(config.cli_repo) + + for repo in repo_order: + repo_name = repo.split("/")[-1] + + # Let the agent create the PR — it can read the repo's PR template and fill it in properly + pr_message = ( + f"Create a pull request in this repo for branch {branch_name}.\n" + f"Issue: {issue_url} (#{issue_number})\n" + f"Issue title: {issue_title}\n" + f"Review summary: {review_summary}\n" + f"Read the PR template at .github/pull_request_template.md and fill it in properly.\n" + f"IMPORTANT: The Related Issue section MUST say 'Closes #{issue_number}' to auto-close the issue when merged.\n" + f"Use a descriptive title based on the issue title. Do NOT use a generic title like 'fix: resolve #N'.\n" + f"Use: gh pr create --title '' --body-file /tmp/pr_body.md --head {branch_name}\n" + f"Write the filled-in template to /tmp/pr_body.md first." + ) + pr_output = client.invoke(session_id=session_id, message=pr_message) + + # Extract PR URL from the agent's output + url_match = re.search(r"https://github\.com/[^\s]+/pull/\d+", pr_output) + if url_match: + pr_urls.append(url_match.group(0)) + else: + stdout, _, _ = client.run_command( + session_id, f"cd {repo_name} && gh pr list --head {branch_name} --json url --jq '.[0].url'" + ) + if stdout.strip(): + pr_urls.append(stdout.strip()) + else: + errors.append(f"PR may have been created in {repo} but could not extract URL") + if exit_code == 0 and stdout.strip(): + pr_urls.append(stdout.strip()) + else: + errors.append(f"Failed to create PR in {repo}: {stderr[:500]}") + + return CompleteResult(pr_urls=pr_urls, rebase_succeeded=rebase_succeeded, errors=errors) diff --git a/agents/orchestrations/fix_and_review/phases/execute.py b/agents/orchestrations/fix_and_review/phases/execute.py new file mode 100644 index 000000000..6d3835b3f --- /dev/null +++ b/agents/orchestrations/fix_and_review/phases/execute.py @@ -0,0 +1,20 @@ +from core.config import PipelineConfig +from core.harness_client import HarnessClient +from orchestrations.fix_and_review.phases.setup import load_prompt + + +def run_execute( + client: HarnessClient, + config: PipelineConfig, + session_id: str, + plan: str, + branch_name: str, + issue_number: str, +) -> str: + prompt = load_prompt( + "executor.md", + plan=plan, + commit_message=f"fix issue #{issue_number}", + branch_name=branch_name, + ) + return client.invoke(session_id=session_id, message=prompt) diff --git a/agents/orchestrations/fix_and_review/phases/extract.py b/agents/orchestrations/fix_and_review/phases/extract.py new file mode 100644 index 000000000..fbe8e713c --- /dev/null +++ b/agents/orchestrations/fix_and_review/phases/extract.py @@ -0,0 +1,55 @@ +from dataclasses import dataclass + +from core.harness_client import HarnessClient +from orchestrations.fix_and_review.partitioning import DiffStats + + +@dataclass +class ExtractResult: + diff_stat: str + full_diff: str + commit_log: str + stats: DiffStats + + +def run_extract( + client: HarnessClient, + session_id: str, + cli_repo: str, + cdk_repo: str, +) -> ExtractResult: + diff_stat_stdout, _, _ = client.run_command(session_id, "git diff main --stat") + full_diff_stdout, _, _ = client.run_command(session_id, "git diff main") + commit_log_stdout, _, _ = client.run_command(session_id, "git log main..HEAD --oneline") + + changed_files: list[str] = [] + for line in diff_stat_stdout.strip().split("\n"): + line = line.strip() + if "|" in line: + file_path = line.split("|")[0].strip() + if file_path: + changed_files.append(file_path) + + total_lines = 0 + for line in full_diff_stdout.split("\n"): + if line.startswith("+") and not line.startswith("+++"): + total_lines += 1 + elif line.startswith("-") and not line.startswith("---"): + total_lines += 1 + + has_cli = any(f.startswith(cli_repo) or f.startswith("src/cli") for f in changed_files) + has_cdk = any(f.startswith(cdk_repo) or f.startswith("src/cdk") for f in changed_files) + cross_repo = has_cli and has_cdk + + stats = DiffStats( + changed_files=changed_files, + total_lines=total_lines, + cross_repo=cross_repo, + ) + + return ExtractResult( + diff_stat=diff_stat_stdout, + full_diff=full_diff_stdout, + commit_log=commit_log_stdout, + stats=stats, + ) diff --git a/agents/orchestrations/fix_and_review/phases/fix.py b/agents/orchestrations/fix_and_review/phases/fix.py new file mode 100644 index 000000000..ce11e110c --- /dev/null +++ b/agents/orchestrations/fix_and_review/phases/fix.py @@ -0,0 +1,30 @@ +from core.config import PipelineConfig +from core.harness_client import HarnessClient +from core.parsing import Finding +from orchestrations.fix_and_review.phases.setup import load_prompt + + +def run_fix( + client: HarnessClient, + config: PipelineConfig, + session_id: str, + findings: list[Finding], + branch_name: str, + round_number: int, +) -> str: + severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3} + sorted_findings = sorted(findings, key=lambda f: severity_order.get(f.severity, 4)) + + findings_text = "" + for f in sorted_findings: + findings_text += f"### [{f.severity.upper()}] {f.file}:{f.line}\n" + findings_text += f"**Issue:** {f.description}\n" + findings_text += f"**Suggestion:** {f.suggestion}\n\n" + + prompt = load_prompt( + "fixer.md", + findings_text=findings_text, + round_number=str(round_number), + branch_name=branch_name, + ) + return client.invoke(session_id=session_id, message=prompt) diff --git a/agents/orchestrations/fix_and_review/phases/plan.py b/agents/orchestrations/fix_and_review/phases/plan.py new file mode 100644 index 000000000..20eee440f --- /dev/null +++ b/agents/orchestrations/fix_and_review/phases/plan.py @@ -0,0 +1,21 @@ +from core.config import PipelineConfig +from core.harness_client import HarnessClient +from orchestrations.fix_and_review.phases.setup import load_prompt + + +def run_plan( + client: HarnessClient, + config: PipelineConfig, + session_id: str, + issue_details: str, + devex_content: str | None = None, + impl_content: str | None = None, +) -> str: + if devex_content and impl_content: + prompt = load_prompt("planner.md", + issue_details=issue_details, + devex_content=devex_content, + impl_content=impl_content) + else: + prompt = load_prompt("planner.md", issue_details=issue_details) + return client.invoke(session_id=session_id, message=prompt) diff --git a/agents/orchestrations/fix_and_review/phases/review.py b/agents/orchestrations/fix_and_review/phases/review.py new file mode 100644 index 000000000..be1fabbce --- /dev/null +++ b/agents/orchestrations/fix_and_review/phases/review.py @@ -0,0 +1,49 @@ +from core.config import PipelineConfig +from core.harness_client import HarnessClient +from core.parsing import ReviewResult, parse_reviewer_output +from orchestrations.fix_and_review.partitioning import ReviewerAssignment +from orchestrations.fix_and_review.phases.setup import load_prompt + + +def run_review( + client: HarnessClient, + config: PipelineConfig, + assignments: list[ReviewerAssignment], + branch_name: str, + issue_summary: str, + previous_findings_context: str = "", +) -> list[tuple[ReviewResult | None, str]]: + results: list[tuple[ReviewResult | None, str]] = [] + + for assignment in assignments: + session_id = HarnessClient.new_session_id() + prompt = load_prompt( + "reviewer.md", + issue_summary=issue_summary, + branch_name=branch_name, + cli_repo=config.cli_repo, + cdk_repo=config.cdk_repo, + focus=assignment.focus, + assigned_files=", ".join(assignment.files), + previous_findings_context=previous_findings_context, + ) + + raw_output = client.invoke(session_id=session_id, message=prompt) + parsed = parse_reviewer_output(raw_output) + + if parsed is None: + retry_msg = ( + "Your previous output was not valid JSON. Please output ONLY a JSON object " + "wrapped in ```json fences with this schema: " + '{"approved": boolean, "findings": [{"severity": "critical"|"high"|"medium"|"low", ' + '"file": "path", "line": number, "description": "...", "suggestion": "..."}]}' + ) + for _ in range(2): + raw_output = client.invoke(session_id=session_id, message=retry_msg) + parsed = parse_reviewer_output(raw_output) + if parsed is not None: + break + + results.append((parsed, raw_output)) + + return results diff --git a/agents/orchestrations/fix_and_review/phases/setup.py b/agents/orchestrations/fix_and_review/phases/setup.py new file mode 100644 index 000000000..32608a182 --- /dev/null +++ b/agents/orchestrations/fix_and_review/phases/setup.py @@ -0,0 +1,40 @@ +from pathlib import Path + +from core.config import PipelineConfig +from core.harness_client import HarnessClient + + +_prompts_dir: Path | None = None + + +def set_prompts_dir(path: Path) -> None: + global _prompts_dir + _prompts_dir = path + + +def load_prompt(name: str, **kwargs: str) -> str: + if _prompts_dir is None: + raise RuntimeError("Prompts directory not set. Call set_prompts_dir() before running phases.") + template = (_prompts_dir / name).read_text() + return template.format(**kwargs) + + +def run_setup( + client: HarnessClient, + config: PipelineConfig, + session_id: str, + issue_url: str, + feature_name: str | None = None, +) -> str: + issue_number = issue_url.rstrip("/").split("/")[-1] + prompt = load_prompt( + "setup.md", + cli_repo=config.cli_repo, + cdk_repo=config.cdk_repo, + cli_repo_name=config.cli_repo.split("/")[-1], + cdk_repo_name=config.cdk_repo.split("/")[-1], + issue_url=issue_url, + issue_number=issue_number, + feature_name=feature_name or issue_number, + ) + return client.invoke(session_id=session_id, message=prompt) diff --git a/agents/orchestrations/fix_and_review/phases/validate.py b/agents/orchestrations/fix_and_review/phases/validate.py new file mode 100644 index 000000000..9e13a7ce9 --- /dev/null +++ b/agents/orchestrations/fix_and_review/phases/validate.py @@ -0,0 +1,33 @@ +import re +from dataclasses import dataclass + +from core.harness_client import HarnessClient + + +@dataclass +class ValidationResult: + valid: bool + errors: list[str] + + +def run_validate( + client: HarnessClient, + session_id: str, + plan_text: str, +) -> ValidationResult: + file_refs = re.findall(r"(?:src|test|tests)/[\w/.-]+\.(?:ts|tsx|js|json)", plan_text) + file_refs = list(set(file_refs)) + + errors: list[str] = [] + for file_ref in file_refs: + for repo_dir in ["agentcore-cli", "agentcore-l3-cdk-constructs"]: + stdout, stderr, exit_code = client.run_command( + session_id, f"test -f {repo_dir}/{file_ref} && echo EXISTS || echo MISSING" + ) + if "EXISTS" in stdout: + break + else: + if "create" not in plan_text.lower() or file_ref not in plan_text: + errors.append(f"File not found in either repo: {file_ref}") + + return ValidationResult(valid=len(errors) == 0, errors=errors) diff --git a/agents/orchestrations/fix_and_review/phases/verify.py b/agents/orchestrations/fix_and_review/phases/verify.py new file mode 100644 index 000000000..72064b3dd --- /dev/null +++ b/agents/orchestrations/fix_and_review/phases/verify.py @@ -0,0 +1,91 @@ +from dataclasses import dataclass + +from core.harness_client import HarnessClient + +TEST_COMMANDS = { + "agentcore-cli": "npm run test:unit", + "agentcore-l3-cdk-constructs": "npm test", +} + + +@dataclass +class VerificationResult: + commits_exist: bool + typecheck_passes: bool + tests_pass: bool + branch_pushed: bool + errors: list[str] + + @property + def all_passed(self) -> bool: + return self.commits_exist and self.typecheck_passes and self.tests_pass and self.branch_pushed + + +def run_verify( + client: HarnessClient, + session_id: str, + branch_name: str, + affected_repos: list[str], +) -> VerificationResult: + errors: list[str] = [] + + # Check commits exist — cd into first affected repo + first_repo = affected_repos[0] if affected_repos else "agentcore-cli" + stdout, _, exit_code = client.run_command( + session_id, f"cd {first_repo} && git log main..HEAD --oneline" + ) + commits_exist = exit_code == 0 and len(stdout.strip()) > 0 + if not commits_exist: + errors.append(f"No commits found on feature branch in {first_repo}") + + # Only typecheck/test repos that were actually changed + typecheck_passes = True + for repo in affected_repos: + stdout, _, exit_code = client.run_command( + session_id, f"cd {repo} && git diff main --stat 2>/dev/null" + ) + if not stdout.strip(): + continue + print(f" Running typecheck in {repo}...", flush=True) + _, stderr, exit_code = client.run_command(session_id, f"cd {repo} && npm run typecheck 2>&1 | tail -5") + if exit_code != 0: + typecheck_passes = False + errors.append(f"Typecheck failed in {repo}: {stderr[:500]}") + + tests_pass = True + for repo in affected_repos: + stdout, _, exit_code = client.run_command( + session_id, f"cd {repo} && git diff main --stat 2>/dev/null" + ) + if not stdout.strip(): + continue + test_cmd = TEST_COMMANDS.get(repo, "npm test") + print(f" Running tests in {repo} (may take a few minutes)...", flush=True) + _, stderr, exit_code = client.run_command(session_id, f'cd {repo} && {test_cmd} 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20') + if exit_code != 0: + tests_pass = False + errors.append(f"Tests failed in {repo}: {stderr[:500]}") + + # Push from each repo that has changes + branch_pushed = True + for repo in affected_repos: + stdout, _, _ = client.run_command( + session_id, f"cd {repo} && git diff main --stat 2>/dev/null" + ) + if not stdout.strip(): + continue + print(f" Pushing {branch_name} in {repo}...", flush=True) + _, stderr, exit_code = client.run_command( + session_id, f"cd {repo} && git push origin {branch_name}" + ) + if exit_code != 0: + branch_pushed = False + errors.append(f"Push failed in {repo}: {stderr[:500]}") + + return VerificationResult( + commits_exist=commits_exist, + typecheck_passes=typecheck_passes, + tests_pass=tests_pass, + branch_pushed=branch_pushed, + errors=errors, + ) diff --git a/agents/orchestrations/one_shot/__init__.py b/agents/orchestrations/one_shot/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/orchestrations/one_shot/phases/__init__.py b/agents/orchestrations/one_shot/phases/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/orchestrations/review_only/__init__.py b/agents/orchestrations/review_only/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/orchestrations/review_only/phases/__init__.py b/agents/orchestrations/review_only/phases/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/pr_reviewer/main.py b/agents/pr_reviewer/main.py new file mode 100644 index 000000000..2ee174266 --- /dev/null +++ b/agents/pr_reviewer/main.py @@ -0,0 +1,200 @@ +"""Invoke Bedrock AgentCore Harness to review a GitHub PR. + +Reads PR_URL from the environment. Streams harness output to stdout. +Uses the boto3 bedrock-agentcore client's invoke_harness API. +""" + +import json +import os +import sys +import time +import uuid + +import boto3 + +# ANSI color codes +CYAN = "\033[36m" +YELLOW = "\033[33m" +GREEN = "\033[32m" +RED = "\033[31m" +DIM = "\033[2m" +RESET = "\033[0m" + +SCRIPTS_DIR = os.path.dirname(__file__) + + +def read_prompt(filename): + """Read a prompt template from the prompts directory.""" + path = os.path.join(SCRIPTS_DIR, "prompts", filename) + with open(path) as f: + return f.read() + + +def invoke_harness_streaming(harness_arn, session_id, system_prompt, messages, model_id, region): + """Call invoke_harness via boto3 and return the event stream.""" + client = boto3.client("bedrock-agentcore", region_name=region) + response = client.invoke_harness( + harnessArn=harness_arn, + runtimeSessionId=session_id, + systemPrompt=[{"text": system_prompt}], + messages=messages, + model={"bedrockModelConfig": {"modelId": model_id}}, + ) + return response["stream"] + + +def parse_events(event_stream): + """Yield (event_type, payload) tuples from the boto3 event stream.""" + for event in event_stream: + if "contentBlockStart" in event: + yield "contentBlockStart", event["contentBlockStart"] + elif "contentBlockDelta" in event: + yield "contentBlockDelta", event["contentBlockDelta"] + elif "contentBlockStop" in event: + yield "contentBlockStop", event["contentBlockStop"] + elif "messageStop" in event: + yield "messageStop", event["messageStop"] + elif "internalServerException" in event: + yield "internalServerException", event["internalServerException"] + elif "runtimeClientError" in event: + yield "runtimeClientError", event["runtimeClientError"] + + +def print_stream(event_stream): + """Display harness events with GitHub Actions log groups. + + The harness streams events as the agent works: + contentBlockStart — a new block begins (text or tool call) + contentBlockDelta — incremental chunks of text or tool input JSON + contentBlockStop — block complete, we now have full tool input to display + messageStop — agent finished + internalServerException — server error + + Tool calls are wrapped in ::group::/::endgroup:: for collapsible sections + in the GitHub Actions log UI. Agent reasoning text is printed inline in dim. + """ + start_time = time.time() + iteration = 0 + tool_name = None + tool_input = "" + tool_start = 0.0 + in_group = False + text_buffer = "" + + def close_group(): + nonlocal in_group + if in_group: + print("::endgroup::", flush=True) + in_group = False + + def flush_text(): + nonlocal text_buffer + if text_buffer: + for line in text_buffer.splitlines(): + print(f"{DIM}{line}{RESET}", flush=True) + text_buffer = "" + + for event_type, payload in parse_events(event_stream): + + if event_type == "contentBlockStart": + start = payload.get("start", {}) + if "toolUse" in start: + tool_name = start["toolUse"].get("name", "unknown") + tool_input = "" + tool_start = time.time() + iteration += 1 + + elif event_type == "contentBlockDelta": + delta = payload.get("delta", {}) + if "text" in delta: + close_group() + text_buffer += delta["text"] + if "toolUse" in delta: + tool_input += delta["toolUse"].get("input", "") + + elif event_type == "contentBlockStop": + flush_text() + if tool_name: + elapsed = time.time() - tool_start + try: + parsed = json.loads(tool_input) + except (json.JSONDecodeError, TypeError): + parsed = tool_input + + close_group() + + cmd = parsed.get("command") if isinstance(parsed, dict) else None + header = f"{CYAN}[{iteration}]{RESET} {YELLOW}{tool_name}{RESET} {DIM}({elapsed:.1f}s){RESET}" + if cmd: + header += f": $ {cmd}" + + print(f"::group::{header}", flush=True) + in_group = True + + if isinstance(parsed, dict): + for k, v in parsed.items(): + if k != "command": + print(f" {DIM}{k}:{RESET} {str(v)[:300]}", flush=True) + + tool_name = None + tool_input = "" + + elif event_type == "messageStop": + flush_text() + close_group() + if payload.get("stopReason") == "end_turn": + total = time.time() - start_time + print(f"\n\n{GREEN}{'=' * 50}", flush=True) + print(f" Done ({int(total // 60)}m {int(total % 60)}s)", flush=True) + print(f"{'=' * 50}{RESET}", flush=True) + + elif event_type == "internalServerException": + close_group() + print(f"\n{RED}ERROR: {payload}{RESET}", file=sys.stderr) + sys.exit(1) + + elif event_type == "runtimeClientError": + close_group() + print(f"\n{RED}ERROR: {payload.get('message', payload)}{RESET}", file=sys.stderr) + sys.exit(1) + + close_group() + total = time.time() - start_time + print(f"\n{GREEN}Review complete.{RESET} {DIM}({iteration} tool calls, {int(total)}s total){RESET}") + + +# --- Main --- + +# All config comes from environment variables (set via GitHub secrets/workflow) +MODEL_ID = os.environ.get("HARNESS_MODEL_ID", "us.anthropic.claude-opus-4-7") +HARNESS_ARN = os.environ.get("HARNESS_ARN", "") +PR_URL = os.environ.get("PR_URL", "") + +for name, val in [("HARNESS_ARN", HARNESS_ARN), ("PR_URL", PR_URL)]: + if not val: + print(f"{RED}ERROR: {name} environment variable is required{RESET}", file=sys.stderr) + sys.exit(1) + +# Extract region from the ARN (arn:aws:bedrock-agentcore:{region}:{account}:harness/{id}) +REGION = HARNESS_ARN.split(":")[3] +SESSION_ID = str(uuid.uuid4()).upper() + +print(f"{CYAN}Session:{RESET} {SESSION_ID}") +print(f"{CYAN}PR:{RESET} {PR_URL}") +print(f"{CYAN}Harness:{RESET} {HARNESS_ARN}") +print() + +SYSTEM_PROMPT = read_prompt("system.md") +REVIEW_PROMPT = read_prompt("review.md").format(pr_url=PR_URL) + +messages = [{"role": "user", "content": [{"text": REVIEW_PROMPT}]}] + +try: + event_stream = invoke_harness_streaming( + HARNESS_ARN, SESSION_ID, SYSTEM_PROMPT, messages, MODEL_ID, REGION + ) +except Exception as e: + print(f"{RED}ERROR: Failed to invoke harness: {e}{RESET}", file=sys.stderr) + sys.exit(1) + +print_stream(event_stream) diff --git a/agents/pr_reviewer/prompts/review.md b/agents/pr_reviewer/prompts/review.md new file mode 100644 index 000000000..d34c67b95 --- /dev/null +++ b/agents/pr_reviewer/prompts/review.md @@ -0,0 +1,18 @@ +Review this GitHub PR: {pr_url} + +You have tools to fetch the PR diff, read files, search the web, and post comments on the PR. + +You have these repos cloned locally for context: + +- /opt/workspace/agentcore-cli — aws/agentcore-cli +- /opt/workspace/agentcore-l3-cdk-constructs — aws/agentcore-l3-cdk-constructs + +Before reviewing, read all existing comments on the PR to understand what has already been discussed. Do not repeat or +re-post issues that have already been raised in existing comments. + +Review the PR. If there are any serious issues that require code changes before merging, post a comment on the PR for +each issue explaining the problem. If there are multiple ways to fix an issue, list the options so the author can +choose. Skip style nits and minor suggestions — only flag things that actually need to change. + +If all serious issues have already been raised in existing comments, or if you found no new issues, post a single +comment on the PR saying it looks good to merge (or that all issues have already been flagged). diff --git a/agents/pr_reviewer/prompts/system.md b/agents/pr_reviewer/prompts/system.md new file mode 100644 index 000000000..52a3d2260 --- /dev/null +++ b/agents/pr_reviewer/prompts/system.md @@ -0,0 +1,25 @@ +# AgentCore CLI Development Workspace + +This workspace contains two repos for developing and testing the AgentCore CLI. + +## Repositories + +### agentcore-cli/ (`aws/agentcore-cli`) + +The terminal experience for creating, developing, and deploying AI agents to AgentCore. Node.js/TypeScript CLI built +with Ink (React-based TUI). + +### agentcore-l3-cdk-constructs/ (`aws/agentcore-l3-cdk-constructs`) + +AWS CDK L3 constructs for declaring and deploying AgentCore infrastructure. Used by agentcore-cli to vend CDK projects +when users run `agentcore create`. + +## How they relate + +`agentcore-cli` is the main product. It vends CDK projects using constructs from `agentcore-l3-cdk-constructs`. + +## Testing with a bundled distribution + +Run `npm run bundle` in `agentcore-cli/` to create a tar distribution that includes the packaged +`agentcore-l3-cdk-constructs`. You can then install it globally with `npm install -g <path-to-tar>` to test the CLI +end-to-end. diff --git a/agents/pyproject.toml b/agents/pyproject.toml new file mode 100644 index 000000000..c1ea76be7 --- /dev/null +++ b/agents/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "harness-invoke" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.14" +dependencies = [ + "boto3>=1.42.94,<1.43.2", + "botocore>=1.42.94,<1.43.2", + "pydantic>=2.13.1", + "pyyaml>=6.0.3", + "urllib3>=2.6.3", +] + +[dependency-groups] +dev = [ + "pytest>=9.0.3", +] diff --git a/agents/tests/__init__.py b/agents/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/tests/test_config.py b/agents/tests/test_config.py new file mode 100644 index 000000000..3bea71950 --- /dev/null +++ b/agents/tests/test_config.py @@ -0,0 +1,65 @@ +import os +import tempfile + +import pytest +import yaml + +from core.config import PipelineConfig + + +def test_from_yaml_loads_all_fields(): + data = { + "harness_arn": "arn:aws:bedrock-agentcore:us-west-2:123456789:harness/Test-abc", + "region": "us-east-1", + "data_plane_endpoint": "https://dp.example.com", + "aws_profile": "test-profile", + "model_id": "global.anthropic.claude-opus-4-7", + "min_reviewers": 2, + "max_reviewers": 4, + "max_review_rounds": 3, + "repos": { + "cli": "aws/agentcore-cli", + "cdk": "aws/agentcore-l3-cdk-constructs", + }, + } + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + yaml.dump(data, f) + path = f.name + + try: + config = PipelineConfig.from_yaml(path) + assert config.harness_arn == data["harness_arn"] + assert config.region == "us-east-1" + assert config.aws_profile == "test-profile" + assert config.min_reviewers == 2 + assert config.max_reviewers == 4 + assert config.cli_repo == "aws/agentcore-cli" + assert config.cdk_repo == "aws/agentcore-l3-cdk-constructs" + finally: + os.unlink(path) + + +def test_from_yaml_uses_defaults_for_missing_fields(): + data = {"harness_arn": "arn:aws:bedrock-agentcore:us-west-2:123:harness/X-abc"} + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + yaml.dump(data, f) + path = f.name + + try: + config = PipelineConfig.from_yaml(path) + assert config.harness_arn == data["harness_arn"] + assert config.region == "us-west-2" + assert config.aws_profile == "deploy" + assert config.min_reviewers == 3 + assert config.max_reviewers == 5 + finally: + os.unlink(path) + + +def test_defaults(): + config = PipelineConfig(harness_arn="arn:aws:bedrock-agentcore:us-west-2:123:harness/X-abc") + assert config.region == "us-west-2" + assert config.model_id == "global.anthropic.claude-opus-4-7" + assert config.min_reviewers == 3 + assert config.max_reviewers == 5 + assert config.max_review_rounds == 5 diff --git a/agents/tests/test_harness_client.py b/agents/tests/test_harness_client.py new file mode 100644 index 000000000..59e4fd27d --- /dev/null +++ b/agents/tests/test_harness_client.py @@ -0,0 +1,21 @@ +from core.harness_client import HarnessClient + + +def test_accumulate_command_output(): + client = HarnessClient.__new__(HarnessClient) + events = [ + {"chunk": {"contentDelta": {"stdout": "file1.ts\n"}}}, + {"chunk": {"contentDelta": {"stdout": "file2.ts\n"}}}, + {"chunk": {"contentDelta": {"stderr": "warning: something\n"}}}, + {"chunk": {"contentStop": {"exitCode": 0, "status": "SUCCESS"}}}, + ] + stdout, stderr, exit_code = client._accumulate_command(events) + assert stdout == "file1.ts\nfile2.ts\n" + assert stderr == "warning: something\n" + assert exit_code == 0 + + +def test_new_session_id_format(): + sid = HarnessClient.new_session_id() + assert len(sid) == 36 + assert sid == sid.upper() diff --git a/agents/tests/test_parsing.py b/agents/tests/test_parsing.py new file mode 100644 index 000000000..835bcab05 --- /dev/null +++ b/agents/tests/test_parsing.py @@ -0,0 +1,81 @@ +import pytest + +from core.parsing import Finding, ReviewResult, parse_reviewer_output + + +def test_parse_json_in_markdown_fences(): + raw = '''Here is my review: + +```json +{ + "approved": false, + "findings": [ + { + "severity": "high", + "file": "src/cli/commands/deploy/index.ts", + "line": 42, + "description": "Missing null check", + "suggestion": "Add null check before accessing property" + } + ] +} +``` + +That's my review.''' + result = parse_reviewer_output(raw) + assert result is not None + assert result.approved is False + assert len(result.findings) == 1 + assert result.findings[0].severity == "high" + assert result.findings[0].file == "src/cli/commands/deploy/index.ts" + + +def test_parse_bare_json(): + raw = '{"approved": true, "findings": []}' + result = parse_reviewer_output(raw) + assert result is not None + assert result.approved is True + assert result.findings == [] + + +def test_parse_returns_none_for_garbage(): + result = parse_reviewer_output("This is just text with no JSON at all.") + assert result is None + + +def test_parse_returns_none_for_invalid_schema(): + raw = '{"approved": "yes", "findings": "none"}' + result = parse_reviewer_output(raw) + assert result is None + + +def test_parse_json_with_nested_braces(): + raw = '''```json +{ + "approved": false, + "findings": [ + { + "severity": "medium", + "file": "src/schema/types.ts", + "line": 10, + "description": "Type should use Record<string, unknown> instead of object", + "suggestion": "Replace object with Record<string, unknown>" + } + ] +} +```''' + result = parse_reviewer_output(raw) + assert result is not None + assert len(result.findings) == 1 + assert result.findings[0].severity == "medium" + + +def test_finding_model_validates_severity(): + with pytest.raises(Exception): + Finding( + severity="urgent", + file="test.ts", + line=1, + description="bad", + suggestion="fix", + ) diff --git a/agents/tests/test_partitioning.py b/agents/tests/test_partitioning.py new file mode 100644 index 000000000..b834a0126 --- /dev/null +++ b/agents/tests/test_partitioning.py @@ -0,0 +1,74 @@ +import pytest + +from orchestrations.fix_and_review.partitioning import ( + DiffStats, + ReviewerAssignment, + calculate_reviewer_count, + partition_round1_by_directory, + partition_round2_focus_prompts, + partition_round3_risk_areas, +) + + +def test_reviewer_count_small_diff(): + stats = DiffStats(changed_files=["src/cli/commands/deploy/index.ts"], total_lines=30, cross_repo=False) + assert calculate_reviewer_count(stats, min_r=3, max_r=5) == 3 + + +def test_reviewer_count_medium_diff(): + files = [f"src/file{i}.ts" for i in range(5)] + stats = DiffStats(changed_files=files, total_lines=300, cross_repo=False) + count = calculate_reviewer_count(stats, min_r=3, max_r=5) + assert 3 <= count <= 4 + + +def test_reviewer_count_large_diff(): + files = [f"src/file{i}.ts" for i in range(10)] + stats = DiffStats(changed_files=files, total_lines=600, cross_repo=False) + assert calculate_reviewer_count(stats, min_r=3, max_r=5) == 5 + + +def test_reviewer_count_cross_repo_adds_one(): + stats = DiffStats(changed_files=["src/a.ts", "src/b.ts"], total_lines=50, cross_repo=True) + count = calculate_reviewer_count(stats, min_r=3, max_r=5) + assert count == 4 + + +def test_round1_groups_by_directory(): + files = [ + "src/cli/commands/deploy/index.ts", + "src/cli/commands/deploy/utils.ts", + "src/cdk/constructs/l3/agent.ts", + "test/deploy.test.ts", + ] + assignments = partition_round1_by_directory(files, num_reviewers=3) + assert len(assignments) == 3 + all_files = [] + for a in assignments: + all_files.extend(a.files) + assert set(all_files) == set(files) + + +def test_round1_fewer_groups_than_reviewers(): + files = ["src/cli/commands/deploy/index.ts", "src/cli/commands/deploy/utils.ts"] + assignments = partition_round1_by_directory(files, num_reviewers=3) + assert len(assignments) == 3 + assert all(len(a.files) > 0 for a in assignments) + + +def test_round2_returns_focus_prompts(): + prompts = partition_round2_focus_prompts(num_reviewers=4) + assert len(prompts) == 4 + assert all(isinstance(p, str) for p in prompts) + assert len(set(prompts)) == 4 + + +def test_round3_focuses_on_previous_findings(): + previous_findings_files = ["src/cli/commands/deploy/index.ts", "src/cdk/constructs/l3/agent.ts"] + all_changed_files = previous_findings_files + ["test/deploy.test.ts", "src/schema/types.ts"] + assignments = partition_round3_risk_areas(previous_findings_files, all_changed_files, num_reviewers=3) + assert len(assignments) == 3 + risk_files_covered = set() + for a in assignments: + risk_files_covered.update(a.files) + assert set(previous_findings_files).issubset(risk_files_covered) From 0b9208f869a122081da16611b07b28ceae6539a8 Mon Sep 17 00:00:00 2001 From: Aidan Daly <aidandal@amazon.com> Date: Tue, 5 May 2026 15:52:23 -0400 Subject: [PATCH 02/12] =?UTF-8?q?chore:=20remove=20pr=5Freviewer=20from=20?= =?UTF-8?q?this=20PR=20=E2=80=94=20migration=20is=20a=20separate=20task?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agents/pr_reviewer/main.py | 200 --------------------------- agents/pr_reviewer/prompts/review.md | 18 --- agents/pr_reviewer/prompts/system.md | 25 ---- 3 files changed, 243 deletions(-) delete mode 100644 agents/pr_reviewer/main.py delete mode 100644 agents/pr_reviewer/prompts/review.md delete mode 100644 agents/pr_reviewer/prompts/system.md diff --git a/agents/pr_reviewer/main.py b/agents/pr_reviewer/main.py deleted file mode 100644 index 2ee174266..000000000 --- a/agents/pr_reviewer/main.py +++ /dev/null @@ -1,200 +0,0 @@ -"""Invoke Bedrock AgentCore Harness to review a GitHub PR. - -Reads PR_URL from the environment. Streams harness output to stdout. -Uses the boto3 bedrock-agentcore client's invoke_harness API. -""" - -import json -import os -import sys -import time -import uuid - -import boto3 - -# ANSI color codes -CYAN = "\033[36m" -YELLOW = "\033[33m" -GREEN = "\033[32m" -RED = "\033[31m" -DIM = "\033[2m" -RESET = "\033[0m" - -SCRIPTS_DIR = os.path.dirname(__file__) - - -def read_prompt(filename): - """Read a prompt template from the prompts directory.""" - path = os.path.join(SCRIPTS_DIR, "prompts", filename) - with open(path) as f: - return f.read() - - -def invoke_harness_streaming(harness_arn, session_id, system_prompt, messages, model_id, region): - """Call invoke_harness via boto3 and return the event stream.""" - client = boto3.client("bedrock-agentcore", region_name=region) - response = client.invoke_harness( - harnessArn=harness_arn, - runtimeSessionId=session_id, - systemPrompt=[{"text": system_prompt}], - messages=messages, - model={"bedrockModelConfig": {"modelId": model_id}}, - ) - return response["stream"] - - -def parse_events(event_stream): - """Yield (event_type, payload) tuples from the boto3 event stream.""" - for event in event_stream: - if "contentBlockStart" in event: - yield "contentBlockStart", event["contentBlockStart"] - elif "contentBlockDelta" in event: - yield "contentBlockDelta", event["contentBlockDelta"] - elif "contentBlockStop" in event: - yield "contentBlockStop", event["contentBlockStop"] - elif "messageStop" in event: - yield "messageStop", event["messageStop"] - elif "internalServerException" in event: - yield "internalServerException", event["internalServerException"] - elif "runtimeClientError" in event: - yield "runtimeClientError", event["runtimeClientError"] - - -def print_stream(event_stream): - """Display harness events with GitHub Actions log groups. - - The harness streams events as the agent works: - contentBlockStart — a new block begins (text or tool call) - contentBlockDelta — incremental chunks of text or tool input JSON - contentBlockStop — block complete, we now have full tool input to display - messageStop — agent finished - internalServerException — server error - - Tool calls are wrapped in ::group::/::endgroup:: for collapsible sections - in the GitHub Actions log UI. Agent reasoning text is printed inline in dim. - """ - start_time = time.time() - iteration = 0 - tool_name = None - tool_input = "" - tool_start = 0.0 - in_group = False - text_buffer = "" - - def close_group(): - nonlocal in_group - if in_group: - print("::endgroup::", flush=True) - in_group = False - - def flush_text(): - nonlocal text_buffer - if text_buffer: - for line in text_buffer.splitlines(): - print(f"{DIM}{line}{RESET}", flush=True) - text_buffer = "" - - for event_type, payload in parse_events(event_stream): - - if event_type == "contentBlockStart": - start = payload.get("start", {}) - if "toolUse" in start: - tool_name = start["toolUse"].get("name", "unknown") - tool_input = "" - tool_start = time.time() - iteration += 1 - - elif event_type == "contentBlockDelta": - delta = payload.get("delta", {}) - if "text" in delta: - close_group() - text_buffer += delta["text"] - if "toolUse" in delta: - tool_input += delta["toolUse"].get("input", "") - - elif event_type == "contentBlockStop": - flush_text() - if tool_name: - elapsed = time.time() - tool_start - try: - parsed = json.loads(tool_input) - except (json.JSONDecodeError, TypeError): - parsed = tool_input - - close_group() - - cmd = parsed.get("command") if isinstance(parsed, dict) else None - header = f"{CYAN}[{iteration}]{RESET} {YELLOW}{tool_name}{RESET} {DIM}({elapsed:.1f}s){RESET}" - if cmd: - header += f": $ {cmd}" - - print(f"::group::{header}", flush=True) - in_group = True - - if isinstance(parsed, dict): - for k, v in parsed.items(): - if k != "command": - print(f" {DIM}{k}:{RESET} {str(v)[:300]}", flush=True) - - tool_name = None - tool_input = "" - - elif event_type == "messageStop": - flush_text() - close_group() - if payload.get("stopReason") == "end_turn": - total = time.time() - start_time - print(f"\n\n{GREEN}{'=' * 50}", flush=True) - print(f" Done ({int(total // 60)}m {int(total % 60)}s)", flush=True) - print(f"{'=' * 50}{RESET}", flush=True) - - elif event_type == "internalServerException": - close_group() - print(f"\n{RED}ERROR: {payload}{RESET}", file=sys.stderr) - sys.exit(1) - - elif event_type == "runtimeClientError": - close_group() - print(f"\n{RED}ERROR: {payload.get('message', payload)}{RESET}", file=sys.stderr) - sys.exit(1) - - close_group() - total = time.time() - start_time - print(f"\n{GREEN}Review complete.{RESET} {DIM}({iteration} tool calls, {int(total)}s total){RESET}") - - -# --- Main --- - -# All config comes from environment variables (set via GitHub secrets/workflow) -MODEL_ID = os.environ.get("HARNESS_MODEL_ID", "us.anthropic.claude-opus-4-7") -HARNESS_ARN = os.environ.get("HARNESS_ARN", "") -PR_URL = os.environ.get("PR_URL", "") - -for name, val in [("HARNESS_ARN", HARNESS_ARN), ("PR_URL", PR_URL)]: - if not val: - print(f"{RED}ERROR: {name} environment variable is required{RESET}", file=sys.stderr) - sys.exit(1) - -# Extract region from the ARN (arn:aws:bedrock-agentcore:{region}:{account}:harness/{id}) -REGION = HARNESS_ARN.split(":")[3] -SESSION_ID = str(uuid.uuid4()).upper() - -print(f"{CYAN}Session:{RESET} {SESSION_ID}") -print(f"{CYAN}PR:{RESET} {PR_URL}") -print(f"{CYAN}Harness:{RESET} {HARNESS_ARN}") -print() - -SYSTEM_PROMPT = read_prompt("system.md") -REVIEW_PROMPT = read_prompt("review.md").format(pr_url=PR_URL) - -messages = [{"role": "user", "content": [{"text": REVIEW_PROMPT}]}] - -try: - event_stream = invoke_harness_streaming( - HARNESS_ARN, SESSION_ID, SYSTEM_PROMPT, messages, MODEL_ID, REGION - ) -except Exception as e: - print(f"{RED}ERROR: Failed to invoke harness: {e}{RESET}", file=sys.stderr) - sys.exit(1) - -print_stream(event_stream) diff --git a/agents/pr_reviewer/prompts/review.md b/agents/pr_reviewer/prompts/review.md deleted file mode 100644 index d34c67b95..000000000 --- a/agents/pr_reviewer/prompts/review.md +++ /dev/null @@ -1,18 +0,0 @@ -Review this GitHub PR: {pr_url} - -You have tools to fetch the PR diff, read files, search the web, and post comments on the PR. - -You have these repos cloned locally for context: - -- /opt/workspace/agentcore-cli — aws/agentcore-cli -- /opt/workspace/agentcore-l3-cdk-constructs — aws/agentcore-l3-cdk-constructs - -Before reviewing, read all existing comments on the PR to understand what has already been discussed. Do not repeat or -re-post issues that have already been raised in existing comments. - -Review the PR. If there are any serious issues that require code changes before merging, post a comment on the PR for -each issue explaining the problem. If there are multiple ways to fix an issue, list the options so the author can -choose. Skip style nits and minor suggestions — only flag things that actually need to change. - -If all serious issues have already been raised in existing comments, or if you found no new issues, post a single -comment on the PR saying it looks good to merge (or that all issues have already been flagged). diff --git a/agents/pr_reviewer/prompts/system.md b/agents/pr_reviewer/prompts/system.md deleted file mode 100644 index 52a3d2260..000000000 --- a/agents/pr_reviewer/prompts/system.md +++ /dev/null @@ -1,25 +0,0 @@ -# AgentCore CLI Development Workspace - -This workspace contains two repos for developing and testing the AgentCore CLI. - -## Repositories - -### agentcore-cli/ (`aws/agentcore-cli`) - -The terminal experience for creating, developing, and deploying AI agents to AgentCore. Node.js/TypeScript CLI built -with Ink (React-based TUI). - -### agentcore-l3-cdk-constructs/ (`aws/agentcore-l3-cdk-constructs`) - -AWS CDK L3 constructs for declaring and deploying AgentCore infrastructure. Used by agentcore-cli to vend CDK projects -when users run `agentcore create`. - -## How they relate - -`agentcore-cli` is the main product. It vends CDK projects using constructs from `agentcore-l3-cdk-constructs`. - -## Testing with a bundled distribution - -Run `npm run bundle` in `agentcore-cli/` to create a tar distribution that includes the packaged -`agentcore-l3-cdk-constructs`. You can then install it globally with `npm install -g <path-to-tar>` to test the CLI -end-to-end. From 13964f991e87779a332575a2a334c5338dcbf850 Mon Sep 17 00:00:00 2001 From: Aidan Daly <aidandal@amazon.com> Date: Tue, 5 May 2026 16:57:22 -0400 Subject: [PATCH 03/12] fix: address 14 review findings from multi-agent code review Critical: - Remove stale variables in complete.py causing duplicate PR URLs High: - Add input validation in feature-builder.yml (path traversal, command injection) - Resolve AWS credentials per-request instead of freezing at construction - Use format_map with defaults to prevent KeyError on missing template vars - Capture test exit code separately from grep display in verify.py - Make JSON brace-depth counter string-aware in parsing.py - Gitignore config.yaml (contains account-specific ARN), add config.yaml.example - Guard against empty changed_files in partition_round1_by_directory Medium: - Add type coercion for numeric overrides in orchestrator - Only push after all local checks pass in verify.py - Skip push when rebase fails in complete.py - Lower Python requirement to >=3.12 - Widen boto3/botocore version constraints --- .github/workflows/feature-builder.yml | 23 ++++++++++ agents/.gitignore | 3 ++ agents/config.yaml.example | 10 ++++ agents/core/harness_client.py | 4 +- agents/core/parsing.py | 38 ++++++++++----- .../fix_and_review/orchestrator.py | 3 ++ .../fix_and_review/partitioning.py | 3 ++ .../fix_and_review/phases/complete.py | 5 +- .../fix_and_review/phases/setup.py | 3 +- .../fix_and_review/phases/verify.py | 46 ++++++++++++------- agents/pyproject.toml | 16 +++---- 11 files changed, 111 insertions(+), 43 deletions(-) create mode 100644 agents/.gitignore create mode 100644 agents/config.yaml.example diff --git a/.github/workflows/feature-builder.yml b/.github/workflows/feature-builder.yml index 13f8f7107..8cd67b04e 100644 --- a/.github/workflows/feature-builder.yml +++ b/.github/workflows/feature-builder.yml @@ -25,6 +25,29 @@ jobs: build-feature: runs-on: ubuntu-latest steps: + - name: Validate inputs + env: + FEATURE_NAME: ${{ inputs.feature_name }} + DEVEX_DOC: ${{ inputs.devex_doc }} + IMPL_DOC: ${{ inputs.impl_doc }} + run: | + if [[ ! "$FEATURE_NAME" =~ ^[a-zA-Z0-9_-]+$ ]]; then + echo "Error: feature_name must be alphanumeric with hyphens/underscores only" + exit 1 + fi + if [[ "$DEVEX_DOC" == *".."* ]] || [[ "$IMPL_DOC" == *".."* ]]; then + echo "Error: doc paths must not contain '..'" + exit 1 + fi + if [[ ! "$DEVEX_DOC" == *.md ]]; then + echo "Error: devex_doc must be a .md file" + exit 1 + fi + if [[ ! "$IMPL_DOC" == *.md ]]; then + echo "Error: impl_doc must be a .md file" + exit 1 + fi + - name: Checkout uses: actions/checkout@v6 diff --git a/agents/.gitignore b/agents/.gitignore new file mode 100644 index 000000000..77a810105 --- /dev/null +++ b/agents/.gitignore @@ -0,0 +1,3 @@ +config.yaml +__pycache__/ +.venv/ diff --git a/agents/config.yaml.example b/agents/config.yaml.example new file mode 100644 index 000000000..0f233d50e --- /dev/null +++ b/agents/config.yaml.example @@ -0,0 +1,10 @@ +harness_arn: "arn:aws:bedrock-agentcore:us-west-2:YOUR_ACCOUNT_ID:harness/YOUR_HARNESS_ID" +region: "us-west-2" +aws_profile: "deploy" +model_id: "global.anthropic.claude-opus-4-7" +min_reviewers: 3 +max_reviewers: 5 +max_review_rounds: 5 +repos: + cli: "aws/agentcore-cli" + cdk: "aws/agentcore-l3-cdk-constructs" diff --git a/agents/core/harness_client.py b/agents/core/harness_client.py index 33d296f32..0a811a31a 100644 --- a/agents/core/harness_client.py +++ b/agents/core/harness_client.py @@ -20,7 +20,6 @@ def __init__(self, config: PipelineConfig): region_name=config.region, profile_name=config.aws_profile, ) - self.credentials = self.session.get_credentials().get_frozen_credentials() self.http = urllib3.PoolManager() self.client = self.session.client( "bedrock-agentcore", @@ -50,7 +49,8 @@ def invoke( "Content-Type": "application/json", "Accept": "application/vnd.amazon.eventstream", }) - SigV4Auth(self.credentials, "bedrock-agentcore", region).add_auth(request) + credentials = self.session.get_credentials().get_frozen_credentials() + SigV4Auth(credentials, "bedrock-agentcore", region).add_auth(request) response = self.http.urlopen( "POST", url, body=json.dumps(body).encode(), diff --git a/agents/core/parsing.py b/agents/core/parsing.py index 1f4aab67d..228bc23f3 100644 --- a/agents/core/parsing.py +++ b/agents/core/parsing.py @@ -34,17 +34,33 @@ def _extract_json(raw_text: str) -> str | None: if start == -1: return None - depth = 0 - for i in range(start, len(raw_text)): - if raw_text[i] == "{": - depth += 1 - elif raw_text[i] == "}": - depth -= 1 - if depth == 0: - candidate = raw_text[start : i + 1] - if "approved" in candidate: - return candidate - return None + while start != -1: + depth = 0 + in_string = False + escape_next = False + for i in range(start, len(raw_text)): + c = raw_text[i] + if escape_next: + escape_next = False + continue + if c == "\\": + escape_next = True + continue + if c == '"': + in_string = not in_string + continue + if in_string: + continue + if c == "{": + depth += 1 + elif c == "}": + depth -= 1 + if depth == 0: + candidate = raw_text[start : i + 1] + if "approved" in candidate: + return candidate + break + start = raw_text.find("{", start + 1) return None diff --git a/agents/orchestrations/fix_and_review/orchestrator.py b/agents/orchestrations/fix_and_review/orchestrator.py index eee22af25..d11324703 100644 --- a/agents/orchestrations/fix_and_review/orchestrator.py +++ b/agents/orchestrations/fix_and_review/orchestrator.py @@ -35,6 +35,9 @@ def run_pipeline( config = PipelineConfig.from_yaml(config_path) for key, value in overrides.items(): if hasattr(config, key): + field_type = type(getattr(config, key)) + if field_type == int: + value = int(value) setattr(config, key, value) if prompts_dir: diff --git a/agents/orchestrations/fix_and_review/partitioning.py b/agents/orchestrations/fix_and_review/partitioning.py index 5f0a42866..bbfed1993 100644 --- a/agents/orchestrations/fix_and_review/partitioning.py +++ b/agents/orchestrations/fix_and_review/partitioning.py @@ -36,6 +36,9 @@ def calculate_reviewer_count(stats: DiffStats, min_r: int = 3, max_r: int = 5) - def partition_round1_by_directory( changed_files: list[str], num_reviewers: int ) -> list[ReviewerAssignment]: + if not changed_files: + return [ReviewerAssignment(files=[], focus="No files to review")] + groups: dict[str, list[str]] = defaultdict(list) for f in changed_files: parts = PurePosixPath(f).parts diff --git a/agents/orchestrations/fix_and_review/phases/complete.py b/agents/orchestrations/fix_and_review/phases/complete.py index f34742ed1..b5225af27 100644 --- a/agents/orchestrations/fix_and_review/phases/complete.py +++ b/agents/orchestrations/fix_and_review/phases/complete.py @@ -60,6 +60,7 @@ def run_complete( rebase_succeeded = False client.run_command(session_id, f"cd {repo_name} && git rebase --abort") errors.append(f"Rebase failed in {repo_name}: {stderr[:500]}") + continue _, stderr, exit_code = client.run_command( session_id, f"cd {repo_name} && git push origin {branch_name} --force-with-lease" @@ -103,9 +104,5 @@ def run_complete( pr_urls.append(stdout.strip()) else: errors.append(f"PR may have been created in {repo} but could not extract URL") - if exit_code == 0 and stdout.strip(): - pr_urls.append(stdout.strip()) - else: - errors.append(f"Failed to create PR in {repo}: {stderr[:500]}") return CompleteResult(pr_urls=pr_urls, rebase_succeeded=rebase_succeeded, errors=errors) diff --git a/agents/orchestrations/fix_and_review/phases/setup.py b/agents/orchestrations/fix_and_review/phases/setup.py index 32608a182..6b8fe3592 100644 --- a/agents/orchestrations/fix_and_review/phases/setup.py +++ b/agents/orchestrations/fix_and_review/phases/setup.py @@ -1,3 +1,4 @@ +from collections import defaultdict from pathlib import Path from core.config import PipelineConfig @@ -16,7 +17,7 @@ def load_prompt(name: str, **kwargs: str) -> str: if _prompts_dir is None: raise RuntimeError("Prompts directory not set. Call set_prompts_dir() before running phases.") template = (_prompts_dir / name).read_text() - return template.format(**kwargs) + return template.format_map(defaultdict(str, **kwargs)) def run_setup( diff --git a/agents/orchestrations/fix_and_review/phases/verify.py b/agents/orchestrations/fix_and_review/phases/verify.py index 72064b3dd..c8070092f 100644 --- a/agents/orchestrations/fix_and_review/phases/verify.py +++ b/agents/orchestrations/fix_and_review/phases/verify.py @@ -61,26 +61,38 @@ def run_verify( continue test_cmd = TEST_COMMANDS.get(repo, "npm test") print(f" Running tests in {repo} (may take a few minutes)...", flush=True) - _, stderr, exit_code = client.run_command(session_id, f'cd {repo} && {test_cmd} 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20') - if exit_code != 0: + stdout, stderr, exit_code = client.run_command( + session_id, f'cd {repo} && {test_cmd} > /tmp/test_output.txt 2>&1; echo "EXIT:$?"' + ) + test_exit = 1 + for line in stdout.strip().split("\n"): + if line.startswith("EXIT:"): + test_exit = int(line.split(":")[1]) + if test_exit != 0: tests_pass = False - errors.append(f"Tests failed in {repo}: {stderr[:500]}") + summary, _, _ = client.run_command( + session_id, 'grep -E "(FAIL|PASS|Tests:|Test Suites:)" /tmp/test_output.txt | tail -20' + ) + errors.append(f"Tests failed in {repo}: {summary[:500]}") - # Push from each repo that has changes + # Only push if all local checks passed branch_pushed = True - for repo in affected_repos: - stdout, _, _ = client.run_command( - session_id, f"cd {repo} && git diff main --stat 2>/dev/null" - ) - if not stdout.strip(): - continue - print(f" Pushing {branch_name} in {repo}...", flush=True) - _, stderr, exit_code = client.run_command( - session_id, f"cd {repo} && git push origin {branch_name}" - ) - if exit_code != 0: - branch_pushed = False - errors.append(f"Push failed in {repo}: {stderr[:500]}") + if not (typecheck_passes and tests_pass): + branch_pushed = False + else: + for repo in affected_repos: + stdout, _, _ = client.run_command( + session_id, f"cd {repo} && git diff main --stat 2>/dev/null" + ) + if not stdout.strip(): + continue + print(f" Pushing {branch_name} in {repo}...", flush=True) + _, stderr, exit_code = client.run_command( + session_id, f"cd {repo} && git push origin {branch_name}" + ) + if exit_code != 0: + branch_pushed = False + errors.append(f"Push failed in {repo}: {stderr[:500]}") return VerificationResult( commits_exist=commits_exist, diff --git a/agents/pyproject.toml b/agents/pyproject.toml index c1ea76be7..9f8611d66 100644 --- a/agents/pyproject.toml +++ b/agents/pyproject.toml @@ -1,15 +1,15 @@ [project] -name = "harness-invoke" +name = "agentcore-agents" version = "0.1.0" -description = "Add your description here" +description = "Multi-agent orchestration for automated issue resolution and feature building" readme = "README.md" -requires-python = ">=3.14" +requires-python = ">=3.12" dependencies = [ - "boto3>=1.42.94,<1.43.2", - "botocore>=1.42.94,<1.43.2", - "pydantic>=2.13.1", - "pyyaml>=6.0.3", - "urllib3>=2.6.3", + "boto3>=1.42.94", + "botocore>=1.42.94", + "pydantic>=2.10.0", + "pyyaml>=6.0", + "urllib3>=2.0", ] [dependency-groups] From ff86d2d0b9c70817ba7eed29b478461f48a91e31 Mon Sep 17 00:00:00 2001 From: Aidan Daly <aidandal@amazon.com> Date: Tue, 5 May 2026 23:14:27 -0400 Subject: [PATCH 04/12] =?UTF-8?q?perf:=20switch=20to=20targeted=20testing?= =?UTF-8?q?=20=E2=80=94=20only=20run=20tests=20for=20changed=20files,=20le?= =?UTF-8?q?t=20CI=20handle=20full=20suite?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agents/bug_fixer/prompts/executor.md | 16 +++++++--- agents/bug_fixer/prompts/fixer.md | 11 +++++-- agents/feature_builder/prompts/executor.md | 16 +++++++--- agents/feature_builder/prompts/fixer.md | 13 +++++--- .../fix_and_review/phases/verify.py | 31 ++++++++++++++++--- 5 files changed, 66 insertions(+), 21 deletions(-) diff --git a/agents/bug_fixer/prompts/executor.md b/agents/bug_fixer/prompts/executor.md index 8f1f89273..19ab91026 100644 --- a/agents/bug_fixer/prompts/executor.md +++ b/agents/bug_fixer/prompts/executor.md @@ -6,12 +6,18 @@ The plan: Instructions: 1. Follow the plan exactly. Make the code changes described. 2. Run `npm run typecheck 2>&1 | tail -20` in each affected repo. Fix any type errors. -3. Run tests with summary output only: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20` -4. If tests fail, debug the specific failing file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50` +3. Run ONLY the tests related to the files you changed. Use targeted test commands: + - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30` + - If you changed `src/cli/aws/account.ts`, run `npx vitest run --project unit src/cli/aws/__tests__/account.test.ts` + - Run 1-5 targeted test files, NOT the full suite. +4. If targeted tests fail, fix the code and re-run only those tests. 5. Commit your changes: `git add -A && git commit -m "feat: {commit_message}"` -6. Push to fork remote: `git push origin {branch_name}` +6. Push to remote: `git push origin {branch_name}` 7. If you need to deviate from the plan, document why in your commit message. -IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context. Use the grep pattern in step 3 for the summary, then target specific files in step 4 if something fails. +IMPORTANT: +- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests for files you changed. +- CI will run the full test suite after the PR is created. +- Always pipe test output through `| tail -30` to avoid context overflow. -Do not stop until typecheck and tests pass. If tests fail, analyze the failure, fix the code, and try again. +Do not stop until typecheck and targeted tests pass. If tests fail, analyze the failure, fix the code, and try again. diff --git a/agents/bug_fixer/prompts/fixer.md b/agents/bug_fixer/prompts/fixer.md index a99a6f0d1..b108e594e 100644 --- a/agents/bug_fixer/prompts/fixer.md +++ b/agents/bug_fixer/prompts/fixer.md @@ -8,9 +8,14 @@ Instructions: 1. Fix each finding, starting with Critical severity first, then High, Medium, Low. 2. If a finding is not applicable or is a false positive, explain why in a commit message. 3. Run `npm run typecheck 2>&1 | tail -20` in each affected repo after fixes. -4. Run tests with summary: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20` -5. If tests fail, debug the specific file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50` +4. Run ONLY targeted tests for files you changed: + - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30` + - Run 1-5 targeted test files, NOT the full suite. +5. If targeted tests fail, fix and re-run only those tests. 6. Commit: `git add -A && git commit -m "fix: address review findings round {round_number}"` 7. Push: `git push origin {branch_name}` -IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context. +IMPORTANT: +- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests. +- CI will validate the full suite after PR creation. +- Always pipe test output through `| tail -30`. diff --git a/agents/feature_builder/prompts/executor.md b/agents/feature_builder/prompts/executor.md index 40601221e..57b641f7c 100644 --- a/agents/feature_builder/prompts/executor.md +++ b/agents/feature_builder/prompts/executor.md @@ -6,12 +6,18 @@ The plan: Instructions: 1. Follow the plan exactly. Make the code changes described. 2. Run `npm run typecheck 2>&1 | tail -20` in each affected repo. Fix any type errors. -3. Run tests with summary: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20` -4. If tests fail, debug the specific file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50` +3. Run ONLY the tests related to the files you changed. Use targeted test commands: + - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30` + - If you changed `src/cli/commands/deploy/actions.ts`, run its test: `npx vitest run --project unit src/cli/commands/deploy/__tests__/actions.test.ts` + - Run 1-5 targeted test files, NOT the full suite. +4. If targeted tests fail, fix the code and re-run only those tests. 5. Commit your changes: `git add -A && git commit -m "feat: {commit_message}"` -6. Push to remote: `git push origin feature/{feature_name}` +6. Push to remote: `git push origin {branch_name}` 7. If you need to deviate from the plan, document why in your commit message. -IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context. Use the grep pattern in step 3 for the summary, then target specific files in step 4 if something fails. +IMPORTANT: +- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests for files you changed. +- CI will run the full test suite after the PR is created. +- Always pipe test output through `| tail -30` to avoid context overflow. -Do not stop until typecheck and tests pass. If tests fail, analyze the failure, fix the code, and try again. +Do not stop until typecheck and targeted tests pass. If tests fail, analyze the failure, fix the code, and try again. diff --git a/agents/feature_builder/prompts/fixer.md b/agents/feature_builder/prompts/fixer.md index 67e04ebfb..b108e594e 100644 --- a/agents/feature_builder/prompts/fixer.md +++ b/agents/feature_builder/prompts/fixer.md @@ -8,9 +8,14 @@ Instructions: 1. Fix each finding, starting with Critical severity first, then High, Medium, Low. 2. If a finding is not applicable or is a false positive, explain why in a commit message. 3. Run `npm run typecheck 2>&1 | tail -20` in each affected repo after fixes. -4. Run tests with summary: `npm run test:unit 2>&1 | grep -E "(FAIL|PASS|Tests:|Test Suites:)" | tail -20` -5. If tests fail, debug the specific file: `npm run test:unit -- path/to/failing.test.ts 2>&1 | tail -50` +4. Run ONLY targeted tests for files you changed: + - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30` + - Run 1-5 targeted test files, NOT the full suite. +5. If targeted tests fail, fix and re-run only those tests. 6. Commit: `git add -A && git commit -m "fix: address review findings round {round_number}"` -7. Push: `git push origin feature/{feature_name}` +7. Push: `git push origin {branch_name}` -IMPORTANT: Never run `npm run test:unit` without piping through grep or tail. The full output is too large and will overflow context. +IMPORTANT: +- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests. +- CI will validate the full suite after PR creation. +- Always pipe test output through `| tail -30`. diff --git a/agents/orchestrations/fix_and_review/phases/verify.py b/agents/orchestrations/fix_and_review/phases/verify.py index c8070092f..b72158235 100644 --- a/agents/orchestrations/fix_and_review/phases/verify.py +++ b/agents/orchestrations/fix_and_review/phases/verify.py @@ -59,10 +59,33 @@ def run_verify( ) if not stdout.strip(): continue - test_cmd = TEST_COMMANDS.get(repo, "npm test") - print(f" Running tests in {repo} (may take a few minutes)...", flush=True) + # Find test files related to changed source files + print(f" Running targeted tests in {repo}...", flush=True) + changed_files_out, _, _ = client.run_command( + session_id, f"cd {repo} && git diff main --name-only | head -20" + ) + test_files: list[str] = [] + for changed in changed_files_out.strip().split("\n"): + changed = changed.strip() + if not changed: + continue + if "__tests__" in changed or ".test." in changed: + test_files.append(changed) + else: + # Look for adjacent test file + test_candidate = changed.replace("/src/", "/src/").replace(".ts", ".test.ts") + dir_parts = changed.rsplit("/", 1) + if len(dir_parts) == 2: + test_dir = f"{dir_parts[0]}/__tests__/{dir_parts[1].replace('.ts', '.test.ts')}" + test_files.append(test_dir) + + if not test_files: + continue + + # Run only the targeted tests (max 5) + test_paths = " ".join(test_files[:5]) stdout, stderr, exit_code = client.run_command( - session_id, f'cd {repo} && {test_cmd} > /tmp/test_output.txt 2>&1; echo "EXIT:$?"' + session_id, f'cd {repo} && npx vitest run --project unit {test_paths} > /tmp/test_output.txt 2>&1; echo "EXIT:$?"' ) test_exit = 1 for line in stdout.strip().split("\n"): @@ -71,7 +94,7 @@ def run_verify( if test_exit != 0: tests_pass = False summary, _, _ = client.run_command( - session_id, 'grep -E "(FAIL|PASS|Tests:|Test Suites:)" /tmp/test_output.txt | tail -20' + session_id, 'tail -20 /tmp/test_output.txt' ) errors.append(f"Tests failed in {repo}: {summary[:500]}") From 6912258eaf6d6822ca0824fb024dac71a8219db1 Mon Sep 17 00:00:00 2001 From: Aidan Daly <aidandal@amazon.com> Date: Wed, 6 May 2026 00:00:51 -0400 Subject: [PATCH 05/12] =?UTF-8?q?fix:=20extract=20phase=20cds=20into=20eac?= =?UTF-8?q?h=20repo=20before=20git=20diff=20=E2=80=94=20fixes=200-file=20e?= =?UTF-8?q?xtraction=20bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../fix_and_review/phases/extract.py | 64 +++++++++++++------ 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/agents/orchestrations/fix_and_review/phases/extract.py b/agents/orchestrations/fix_and_review/phases/extract.py index fbe8e713c..b9f8f7724 100644 --- a/agents/orchestrations/fix_and_review/phases/extract.py +++ b/agents/orchestrations/fix_and_review/phases/extract.py @@ -18,27 +18,49 @@ def run_extract( cli_repo: str, cdk_repo: str, ) -> ExtractResult: - diff_stat_stdout, _, _ = client.run_command(session_id, "git diff main --stat") - full_diff_stdout, _, _ = client.run_command(session_id, "git diff main") - commit_log_stdout, _, _ = client.run_command(session_id, "git log main..HEAD --oneline") + cli_name = cli_repo.split("/")[-1] + cdk_name = cdk_repo.split("/")[-1] + all_diff_stat = "" + all_full_diff = "" + all_commit_log = "" changed_files: list[str] = [] - for line in diff_stat_stdout.strip().split("\n"): - line = line.strip() - if "|" in line: - file_path = line.split("|")[0].strip() - if file_path: - changed_files.append(file_path) - total_lines = 0 - for line in full_diff_stdout.split("\n"): - if line.startswith("+") and not line.startswith("+++"): - total_lines += 1 - elif line.startswith("-") and not line.startswith("---"): - total_lines += 1 - - has_cli = any(f.startswith(cli_repo) or f.startswith("src/cli") for f in changed_files) - has_cdk = any(f.startswith(cdk_repo) or f.startswith("src/cdk") for f in changed_files) + has_cli = False + has_cdk = False + + for repo_name in [cli_name, cdk_name]: + # Check if this repo has changes on the branch + commit_log, _, exit_code = client.run_command( + session_id, f"cd {repo_name} && git log main..HEAD --oneline 2>/dev/null" + ) + if exit_code != 0 or not commit_log.strip(): + continue + + diff_stat, _, _ = client.run_command(session_id, f"cd {repo_name} && git diff main --stat") + full_diff, _, _ = client.run_command(session_id, f"cd {repo_name} && git diff main") + + all_diff_stat += diff_stat + all_full_diff += full_diff + all_commit_log += commit_log + + for line in diff_stat.strip().split("\n"): + line = line.strip() + if "|" in line: + file_path = line.split("|")[0].strip() + if file_path: + changed_files.append(file_path) + if repo_name == cli_name: + has_cli = True + else: + has_cdk = True + + for line in full_diff.split("\n"): + if line.startswith("+") and not line.startswith("+++"): + total_lines += 1 + elif line.startswith("-") and not line.startswith("---"): + total_lines += 1 + cross_repo = has_cli and has_cdk stats = DiffStats( @@ -48,8 +70,8 @@ def run_extract( ) return ExtractResult( - diff_stat=diff_stat_stdout, - full_diff=full_diff_stdout, - commit_log=commit_log_stdout, + diff_stat=all_diff_stat, + full_diff=all_full_diff, + commit_log=all_commit_log, stats=stats, ) From 41ec7e99bce4065e4b419ccb097290a76288d9c5 Mon Sep 17 00:00:00 2001 From: Aidan Daly <aidandal@amazon.com> Date: Wed, 6 May 2026 00:20:50 -0400 Subject: [PATCH 06/12] fix: add maxIterations per phase to prevent context overflow, use force-with-lease for push --- agents/core/harness_client.py | 3 +++ agents/orchestrations/fix_and_review/phases/execute.py | 2 +- agents/orchestrations/fix_and_review/phases/fix.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/agents/core/harness_client.py b/agents/core/harness_client.py index 0a811a31a..87b82656a 100644 --- a/agents/core/harness_client.py +++ b/agents/core/harness_client.py @@ -31,6 +31,7 @@ def invoke( session_id: str, message: str, system_prompt: str | None = None, + max_iterations: int | None = None, verbose: bool = True, ) -> str: body: dict = { @@ -40,6 +41,8 @@ def invoke( } if system_prompt: body["systemPrompt"] = [{"text": system_prompt}] + if max_iterations: + body["maxIterations"] = max_iterations region = self.config.region arn = self.config.harness_arn diff --git a/agents/orchestrations/fix_and_review/phases/execute.py b/agents/orchestrations/fix_and_review/phases/execute.py index 6d3835b3f..9587484c6 100644 --- a/agents/orchestrations/fix_and_review/phases/execute.py +++ b/agents/orchestrations/fix_and_review/phases/execute.py @@ -17,4 +17,4 @@ def run_execute( commit_message=f"fix issue #{issue_number}", branch_name=branch_name, ) - return client.invoke(session_id=session_id, message=prompt) + return client.invoke(session_id=session_id, message=prompt, max_iterations=40) diff --git a/agents/orchestrations/fix_and_review/phases/fix.py b/agents/orchestrations/fix_and_review/phases/fix.py index ce11e110c..8631d498f 100644 --- a/agents/orchestrations/fix_and_review/phases/fix.py +++ b/agents/orchestrations/fix_and_review/phases/fix.py @@ -27,4 +27,4 @@ def run_fix( round_number=str(round_number), branch_name=branch_name, ) - return client.invoke(session_id=session_id, message=prompt) + return client.invoke(session_id=session_id, message=prompt, max_iterations=30) From 362766c23a9bf1dedcc1d1ce6b2b1a4ba8c27f49 Mon Sep 17 00:00:00 2001 From: Aidan Daly <aidandal@amazon.com> Date: Wed, 6 May 2026 00:48:30 -0400 Subject: [PATCH 07/12] fix: commit-early strategy, unique branch per run, abort on 0-file extract --- agents/bug_fixer/prompts/executor.md | 20 +++++++++---------- agents/feature_builder/prompts/executor.md | 20 +++++++++---------- .../fix_and_review/orchestrator.py | 6 +++++- .../fix_and_review/phases/execute.py | 2 +- .../fix_and_review/phases/fix.py | 2 +- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/agents/bug_fixer/prompts/executor.md b/agents/bug_fixer/prompts/executor.md index 19ab91026..722ea6f2f 100644 --- a/agents/bug_fixer/prompts/executor.md +++ b/agents/bug_fixer/prompts/executor.md @@ -5,19 +5,17 @@ The plan: Instructions: 1. Follow the plan exactly. Make the code changes described. -2. Run `npm run typecheck 2>&1 | tail -20` in each affected repo. Fix any type errors. -3. Run ONLY the tests related to the files you changed. Use targeted test commands: +2. COMMIT IMMEDIATELY after writing your changes: `git add -A && git commit -m "feat: {commit_message}"` +3. Run `npm run typecheck 2>&1 | tail -20`. If there are type errors, fix them and commit again. +4. Run ONLY targeted tests for files you changed: - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30` - - If you changed `src/cli/aws/account.ts`, run `npx vitest run --project unit src/cli/aws/__tests__/account.test.ts` - Run 1-5 targeted test files, NOT the full suite. -4. If targeted tests fail, fix the code and re-run only those tests. -5. Commit your changes: `git add -A && git commit -m "feat: {commit_message}"` +5. If targeted tests fail, fix and commit again. 6. Push to remote: `git push origin {branch_name}` -7. If you need to deviate from the plan, document why in your commit message. -IMPORTANT: -- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests for files you changed. +CRITICAL RULES: +- COMMIT EARLY AND OFTEN. Your first commit should happen BEFORE running typecheck. Commit after every fix. A commit with typecheck errors is better than no commit at all. +- If typecheck has more than 5 errors, fix the most critical ones, commit what you have, and move on. Do NOT spend more than 3 attempts fixing typecheck. +- Do NOT run `npm run test:unit` (full suite). Only run targeted tests. - CI will run the full test suite after the PR is created. -- Always pipe test output through `| tail -30` to avoid context overflow. - -Do not stop until typecheck and targeted tests pass. If tests fail, analyze the failure, fix the code, and try again. +- Always pipe output through `| tail -30`. diff --git a/agents/feature_builder/prompts/executor.md b/agents/feature_builder/prompts/executor.md index 57b641f7c..77d974e28 100644 --- a/agents/feature_builder/prompts/executor.md +++ b/agents/feature_builder/prompts/executor.md @@ -5,19 +5,17 @@ The plan: Instructions: 1. Follow the plan exactly. Make the code changes described. -2. Run `npm run typecheck 2>&1 | tail -20` in each affected repo. Fix any type errors. -3. Run ONLY the tests related to the files you changed. Use targeted test commands: +2. COMMIT IMMEDIATELY after writing your changes: `git add -A && git commit -m "feat: {commit_message}"` +3. Run `npm run typecheck 2>&1 | tail -20`. If there are type errors, fix them and commit again. +4. Run ONLY targeted tests for files you changed: - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30` - - If you changed `src/cli/commands/deploy/actions.ts`, run its test: `npx vitest run --project unit src/cli/commands/deploy/__tests__/actions.test.ts` - Run 1-5 targeted test files, NOT the full suite. -4. If targeted tests fail, fix the code and re-run only those tests. -5. Commit your changes: `git add -A && git commit -m "feat: {commit_message}"` +5. If targeted tests fail, fix and commit again. 6. Push to remote: `git push origin {branch_name}` -7. If you need to deviate from the plan, document why in your commit message. -IMPORTANT: -- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests for files you changed. +CRITICAL RULES: +- COMMIT EARLY AND OFTEN. Your first commit should happen BEFORE running typecheck. Commit after every fix. A commit with typecheck errors is better than no commit at all. +- If typecheck has more than 5 errors, fix the most critical ones, commit what you have, and move on. Do NOT spend more than 3 attempts fixing typecheck. +- Do NOT run `npm run test:unit` (full suite). Only run targeted tests. - CI will run the full test suite after the PR is created. -- Always pipe test output through `| tail -30` to avoid context overflow. - -Do not stop until typecheck and targeted tests pass. If tests fail, analyze the failure, fix the code, and try again. +- Always pipe output through `| tail -30`. diff --git a/agents/orchestrations/fix_and_review/orchestrator.py b/agents/orchestrations/fix_and_review/orchestrator.py index d11324703..ba2c1089e 100644 --- a/agents/orchestrations/fix_and_review/orchestrator.py +++ b/agents/orchestrations/fix_and_review/orchestrator.py @@ -49,7 +49,8 @@ def run_pipeline( branch_name = f"feature/{feature_name or 'unnamed'}" else: issue_number = issue_url.rstrip("/").split("/")[-1] - branch_name = f"fix/{issue_number}" + short_id = HarnessClient.new_session_id()[:8].lower() + branch_name = f"fix/{issue_number}-{short_id}" client = HarnessClient(config) session_id = HarnessClient.new_session_id() @@ -146,6 +147,9 @@ def elapsed() -> str: f"Extracted diff: {len(extract.stats.changed_files)} files, " f"{extract.stats.total_lines} lines changed [{int(time.time()-t0)}s | total {elapsed()}]" ) + if not extract.stats.changed_files: + print("\n=== Pipeline Failed — no changes were produced. Agent may have failed to commit. ===") + return 1 print() # Review Loop diff --git a/agents/orchestrations/fix_and_review/phases/execute.py b/agents/orchestrations/fix_and_review/phases/execute.py index 9587484c6..6d3835b3f 100644 --- a/agents/orchestrations/fix_and_review/phases/execute.py +++ b/agents/orchestrations/fix_and_review/phases/execute.py @@ -17,4 +17,4 @@ def run_execute( commit_message=f"fix issue #{issue_number}", branch_name=branch_name, ) - return client.invoke(session_id=session_id, message=prompt, max_iterations=40) + return client.invoke(session_id=session_id, message=prompt) diff --git a/agents/orchestrations/fix_and_review/phases/fix.py b/agents/orchestrations/fix_and_review/phases/fix.py index 8631d498f..ce11e110c 100644 --- a/agents/orchestrations/fix_and_review/phases/fix.py +++ b/agents/orchestrations/fix_and_review/phases/fix.py @@ -27,4 +27,4 @@ def run_fix( round_number=str(round_number), branch_name=branch_name, ) - return client.invoke(session_id=session_id, message=prompt, max_iterations=30) + return client.invoke(session_id=session_id, message=prompt) From 35a3e348253f01fe65d0f5ed7fe00b73906c4187 Mon Sep 17 00:00:00 2001 From: Aidan Daly <aidandal@amazon.com> Date: Wed, 6 May 2026 10:42:47 -0400 Subject: [PATCH 08/12] fix: add retry on connection errors, fix node 20 symlinks in setup, fix double serialization --- agents/bug_fixer/prompts/setup.md | 6 +++++- agents/core/harness_client.py | 28 +++++++++++++++++++++++-- agents/feature_builder/prompts/setup.md | 6 +++++- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/agents/bug_fixer/prompts/setup.md b/agents/bug_fixer/prompts/setup.md index 11c3e50df..04fea6c9d 100644 --- a/agents/bug_fixer/prompts/setup.md +++ b/agents/bug_fixer/prompts/setup.md @@ -1,7 +1,11 @@ You are setting up a development environment to solve a GitHub issue. Steps: -1. Install tools (Amazon Linux 2023): dnf install -y -q git nodejs20 npm > /dev/null 2>&1 +1. Install tools and configure node 20 as default: + dnf install -y -q git nodejs20 > /dev/null 2>&1 + ln -sf /usr/bin/node-20 /usr/local/bin/node + ln -sf /usr/lib/nodejs20/lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm + export PATH=/usr/local/bin:$PATH 2. Authenticate GitHub: echo $GH_TOKEN | gh auth login --with-token 3. Configure git to use gh for auth: gh auth setup-git 4. Clone both repos: diff --git a/agents/core/harness_client.py b/agents/core/harness_client.py index 87b82656a..2bd438638 100644 --- a/agents/core/harness_client.py +++ b/agents/core/harness_client.py @@ -1,5 +1,6 @@ import json import sys +import time import uuid from urllib.parse import quote @@ -33,6 +34,28 @@ def invoke( system_prompt: str | None = None, max_iterations: int | None = None, verbose: bool = True, + retries: int = 2, + ) -> str: + for attempt in range(retries + 1): + try: + return self._invoke_once(session_id, message, system_prompt, max_iterations, verbose) + except (urllib3.exceptions.ProtocolError, urllib3.exceptions.ReadTimeoutError, ConnectionResetError) as e: + if attempt < retries: + if verbose: + print(f"\n ⚠️ Connection error (attempt {attempt + 1}/{retries + 1}): {e}. Retrying...", flush=True) + time.sleep(5) + else: + if verbose: + print(f"\n ⚠️ Connection error after {retries + 1} attempts: {e}", flush=True) + raise + + def _invoke_once( + self, + session_id: str, + message: str, + system_prompt: str | None = None, + max_iterations: int | None = None, + verbose: bool = True, ) -> str: body: dict = { "runtimeSessionId": session_id, @@ -48,7 +71,8 @@ def invoke( arn = self.config.harness_arn url = f"https://bedrock-agentcore.{region}.amazonaws.com/harnesses/invoke?harnessArn={quote(arn, safe='')}" - request = AWSRequest(method="POST", url=url, data=json.dumps(body), headers={ + body_bytes = json.dumps(body).encode() + request = AWSRequest(method="POST", url=url, data=body_bytes, headers={ "Content-Type": "application/json", "Accept": "application/vnd.amazon.eventstream", }) @@ -56,7 +80,7 @@ def invoke( SigV4Auth(credentials, "bedrock-agentcore", region).add_auth(request) response = self.http.urlopen( - "POST", url, body=json.dumps(body).encode(), + "POST", url, body=body_bytes, headers=dict(request.headers), preload_content=False, timeout=urllib3.Timeout(connect=30, read=900), diff --git a/agents/feature_builder/prompts/setup.md b/agents/feature_builder/prompts/setup.md index f06610da8..e9d67786d 100644 --- a/agents/feature_builder/prompts/setup.md +++ b/agents/feature_builder/prompts/setup.md @@ -1,7 +1,11 @@ You are setting up a development environment to build a new feature. Steps: -1. Install tools (Amazon Linux 2023): dnf install -y -q git nodejs20 npm > /dev/null 2>&1 +1. Install tools and configure node 20 as default: + dnf install -y -q git nodejs20 > /dev/null 2>&1 + ln -sf /usr/bin/node-20 /usr/local/bin/node + ln -sf /usr/lib/nodejs20/lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm + export PATH=/usr/local/bin:$PATH 2. Authenticate GitHub: echo $GH_TOKEN | gh auth login --with-token 3. Configure git to use gh for auth: gh auth setup-git 4. Clone both repos: From 3bad3e266b56d28e911f6de8f51330c617b1b737 Mon Sep 17 00:00:00 2001 From: Aidan Daly <aidandal@amazon.com> Date: Wed, 6 May 2026 12:02:31 -0400 Subject: [PATCH 09/12] =?UTF-8?q?fix:=20constrain=20reviewer=20to=2010=20t?= =?UTF-8?q?ool=20calls=20max=20=E2=80=94=20prevent=20codebase=20exploratio?= =?UTF-8?q?n=20rabbit=20holes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agents/bug_fixer/prompts/reviewer.md | 25 ++++++++++++---------- agents/feature_builder/prompts/reviewer.md | 25 ++++++++++++---------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/agents/bug_fixer/prompts/reviewer.md b/agents/bug_fixer/prompts/reviewer.md index 9cd4cc7a2..a7bbef4f1 100644 --- a/agents/bug_fixer/prompts/reviewer.md +++ b/agents/bug_fixer/prompts/reviewer.md @@ -1,4 +1,4 @@ -You are a senior code reviewer. You have been assigned a region of a code change to review. +You are a senior code reviewer. Review ONLY the diff on the feature branch. Issue being solved: {issue_summary} Branch: {branch_name} @@ -8,19 +8,22 @@ Your assigned focus: {focus} Files to focus on: {assigned_files} Instructions: -1. Clone repos with the feature branch: - - git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli - - git clone --depth 10 --branch {branch_name} https://github.com/{cdk_repo}.git agentcore-l3-cdk-constructs - (If the branch doesn't exist in a repo, clone main instead: git clone --depth 10 https://github.com/{cli_repo}.git agentcore-cli) -2. Run: git diff main (or git log if on the feature branch already) -3. Review your assigned files for ALL concerns: correctness, architecture, security, testing adequacy, cross-repo consistency, and breaking changes -4. Trace callers of changed functions. Check types. Verify test coverage. -5. You do NOT need to run npm install — you are reviewing code, not building it. +1. Clone the repo: git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli 2>&1 | tail -3 + (If branch doesn't exist, clone main instead) +2. Run: cd agentcore-cli && git diff main +3. Read ONLY the changed files and their immediate context (the functions/classes that were modified). +4. If you need to check a caller or type, read at most 1-2 additional files. No more. +5. Produce your verdict. {previous_findings_context} -Output your review as a JSON object wrapped in ```json fences. -The JSON must have this exact schema: +CRITICAL CONSTRAINTS: +- Use at most 10 shell commands total. Do NOT explore the whole codebase. +- Focus on: correctness, breaking changes, obvious bugs. Skip style nits. +- If the code looks correct and doesn't break anything, approve it. +- Do NOT run npm install, npm test, or any build commands. + +Output your review as a JSON object wrapped in ```json fences: {{ "approved": boolean, "findings": [ diff --git a/agents/feature_builder/prompts/reviewer.md b/agents/feature_builder/prompts/reviewer.md index d576c249f..f2f71391d 100644 --- a/agents/feature_builder/prompts/reviewer.md +++ b/agents/feature_builder/prompts/reviewer.md @@ -1,4 +1,4 @@ -You are a senior code reviewer. You have been assigned a region of a code change to review. +You are a senior code reviewer. Review ONLY the diff on the feature branch. Feature being built: {issue_summary} Branch: {branch_name} @@ -8,19 +8,22 @@ Your assigned focus: {focus} Files to focus on: {assigned_files} Instructions: -1. Clone repos with the feature branch: - - git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli - - git clone --depth 10 --branch {branch_name} https://github.com/{cdk_repo}.git agentcore-l3-cdk-constructs - (If the branch doesn't exist in a repo, clone main instead: git clone --depth 10 https://github.com/{cli_repo}.git agentcore-cli) -2. Run: git diff main (or git log if on the feature branch already) -3. Review your assigned files for ALL concerns: correctness, architecture, security, testing adequacy, cross-repo consistency, and breaking changes -4. Trace callers of changed functions. Check types. Verify test coverage. -5. You do NOT need to run npm install — you are reviewing code, not building it. +1. Clone the repo: git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli 2>&1 | tail -3 + (If branch doesn't exist, clone main instead) +2. Run: cd agentcore-cli && git diff main +3. Read ONLY the changed files and their immediate context (the functions/classes that were modified). +4. If you need to check a caller or type, read at most 1-2 additional files. No more. +5. Produce your verdict. {previous_findings_context} -Output your review as a JSON object wrapped in ```json fences. -The JSON must have this exact schema: +CRITICAL CONSTRAINTS: +- Use at most 10 shell commands total. Do NOT explore the whole codebase. +- Focus on: correctness, breaking changes, obvious bugs. Skip style nits. +- If the code looks correct and doesn't break anything, approve it. +- Do NOT run npm install, npm test, or any build commands. + +Output your review as a JSON object wrapped in ```json fences: {{ "approved": boolean, "findings": [ From 6ea694821894ae54c62adbddd0210d48b8c8651e Mon Sep 17 00:00:00 2001 From: Aidan Daly <aidandal@amazon.com> Date: Wed, 6 May 2026 13:54:16 -0400 Subject: [PATCH 10/12] =?UTF-8?q?fix:=20pass=20branch=5Fname=20to=20setup?= =?UTF-8?q?=20prompt=20=E2=80=94=20sync=20branch=20naming=20between=20orch?= =?UTF-8?q?estrator=20and=20agent?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agents/bug_fixer/prompts/setup.md | 4 ++-- agents/feature_builder/prompts/setup.md | 4 ++-- agents/orchestrations/fix_and_review/orchestrator.py | 2 +- agents/orchestrations/fix_and_review/phases/setup.py | 2 ++ 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/agents/bug_fixer/prompts/setup.md b/agents/bug_fixer/prompts/setup.md index 04fea6c9d..75066446d 100644 --- a/agents/bug_fixer/prompts/setup.md +++ b/agents/bug_fixer/prompts/setup.md @@ -14,8 +14,8 @@ Steps: 5. Install dependencies: cd {cli_repo_name} && npm install 2>&1 | tail -3 && cd .. 6. Fetch the issue details: gh issue view {issue_url} --json title,body,labels,comments,assignees 7. Create a feature branch in both repos: - - cd {cli_repo_name} && git checkout -b fix/{issue_number} && cd .. - - cd {cdk_repo_name} && git checkout -b fix/{issue_number} && cd .. + - cd {cli_repo_name} && git checkout -b {branch_name} && cd .. + - cd {cdk_repo_name} && git checkout -b {branch_name} && cd .. 8. Report back: the issue title, body, and which repos likely need changes based on the issue content. IMPORTANT: Run each step as a separate shell command. Do not combine them. If tools are already installed, skip step 1. diff --git a/agents/feature_builder/prompts/setup.md b/agents/feature_builder/prompts/setup.md index e9d67786d..3757a4f55 100644 --- a/agents/feature_builder/prompts/setup.md +++ b/agents/feature_builder/prompts/setup.md @@ -13,8 +13,8 @@ Steps: - git clone https://github.com/{cdk_repo}.git {cdk_repo_name} 5. Install dependencies: cd {cli_repo_name} && npm install 2>&1 | tail -3 && cd .. 6. Create a feature branch in both repos: - - cd {cli_repo_name} && git checkout -b feature/{feature_name} && cd .. - - cd {cdk_repo_name} && git checkout -b feature/{feature_name} && cd .. + - cd {cli_repo_name} && git checkout -b {branch_name} && cd .. + - cd {cdk_repo_name} && git checkout -b {branch_name} && cd .. 7. Report back confirmation that the environment is ready. IMPORTANT: Run each step as a separate shell command. Do not combine them. If tools are already installed, skip step 1. diff --git a/agents/orchestrations/fix_and_review/orchestrator.py b/agents/orchestrations/fix_and_review/orchestrator.py index ba2c1089e..2eb1ab194 100644 --- a/agents/orchestrations/fix_and_review/orchestrator.py +++ b/agents/orchestrations/fix_and_review/orchestrator.py @@ -70,7 +70,7 @@ def elapsed() -> str: t0 = time.time() print("--- Phase 0: Setup ---") issue_details = run_setup(client, config, session_id, issue_url, - feature_name=feature_name) + feature_name=feature_name, branch_name=branch_name) if is_feature: issue_title = feature_name or "unnamed feature" else: diff --git a/agents/orchestrations/fix_and_review/phases/setup.py b/agents/orchestrations/fix_and_review/phases/setup.py index 6b8fe3592..16982b445 100644 --- a/agents/orchestrations/fix_and_review/phases/setup.py +++ b/agents/orchestrations/fix_and_review/phases/setup.py @@ -26,6 +26,7 @@ def run_setup( session_id: str, issue_url: str, feature_name: str | None = None, + branch_name: str | None = None, ) -> str: issue_number = issue_url.rstrip("/").split("/")[-1] prompt = load_prompt( @@ -37,5 +38,6 @@ def run_setup( issue_url=issue_url, issue_number=issue_number, feature_name=feature_name or issue_number, + branch_name=branch_name or f"fix/{issue_number}", ) return client.invoke(session_id=session_id, message=prompt) From 139a6141ac7e0fc618a66d22ca92be97a34c61a0 Mon Sep 17 00:00:00 2001 From: Aidan Daly <aidandal@amazon.com> Date: Wed, 6 May 2026 14:10:14 -0400 Subject: [PATCH 11/12] feat: parallelize reviewers with ThreadPoolExecutor, relax hard tool limit --- agents/bug_fixer/prompts/reviewer.md | 6 +- agents/feature_builder/prompts/reviewer.md | 6 +- .../fix_and_review/phases/review.py | 83 ++++++++++++------- 3 files changed, 59 insertions(+), 36 deletions(-) diff --git a/agents/bug_fixer/prompts/reviewer.md b/agents/bug_fixer/prompts/reviewer.md index a7bbef4f1..3fd90784b 100644 --- a/agents/bug_fixer/prompts/reviewer.md +++ b/agents/bug_fixer/prompts/reviewer.md @@ -17,9 +17,9 @@ Instructions: {previous_findings_context} -CRITICAL CONSTRAINTS: -- Use at most 10 shell commands total. Do NOT explore the whole codebase. -- Focus on: correctness, breaking changes, obvious bugs. Skip style nits. +CONSTRAINTS: +- Stay focused on the diff and immediately related code. Do not explore unrelated parts of the codebase. +- Focus on: correctness, breaking changes, obvious bugs, missing error handling. Skip style nits. - If the code looks correct and doesn't break anything, approve it. - Do NOT run npm install, npm test, or any build commands. diff --git a/agents/feature_builder/prompts/reviewer.md b/agents/feature_builder/prompts/reviewer.md index f2f71391d..70fe76285 100644 --- a/agents/feature_builder/prompts/reviewer.md +++ b/agents/feature_builder/prompts/reviewer.md @@ -17,9 +17,9 @@ Instructions: {previous_findings_context} -CRITICAL CONSTRAINTS: -- Use at most 10 shell commands total. Do NOT explore the whole codebase. -- Focus on: correctness, breaking changes, obvious bugs. Skip style nits. +CONSTRAINTS: +- Stay focused on the diff and immediately related code. Do not explore unrelated parts of the codebase. +- Focus on: correctness, breaking changes, obvious bugs, missing error handling. Skip style nits. - If the code looks correct and doesn't break anything, approve it. - Do NOT run npm install, npm test, or any build commands. diff --git a/agents/orchestrations/fix_and_review/phases/review.py b/agents/orchestrations/fix_and_review/phases/review.py index be1fabbce..1e5196353 100644 --- a/agents/orchestrations/fix_and_review/phases/review.py +++ b/agents/orchestrations/fix_and_review/phases/review.py @@ -1,3 +1,5 @@ +from concurrent.futures import ThreadPoolExecutor, as_completed + from core.config import PipelineConfig from core.harness_client import HarnessClient from core.parsing import ReviewResult, parse_reviewer_output @@ -5,6 +7,45 @@ from orchestrations.fix_and_review.phases.setup import load_prompt +def _run_single_reviewer( + client: HarnessClient, + config: PipelineConfig, + assignment: ReviewerAssignment, + branch_name: str, + issue_summary: str, + previous_findings_context: str, +) -> tuple[ReviewResult | None, str]: + session_id = HarnessClient.new_session_id() + prompt = load_prompt( + "reviewer.md", + issue_summary=issue_summary, + branch_name=branch_name, + cli_repo=config.cli_repo, + cdk_repo=config.cdk_repo, + focus=assignment.focus, + assigned_files=", ".join(assignment.files), + previous_findings_context=previous_findings_context, + ) + + raw_output = client.invoke(session_id=session_id, message=prompt) + parsed = parse_reviewer_output(raw_output) + + if parsed is None: + retry_msg = ( + "Your previous output was not valid JSON. Please output ONLY a JSON object " + "wrapped in ```json fences with this schema: " + '{"approved": boolean, "findings": [{"severity": "critical"|"high"|"medium"|"low", ' + '"file": "path", "line": number, "description": "...", "suggestion": "..."}]}' + ) + for _ in range(2): + raw_output = client.invoke(session_id=session_id, message=retry_msg) + parsed = parse_reviewer_output(raw_output) + if parsed is not None: + break + + return (parsed, raw_output) + + def run_review( client: HarnessClient, config: PipelineConfig, @@ -13,37 +54,19 @@ def run_review( issue_summary: str, previous_findings_context: str = "", ) -> list[tuple[ReviewResult | None, str]]: - results: list[tuple[ReviewResult | None, str]] = [] - - for assignment in assignments: - session_id = HarnessClient.new_session_id() - prompt = load_prompt( - "reviewer.md", - issue_summary=issue_summary, - branch_name=branch_name, - cli_repo=config.cli_repo, - cdk_repo=config.cdk_repo, - focus=assignment.focus, - assigned_files=", ".join(assignment.files), - previous_findings_context=previous_findings_context, - ) - - raw_output = client.invoke(session_id=session_id, message=prompt) - parsed = parse_reviewer_output(raw_output) + print(f" Spawning {len(assignments)} reviewers in parallel...", flush=True) - if parsed is None: - retry_msg = ( - "Your previous output was not valid JSON. Please output ONLY a JSON object " - "wrapped in ```json fences with this schema: " - '{"approved": boolean, "findings": [{"severity": "critical"|"high"|"medium"|"low", ' - '"file": "path", "line": number, "description": "...", "suggestion": "..."}]}' + with ThreadPoolExecutor(max_workers=len(assignments)) as executor: + futures = [ + executor.submit( + _run_single_reviewer, + client, config, assignment, branch_name, + issue_summary, previous_findings_context, ) - for _ in range(2): - raw_output = client.invoke(session_id=session_id, message=retry_msg) - parsed = parse_reviewer_output(raw_output) - if parsed is not None: - break - - results.append((parsed, raw_output)) + for assignment in assignments + ] + results: list[tuple[ReviewResult | None, str]] = [] + for future in as_completed(futures): + results.append(future.result()) return results From 7026fd9ed6be9676b1eede610653cb5953a9c786 Mon Sep 17 00:00:00 2001 From: Aidan Daly <aidandal@amazon.com> Date: Wed, 6 May 2026 16:27:39 -0400 Subject: [PATCH 12/12] =?UTF-8?q?fix:=20use=20origin/main=20in=20extract?= =?UTF-8?q?=20and=20complete=20phases=20=E2=80=94=20local=20main=20ref=20m?= =?UTF-8?q?ay=20be=20stale?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agents/orchestrations/fix_and_review/phases/complete.py | 2 +- agents/orchestrations/fix_and_review/phases/extract.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/agents/orchestrations/fix_and_review/phases/complete.py b/agents/orchestrations/fix_and_review/phases/complete.py index b5225af27..3fb4f4e5a 100644 --- a/agents/orchestrations/fix_and_review/phases/complete.py +++ b/agents/orchestrations/fix_and_review/phases/complete.py @@ -39,7 +39,7 @@ def run_complete( for repo in affected_repos: repo_name = repo.split("/")[-1] if "/" in repo else repo stdout, _, exit_code = client.run_command( - session_id, f"cd {repo_name} && git log main..{branch_name} --oneline 2>/dev/null" + session_id, f"cd {repo_name} && git log origin/main..{branch_name} --oneline 2>/dev/null" ) if exit_code == 0 and stdout.strip(): full_repo = full_repo_map.get(repo, repo) diff --git a/agents/orchestrations/fix_and_review/phases/extract.py b/agents/orchestrations/fix_and_review/phases/extract.py index b9f8f7724..daed91e41 100644 --- a/agents/orchestrations/fix_and_review/phases/extract.py +++ b/agents/orchestrations/fix_and_review/phases/extract.py @@ -32,13 +32,13 @@ def run_extract( for repo_name in [cli_name, cdk_name]: # Check if this repo has changes on the branch commit_log, _, exit_code = client.run_command( - session_id, f"cd {repo_name} && git log main..HEAD --oneline 2>/dev/null" + session_id, f"cd {repo_name} && git log origin/main..HEAD --oneline 2>/dev/null" ) if exit_code != 0 or not commit_log.strip(): continue - diff_stat, _, _ = client.run_command(session_id, f"cd {repo_name} && git diff main --stat") - full_diff, _, _ = client.run_command(session_id, f"cd {repo_name} && git diff main") + diff_stat, _, _ = client.run_command(session_id, f"cd {repo_name} && git diff origin/main --stat") + full_diff, _, _ = client.run_command(session_id, f"cd {repo_name} && git diff origin/main") all_diff_stat += diff_stat all_full_diff += full_diff