diff --git a/.github/workflows/bug-fixer.yml b/.github/workflows/bug-fixer.yml new file mode 100644 index 000000000..a7de5d8c1 --- /dev/null +++ b/.github/workflows/bug-fixer.yml @@ -0,0 +1,60 @@ +name: Bug Fixer Agent + +on: + issues: + types: [labeled] + workflow_dispatch: + inputs: + issue_url: + description: 'GitHub issue URL' + required: true + type: string + +permissions: + id-token: write + contents: write + pull-requests: write + issues: write + +jobs: + fix-bug: + if: github.event.label.name == 'bug' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + steps: + - name: Determine issue URL + id: issue + env: + INPUT_URL: ${{ inputs.issue_url }} + EVENT_URL: ${{ github.event.issue.html_url }} + EVENT_NAME: ${{ github.event_name }} + run: | + if [ "$EVENT_NAME" = "workflow_dispatch" ]; then + echo "url=$INPUT_URL" >> "$GITHUB_OUTPUT" + else + echo "url=$EVENT_URL" >> "$GITHUB_OUTPUT" + fi + + - name: Checkout + uses: actions/checkout@v6 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v6 + with: + role-to-assume: ${{ secrets.HARNESS_AWS_ROLE_ARN }} + aws-region: us-west-2 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Setup uv + uses: astral-sh/setup-uv@v7 + + - name: Run Bug Fixer Agent + working-directory: agents + env: + HARNESS_ARN: ${{ secrets.HARNESS_ARN }} + GH_TOKEN: ${{ secrets.PAT_TOKEN }} + ISSUE_URL: ${{ steps.issue.outputs.url }} + run: uv sync && uv run python -m bug_fixer.main --issue "$ISSUE_URL" diff --git a/.github/workflows/feature-builder.yml b/.github/workflows/feature-builder.yml new file mode 100644 index 000000000..8cd67b04e --- /dev/null +++ b/.github/workflows/feature-builder.yml @@ -0,0 +1,81 @@ +name: Feature Builder Agent + +on: + workflow_dispatch: + inputs: + devex_doc: + description: 'Path to devex doc (relative to repo root)' + required: true + type: string + impl_doc: + description: 'Path to implementation plan (relative to repo root)' + required: true + type: string + feature_name: + description: 'Feature name (used for branch naming)' + required: true + type: string + +permissions: + id-token: write + contents: write + pull-requests: write + +jobs: + build-feature: + runs-on: ubuntu-latest + steps: + - name: Validate inputs + env: + FEATURE_NAME: ${{ inputs.feature_name }} + DEVEX_DOC: ${{ inputs.devex_doc }} + IMPL_DOC: ${{ inputs.impl_doc }} + run: | + if [[ ! "$FEATURE_NAME" =~ ^[a-zA-Z0-9_-]+$ ]]; then + echo "Error: feature_name must be alphanumeric with hyphens/underscores only" + exit 1 + fi + if [[ "$DEVEX_DOC" == *".."* ]] || [[ "$IMPL_DOC" == *".."* ]]; then + echo "Error: doc paths must not contain '..'" + exit 1 + fi + if [[ ! "$DEVEX_DOC" == *.md ]]; then + echo "Error: devex_doc must be a .md file" + exit 1 + fi + if [[ ! "$IMPL_DOC" == *.md ]]; then + echo "Error: impl_doc must be a .md file" + exit 1 + fi + + - name: Checkout + uses: actions/checkout@v6 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v6 + with: + role-to-assume: ${{ secrets.HARNESS_AWS_ROLE_ARN }} + aws-region: us-west-2 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Setup uv + uses: astral-sh/setup-uv@v7 + + - name: Run Feature Builder Agent + working-directory: agents + env: + HARNESS_ARN: ${{ secrets.HARNESS_ARN }} + GH_TOKEN: ${{ secrets.PAT_TOKEN }} + DEVEX_DOC: ${{ inputs.devex_doc }} + IMPL_DOC: ${{ inputs.impl_doc }} + FEATURE_NAME: ${{ inputs.feature_name }} + run: | + uv sync + uv run python -m feature_builder.main \ + --devex "../$DEVEX_DOC" \ + --impl "../$IMPL_DOC" \ + --name "$FEATURE_NAME" diff --git a/agents/.gitignore b/agents/.gitignore new file mode 100644 index 000000000..77a810105 --- /dev/null +++ b/agents/.gitignore @@ -0,0 +1,3 @@ +config.yaml +__pycache__/ +.venv/ diff --git a/agents/bug_fixer/main.py b/agents/bug_fixer/main.py new file mode 100644 index 000000000..df1ba5b78 --- /dev/null +++ b/agents/bug_fixer/main.py @@ -0,0 +1,40 @@ +"""Bug Fixer Agent — resolves GitHub issues labeled 'bug'. + +Usage: + uv run bug_fixer/main.py --issue https://github.com/aws/agentcore-cli/issues/123 + uv run bug_fixer/main.py --issue https://github.com/aws/agentcore-cli/issues/123 --config config.yaml +""" + +import argparse +import sys +from pathlib import Path + +from orchestrations.fix_and_review.orchestrator import run_pipeline + +PROMPTS_DIR = Path(__file__).resolve().parent / "prompts" + + +def main(): + parser = argparse.ArgumentParser(description="Bug Fixer Agent") + parser.add_argument("--issue", required=True, help="GitHub issue URL") + parser.add_argument("--config", default="config.yaml", help="Config YAML path") + parser.add_argument("--aws-profile", help="Override AWS profile") + parser.add_argument("--harness-arn", help="Override harness ARN") + args = parser.parse_args() + + overrides = {} + if args.aws_profile: + overrides["aws_profile"] = args.aws_profile + if args.harness_arn: + overrides["harness_arn"] = args.harness_arn + + return run_pipeline( + issue_url=args.issue, + config_path=args.config, + prompts_dir=PROMPTS_DIR, + **overrides, + ) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/agents/bug_fixer/prompts/executor.md b/agents/bug_fixer/prompts/executor.md new file mode 100644 index 000000000..722ea6f2f --- /dev/null +++ b/agents/bug_fixer/prompts/executor.md @@ -0,0 +1,21 @@ +You are a senior software engineer implementing a planned change across one or two TypeScript repos. + +The plan: +{plan} + +Instructions: +1. Follow the plan exactly. Make the code changes described. +2. COMMIT IMMEDIATELY after writing your changes: `git add -A && git commit -m "feat: {commit_message}"` +3. Run `npm run typecheck 2>&1 | tail -20`. If there are type errors, fix them and commit again. +4. Run ONLY targeted tests for files you changed: + - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30` + - Run 1-5 targeted test files, NOT the full suite. +5. If targeted tests fail, fix and commit again. +6. Push to remote: `git push origin {branch_name}` + +CRITICAL RULES: +- COMMIT EARLY AND OFTEN. Your first commit should happen BEFORE running typecheck. Commit after every fix. A commit with typecheck errors is better than no commit at all. +- If typecheck has more than 5 errors, fix the most critical ones, commit what you have, and move on. Do NOT spend more than 3 attempts fixing typecheck. +- Do NOT run `npm run test:unit` (full suite). Only run targeted tests. +- CI will run the full test suite after the PR is created. +- Always pipe output through `| tail -30`. diff --git a/agents/bug_fixer/prompts/fixer.md b/agents/bug_fixer/prompts/fixer.md new file mode 100644 index 000000000..b108e594e --- /dev/null +++ b/agents/bug_fixer/prompts/fixer.md @@ -0,0 +1,21 @@ +You are a senior software engineer fixing issues found during code review. + +The following findings were reported by reviewers. Address each one: + +{findings_text} + +Instructions: +1. Fix each finding, starting with Critical severity first, then High, Medium, Low. +2. If a finding is not applicable or is a false positive, explain why in a commit message. +3. Run `npm run typecheck 2>&1 | tail -20` in each affected repo after fixes. +4. Run ONLY targeted tests for files you changed: + - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30` + - Run 1-5 targeted test files, NOT the full suite. +5. If targeted tests fail, fix and re-run only those tests. +6. Commit: `git add -A && git commit -m "fix: address review findings round {round_number}"` +7. Push: `git push origin {branch_name}` + +IMPORTANT: +- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests. +- CI will validate the full suite after PR creation. +- Always pipe test output through `| tail -30`. diff --git a/agents/bug_fixer/prompts/planner.md b/agents/bug_fixer/prompts/planner.md new file mode 100644 index 000000000..a651788b2 --- /dev/null +++ b/agents/bug_fixer/prompts/planner.md @@ -0,0 +1,20 @@ +You are a senior software architect planning a fix for a GitHub issue. + +You have access to two TypeScript repositories: +- agentcore-cli: AWS AgentCore CLI tool (Commander.js + Ink TUI, ~550 source files) +- agentcore-l3-cdk-constructs: AWS CDK L3 constructs for AgentCore (~17 test files, shares schemas with CLI) + +The issue details are: +{issue_details} + +Analyze the issue, explore the relevant code in both repos, and produce a structured implementation plan. + +Your plan MUST include: +1. **Affected repos**: Which repos need changes (cli, cdk, or both) +2. **Files to change**: Exact file paths to modify, create, or delete +3. **Approach**: Step-by-step description of the changes +4. **Risks**: What could go wrong, edge cases to watch for +5. **Testing strategy**: What tests to add or modify, how to verify the fix +6. **Scope estimate**: small (1-3 files), medium (4-7 files), or large (8+ files) + +Output the plan as structured markdown. diff --git a/agents/bug_fixer/prompts/reviewer.md b/agents/bug_fixer/prompts/reviewer.md new file mode 100644 index 000000000..3fd90784b --- /dev/null +++ b/agents/bug_fixer/prompts/reviewer.md @@ -0,0 +1,39 @@ +You are a senior code reviewer. Review ONLY the diff on the feature branch. + +Issue being solved: {issue_summary} +Branch: {branch_name} +Repos: {cli_repo}, {cdk_repo} + +Your assigned focus: {focus} +Files to focus on: {assigned_files} + +Instructions: +1. Clone the repo: git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli 2>&1 | tail -3 + (If branch doesn't exist, clone main instead) +2. Run: cd agentcore-cli && git diff main +3. Read ONLY the changed files and their immediate context (the functions/classes that were modified). +4. If you need to check a caller or type, read at most 1-2 additional files. No more. +5. Produce your verdict. + +{previous_findings_context} + +CONSTRAINTS: +- Stay focused on the diff and immediately related code. Do not explore unrelated parts of the codebase. +- Focus on: correctness, breaking changes, obvious bugs, missing error handling. Skip style nits. +- If the code looks correct and doesn't break anything, approve it. +- Do NOT run npm install, npm test, or any build commands. + +Output your review as a JSON object wrapped in ```json fences: +{{ + "approved": boolean, + "findings": [ + {{ + "severity": "critical" | "high" | "medium" | "low", + "file": "path/to/file", + "line": number, + "description": "what's wrong", + "suggestion": "how to fix" + }} + ] +}} +Output ONLY the JSON object in code fences. No other text before or after. diff --git a/agents/bug_fixer/prompts/setup.md b/agents/bug_fixer/prompts/setup.md new file mode 100644 index 000000000..75066446d --- /dev/null +++ b/agents/bug_fixer/prompts/setup.md @@ -0,0 +1,23 @@ +You are setting up a development environment to solve a GitHub issue. + +Steps: +1. Install tools and configure node 20 as default: + dnf install -y -q git nodejs20 > /dev/null 2>&1 + ln -sf /usr/bin/node-20 /usr/local/bin/node + ln -sf /usr/lib/nodejs20/lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm + export PATH=/usr/local/bin:$PATH +2. Authenticate GitHub: echo $GH_TOKEN | gh auth login --with-token +3. Configure git to use gh for auth: gh auth setup-git +4. Clone both repos: + - git clone https://github.com/{cli_repo}.git {cli_repo_name} + - git clone https://github.com/{cdk_repo}.git {cdk_repo_name} +5. Install dependencies: cd {cli_repo_name} && npm install 2>&1 | tail -3 && cd .. +6. Fetch the issue details: gh issue view {issue_url} --json title,body,labels,comments,assignees +7. Create a feature branch in both repos: + - cd {cli_repo_name} && git checkout -b {branch_name} && cd .. + - cd {cdk_repo_name} && git checkout -b {branch_name} && cd .. +8. Report back: the issue title, body, and which repos likely need changes based on the issue content. + +IMPORTANT: Run each step as a separate shell command. Do not combine them. If tools are already installed, skip step 1. + +Output the issue details as structured markdown with sections: Title, Body, Labels, and Initial Assessment. diff --git a/agents/config.yaml b/agents/config.yaml new file mode 100644 index 000000000..6ebd0a945 --- /dev/null +++ b/agents/config.yaml @@ -0,0 +1,10 @@ +harness_arn: "arn:aws:bedrock-agentcore:us-west-2:603141041947:harness/IssueSolver_aidandal-8SL97TEXjS" +region: "us-west-2" +aws_profile: "deploy" +model_id: "global.anthropic.claude-opus-4-7" +min_reviewers: 3 +max_reviewers: 5 +max_review_rounds: 5 +repos: + cli: "aws/agentcore-cli" + cdk: "aws/agentcore-l3-cdk-constructs" diff --git a/agents/config.yaml.example b/agents/config.yaml.example new file mode 100644 index 000000000..0f233d50e --- /dev/null +++ b/agents/config.yaml.example @@ -0,0 +1,10 @@ +harness_arn: "arn:aws:bedrock-agentcore:us-west-2:YOUR_ACCOUNT_ID:harness/YOUR_HARNESS_ID" +region: "us-west-2" +aws_profile: "deploy" +model_id: "global.anthropic.claude-opus-4-7" +min_reviewers: 3 +max_reviewers: 5 +max_review_rounds: 5 +repos: + cli: "aws/agentcore-cli" + cdk: "aws/agentcore-l3-cdk-constructs" diff --git a/agents/core/__init__.py b/agents/core/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/core/config.py b/agents/core/config.py new file mode 100644 index 000000000..fb81a3a97 --- /dev/null +++ b/agents/core/config.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass + +import yaml + + +@dataclass +class PipelineConfig: + harness_arn: str + region: str = "us-west-2" + data_plane_endpoint: str | None = None + aws_profile: str = "deploy" + model_id: str = "global.anthropic.claude-opus-4-7" + min_reviewers: int = 3 + max_reviewers: int = 5 + max_review_rounds: int = 5 + cli_repo: str = "aws/agentcore-cli" + cdk_repo: str = "aws/agentcore-l3-cdk-constructs" + + @classmethod + def from_yaml(cls, path: str) -> "PipelineConfig": + with open(path) as f: + data = yaml.safe_load(f) + + repos = data.pop("repos", {}) + if "cli" in repos: + data["cli_repo"] = repos["cli"] + if "cdk" in repos: + data["cdk_repo"] = repos["cdk"] + + return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__}) diff --git a/agents/core/harness_client.py b/agents/core/harness_client.py new file mode 100644 index 000000000..2bd438638 --- /dev/null +++ b/agents/core/harness_client.py @@ -0,0 +1,187 @@ +import json +import sys +import time +import uuid +from urllib.parse import quote + +import boto3 +import urllib3 +from botocore.auth import SigV4Auth +from botocore.awsrequest import AWSRequest +from botocore.config import Config as BotoConfig +from botocore.eventstream import EventStreamBuffer + +from core.config import PipelineConfig + + +class HarnessClient: + def __init__(self, config: PipelineConfig): + self.config = config + self.session = boto3.Session( + region_name=config.region, + profile_name=config.aws_profile, + ) + self.http = urllib3.PoolManager() + self.client = self.session.client( + "bedrock-agentcore", + config=BotoConfig(read_timeout=600, connect_timeout=30, retries={"max_attempts": 2}), + ) + + def invoke( + self, + session_id: str, + message: str, + system_prompt: str | None = None, + max_iterations: int | None = None, + verbose: bool = True, + retries: int = 2, + ) -> str: + for attempt in range(retries + 1): + try: + return self._invoke_once(session_id, message, system_prompt, max_iterations, verbose) + except (urllib3.exceptions.ProtocolError, urllib3.exceptions.ReadTimeoutError, ConnectionResetError) as e: + if attempt < retries: + if verbose: + print(f"\n ⚠️ Connection error (attempt {attempt + 1}/{retries + 1}): {e}. Retrying...", flush=True) + time.sleep(5) + else: + if verbose: + print(f"\n ⚠️ Connection error after {retries + 1} attempts: {e}", flush=True) + raise + + def _invoke_once( + self, + session_id: str, + message: str, + system_prompt: str | None = None, + max_iterations: int | None = None, + verbose: bool = True, + ) -> str: + body: dict = { + "runtimeSessionId": session_id, + "messages": [{"role": "user", "content": [{"text": message}]}], + "model": {"bedrockModelConfig": {"modelId": self.config.model_id}}, + } + if system_prompt: + body["systemPrompt"] = [{"text": system_prompt}] + if max_iterations: + body["maxIterations"] = max_iterations + + region = self.config.region + arn = self.config.harness_arn + url = f"https://bedrock-agentcore.{region}.amazonaws.com/harnesses/invoke?harnessArn={quote(arn, safe='')}" + + body_bytes = json.dumps(body).encode() + request = AWSRequest(method="POST", url=url, data=body_bytes, headers={ + "Content-Type": "application/json", + "Accept": "application/vnd.amazon.eventstream", + }) + credentials = self.session.get_credentials().get_frozen_credentials() + SigV4Auth(credentials, "bedrock-agentcore", region).add_auth(request) + + response = self.http.urlopen( + "POST", url, body=body_bytes, + headers=dict(request.headers), + preload_content=False, + timeout=urllib3.Timeout(connect=30, read=900), + ) + + if response.status != 200: + error = response.read().decode("utf-8") + if verbose: + print(f"\n ⚠️ HTTP {response.status}: {error}", flush=True) + raise RuntimeError(f"InvokeHarness failed: HTTP {response.status}: {error}") + + request_id = response.headers.get("x-amzn-RequestId", "unknown") + if verbose: + print(f" [request: {request_id}]", flush=True) + self.last_request_id = request_id + + return self._accumulate_text_from_http(response, verbose=verbose) + + def run_command(self, session_id: str, command: str, verbose: bool = False) -> tuple[str, str, int]: + if verbose: + print(f" $ {command}", flush=True) + response = self.client.invoke_agent_runtime_command( + agentRuntimeArn=self.config.harness_arn, + runtimeSessionId=session_id, + body={"command": command}, + ) + request_id = response.get("ResponseMetadata", {}).get("RequestId", "unknown") + self.last_request_id = request_id + return self._accumulate_command(response["stream"], verbose=verbose) + + def _accumulate_text_from_http(self, http_response, verbose: bool = False) -> str: + text_parts: list[str] = [] + tool_input_parts: list[str] = [] + current_tool: str | None = None + event_buffer = EventStreamBuffer() + + for chunk in http_response.stream(4096): + event_buffer.add_data(chunk) + for event in event_buffer: + if event.headers.get(":message-type") == "exception": + payload = json.loads(event.payload.decode("utf-8")) + if verbose: + print(f"\n ⚠️ Stream error: {payload}", flush=True) + if text_parts: + return "".join(text_parts) + raise RuntimeError(f"Stream error: {payload}") + + event_type = event.headers.get(":event-type", "") + if not event.payload: + continue + payload = json.loads(event.payload.decode("utf-8")) + + if event_type == "contentBlockStart": + start = payload.get("start", {}) + if "toolUse" in start: + current_tool = start["toolUse"].get("name", "unknown") + tool_input_parts = [] + else: + current_tool = None + elif event_type == "contentBlockDelta": + delta = payload.get("delta", {}) + if "text" in delta: + text_parts.append(delta["text"]) + if verbose: + print(delta["text"], end="", flush=True) + elif "toolUse" in delta and current_tool: + tool_input_parts.append(delta["toolUse"].get("input", "")) + elif event_type == "contentBlockStop": + if current_tool and verbose: + tool_input = "".join(tool_input_parts) + print(f"\n 🔧 {current_tool}: {tool_input[:200]}", flush=True) + current_tool = None + tool_input_parts = [] + elif event_type == "messageStop" and verbose: + print(flush=True) + + return "".join(text_parts) + + def _accumulate_command(self, stream, verbose: bool = False) -> tuple[str, str, int]: + stdout_parts: list[str] = [] + stderr_parts: list[str] = [] + exit_code = -1 + for event in stream: + if "chunk" in event: + chunk = event["chunk"] + if "contentDelta" in chunk: + delta = chunk["contentDelta"] + if "stdout" in delta: + stdout_parts.append(delta["stdout"]) + if verbose: + print(delta["stdout"], end="", flush=True) + if "stderr" in delta: + stderr_parts.append(delta["stderr"]) + if verbose: + print(delta["stderr"], end="", file=sys.stderr, flush=True) + elif "contentStop" in chunk: + exit_code = chunk["contentStop"].get("exitCode", -1) + if verbose: + print(f" [exit: {exit_code}]", flush=True) + return "".join(stdout_parts), "".join(stderr_parts), exit_code + + @staticmethod + def new_session_id() -> str: + return str(uuid.uuid4()).upper() diff --git a/agents/core/parsing.py b/agents/core/parsing.py new file mode 100644 index 000000000..228bc23f3 --- /dev/null +++ b/agents/core/parsing.py @@ -0,0 +1,72 @@ +import json +import re +from typing import Literal + +from pydantic import BaseModel, ValidationError + + +class Finding(BaseModel): + severity: Literal["critical", "high", "medium", "low"] + file: str + line: int + description: str + suggestion: str + + +class ReviewResult(BaseModel): + approved: bool + findings: list[Finding] + + +def parse_reviewer_output(raw_text: str) -> ReviewResult | None: + json_str = _extract_json(raw_text) + if json_str is None: + return None + return _validate(json_str) + + +def _extract_json(raw_text: str) -> str | None: + match = re.search(r"```json?\s*\n(.*?)\n\s*```", raw_text, re.DOTALL) + if match: + return match.group(1).strip() + + start = raw_text.find("{") + if start == -1: + return None + + while start != -1: + depth = 0 + in_string = False + escape_next = False + for i in range(start, len(raw_text)): + c = raw_text[i] + if escape_next: + escape_next = False + continue + if c == "\\": + escape_next = True + continue + if c == '"': + in_string = not in_string + continue + if in_string: + continue + if c == "{": + depth += 1 + elif c == "}": + depth -= 1 + if depth == 0: + candidate = raw_text[start : i + 1] + if "approved" in candidate: + return candidate + break + start = raw_text.find("{", start + 1) + return None + + +def _validate(json_str: str) -> ReviewResult | None: + try: + data = json.loads(json_str) + return ReviewResult(**data) + except (json.JSONDecodeError, ValidationError): + return None diff --git a/agents/core/phases/__init__.py b/agents/core/phases/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/feature_builder/__init__.py b/agents/feature_builder/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/feature_builder/main.py b/agents/feature_builder/main.py new file mode 100644 index 000000000..bbd6a53f2 --- /dev/null +++ b/agents/feature_builder/main.py @@ -0,0 +1,64 @@ +"""Feature Builder Agent — builds features from devex + implementation docs. + +Usage: + uv run python -m feature_builder.main --devex docs/devex.md --impl docs/impl.md + uv run python -m feature_builder.main --devex docs/devex.md --impl docs/impl.md --config config.yaml +""" + +import argparse +import sys +from pathlib import Path + +from orchestrations.fix_and_review.orchestrator import run_pipeline + +PROMPTS_DIR = Path(__file__).resolve().parent / "prompts" + + +def main(): + parser = argparse.ArgumentParser(description="Feature Builder Agent") + parser.add_argument("--devex", required=True, help="Path to devex doc (markdown)") + parser.add_argument("--impl", required=True, help="Path to implementation plan (markdown)") + parser.add_argument("--name", help="Feature name (used for branch naming)") + parser.add_argument("--config", default="config.yaml", help="Config YAML path") + parser.add_argument("--aws-profile", help="Override AWS profile") + parser.add_argument("--harness-arn", help="Override harness ARN") + args = parser.parse_args() + + devex_path = Path(args.devex) + impl_path = Path(args.impl) + + if not devex_path.exists(): + print(f"Error: devex doc not found: {devex_path}", file=sys.stderr) + return 1 + if not impl_path.exists(): + print(f"Error: impl doc not found: {impl_path}", file=sys.stderr) + return 1 + + devex_content = devex_path.read_text() + impl_content = impl_path.read_text() + + feature_name = args.name or devex_path.stem.replace(" ", "-").lower() + + # Construct a synthetic "issue" that the orchestrator can consume + # The orchestrator expects an issue_url — we pass a placeholder and override the setup phase + issue_url = f"feature/{feature_name}" + + overrides = {} + if args.aws_profile: + overrides["aws_profile"] = args.aws_profile + if args.harness_arn: + overrides["harness_arn"] = args.harness_arn + + return run_pipeline( + issue_url=issue_url, + config_path=args.config, + prompts_dir=PROMPTS_DIR, + devex_content=devex_content, + impl_content=impl_content, + feature_name=feature_name, + **overrides, + ) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/agents/feature_builder/prompts/executor.md b/agents/feature_builder/prompts/executor.md new file mode 100644 index 000000000..77d974e28 --- /dev/null +++ b/agents/feature_builder/prompts/executor.md @@ -0,0 +1,21 @@ +You are a senior software engineer implementing a new feature across one or two TypeScript repos. + +The plan: +{plan} + +Instructions: +1. Follow the plan exactly. Make the code changes described. +2. COMMIT IMMEDIATELY after writing your changes: `git add -A && git commit -m "feat: {commit_message}"` +3. Run `npm run typecheck 2>&1 | tail -20`. If there are type errors, fix them and commit again. +4. Run ONLY targeted tests for files you changed: + - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30` + - Run 1-5 targeted test files, NOT the full suite. +5. If targeted tests fail, fix and commit again. +6. Push to remote: `git push origin {branch_name}` + +CRITICAL RULES: +- COMMIT EARLY AND OFTEN. Your first commit should happen BEFORE running typecheck. Commit after every fix. A commit with typecheck errors is better than no commit at all. +- If typecheck has more than 5 errors, fix the most critical ones, commit what you have, and move on. Do NOT spend more than 3 attempts fixing typecheck. +- Do NOT run `npm run test:unit` (full suite). Only run targeted tests. +- CI will run the full test suite after the PR is created. +- Always pipe output through `| tail -30`. diff --git a/agents/feature_builder/prompts/fixer.md b/agents/feature_builder/prompts/fixer.md new file mode 100644 index 000000000..b108e594e --- /dev/null +++ b/agents/feature_builder/prompts/fixer.md @@ -0,0 +1,21 @@ +You are a senior software engineer fixing issues found during code review. + +The following findings were reported by reviewers. Address each one: + +{findings_text} + +Instructions: +1. Fix each finding, starting with Critical severity first, then High, Medium, Low. +2. If a finding is not applicable or is a false positive, explain why in a commit message. +3. Run `npm run typecheck 2>&1 | tail -20` in each affected repo after fixes. +4. Run ONLY targeted tests for files you changed: + - `npx vitest run --project unit path/to/relevant.test.ts 2>&1 | tail -30` + - Run 1-5 targeted test files, NOT the full suite. +5. If targeted tests fail, fix and re-run only those tests. +6. Commit: `git add -A && git commit -m "fix: address review findings round {round_number}"` +7. Push: `git push origin {branch_name}` + +IMPORTANT: +- Do NOT run `npm run test:unit` (full suite). It takes too long. Only run targeted tests. +- CI will validate the full suite after PR creation. +- Always pipe test output through `| tail -30`. diff --git a/agents/feature_builder/prompts/planner.md b/agents/feature_builder/prompts/planner.md new file mode 100644 index 000000000..ee22e2956 --- /dev/null +++ b/agents/feature_builder/prompts/planner.md @@ -0,0 +1,27 @@ +You are a senior software architect planning the implementation of a new feature. + +You have access to two TypeScript repositories: +- agentcore-cli: AWS AgentCore CLI tool (Commander.js + Ink TUI, ~550 source files) +- agentcore-l3-cdk-constructs: AWS CDK L3 constructs for AgentCore (~17 test files, shares schemas with CLI) + +## DevEx Document (what the user experience should be) + +{devex_content} + +## Implementation Plan (technical approach) + +{impl_content} + +## Your Task + +Based on the devex doc and implementation plan above, explore the relevant code in both repos and produce a detailed, actionable implementation plan. + +Your plan MUST include: +1. **Affected repos**: Which repos need changes (cli, cdk, or both) +2. **Files to change**: Exact file paths to modify, create, or delete +3. **Approach**: Step-by-step description of the changes, referencing specific functions and types in the codebase +4. **Risks**: What could go wrong, edge cases to watch for +5. **Testing strategy**: What tests to add or modify, how to verify the feature works +6. **Scope estimate**: small (1-3 files), medium (4-7 files), or large (8+ files) + +Output the plan as structured markdown. diff --git a/agents/feature_builder/prompts/reviewer.md b/agents/feature_builder/prompts/reviewer.md new file mode 100644 index 000000000..70fe76285 --- /dev/null +++ b/agents/feature_builder/prompts/reviewer.md @@ -0,0 +1,39 @@ +You are a senior code reviewer. Review ONLY the diff on the feature branch. + +Feature being built: {issue_summary} +Branch: {branch_name} +Repos: {cli_repo}, {cdk_repo} + +Your assigned focus: {focus} +Files to focus on: {assigned_files} + +Instructions: +1. Clone the repo: git clone --depth 10 --branch {branch_name} https://github.com/{cli_repo}.git agentcore-cli 2>&1 | tail -3 + (If branch doesn't exist, clone main instead) +2. Run: cd agentcore-cli && git diff main +3. Read ONLY the changed files and their immediate context (the functions/classes that were modified). +4. If you need to check a caller or type, read at most 1-2 additional files. No more. +5. Produce your verdict. + +{previous_findings_context} + +CONSTRAINTS: +- Stay focused on the diff and immediately related code. Do not explore unrelated parts of the codebase. +- Focus on: correctness, breaking changes, obvious bugs, missing error handling. Skip style nits. +- If the code looks correct and doesn't break anything, approve it. +- Do NOT run npm install, npm test, or any build commands. + +Output your review as a JSON object wrapped in ```json fences: +{{ + "approved": boolean, + "findings": [ + {{ + "severity": "critical" | "high" | "medium" | "low", + "file": "path/to/file", + "line": number, + "description": "what's wrong", + "suggestion": "how to fix" + }} + ] +}} +Output ONLY the JSON object in code fences. No other text before or after. diff --git a/agents/feature_builder/prompts/setup.md b/agents/feature_builder/prompts/setup.md new file mode 100644 index 000000000..3757a4f55 --- /dev/null +++ b/agents/feature_builder/prompts/setup.md @@ -0,0 +1,22 @@ +You are setting up a development environment to build a new feature. + +Steps: +1. Install tools and configure node 20 as default: + dnf install -y -q git nodejs20 > /dev/null 2>&1 + ln -sf /usr/bin/node-20 /usr/local/bin/node + ln -sf /usr/lib/nodejs20/lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm + export PATH=/usr/local/bin:$PATH +2. Authenticate GitHub: echo $GH_TOKEN | gh auth login --with-token +3. Configure git to use gh for auth: gh auth setup-git +4. Clone both repos: + - git clone https://github.com/{cli_repo}.git {cli_repo_name} + - git clone https://github.com/{cdk_repo}.git {cdk_repo_name} +5. Install dependencies: cd {cli_repo_name} && npm install 2>&1 | tail -3 && cd .. +6. Create a feature branch in both repos: + - cd {cli_repo_name} && git checkout -b {branch_name} && cd .. + - cd {cdk_repo_name} && git checkout -b {branch_name} && cd .. +7. Report back confirmation that the environment is ready. + +IMPORTANT: Run each step as a separate shell command. Do not combine them. If tools are already installed, skip step 1. + +Output: Confirm environment is ready and which repos are cloned. diff --git a/agents/orchestrations/__init__.py b/agents/orchestrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/orchestrations/fix_and_review/__init__.py b/agents/orchestrations/fix_and_review/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/orchestrations/fix_and_review/orchestrator.py b/agents/orchestrations/fix_and_review/orchestrator.py new file mode 100644 index 000000000..2eb1ab194 --- /dev/null +++ b/agents/orchestrations/fix_and_review/orchestrator.py @@ -0,0 +1,259 @@ +import time +from pathlib import Path + +from core.config import PipelineConfig +from core.harness_client import HarnessClient +from core.parsing import Finding +from orchestrations.fix_and_review.partitioning import ( + ReviewerAssignment, + calculate_reviewer_count, + partition_round1_by_directory, + partition_round2_focus_prompts, + partition_round3_risk_areas, +) +from orchestrations.fix_and_review.phases.aggregate import run_aggregate +from orchestrations.fix_and_review.phases.complete import run_complete +from orchestrations.fix_and_review.phases.execute import run_execute +from orchestrations.fix_and_review.phases.extract import ExtractResult, run_extract +from orchestrations.fix_and_review.phases.fix import run_fix +from orchestrations.fix_and_review.phases.plan import run_plan +from orchestrations.fix_and_review.phases.review import run_review +from orchestrations.fix_and_review.phases.setup import run_setup, set_prompts_dir +from orchestrations.fix_and_review.phases.validate import run_validate +from orchestrations.fix_and_review.phases.verify import run_verify + + +def run_pipeline( + issue_url: str, + config_path: str = "config.yaml", + prompts_dir: str | Path | None = None, + devex_content: str | None = None, + impl_content: str | None = None, + feature_name: str | None = None, + **overrides: str, +) -> int: + config = PipelineConfig.from_yaml(config_path) + for key, value in overrides.items(): + if hasattr(config, key): + field_type = type(getattr(config, key)) + if field_type == int: + value = int(value) + setattr(config, key, value) + + if prompts_dir: + set_prompts_dir(Path(prompts_dir)) + + is_feature = devex_content is not None + if is_feature: + issue_number = feature_name or "feature" + branch_name = f"feature/{feature_name or 'unnamed'}" + else: + issue_number = issue_url.rstrip("/").split("/")[-1] + short_id = HarnessClient.new_session_id()[:8].lower() + branch_name = f"fix/{issue_number}-{short_id}" + + client = HarnessClient(config) + session_id = HarnessClient.new_session_id() + + pipeline_start = time.time() + print(f"=== Pipeline Start ===") + print(f"{'Feature' if is_feature else 'Issue'}: {feature_name or issue_url}") + print(f"Session: {session_id}") + print(f"Harness: {config.harness_arn}") + print() + + def elapsed() -> str: + m, s = divmod(int(time.time() - pipeline_start), 60) + return f"{m}m{s:02d}s" + + # Phase 0: Setup + t0 = time.time() + print("--- Phase 0: Setup ---") + issue_details = run_setup(client, config, session_id, issue_url, + feature_name=feature_name, branch_name=branch_name) + if is_feature: + issue_title = feature_name or "unnamed feature" + else: + issue_title_raw, _, _ = client.run_command( + session_id, f"gh issue view {issue_url} --json title --jq .title 2>/dev/null" + ) + issue_title = issue_title_raw.strip() or f"resolve #{issue_number}" + print(f"Setup complete. {'Feature' if is_feature else 'Issue'}: {issue_title} [{int(time.time()-t0)}s | total {elapsed()}]") + print() + + # Phase 1: Plan + t0 = time.time() + print("--- Phase 1: Plan ---") + if is_feature: + plan = run_plan(client, config, session_id, issue_details, + devex_content=devex_content, impl_content=impl_content) + else: + plan = run_plan(client, config, session_id, issue_details) + print(f"Plan generated ({len(plan)} chars). [{int(time.time()-t0)}s | total {elapsed()}]") + print() + + # Phase 1.5: Validate Plan + t0 = time.time() + print("--- Phase 1.5: Validate Plan ---") + for attempt in range(3): + validation = run_validate(client, session_id, plan) + if validation.valid: + print(f"Plan validated. [{int(time.time()-t0)}s | total {elapsed()}]") + break + print(f"Validation errors: {validation.errors}") + if attempt < 2: + print("Re-planning...") + plan = run_plan( + client, config, session_id, + f"Previous plan had issues: {validation.errors}\n\n{issue_details}", + ) + else: + print("WARNING: Plan validation failed after 3 attempts. Proceeding anyway.") + print() + + # Phase 2: Execute + t0 = time.time() + print("--- Phase 2: Execute ---") + affected_repos: list[str] = [] + if "agentcore-cli" in plan.lower() or "cli" in plan.lower(): + affected_repos.append("agentcore-cli") + if "agentcore-l3-cdk" in plan.lower() or "cdk" in plan.lower(): + affected_repos.append("agentcore-l3-cdk-constructs") + if not affected_repos: + affected_repos = ["agentcore-cli"] + + for attempt in range(3): + run_execute(client, config, session_id, plan, branch_name, issue_number) + print(f"Execution complete. [{int(time.time()-t0)}s | total {elapsed()}]") + + # Phase 2.5: Verify + print("--- Phase 2.5: Verify ---") + verification = run_verify(client, session_id, branch_name, affected_repos) + if verification.all_passed: + print(f"Verification passed. [{int(time.time()-t0)}s | total {elapsed()}]") + break + print(f"Verification failed: {verification.errors}") + if attempt < 2: + print("Re-executing with error context...") + else: + print("WARNING: Verification failed after 3 attempts. Proceeding to review anyway.") + print() + + # Phase 3: Extract + t0 = time.time() + print("--- Phase 3: Extract ---") + extract = run_extract(client, session_id, config.cli_repo, config.cdk_repo) + print( + f"Extracted diff: {len(extract.stats.changed_files)} files, " + f"{extract.stats.total_lines} lines changed [{int(time.time()-t0)}s | total {elapsed()}]" + ) + if not extract.stats.changed_files: + print("\n=== Pipeline Failed — no changes were produced. Agent may have failed to commit. ===") + return 1 + print() + + # Review Loop + all_previous_findings_files: list[str] = [] + review_summary_parts: list[str] = [] + + for round_num in range(1, config.max_review_rounds + 1): + t0 = time.time() + # Phase 4: Review + print(f"--- Phase 4: Review (Round {round_num}) ---") + num_reviewers = calculate_reviewer_count( + extract.stats, config.min_reviewers, config.max_reviewers + ) + + if round_num == 1: + assignments = partition_round1_by_directory( + extract.stats.changed_files, num_reviewers + ) + elif round_num == 2: + focus_prompts = partition_round2_focus_prompts(num_reviewers) + assignments = [ + ReviewerAssignment(files=extract.stats.changed_files, focus=fp) + for fp in focus_prompts + ] + else: + assignments = partition_round3_risk_areas( + all_previous_findings_files, extract.stats.changed_files, num_reviewers + ) + + previous_context = "" + if round_num > 1: + previous_context = ( + f"These findings were identified and fixed in previous rounds: " + f"{', '.join(all_previous_findings_files)}. " + f"Do not re-raise issues that have already been addressed." + ) + + issue_summary = issue_details[:500] if issue_details else "See branch for details" + review_results = run_review( + client, config, assignments, branch_name, issue_summary, previous_context + ) + print(f"Reviews collected from {len(review_results)} reviewers. [{int(time.time()-t0)}s | total {elapsed()}]") + + # Phase 5: Aggregate + print(f"--- Phase 5: Aggregate (Round {round_num}) ---") + aggregate = run_aggregate(review_results) + print( + f"Approved: {aggregate.all_approved}, " + f"Findings: {len(aggregate.unique_findings)}, " + f"Parse failures: {aggregate.parse_failures}" + ) + + if aggregate.all_approved: + medium_plus = [ + f for f in aggregate.unique_findings + if f.severity in ("critical", "high", "medium") + ] + if not medium_plus: + print(f"All reviewers approved. Moving to Complete. [total {elapsed()}]") + review_summary_parts.append( + f"Round {round_num}: {len(aggregate.unique_findings)} findings, all approved" + ) + break + + review_summary_parts.append( + f"Round {round_num}: {len(aggregate.unique_findings)} findings" + ) + + for f in aggregate.unique_findings: + if f.file and f.file not in all_previous_findings_files: + all_previous_findings_files.append(f.file) + + # Phase 6: Fix + t_fix = time.time() + print(f"--- Phase 6: Fix (Round {round_num}) ---") + run_fix(client, config, session_id, aggregate.unique_findings, branch_name, round_num) + print(f"Fixes applied. [{int(time.time()-t_fix)}s | total {elapsed()}]") + + # Re-extract for next round + extract = run_extract(client, session_id, config.cli_repo, config.cdk_repo) + print() + else: + print( + f"WARNING: Max review rounds ({config.max_review_rounds}) reached " + f"without full approval." + ) + + # Phase 8: Complete + t0 = time.time() + print("--- Phase 8: Complete ---") + review_summary = "\n".join(review_summary_parts) + result = run_complete( + client, config, session_id, branch_name, issue_url, issue_number, + issue_title, review_summary, affected_repos, + ) + + if result.pr_urls: + print(f"\n=== Pipeline Complete [{elapsed()}] ===") + for url in result.pr_urls: + print(f"PR: {url}") + else: + print(f"\n=== Pipeline Failed [{elapsed()}] ===") + print(f"Errors: {result.errors}") + + return 0 if result.pr_urls else 1 + + diff --git a/agents/orchestrations/fix_and_review/partitioning.py b/agents/orchestrations/fix_and_review/partitioning.py new file mode 100644 index 000000000..bbfed1993 --- /dev/null +++ b/agents/orchestrations/fix_and_review/partitioning.py @@ -0,0 +1,124 @@ +from collections import defaultdict +from dataclasses import dataclass +from pathlib import PurePosixPath + + +@dataclass +class DiffStats: + changed_files: list[str] + total_lines: int + cross_repo: bool + + +@dataclass +class ReviewerAssignment: + files: list[str] + focus: str = "" + + +def calculate_reviewer_count(stats: DiffStats, min_r: int = 3, max_r: int = 5) -> int: + file_count = len(stats.changed_files) + lines = stats.total_lines + + if file_count <= 2 and lines < 100: + base = min_r + elif file_count <= 7 and lines < 500: + base = min_r + 1 + else: + base = max_r + + if stats.cross_repo: + base += 1 + + return min(base, max_r) + + +def partition_round1_by_directory( + changed_files: list[str], num_reviewers: int +) -> list[ReviewerAssignment]: + if not changed_files: + return [ReviewerAssignment(files=[], focus="No files to review")] + + groups: dict[str, list[str]] = defaultdict(list) + for f in changed_files: + parts = PurePosixPath(f).parts + key = "/".join(parts[:2]) if len(parts) > 1 else parts[0] if parts else "root" + groups[key].append(f) + + sorted_groups = sorted(groups.items(), key=lambda x: -len(x[1])) + + if len(sorted_groups) >= num_reviewers: + assignments = [ + ReviewerAssignment(files=files, focus=f"Review changes in {key}/") + for key, files in sorted_groups[:num_reviewers - 1] + ] + remaining_files = [] + for key, files in sorted_groups[num_reviewers - 1:]: + remaining_files.extend(files) + assignments.append( + ReviewerAssignment(files=remaining_files, focus="Review remaining changes") + ) + return assignments + else: + assignments = [ + ReviewerAssignment(files=files, focus=f"Review changes in {key}/") + for key, files in sorted_groups + ] + focus_extras = [ + "Focus on error handling and edge cases", + "Focus on type safety and interface consistency", + "Focus on backwards compatibility and breaking changes", + ] + largest_group_files = sorted_groups[0][1] if sorted_groups else changed_files + extra_idx = 0 + while len(assignments) < num_reviewers: + assignments.append( + ReviewerAssignment( + files=largest_group_files, + focus=focus_extras[extra_idx % len(focus_extras)], + ) + ) + extra_idx += 1 + return assignments + + +def partition_round2_focus_prompts(num_reviewers: int) -> list[str]: + prompts = [ + "Focus on correctness — does the logic match the issue requirements? Are there edge cases or off-by-one errors?", + "Focus on safety — breaking changes, backwards compatibility, error handling, and graceful degradation", + "Focus on testing — are the tests adequate? Do they cover acceptance criteria, edge cases, and error paths?", + "Focus on cross-repo consistency — do type definitions, schemas, and interfaces stay in sync across both repos?", + "Focus on performance and resource usage — are there unnecessary allocations, N+1 patterns, or missing caching?", + ] + return prompts[:num_reviewers] + + +def partition_round3_risk_areas( + previous_findings_files: list[str], + all_changed_files: list[str], + num_reviewers: int, +) -> list[ReviewerAssignment]: + risk_files = list(set(previous_findings_files)) + non_risk_files = [f for f in all_changed_files if f not in risk_files] + + assignments: list[ReviewerAssignment] = [] + risk_reviewer_count = max(1, num_reviewers // 2) + for i in range(risk_reviewer_count): + assignments.append( + ReviewerAssignment( + files=risk_files, + focus=f"Deep review of previously-flagged files — verify fixes are correct and complete (pass {i + 1})", + ) + ) + + remaining = num_reviewers - risk_reviewer_count + broader_files = risk_files + non_risk_files + for i in range(remaining): + assignments.append( + ReviewerAssignment( + files=broader_files, + focus=f"Broad review with emphasis on how fixes interact with surrounding code (pass {i + 1})", + ) + ) + + return assignments diff --git a/agents/orchestrations/fix_and_review/phases/__init__.py b/agents/orchestrations/fix_and_review/phases/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/agents/orchestrations/fix_and_review/phases/aggregate.py b/agents/orchestrations/fix_and_review/phases/aggregate.py new file mode 100644 index 000000000..cb2fc3000 --- /dev/null +++ b/agents/orchestrations/fix_and_review/phases/aggregate.py @@ -0,0 +1,57 @@ +from dataclasses import dataclass + +from core.parsing import Finding, ReviewResult + + +@dataclass +class AggregateResult: + all_approved: bool + unique_findings: list[Finding] + parse_failures: int + total_reviewers: int + + +def run_aggregate( + review_results: list[tuple[ReviewResult | None, str]], +) -> AggregateResult: + all_approved = True + findings: list[Finding] = [] + parse_failures = 0 + + for parsed, raw in review_results: + if parsed is None: + all_approved = False + parse_failures += 1 + findings.append( + Finding( + severity="high", + file="", + line=0, + description="Reviewer output failed to parse after retries", + suggestion="Manual review needed", + ) + ) + else: + if not parsed.approved: + all_approved = False + findings.extend(parsed.findings) + + unique = _deduplicate(findings) + + return AggregateResult( + all_approved=all_approved, + unique_findings=unique, + parse_failures=parse_failures, + total_reviewers=len(review_results), + ) + + +def _deduplicate(findings: list[Finding]) -> list[Finding]: + seen: set[str] = set() + unique: list[Finding] = [] + for f in findings: + key = f"{f.file}:{f.line}:{f.description[:50]}" + if key not in seen: + seen.add(key) + unique.append(f) + return unique diff --git a/agents/orchestrations/fix_and_review/phases/complete.py b/agents/orchestrations/fix_and_review/phases/complete.py new file mode 100644 index 000000000..3fb4f4e5a --- /dev/null +++ b/agents/orchestrations/fix_and_review/phases/complete.py @@ -0,0 +1,108 @@ +import re +from dataclasses import dataclass + +from core.config import PipelineConfig +from core.harness_client import HarnessClient + + +@dataclass +class CompleteResult: + pr_urls: list[str] + rebase_succeeded: bool + errors: list[str] + + +def run_complete( + client: HarnessClient, + config: PipelineConfig, + session_id: str, + branch_name: str, + issue_url: str, + issue_number: str, + issue_title: str, + review_summary: str, + affected_repos: list[str], +) -> CompleteResult: + errors: list[str] = [] + pr_urls: list[str] = [] + + # Normalize affected_repos to full org/repo format for comparison with config + full_repo_map = { + "agentcore-cli": config.cli_repo, + "agentcore-l3-cdk-constructs": config.cdk_repo, + config.cli_repo: config.cli_repo, + config.cdk_repo: config.cdk_repo, + } + + # Filter to repos that actually have changes on the feature branch + repos_with_changes: list[str] = [] + for repo in affected_repos: + repo_name = repo.split("/")[-1] if "/" in repo else repo + stdout, _, exit_code = client.run_command( + session_id, f"cd {repo_name} && git log origin/main..{branch_name} --oneline 2>/dev/null" + ) + if exit_code == 0 and stdout.strip(): + full_repo = full_repo_map.get(repo, repo) + repos_with_changes.append(full_repo) + + if not repos_with_changes: + return CompleteResult(pr_urls=[], rebase_succeeded=False, errors=["No repos have changes on the feature branch"]) + + # Rebase and push each repo that has changes + rebase_succeeded = True + for repo in repos_with_changes: + repo_name = repo.split("/")[-1] if "/" in repo else repo + + _, stderr, exit_code = client.run_command( + session_id, f"cd {repo_name} && git fetch origin main && git rebase origin/main" + ) + if exit_code != 0: + rebase_succeeded = False + client.run_command(session_id, f"cd {repo_name} && git rebase --abort") + errors.append(f"Rebase failed in {repo_name}: {stderr[:500]}") + continue + + _, stderr, exit_code = client.run_command( + session_id, f"cd {repo_name} && git push origin {branch_name} --force-with-lease" + ) + if exit_code != 0: + errors.append(f"Push failed in {repo_name}: {stderr[:500]}") + + # Create PRs — CDK first if both repos have changes + repo_order = [] + if config.cdk_repo in repos_with_changes: + repo_order.append(config.cdk_repo) + if config.cli_repo in repos_with_changes: + repo_order.append(config.cli_repo) + + for repo in repo_order: + repo_name = repo.split("/")[-1] + + # Let the agent create the PR — it can read the repo's PR template and fill it in properly + pr_message = ( + f"Create a pull request in this repo for branch {branch_name}.\n" + f"Issue: {issue_url} (#{issue_number})\n" + f"Issue title: {issue_title}\n" + f"Review summary: {review_summary}\n" + f"Read the PR template at .github/pull_request_template.md and fill it in properly.\n" + f"IMPORTANT: The Related Issue section MUST say 'Closes #{issue_number}' to auto-close the issue when merged.\n" + f"Use a descriptive title based on the issue title. Do NOT use a generic title like 'fix: resolve #N'.\n" + f"Use: gh pr create --title '