diff --git a/CLAUDE.md b/CLAUDE.md index dedf3df..b8c8076 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -49,7 +49,8 @@ src/clayde/ # find_open_pr(), create_pull_request(), is_blocked(), # add_pr_reviewer(), get_pr_reviews(), # get_pr_review_comments(), parse_pr_url(), - # get_issue_author() + # get_issue_author(), get_check_runs(), + # get_required_check_names() git.py # ensure_repo() — clone or update repos under REPOS_DIR safety.py # Content filtering & plan approval: is_comment_visible(), # filter_comments(), is_issue_visible(), @@ -63,10 +64,13 @@ src/clayde/ orchestrator.py # main() — single cycle, run_loop() — container entry point prompts/ work.j2 # Jinja2 template for the unified work prompt + fix_ci.j2 # prompt for diagnosing/fixing a failing PR pipeline tasks/ __init__.py work.py # run(issue_url) — unified: Claude decides next action # (ask, plan, implement, open PR, or address review) + fix_ci.py # run(issue_url, pr_url, branch_name, failed_checks) — + # self-fix a failing CI pipeline on a clayde PR webhook/ __init__.py app.py # FastAPI app, /webhook/pebble, /health, OTel enqueue span @@ -108,6 +112,7 @@ Plain `KEY=VALUE` file (no shell quoting). All keys use `CLAYDE_` prefix and are | `CLAYDE_CLAUDE_API_KEY` | Anthropic API key for Claude SDK calls (required when backend=`api`) | | `CLAYDE_CLAUDE_MODEL` | Model to use (default: `claude-opus-4-6`) | | `CLAYDE_CLAUDE_BACKEND` | `api` (default) or `cli` — selects Anthropic SDK or Claude Code CLI | +| `CLAYDE_CI_FIX_MAX_ATTEMPTS` | Max autonomous CI-fix attempts per PR before giving up and notifying (default 3) | | `CLAYDE_PEBBLE_ENABLED` | Set to `true` to enable the Pebble webhook | | `CLAYDE_PEBBLE_TOKEN` | Bearer token the Pebble app sends | | `CLAYDE_PEBBLE_HOST` | Public hostname for Traefik routing | @@ -142,6 +147,9 @@ Per-issue state is stored in `state.json` under | `pr_url` | PR opened for this issue, once detected via `find_open_pr()` | | `in_progress` | `True` while the work task runs; a crash leaves it set so the next cycle retries | | `last_seen_at` | ISO-UTC timestamp of the last completed cycle; used to detect new activity | +| `ci_fix_attempts` | Number of autonomous CI-fix attempts made for this PR (capped at `ci_fix_max_attempts`) | +| `last_ci_fix_attempt_sha` | PR head SHA of the last CI-fix attempt; prevents re-attempting the same commit | +| `ci_fix_exhausted_notified` | `True` once the operator has been alerted that the attempt budget is spent (avoids re-notifying) | **Activity detection** (`_handle_issue`): the work task is invoked when any of — `in_progress` is set (retry), `last_seen_at` is `None` (never processed), @@ -149,6 +157,16 @@ there are new whitelist-visible comments, or there is new PR review activity (inline comments or a review body). A pure PR approval with no comments does **not** invoke Claude — it just advances `last_seen_at`. +**CI self-fix**: when there is *no* new human activity but an open PR exists, +`_handle_ci_fix()` checks the PR head commit's check runs (`get_check_runs()`, +filtered to branch-protection-required checks when defined). If a required +check has failed and a fix has not yet been attempted for that head SHA, the +`fix_ci` task is invoked: Claude inspects the failing job logs, pushes a fix to +the PR branch, and a summary is posted as an issue comment. Attempts are capped +per PR by `ci_fix_max_attempts` (default 3); once exhausted, the operator is +notified once via ntfy and Clayde stops attempting. Green/pending CI falls +through to normal review monitoring unchanged. + **Limits & retries**: `UsageLimitError` / `InvocationTimeoutError` from Claude leave `in_progress=True` so the next cycle retries automatically. Other exceptions clear `in_progress` and log the error. Closed issues are pruned @@ -221,6 +239,8 @@ Key functions: - `get_pr_reviews()` / `get_pr_review_comments()` — fetch PR review data - `edit_comment()` — edit an existing issue comment - `parse_pr_url()` — parse PR URL into (owner, repo, pr_number) +- `get_check_runs(g, owner, repo, ref)` — failed check runs for a commit SHA (name, conclusion, details_url) +- `get_required_check_names(g, owner, repo, branch)` — required status-check names from branch protection (empty set when unprotected) --- diff --git a/README.md b/README.md index 34b34e5..ad74248 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Clayde is assigned GitHub issues in software repositories. For each issue it: 3. Posts a summary comment after each work cycle 4. Opens a pull request (Claude creates the PR directly with a description and, for diffs spanning more than 3 files, a recommended reading order) and assigns the issue author as reviewer 5. Monitors the PR and addresses review comments when they appear +6. Monitors the PR's CI pipeline and, if a required check fails, autonomously diagnoses the failing job and pushes a fix Clayde runs as a Docker container in a continuous loop (default: every 5 minutes). Rather than a rigid state machine, it uses **timestamp-based activity detection**: each issue records the last time it was processed, and only new visible activity since that timestamp triggers a new Claude invocation. @@ -34,6 +35,7 @@ Clayde's loop is event-driven and stateless by design: 6. **Crash recovery**: `in_progress` is set before invoking Claude and cleared after. If the process crashes mid-run, the next cycle retries automatically. 7. **Pure PR approvals** (no comments) update `last_seen_at` without invoking Claude. 8. **Closed issues** are pruned from state automatically. +9. **CI self-fix**: when an issue's PR is open and there is no new human activity, Clayde checks the PR head commit's CI status. If a required check has failed (and a fix has not already been attempted for that commit), Claude inspects the failing job logs and pushes a fix to the branch — up to `CLAYDE_CI_FIX_MAX_ATTEMPTS` times per PR, after which the operator is notified via ntfy. Green CI falls through to normal review monitoring. --- @@ -59,6 +61,7 @@ Whitelisted users are configured via `CLAYDE_WHITELISTED_USERS` in `data/config. - **Full issue lifecycle**: Engage → implement → PR → review, all driven by new activity - **PR creation by Claude**: Claude writes the PR description and a recommended reading order for larger diffs - **PR review handling**: Reads and addresses reviewer feedback automatically +- **CI self-healing**: Detects failing required checks on its own PRs and pushes fixes autonomously, with a per-PR attempt cap and operator notification - **Rate-limit resilience**: Detects Claude usage limits and automatically retries - **Crash recovery**: `in_progress` flag ensures interrupted runs are retried next cycle - **Safety filtering**: Whitelist-based content filtering prevents acting on unauthorized content @@ -173,6 +176,7 @@ In any repository the bot has access to, assign issues to the bot account. Clayd | `CLAYDE_CLAUDE_BACKEND` | `api` (default) or `cli` | | `CLAYDE_CLAUDE_API_KEY` | Anthropic API key (required when backend=`api`) | | `CLAYDE_CLAUDE_MODEL` | Model to use (default: `claude-opus-4-6`) | +| `CLAYDE_CI_FIX_MAX_ATTEMPTS` | Max autonomous CI-fix attempts per PR before notifying (default: `3`) | | `CLAYDE_PEBBLE_ENABLED` | Set to `true` to enable the Pebble webhook | | `CLAYDE_PEBBLE_TOKEN` | Bearer token the Pebble app sends | | `CLAYDE_PEBBLE_HOST` | Public hostname for Traefik routing | diff --git a/config.env.template b/config.env.template index 42f6209..67b9b71 100644 --- a/config.env.template +++ b/config.env.template @@ -19,6 +19,9 @@ CLAYDE_WHITELISTED_USERS=your-username,your-bot-username CLAYDE_CLAUDE_BACKEND=api CLAYDE_CLAUDE_API_KEY= +# Max autonomous CI-fix attempts per PR before giving up and notifying (default 3). +CLAYDE_CI_FIX_MAX_ATTEMPTS=3 + # --- Pebble webhook --- # Set to true to enable the FastAPI webhook on port 8080 (routed via Traefik). CLAYDE_PEBBLE_ENABLED=false diff --git a/src/clayde/config.py b/src/clayde/config.py index d3eef8c..5cd53cf 100644 --- a/src/clayde/config.py +++ b/src/clayde/config.py @@ -41,6 +41,8 @@ def effective_git_name(self) -> str: # Orchestrator behaviour loop_interval_s: int = 300 implement_max_retries: int = 3 + # Max autonomous CI-fix attempts per PR before giving up and notifying. + ci_fix_max_attempts: int = 3 # Pebble webhook pebble_enabled: bool = False diff --git a/src/clayde/github.py b/src/clayde/github.py index 3301a98..d338dca 100644 --- a/src/clayde/github.py +++ b/src/clayde/github.py @@ -193,3 +193,52 @@ def get_pr_title(g: Github, owner: str, repo: str, pr_number: int) -> str: def get_pull(g: Github, owner: str, repo: str, pr_number: int): """Return the PullRequest object for the given PR number.""" return _get_repo(g, owner, repo).get_pull(pr_number) + + +# --------------------------------------------------------------------------- +# CI / check-run helpers +# --------------------------------------------------------------------------- + +# Check-run conclusions that represent a failed / blocking pipeline. "neutral", +# "skipped", "success" and "stale" are not treated as failures; queued and +# in-progress runs are ignored until they complete. +_FAILED_CONCLUSIONS = frozenset( + {"failure", "timed_out", "action_required", "startup_failure"} +) + + +def get_check_runs(g: Github, owner: str, repo: str, ref: str) -> list[dict]: + """Return the *failed* check runs for a commit SHA via the Checks API. + + Only completed runs whose conclusion is in ``_FAILED_CONCLUSIONS`` are + returned; queued, in-progress, successful, skipped and neutral runs are + omitted. Each item is a dict with ``name``, ``conclusion`` and + ``details_url`` (the URL of the failing job's logs). + """ + commit = _get_repo(g, owner, repo).get_commit(ref) + failed: list[dict] = [] + for run in commit.get_check_runs(): + if run.status == "completed" and run.conclusion in _FAILED_CONCLUSIONS: + failed.append({ + "name": run.name, + "conclusion": run.conclusion, + "details_url": run.details_url or run.html_url or "", + }) + return failed + + +def get_required_check_names(g: Github, owner: str, repo: str, branch: str) -> set[str]: + """Return the set of required status-check names from branch protection. + + Returns an empty set when the branch is unprotected or the required checks + cannot be read (e.g. insufficient token permissions). Callers treat an + empty set as "no required-check filter" — every failed check is then + considered blocking. + """ + try: + b = _get_repo(g, owner, repo).get_branch(branch) + required = b.get_required_status_checks() + return set(required.contexts or []) + except Exception as e: + log.info("No required status checks for %s/%s@%s: %s", owner, repo, branch, e) + return set() diff --git a/src/clayde/orchestrator.py b/src/clayde/orchestrator.py index 74ef059..12e50f5 100644 --- a/src/clayde/orchestrator.py +++ b/src/clayde/orchestrator.py @@ -34,9 +34,11 @@ fetch_issue, fetch_issue_comments, get_assigned_issues, + get_check_runs, get_pr_review_comments, get_pr_reviews, get_pull, + get_required_check_names, is_blocked, is_pull_request_item, issue_ref, @@ -45,8 +47,9 @@ ) from clayde.safety import filter_pr_reviews, get_new_visible_comments, has_visible_content from clayde.state import get_issue_state, load_state, save_state, update_issue_state -from clayde.tasks import work, wrap_up, pr_work +from clayde.tasks import fix_ci, work, wrap_up, pr_work from clayde.telemetry import get_tracer, init_tracer +from clayde.webhook.notify import send_ntfy_sync log = logging.getLogger("clayde.orchestrator") @@ -162,34 +165,145 @@ def _handle_issue(g: Github, issue: Issue, url: str) -> None: should_invoke = in_progress or (last_seen_at is None) or bool(new_comments) or has_new_review_activity - if not should_invoke: - log.info("[%s] No new activity — skipping", label) - span.set_attribute("issue.skip_reason", "no_new_activity") - return + if should_invoke: + # Mark in_progress before invoking Claude so a crash leaves a retry marker + update_issue_state(url, {"in_progress": True}) - # Mark in_progress before invoking Claude so a crash leaves a retry marker - update_issue_state(url, {"in_progress": True}) + log.info("[%s] New activity — invoking work task", label) + try: + work.run(url) + except (UsageLimitError, InvocationTimeoutError) as e: + log.warning("[%s] Usage/timeout limit — will retry next cycle: %s", label, e) + span.set_attribute("issue.status", "retry") + # in_progress stays True so the next cycle retries automatically + return + except Exception as e: + log.error("[%s] ERROR in work task: %s", label, e) + span.set_status(StatusCode.ERROR, str(e)) + span.record_exception(e) + update_issue_state(url, {"in_progress": False}) + return - log.info("[%s] New activity — invoking work task", label) - try: - work.run(url) - except (UsageLimitError, InvocationTimeoutError) as e: - log.warning("[%s] Usage/timeout limit — will retry next cycle: %s", label, e) - span.set_attribute("issue.status", "retry") - # in_progress stays True so the next cycle retries automatically + # Successful completion — update last_seen_at to prevent re-triggering on + # Clayde's own comments posted during this run + update_issue_state(url, {"in_progress": False, "last_seen_at": _now_utc()}) + span.set_attribute("issue.status", "completed") + log.info("[%s] Cycle complete", label) return - except Exception as e: - log.error("[%s] ERROR in work task: %s", label, e) - span.set_status(StatusCode.ERROR, str(e)) - span.record_exception(e) - update_issue_state(url, {"in_progress": False}) + + # No new human activity. If a PR is open, monitor its CI and self-fix a + # failing pipeline before falling back to "nothing to do". + if pr_url and _handle_ci_fix(g, owner, repo, pr_url, url, label, span): return - # Successful completion — update last_seen_at to prevent re-triggering on - # Clayde's own comments posted during this run - update_issue_state(url, {"in_progress": False, "last_seen_at": _now_utc()}) - span.set_attribute("issue.status", "completed") - log.info("[%s] Cycle complete", label) + log.info("[%s] No new activity — skipping", label) + span.set_attribute("issue.skip_reason", "no_new_activity") + + +def _handle_ci_fix(g: Github, owner: str, repo: str, pr_url: str, url: str, + label: str, span) -> bool: + """Monitor CI on the open PR and self-fix a failing required pipeline. + + Returns True when CI handling has consumed this cycle (a fix was attempted, + the PR is waiting on a previous fix for the same commit, or the attempt + budget is exhausted) so the caller should stop. Returns False when CI is + green, still pending, or has no failing required checks — the caller then + falls through to its normal "no new activity" handling. + + Loop-safety: the attempt counter and the attempted head SHA are recorded + *before* invoking Claude, so a crash or usage limit can never cause an + endless retry on the same commit. + """ + settings = get_settings() + try: + _, _, pr_number = parse_pr_url(pr_url) + pr = get_pull(g, owner, repo, pr_number) + head_sha = pr.head.sha + base_branch = pr.base.ref + except Exception as e: + log.warning("[%s] Failed to fetch PR for CI check: %s", label, e) + return False + + try: + failed = get_check_runs(g, owner, repo, head_sha) + required = get_required_check_names(g, owner, repo, base_branch) + if required: + # Branch protection defines required checks — only act on those. + failed = [f for f in failed if f["name"] in required] + # When no required checks are configured, every failed check is treated + # as blocking (fallback for unprotected branches). + except Exception as e: + log.warning("[%s] Failed to fetch CI status: %s", label, e) + return False + + if not failed: + return False # CI green / pending — proceed with review monitoring + + issue_state = get_issue_state(url) + attempts = issue_state.get("ci_fix_attempts", 0) + max_attempts = settings.ci_fix_max_attempts + + if attempts >= max_attempts: + if not issue_state.get("ci_fix_exhausted_notified"): + log.warning("[%s] CI still failing after %d attempts — notifying operator", + label, attempts) + _notify_ci_exhausted(settings, owner, repo, pr_number, attempts) + update_issue_state(url, {"ci_fix_exhausted_notified": True}) + span.set_attribute("issue.skip_reason", "ci_fix_exhausted") + return True + + if issue_state.get("last_ci_fix_attempt_sha") == head_sha: + # Already attempted a fix for this exact commit — wait for new activity + # (a new push, review, or comment) rather than looping on the same SHA. + log.info("[%s] CI failing but fix already attempted for %s — waiting", + label, head_sha[:7]) + span.set_attribute("issue.skip_reason", "ci_fix_already_attempted") + return True + + branch_name = issue_state.get("branch_name", pr.head.ref) + check_names = ", ".join(f["name"] for f in failed) + log.info("[%s] CI failing (%s) — invoking fix task (attempt %d/%d)", + label, check_names, attempts + 1, max_attempts) + + # Record the attempt *before* invoking so a crash/limit cannot loop on this + # SHA, and so the attempt counts toward the max-attempts budget. + update_issue_state(url, { + "ci_fix_attempts": attempts + 1, + "last_ci_fix_attempt_sha": head_sha, + }) + + try: + fix_ci.run(url, pr_url, branch_name, failed) + except (UsageLimitError, InvocationTimeoutError) as e: + log.warning("[%s] Usage/timeout limit during CI fix — will retry on a new commit: %s", + label, e) + span.set_attribute("issue.status", "ci_fix_retry") + return True + except Exception as e: + log.error("[%s] ERROR in CI fix task: %s", label, e) + span.set_status(StatusCode.ERROR, str(e)) + span.record_exception(e) + return True + + # Advance last_seen_at so Clayde's own summary comment does not re-trigger. + update_issue_state(url, {"last_seen_at": _now_utc()}) + span.set_attribute("issue.status", "ci_fix_attempted") + log.info("[%s] CI fix attempt complete", label) + return True + + +def _notify_ci_exhausted(settings, owner: str, repo: str, pr_number: int, attempts: int) -> None: + """Send an ntfy alert that CI is still failing after the attempt budget.""" + if not settings.ntfy_topic: + return + send_ntfy_sync( + title="Clayde: CI still failing", + body=f"CI still failing after {attempts} attempts on {owner}/{repo}#{pr_number}", + success=False, + base_url=settings.ntfy_base_url, + topic=settings.ntfy_topic, + timeout_s=settings.ntfy_timeout_s, + ) def _handle_standalone_pr(g: Github, url: str) -> None: diff --git a/src/clayde/prompts/fix_ci.j2 b/src/clayde/prompts/fix_ci.j2 new file mode 100644 index 0000000..ba2e852 --- /dev/null +++ b/src/clayde/prompts/fix_ci.j2 @@ -0,0 +1,50 @@ +You are Clayde, an autonomous software agent. The GitHub Actions pipeline on a +pull request you opened is failing, and you need to diagnose and fix it. + +ISSUE #{{ number }}: {{ title }} +REPO: {{ owner }}/{{ repo }} +LABELS: {{ labels }} + +ISSUE BODY: +{{ body }} + +OPEN PULL REQUEST: {{ pr_url }} +PR BRANCH: {{ branch_name }} + +FAILED CHECKS (required checks blocking this PR): +{{ failed_checks }} + +REPOSITORY ON DISK: {{ repo_path }} + +--- + +The CI pipeline for this PR is red. Diagnose the failure and push a fix. + +Steps: +1. Check out the PR branch: `git checkout {{ branch_name }} && git pull origin {{ branch_name }}`. +2. Inspect the failing job logs. Use the GitHub CLI, e.g.: + - `gh run list --branch {{ branch_name }} --limit 5` to find the failing run. + - `gh run view --log-failed` to read only the failed steps' logs. + The failed checks above include the details URL of each failing job. +3. Reproduce the failure locally where possible (run the same test/lint/build + command the pipeline ran) so you can confirm your fix. +4. Implement the smallest correct fix. Do NOT disable, skip, or weaken checks + to make the pipeline pass — fix the underlying cause. +5. Re-run the relevant command locally to confirm it now passes. +6. Commit with a clear message describing what failed and what you changed. +7. Push: `git push origin {{ branch_name }}`. + +Do NOT open a new pull request — the existing PR {{ pr_url }} will update +automatically when you push to `{{ branch_name }}`. If after investigating you +determine the failure is not something you can fix from the code (e.g. it is a +flaky infrastructure error or requires a secret/permission you do not have), +explain that in your summary instead of forcing a change. + +After completing your work, provide a short summary of what failed and what you +changed. + +IMPORTANT: Your final response must be ONLY a raw JSON object — nothing else. +Do not include any text before or after the JSON. Do not wrap it in markdown code fences. +Your entire response must be parseable by json.loads(). + +{"summary": ""} diff --git a/src/clayde/tasks/fix_ci.py b/src/clayde/tasks/fix_ci.py new file mode 100644 index 0000000..f93b7a7 --- /dev/null +++ b/src/clayde/tasks/fix_ci.py @@ -0,0 +1,106 @@ +"""CI-fix task — diagnose a failing pipeline on a clayde-opened PR and push a fix. + +Invoked by the orchestrator when a required check on an open PR has failed and a +fix has not yet been attempted for that head commit. Claude inspects the +failing job logs, pushes a fix commit to the PR branch, and a summary is posted +as an issue comment. Mirrors the structure of ``tasks/work.py``. +""" + +import logging + +from clayde.claude import format_cost_line, invoke_claude +from clayde.config import get_github_client +from clayde.git import ensure_repo +from clayde.github import ( + fetch_issue, + get_default_branch, + issue_ref, + parse_issue_url, + post_comment, +) +from clayde.prompts import render_template +from clayde.responses import WorkResponse, parse_response +from clayde.safety import is_issue_visible +from clayde.telemetry import get_tracer + +log = logging.getLogger("clayde.tasks.fix_ci") + + +def _format_failed_checks(failed_checks: list[dict]) -> str: + """Render failed check runs into a readable list for the prompt.""" + lines = [] + for check in failed_checks: + name = check.get("name", "(unknown)") + conclusion = check.get("conclusion", "failure") + url = check.get("details_url", "") + line = f"- {name} ({conclusion})" + if url: + line += f" — {url}" + lines.append(line) + return "\n".join(lines) or "(none)" + + +def run(issue_url: str, pr_url: str, branch_name: str, failed_checks: list[dict]) -> None: + """Invoke Claude to fix a failing CI pipeline on the PR branch. + + Raises UsageLimitError or InvocationTimeoutError on rate/timeout limits so + the orchestrator can react (the same head SHA will be retried next cycle). + """ + tracer = get_tracer() + with tracer.start_as_current_span("clayde.task.fix_ci") as span: + g = get_github_client() + owner, repo, number = parse_issue_url(issue_url) + ref = issue_ref(owner, repo, number) + span.set_attribute("issue.number", number) + span.set_attribute("issue.owner", owner) + span.set_attribute("issue.repo", repo) + span.set_attribute("ci_fix.failed_count", len(failed_checks)) + + issue = fetch_issue(g, owner, repo, number) + default_branch = get_default_branch(g, owner, repo) + repo_path = ensure_repo(owner, repo, default_branch) + + body_text = issue.body or "(empty)" + if not is_issue_visible(issue): + body_text = "(filtered)" + labels = ", ".join(lb.name for lb in issue.labels) or "none" + + prompt = render_template( + "fix_ci.j2", + number=number, + title=issue.title, + owner=owner, + repo=repo, + labels=labels, + body=body_text, + branch_name=branch_name, + pr_url=pr_url, + failed_checks=_format_failed_checks(failed_checks), + repo_path=repo_path, + default_branch=default_branch, + ) + + log.info("[%s: %s] Invoking Claude to fix failing CI", ref, issue.title) + + # UsageLimitError/InvocationTimeoutError propagate to the orchestrator + result = invoke_claude(prompt, repo_path) + + span.set_attribute("fix_ci.output_length", len(result.output or "")) + + # Parse summary (best-effort; fall back to raw output snippet) + summary = None + try: + parsed = parse_response(result.output, WorkResponse) + summary = parsed.summary + except ValueError: + log.warning("[%s: %s] Failed to parse CI-fix response JSON — using raw output", + ref, issue.title) + summary = (result.output or "").strip()[:500] or None + + if summary: + check_names = ", ".join(c.get("name", "?") for c in failed_checks) + header = f"🔧 CI was failing ({check_names}). " + post_comment(g, owner, repo, number, + f"{header}{summary}{format_cost_line(result.cost_eur)}") + + log.info("[%s: %s] CI-fix complete", ref, issue.title) diff --git a/tests/test_github.py b/tests/test_github.py index 651c2db..179e6b2 100644 --- a/tests/test_github.py +++ b/tests/test_github.py @@ -15,10 +15,12 @@ fetch_issue_comments, find_open_pr, get_assigned_issues, + get_check_runs, get_default_branch, get_issue_author, get_pr_review_comments, get_pr_reviews, + get_required_check_names, is_blocked, is_pull_request_item, parse_issue_url, @@ -28,6 +30,16 @@ ) +def _make_check_run(name, status, conclusion, details_url="https://ci/details", html_url="https://ci/html"): + run = MagicMock() + run.name = name + run.status = status + run.conclusion = conclusion + run.details_url = details_url + run.html_url = html_url + return run + + class TestParseIssueUrl: def test_valid_url(self): owner, repo, number = parse_issue_url("https://github.com/alice/myrepo/issues/42") @@ -290,3 +302,77 @@ def test_returns_pull_request_object(self): g.get_repo.assert_called_once_with("owner/repo") mock_repo.get_pull.assert_called_once_with(42) assert result is mock_pr + + +class TestGetCheckRuns: + def _setup(self, g, runs): + commit = MagicMock() + commit.get_check_runs.return_value = runs + g.get_repo.return_value.get_commit.return_value = commit + return commit + + def test_returns_only_completed_failures(self): + g = MagicMock() + runs = [ + _make_check_run("test", "completed", "failure"), + _make_check_run("lint", "completed", "success"), + _make_check_run("build", "completed", "timed_out"), + _make_check_run("deploy", "in_progress", None), + _make_check_run("docs", "completed", "skipped"), + _make_check_run("flaky", "completed", "neutral"), + ] + self._setup(g, runs) + result = get_check_runs(g, "o", "r", "abc123") + names = {r["name"] for r in result} + assert names == {"test", "build"} + + def test_includes_conclusion_and_details_url(self): + g = MagicMock() + self._setup(g, [_make_check_run("test", "completed", "failure", + details_url="https://ci/log")]) + result = get_check_runs(g, "o", "r", "abc123") + assert result == [{ + "name": "test", + "conclusion": "failure", + "details_url": "https://ci/log", + }] + + def test_falls_back_to_html_url(self): + g = MagicMock() + self._setup(g, [_make_check_run("test", "completed", "failure", + details_url=None, html_url="https://ci/html")]) + result = get_check_runs(g, "o", "r", "abc123") + assert result[0]["details_url"] == "https://ci/html" + + def test_returns_empty_when_all_green(self): + g = MagicMock() + self._setup(g, [_make_check_run("test", "completed", "success")]) + assert get_check_runs(g, "o", "r", "abc123") == [] + + def test_uses_commit_ref(self): + g = MagicMock() + self._setup(g, []) + get_check_runs(g, "o", "r", "deadbeef") + g.get_repo.return_value.get_commit.assert_called_once_with("deadbeef") + + +class TestGetRequiredCheckNames: + def test_returns_contexts(self): + g = MagicMock() + rsc = MagicMock() + rsc.contexts = ["test", "lint"] + g.get_repo.return_value.get_branch.return_value.get_required_status_checks.return_value = rsc + assert get_required_check_names(g, "o", "r", "main") == {"test", "lint"} + + def test_returns_empty_set_when_no_contexts(self): + g = MagicMock() + rsc = MagicMock() + rsc.contexts = None + g.get_repo.return_value.get_branch.return_value.get_required_status_checks.return_value = rsc + assert get_required_check_names(g, "o", "r", "main") == set() + + def test_returns_empty_set_on_unprotected_branch(self): + g = MagicMock() + g.get_repo.return_value.get_branch.return_value.get_required_status_checks.side_effect = \ + GithubException(404, "Branch not protected", None) + assert get_required_check_names(g, "o", "r", "main") == set() diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index a29057c..2b1c3da 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -7,6 +7,7 @@ import pytest from clayde.orchestrator import ( + _handle_ci_fix, _handle_issue, _handle_standalone_pr, _is_pr_tracked_as_issue, @@ -753,3 +754,217 @@ def test_get_pull_failure_falls_through(self): mock_wrap_up.run.assert_not_called() mock_work.run.assert_called_once_with(issue.html_url) + + +def _ci_settings(max_attempts=3, ntfy_topic="topic"): + s = MagicMock() + s.ci_fix_max_attempts = max_attempts + s.ntfy_topic = ntfy_topic + s.ntfy_base_url = "https://ntfy.sh" + s.ntfy_timeout_s = 10 + return s + + +def _ci_pr(head_sha="sha-new", base_ref="main", head_ref="clayde/issue-86"): + pr = MagicMock() + pr.head.sha = head_sha + pr.base.ref = base_ref + pr.head.ref = head_ref + return pr + + +class TestHandleCiFix: + """Unit tests for _handle_ci_fix — CI monitoring and self-fix.""" + + PR_URL = "https://github.com/o/r/pull/5" + URL = "https://github.com/o/r/issues/86" + + def _run(self, *, failed, required=set(), issue_state, settings=None, + fix_side_effect=None): + settings = settings or _ci_settings() + span = MagicMock() + with patch("clayde.orchestrator.parse_pr_url", return_value=("o", "r", 5)), \ + patch("clayde.orchestrator.get_pull", return_value=_ci_pr()), \ + patch("clayde.orchestrator.get_check_runs", return_value=failed), \ + patch("clayde.orchestrator.get_required_check_names", return_value=required), \ + patch("clayde.orchestrator.get_issue_state", return_value=issue_state), \ + patch("clayde.orchestrator.get_settings", return_value=settings), \ + patch("clayde.orchestrator.update_issue_state") as mock_update, \ + patch("clayde.orchestrator.send_ntfy_sync") as mock_ntfy, \ + patch("clayde.orchestrator.fix_ci") as mock_fix: + if fix_side_effect: + mock_fix.run.side_effect = fix_side_effect + result = _handle_ci_fix(MagicMock(), "o", "r", self.PR_URL, self.URL, + "label", span) + return result, mock_update, mock_ntfy, mock_fix + + def test_green_ci_returns_false(self): + result, _, _, mock_fix = self._run(failed=[], issue_state={}) + assert result is False + mock_fix.run.assert_not_called() + + def test_invokes_fix_when_failing_and_no_prior_attempt(self): + failed = [{"name": "test", "conclusion": "failure", "details_url": "u"}] + result, mock_update, _, mock_fix = self._run(failed=failed, issue_state={}) + assert result is True + mock_fix.run.assert_called_once_with( + self.URL, self.PR_URL, "clayde/issue-86", failed) + + def test_records_attempt_before_invoking(self): + failed = [{"name": "test", "conclusion": "failure", "details_url": "u"}] + _, mock_update, _, _ = self._run(failed=failed, issue_state={}) + recorded = [c[0][1] for c in mock_update.call_args_list] + assert any( + u.get("ci_fix_attempts") == 1 and u.get("last_ci_fix_attempt_sha") == "sha-new" + for u in recorded + ) + + def test_advances_last_seen_at_on_success(self): + failed = [{"name": "test", "conclusion": "failure", "details_url": "u"}] + _, mock_update, _, _ = self._run(failed=failed, issue_state={}) + assert any("last_seen_at" in c[0][1] for c in mock_update.call_args_list) + + def test_skips_when_same_sha_already_attempted(self): + failed = [{"name": "test", "conclusion": "failure", "details_url": "u"}] + result, _, _, mock_fix = self._run( + failed=failed, + issue_state={"last_ci_fix_attempt_sha": "sha-new", "ci_fix_attempts": 1}, + ) + assert result is True + mock_fix.run.assert_not_called() + + def test_notifies_once_when_attempts_exhausted(self): + failed = [{"name": "test", "conclusion": "failure", "details_url": "u"}] + result, mock_update, mock_ntfy, mock_fix = self._run( + failed=failed, + issue_state={"ci_fix_attempts": 3}, + ) + assert result is True + mock_fix.run.assert_not_called() + mock_ntfy.assert_called_once() + assert any(c[0][1].get("ci_fix_exhausted_notified") for c in mock_update.call_args_list) + + def test_does_not_renotify_when_already_notified(self): + failed = [{"name": "test", "conclusion": "failure", "details_url": "u"}] + _, _, mock_ntfy, _ = self._run( + failed=failed, + issue_state={"ci_fix_attempts": 3, "ci_fix_exhausted_notified": True}, + ) + mock_ntfy.assert_not_called() + + def test_required_filter_excludes_informational_checks(self): + # Failing check "lint" is not in the required set → treated as green. + failed = [{"name": "lint", "conclusion": "failure", "details_url": "u"}] + result, _, _, mock_fix = self._run( + failed=failed, required={"test"}, issue_state={}) + assert result is False + mock_fix.run.assert_not_called() + + def test_required_filter_keeps_required_failure(self): + failed = [{"name": "test", "conclusion": "failure", "details_url": "u"}] + result, _, _, mock_fix = self._run( + failed=failed, required={"test"}, issue_state={}) + assert result is True + mock_fix.run.assert_called_once() + + def test_usage_limit_during_fix_returns_true(self): + from clayde.claude import UsageLimitError + failed = [{"name": "test", "conclusion": "failure", "details_url": "u"}] + result, mock_update, _, _ = self._run( + failed=failed, issue_state={}, + fix_side_effect=UsageLimitError("limit", cost_eur=0.0)) + assert result is True + # last_seen_at must NOT be advanced on a limited run + assert not any("last_seen_at" in c[0][1] for c in mock_update.call_args_list) + + def test_unexpected_error_during_fix_returns_true(self): + failed = [{"name": "test", "conclusion": "failure", "details_url": "u"}] + result, _, _, _ = self._run( + failed=failed, issue_state={}, + fix_side_effect=RuntimeError("boom")) + assert result is True + + def test_pr_fetch_failure_returns_false(self): + span = MagicMock() + with patch("clayde.orchestrator.parse_pr_url", return_value=("o", "r", 5)), \ + patch("clayde.orchestrator.get_pull", side_effect=Exception("API error")), \ + patch("clayde.orchestrator.get_settings", return_value=_ci_settings()), \ + patch("clayde.orchestrator.fix_ci") as mock_fix: + result = _handle_ci_fix(MagicMock(), "o", "r", self.PR_URL, self.URL, + "label", span) + assert result is False + mock_fix.run.assert_not_called() + + +class TestHandleIssueCiIntegration: + """_handle_issue should route to CI fix when there is no human activity.""" + + def _patches(self, pr_url="https://github.com/o/r/pull/5"): + return [ + patch("clayde.orchestrator.is_blocked", return_value=False), + patch("clayde.orchestrator.parse_issue_url", return_value=("o", "r", 86)), + patch("clayde.orchestrator.fetch_issue_comments", return_value=[]), + patch("clayde.orchestrator.has_visible_content", return_value=True), + patch("clayde.orchestrator.get_issue_state", return_value={ + "pr_url": pr_url, + "last_seen_at": "2024-01-01T12:00:00+00:00", + }), + patch("clayde.orchestrator.parse_pr_url", return_value=("o", "r", 5)), + patch("clayde.orchestrator.get_pull", return_value=MagicMock(merged=False)), + patch("clayde.orchestrator.get_pr_reviews", return_value=[]), + patch("clayde.orchestrator.get_pr_review_comments", return_value=[]), + patch("clayde.orchestrator.get_new_visible_comments", return_value=[]), + patch("clayde.orchestrator.update_issue_state"), + patch("clayde.orchestrator.get_settings", return_value=_mock_settings()), + ] + + def test_ci_fix_invoked_when_no_activity_and_pr_open(self): + g = MagicMock() + issue = MagicMock() + issue.html_url = "https://github.com/o/r/issues/86" + issue.title = "Feature" + with ExitStack() as stack: + for p in self._patches(): + stack.enter_context(p) + mock_ci = stack.enter_context( + patch("clayde.orchestrator._handle_ci_fix", return_value=True)) + mock_work = stack.enter_context(patch("clayde.orchestrator.work")) + _handle_issue(g, issue, issue.html_url) + mock_ci.assert_called_once() + mock_work.run.assert_not_called() + + def test_ci_fix_not_invoked_when_human_activity(self): + g = MagicMock() + issue = MagicMock() + issue.html_url = "https://github.com/o/r/issues/86" + issue.title = "Feature" + with ExitStack() as stack: + for p in self._patches(): + stack.enter_context(p) + # New comment present → work task handles it, CI fix skipped this cycle. + stack.enter_context( + patch("clayde.orchestrator.get_new_visible_comments", + return_value=[MagicMock()])) + mock_ci = stack.enter_context(patch("clayde.orchestrator._handle_ci_fix")) + mock_work = stack.enter_context(patch("clayde.orchestrator.work")) + _handle_issue(g, issue, issue.html_url) + mock_ci.assert_not_called() + mock_work.run.assert_called_once_with(issue.html_url) + + def test_no_ci_check_when_no_pr(self): + g = MagicMock() + issue = MagicMock() + issue.html_url = "https://github.com/o/r/issues/86" + issue.title = "Feature" + with patch("clayde.orchestrator.is_blocked", return_value=False), \ + patch("clayde.orchestrator.parse_issue_url", return_value=("o", "r", 86)), \ + patch("clayde.orchestrator.fetch_issue_comments", return_value=[]), \ + patch("clayde.orchestrator.has_visible_content", return_value=True), \ + patch("clayde.orchestrator.get_issue_state", + return_value={"last_seen_at": "2024-01-01T12:00:00+00:00"}), \ + patch("clayde.orchestrator.get_new_visible_comments", return_value=[]), \ + patch("clayde.orchestrator._handle_ci_fix") as mock_ci, \ + patch("clayde.orchestrator.work") as mock_work: + _handle_issue(g, issue, issue.html_url) + mock_ci.assert_not_called() + mock_work.run.assert_not_called() diff --git a/tests/test_tasks_fix_ci.py b/tests/test_tasks_fix_ci.py new file mode 100644 index 0000000..b3f9562 --- /dev/null +++ b/tests/test_tasks_fix_ci.py @@ -0,0 +1,112 @@ +"""Tests for clayde.tasks.fix_ci — autonomous CI-fix task.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from clayde.claude import InvocationResult, UsageLimitError +from clayde.tasks.fix_ci import _format_failed_checks, run + + +def _make_result(output: str, cost_eur: float = 0.50) -> InvocationResult: + return InvocationResult(output=output, cost_eur=cost_eur, input_tokens=100, output_tokens=50) + + +def _mock_issue(): + issue = MagicMock() + issue.title = "Add a feature" + issue.body = "Please add it" + issue.labels = [] + return issue + + +FAILED = [{"name": "test", "conclusion": "failure", "details_url": "https://ci/log"}] + + +class TestFormatFailedChecks: + def test_includes_name_conclusion_and_url(self): + out = _format_failed_checks(FAILED) + assert "test" in out + assert "failure" in out + assert "https://ci/log" in out + + def test_handles_missing_url(self): + out = _format_failed_checks([{"name": "lint", "conclusion": "failure"}]) + assert "lint" in out + assert "—" not in out # no trailing URL separator + + def test_empty_list(self): + assert _format_failed_checks([]) == "(none)" + + +class TestRun: + def _patches(self, invoke): + return [ + patch("clayde.tasks.fix_ci.get_github_client"), + patch("clayde.tasks.fix_ci.parse_issue_url", return_value=("o", "r", 86)), + patch("clayde.tasks.fix_ci.fetch_issue", return_value=_mock_issue()), + patch("clayde.tasks.fix_ci.get_default_branch", return_value="main"), + patch("clayde.tasks.fix_ci.ensure_repo", return_value="/tmp/repo"), + patch("clayde.tasks.fix_ci.is_issue_visible", return_value=True), + patch("clayde.tasks.fix_ci.render_template", return_value="prompt"), + patch("clayde.tasks.fix_ci.invoke_claude", invoke), + ] + + def test_posts_summary_with_ci_header(self): + invoke = MagicMock(return_value=_make_result('{"summary": "Fixed the import"}')) + with patch("clayde.tasks.fix_ci.post_comment") as mock_post: + for p in self._patches(invoke): + p.start() + try: + run("https://github.com/o/r/issues/86", + "https://github.com/o/r/pull/5", "clayde/issue-86", FAILED) + finally: + patch.stopall() + mock_post.assert_called_once() + body = mock_post.call_args[0][4] + assert "Fixed the import" in body + assert "CI was failing" in body + assert "test" in body + + def test_renders_prompt_with_failed_checks(self): + invoke = MagicMock(return_value=_make_result('{"summary": "ok"}')) + with patch("clayde.tasks.fix_ci.render_template", return_value="prompt") as mock_render, \ + patch("clayde.tasks.fix_ci.get_github_client"), \ + patch("clayde.tasks.fix_ci.parse_issue_url", return_value=("o", "r", 86)), \ + patch("clayde.tasks.fix_ci.fetch_issue", return_value=_mock_issue()), \ + patch("clayde.tasks.fix_ci.get_default_branch", return_value="main"), \ + patch("clayde.tasks.fix_ci.ensure_repo", return_value="/tmp/repo"), \ + patch("clayde.tasks.fix_ci.is_issue_visible", return_value=True), \ + patch("clayde.tasks.fix_ci.invoke_claude", invoke), \ + patch("clayde.tasks.fix_ci.post_comment"): + run("https://github.com/o/r/issues/86", + "https://github.com/o/r/pull/5", "clayde/issue-86", FAILED) + kwargs = mock_render.call_args.kwargs + assert kwargs["branch_name"] == "clayde/issue-86" + assert kwargs["pr_url"] == "https://github.com/o/r/pull/5" + assert "test" in kwargs["failed_checks"] + + def test_propagates_usage_limit_error(self): + invoke = MagicMock(side_effect=UsageLimitError("limit", cost_eur=0.0)) + with patch("clayde.tasks.fix_ci.post_comment"): + for p in self._patches(invoke): + p.start() + try: + with pytest.raises(UsageLimitError): + run("https://github.com/o/r/issues/86", + "https://github.com/o/r/pull/5", "clayde/issue-86", FAILED) + finally: + patch.stopall() + + def test_falls_back_to_raw_output_on_bad_json(self): + invoke = MagicMock(return_value=_make_result("not json at all")) + with patch("clayde.tasks.fix_ci.post_comment") as mock_post: + for p in self._patches(invoke): + p.start() + try: + run("https://github.com/o/r/issues/86", + "https://github.com/o/r/pull/5", "clayde/issue-86", FAILED) + finally: + patch.stopall() + mock_post.assert_called_once() + assert "not json at all" in mock_post.call_args[0][4]