Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions .claude/commands/ai-review-local.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
---
description: Run AI code review locally using OpenAI API before opening a PR
argument-hint: "[--context minimal|standard|deep] [--include-files <files>] [--token-budget <n>] [--force-fresh] [--full-registry] [--model <model>] [--dry-run]"
argument-hint: "[--context minimal|standard|deep] [--include-files <files>] [--token-budget <n>] [--force-fresh] [--full-registry] [--model <model>] [--timeout <seconds>] [--dry-run]"
---

# Local AI Code Review

Run a structured code review using the OpenAI Chat Completions API. Reviews changes
Run a structured code review using the OpenAI Responses API. Reviews changes
against the same methodology criteria used by the CI reviewer, but adapted for local
pre-PR use. Designed for iterative review/revision cycles before submitting a PR.

Expand All @@ -23,8 +23,15 @@ pre-PR use. Designed for iterative review/revision cycles before submitting a PR
- `--force-fresh`: Skip delta-diff mode, run a full fresh review even if previous state exists
- `--full-registry`: Include the entire REGISTRY.md instead of selective sections
- `--model <name>`: Override the OpenAI model (default: `gpt-5.4`)
- `--timeout <seconds>`: HTTP request timeout (default: 300). Use 900 for reasoning models.
- `--dry-run`: Print the compiled prompt without calling the API

**Reasoning models** (`gpt-5.4-pro`, `o3`, `o4-mini`, etc.): Reviews may take 10-15
minutes. For deep reviews with reasoning models, combine `--token-budget` with `--model`:
```
/ai-review-local --model gpt-5.4-pro --token-budget 500000 --context deep
```

## Constraints

This skill does not modify source code files. It may:
Expand Down Expand Up @@ -320,12 +327,19 @@ python3 .claude/scripts/openai_review.py \
[--token-budget "$token_budget"] \
[--full-registry] \
[--model <model>] \
[--timeout <seconds>] \
[--dry-run]
```

Note: `--force-fresh` is a skill-only flag — it controls whether delta diffs are
generated in Step 4 and is NOT passed to the script.

**Reasoning model handling:** If the model contains `-pro` or starts with `o1`/`o3`/`o4`
(e.g., `gpt-5.4-pro`, `o3`, `o4-mini`):
- Pass `--timeout 900` to the script (unless the user explicitly specified `--timeout`)
- Run the Bash command with `run_in_background: true` (bypasses the 600s Bash tool timeout cap)
- After the background command completes, continue to Step 6

If `--dry-run`: display the prompt output and stop. Report the estimated token count,
cost estimate, and model that would be used.

Expand Down Expand Up @@ -451,6 +465,9 @@ runs `--force-fresh` or when a rebase invalidates the tracked commit.
# Use a different model with full registry
/ai-review-local --model gpt-4.1 --full-registry

# Deep review with reasoning model (may take 10-15 minutes)
/ai-review-local --model gpt-5.4-pro --token-budget 500000 --context deep

# Limit token budget for faster/cheaper reviews
/ai-review-local --token-budget 100000
```
Expand Down
123 changes: 102 additions & 21 deletions .claude/scripts/openai_review.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""Local AI code review using OpenAI Chat Completions API.
"""Local AI code review using OpenAI Responses API.

Compiles a review prompt from the project's review criteria, methodology registry,
and code diffs, then sends it to the OpenAI API for structured feedback.
Expand Down Expand Up @@ -854,6 +854,7 @@ def apply_token_budget(
# MAINTENANCE: Update when OpenAI changes pricing.
PRICING = {
"gpt-5.4": (2.50, 15.00),
"gpt-5.4-pro": (30.00, 180.00),
"gpt-4.1": (2.00, 8.00),
"gpt-4.1-mini": (0.40, 1.60),
"o3": (2.00, 8.00),
Expand Down Expand Up @@ -1093,31 +1094,63 @@ def compile_prompt(
# OpenAI API call
# ---------------------------------------------------------------------------

ENDPOINT = "https://api.openai.com/v1/chat/completions"
ENDPOINT = "https://api.openai.com/v1/responses"
DEFAULT_MODEL = "gpt-5.4"
DEFAULT_TIMEOUT = 300 # seconds
DEFAULT_MAX_TOKENS = 16384
REASONING_MAX_TOKENS = 32768


def _is_reasoning_model(model: str) -> bool:
"""Return True for models that use internal chain-of-thought reasoning."""
return model.startswith(("o1", "o3", "o4")) or "-pro" in model


def estimate_tokens(text: str) -> int:
"""Rough token estimate (~4 chars per token). May vary +/- 50% for code."""
return len(text) // 4


def _extract_response_text(result: dict) -> str:
"""Extract review text from a Responses API JSON payload.

Tries the top-level ``output_text`` convenience field first (populated by
the Python SDK but typically null in raw HTTP responses), then walks
``output[].content[]`` items. Returns an empty string when no text is
found so the caller can decide how to handle it.
"""
text = result.get("output_text") or ""
if text:
return text
for item in result.get("output", []):
if item.get("type") == "message":
for block in item.get("content", []):
if block.get("type") == "output_text":
text += block.get("text", "")
return text


def call_openai(
prompt: str, model: str, api_key: str
prompt: str,
model: str,
api_key: str,
timeout: int = DEFAULT_TIMEOUT,
) -> "tuple[str, dict]":
"""Call the OpenAI Chat Completions API.
"""Call the OpenAI Responses API.

Returns (content, usage) where usage is the API response's usage dict
containing prompt_tokens and completion_tokens.
containing input_tokens and output_tokens.
"""
payload = {
reasoning = _is_reasoning_model(model)
max_tokens = REASONING_MAX_TOKENS if reasoning else DEFAULT_MAX_TOKENS

payload: dict = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0,
"max_completion_tokens": DEFAULT_MAX_TOKENS,
"input": prompt,
"max_output_tokens": max_tokens,
}
if not reasoning:
payload["temperature"] = 0

data = json.dumps(payload).encode("utf-8")
req = urllib.request.Request(
Expand All @@ -1131,7 +1164,7 @@ def call_openai(
)

try:
with urllib.request.urlopen(req, timeout=DEFAULT_TIMEOUT) as resp:
with urllib.request.urlopen(req, timeout=timeout) as resp:
result = json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as e:
body = ""
Expand Down Expand Up @@ -1165,7 +1198,7 @@ def call_openai(
sys.exit(1)
except TimeoutError:
print(
f"Error: Request timed out (>{DEFAULT_TIMEOUT}s). "
f"Error: Request timed out (>{timeout}s). "
"Try a smaller diff or disable --full-registry.",
file=sys.stderr,
)
Expand All @@ -1174,14 +1207,39 @@ def call_openai(
print(f"Error: Network error — {e.reason}", file=sys.stderr)
sys.exit(1)

choices = result.get("choices", [])
if not choices:
print("Error: Empty response from OpenAI API.", file=sys.stderr)
content = _extract_response_text(result)

# Treat truncated responses as errors — partial reviews may suppress findings.
status = result.get("status")
if content.strip() and status == "incomplete":
detail = result.get("incomplete_details") or ""
print(
"Error: Review was truncated (status='incomplete'). "
"Output may be missing findings.",
file=sys.stderr,
)
if detail:
print(f"Detail: {detail}", file=sys.stderr)
print(
"Try reducing diff size, disabling --full-registry, or "
"lowering --context to 'minimal'.",
file=sys.stderr,
)
sys.exit(1)

content = choices[0].get("message", {}).get("content", "")
if not content.strip():
print("Error: Empty review content from OpenAI API.", file=sys.stderr)
# No usable content — report the best diagnostic we have.
status = result.get("status", "<missing>")
detail = result.get("incomplete_details") or result.get("error") or ""
if status not in ("completed", "<missing>"):
print(
f"Error: OpenAI response status is '{status}' with no review content.",
file=sys.stderr,
)
else:
print("Error: Empty review content from OpenAI API.", file=sys.stderr)
if detail:
print(f"Detail: {detail}", file=sys.stderr)
sys.exit(1)

usage = result.get("usage", {})
Expand All @@ -1204,7 +1262,7 @@ def _read_file(path: str, label: str) -> str:

def main() -> None:
parser = argparse.ArgumentParser(
description="Run local AI code review via OpenAI Chat Completions API."
description="Run local AI code review via OpenAI Responses API."
)
parser.add_argument(
"--review-criteria",
Expand Down Expand Up @@ -1282,6 +1340,12 @@ def main() -> None:
help=f"Max estimated input tokens before dropping context "
f"(default: {DEFAULT_TOKEN_BUDGET:,})",
)
parser.add_argument(
"--timeout",
type=int,
default=DEFAULT_TIMEOUT,
help=f"HTTP request timeout in seconds (default: {DEFAULT_TIMEOUT})",
)
parser.add_argument(
"--delta-diff",
default=None,
Expand Down Expand Up @@ -1531,7 +1595,8 @@ def main() -> None:
)

# Cost estimate
cost_str = estimate_cost(est_tokens, DEFAULT_MAX_TOKENS, args.model)
max_out = REASONING_MAX_TOKENS if _is_reasoning_model(args.model) else DEFAULT_MAX_TOKENS
cost_str = estimate_cost(est_tokens, max_out, args.model)

# Dry-run: print prompt and exit
if args.dry_run:
Expand All @@ -1549,6 +1614,12 @@ def main() -> None:
sys.exit(0)

# Call OpenAI API
if _is_reasoning_model(args.model) and args.timeout == DEFAULT_TIMEOUT:
print(
f"Note: {args.model} is a reasoning model. Consider --timeout 900 "
"for large reviews.",
file=sys.stderr,
)
print(f"Sending review to {args.model}...", file=sys.stderr)
print(f"Estimated input tokens: ~{est_tokens:,}", file=sys.stderr)
if cost_str:
Expand All @@ -1559,7 +1630,9 @@ def main() -> None:
if delta_diff_text:
print("Mode: Delta-diff (changes since last review)", file=sys.stderr)

review_content, usage = call_openai(prompt, args.model, api_key)
review_content, usage = call_openai(
prompt, args.model, api_key, timeout=args.timeout
)

# Write review output
os.makedirs(os.path.dirname(args.output), exist_ok=True)
Expand Down Expand Up @@ -1603,8 +1676,8 @@ def main() -> None:
)

# Print completion summary with actual usage
actual_input = usage.get("prompt_tokens", 0)
actual_output = usage.get("completion_tokens", 0)
actual_input = usage.get("input_tokens", 0)
actual_output = usage.get("output_tokens", 0)
actual_cost = estimate_cost(actual_input, actual_output, args.model)

print(f"\nAI Review complete.", file=sys.stderr)
Expand All @@ -1615,6 +1688,14 @@ def main() -> None:
f"{actual_output:,} output",
file=sys.stderr,
)
reasoning_tokens = usage.get("output_tokens_details", {}).get(
"reasoning_tokens", 0
)
if reasoning_tokens:
print(
f" (includes {reasoning_tokens:,} reasoning tokens)",
file=sys.stderr,
)
if actual_cost:
print(f"Actual cost: {actual_cost}", file=sys.stderr)
else:
Expand Down
Loading
Loading