diff --git a/bugbug/tools/code_review/agent.py b/bugbug/tools/code_review/agent.py index df65409275..48611a1517 100644 --- a/bugbug/tools/code_review/agent.py +++ b/bugbug/tools/code_review/agent.py @@ -12,6 +12,7 @@ from typing import Optional from langchain.agents import create_agent +from langchain.agents.middleware import TodoListMiddleware from langchain.agents.structured_output import ProviderStrategy from langchain.chat_models import BaseChatModel, init_chat_model from langchain.messages import HumanMessage @@ -32,6 +33,8 @@ expand_context, ) from bugbug.tools.code_review.prompts import ( + CODE_REVIEW_TODO_PROMPT, + CODE_REVIEW_TODO_TOOL_DESCRIPTION, FIRST_MESSAGE_TEMPLATE, STATIC_COMMENT_EXAMPLES, SYSTEM_PROMPT_TEMPLATE, @@ -66,6 +69,7 @@ def __init__( show_patch_example: bool = False, verbose: bool = True, target_software: str = "Mozilla Firefox", + todo_enabled: bool = True, ) -> None: super().__init__() @@ -96,6 +100,15 @@ def __init__( self._agent_model = llm + middleware = [] + if todo_enabled: + middleware.append( + TodoListMiddleware( + system_prompt=CODE_REVIEW_TODO_PROMPT, + tool_description=CODE_REVIEW_TODO_TOOL_DESCRIPTION, + ) + ) + self.agent = create_agent( llm, tools, @@ -103,6 +116,7 @@ def __init__( target_software=self.target_software, ), response_format=ProviderStrategy(AgentResponse), + middleware=middleware, ) self.review_comments_db = review_comments_db diff --git a/bugbug/tools/code_review/prompts.py b/bugbug/tools/code_review/prompts.py index 8ee20da13a..639b36abc9 100644 --- a/bugbug/tools/code_review/prompts.py +++ b/bugbug/tools/code_review/prompts.py @@ -187,6 +187,27 @@ }, ] +CODE_REVIEW_TODO_PROMPT = """ +## Review Planning with `write_todos` + +Use the `write_todos` tool to track investigation tasks as you review. + +- After your initial scan, create todos for any concerns that need deeper investigation + (e.g., "Verify that removed error handler is covered elsewhere", "Check callers of + renamed function for breakage") +- As the review progresses, add new todos when you discover additional concerns +- Remove or complete todos that turn out to be non-issues after verification +- For small or straightforward patches, skip todos entirely — just review directly +""" + +CODE_REVIEW_TODO_TOOL_DESCRIPTION = ( + "Track investigation tasks during code review. Add items for concerns that need " + "tool-based verification (expand_context, find_function_definition). Evolve the " + "list as you go — add new items when you discover concerns, remove irrelevant ones. " + "Do not use this as a file checklist." +) + + TEMPLATE_PATCH_FROM_HUNK = """diff --git a/{filename} b/{filename} --- a/{filename} +++ b/{filename} diff --git a/notebooks/code_review_evaluation.ipynb b/notebooks/code_review_evaluation.ipynb index 52214b687f..45b6343c23 100644 --- a/notebooks/code_review_evaluation.ipynb +++ b/notebooks/code_review_evaluation.ipynb @@ -69,9 +69,11 @@ "class CodeReviewModel(weave.Model):\n", " \"\"\"Weave Model wrapper for CodeReviewTool.\"\"\"\n", "\n", + " todo_enabled: bool\n", + "\n", " @cached_property\n", " def tool(self):\n", - " return CodeReviewTool.create()\n", + " return CodeReviewTool.create(todo_enabled=self.todo_enabled)\n", "\n", " @weave.op()\n", " async def invoke(self, diff_id: int, patch_summary: str) -> dict:\n", @@ -82,7 +84,7 @@ " }\n", "\n", "\n", - "model = CodeReviewModel()" + "model = CodeReviewModel(todo_enabled=True)" ] }, { diff --git a/services/reviewhelper-api/app/config.py b/services/reviewhelper-api/app/config.py index 6077b68e43..a0dd25afd1 100644 --- a/services/reviewhelper-api/app/config.py +++ b/services/reviewhelper-api/app/config.py @@ -35,6 +35,9 @@ class Settings(BaseSettings): # Cloud Run port: int = 8080 + # Agent settings + todo_enabled: bool = True + model_config = { "env_file": ".env", "env_file_encoding": "utf-8", diff --git a/services/reviewhelper-api/app/review_processor.py b/services/reviewhelper-api/app/review_processor.py index 6c5530c141..dcfedda07c 100644 --- a/services/reviewhelper-api/app/review_processor.py +++ b/services/reviewhelper-api/app/review_processor.py @@ -4,6 +4,7 @@ from functools import cache from typing import Collection, Iterable +from app.config import settings from app.database.models import GeneratedComment, ReviewRequest from app.enums import Platform from bugbug.tools.core.exceptions import LargeDiffError @@ -32,7 +33,7 @@ class RevisionNotYetPublicError(Exception): def get_code_review_tool(): from bugbug.tools.code_review import CodeReviewTool - return CodeReviewTool.create() + return CodeReviewTool.create(todo_enabled=settings.todo_enabled) async def process_review(