diff --git a/.eslintrc.js b/.eslintrc.js new file mode 100644 index 000000000..96d9178bc --- /dev/null +++ b/.eslintrc.js @@ -0,0 +1,17 @@ +module.exports = { + env: { + browser: true, + es2021: true, + }, + extends: ['eslint:recommended'], + parserOptions: { + ecmaVersion: 2021, + }, + globals: { + marked: 'readonly', + }, + rules: { + 'no-unused-vars': 'warn', + 'no-console': 'warn', + }, +}; diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml new file mode 100644 index 000000000..b5e8cfd4d --- /dev/null +++ b/.github/workflows/claude-code-review.yml @@ -0,0 +1,44 @@ +name: Claude Code Review + +on: + pull_request: + types: [opened, synchronize, ready_for_review, reopened] + # Optional: Only run on specific file changes + # paths: + # - "src/**/*.ts" + # - "src/**/*.tsx" + # - "src/**/*.js" + # - "src/**/*.jsx" + +jobs: + claude-review: + # Optional: Filter by PR author + # if: | + # github.event.pull_request.user.login == 'external-contributor' || + # github.event.pull_request.user.login == 'new-developer' || + # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR' + + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code Review + id: claude-review + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + plugin_marketplaces: 'https://github.com/anthropics/claude-code.git' + plugins: 'code-review@claude-code-plugins' + prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}' + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://code.claude.com/docs/en/cli-reference for available options + diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml new file mode 100644 index 000000000..d300267f1 --- /dev/null +++ b/.github/workflows/claude.yml @@ -0,0 +1,50 @@ +name: Claude Code + +on: + issue_comment: + types: [created] + pull_request_review_comment: + types: [created] + issues: + types: [opened, assigned] + pull_request_review: + types: [submitted] + +jobs: + claude: + if: | + (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || + (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || + (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + issues: read + id-token: write + actions: read # Required for Claude to read CI results on PRs + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Run Claude Code + id: claude + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + + # This is an optional setting that allows Claude to read CI results on PRs + additional_permissions: | + actions: read + + # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it. + # prompt: 'Update the pull request description to include a summary of changes.' + + # Optional: Add claude_args to customize behavior and configuration + # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md + # or https://code.claude.com/docs/en/cli-reference for available options + # claude_args: '--allowed-tools Bash(gh pr:*)' + diff --git a/.prettierrc b/.prettierrc new file mode 100644 index 000000000..80668882f --- /dev/null +++ b/.prettierrc @@ -0,0 +1,11 @@ +{ + "printWidth": 88, + "tabWidth": 4, + "useTabs": false, + "semi": true, + "singleQuote": true, + "trailingComma": "es5", + "bracketSpacing": true, + "arrowParens": "always", + "endOfLine": "lf" +} diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..35d101ea9 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,97 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Development Commands + +```bash +# Run the application +./run.sh +# or manually: +cd backend && uv run uvicorn app:app --reload --port 8000 + +# Install dependencies +uv sync + +# Install dev dependencies (required for linting/formatting) +uv sync --group dev + + +# Add a new dependency +uv add package_name + +# Run all tests +uv run pytest + +# Run a single test file +uv run pytest backend/tests/test_rag_system.py + +# Run a single test +uv run pytest backend/tests/test_rag_system.py::TestClassName::test_method_name + +# Format code (modifies files: isort → black → flake8 → mypy) +./scripts/format.sh + +# Lint only, no modifications +./scripts/lint.sh +``` + +- Web UI: http://localhost:8000 +- API docs: http://localhost:8000/docs +- Requires `ANTHROPIC_API_KEY` in a `.env` file at the project root + +## Architecture + +This is a RAG (Retrieval-Augmented Generation) chatbot for course materials. FastAPI serves both the API and the vanilla JS frontend as static files. + +### API endpoints + +- `POST /api/query` — main query endpoint, returns `{ answer, sources, source_links, session_id }` +- `GET /api/courses` — returns course catalog stats `{ total_courses, course_titles }` +- `POST /api/clear-session` — clears a session by `{ session_id }` + +### Query flow + +1. Frontend (`frontend/script.js`) sends `POST /api/query` with `{ query, session_id }` +2. `app.py` creates a session if none exists, delegates to `RAGSystem.query()` +3. `RAGSystem` fetches conversation history from `SessionManager`, then calls `AIGenerator.generate_response()` +4. `AIGenerator` runs a **tool-calling loop** (max 2 rounds) with the Claude API: + - Claude may call `search_course_content` (semantic chunk search) or `get_course_outline` (lesson list) + - Tool results are appended to the message list and sent back to Claude + - After max rounds, a final API call without tools forces a text response +5. `ToolManager` collects `last_sources` and `last_source_links` from whichever tool ran last +6. Response, sources, and lesson links are returned to the frontend +7. Frontend renders the answer as Markdown (`marked.js`) with a collapsible sources block + +### Key design decisions + +- **Course name resolution**: Partial/fuzzy course names are resolved via a semantic search against the `course_catalog` ChromaDB collection before filtering `course_content`. This lets Claude pass "MCP" and still find "Introduction to MCP Servers". +- **Dual ChromaDB collections**: `course_catalog` stores one document per course (title + metadata including `lessons_json`). `course_content` stores all text chunks with `course_title`/`lesson_number` metadata for filtered search. +- **Session storage**: Sessions are in-memory only — they are lost on server restart. `SessionManager` keeps the last 2 exchange pairs (4 messages) per session. Conversation history is injected into the system prompt, not the message list. +- **AI generation config**: `AIGenerator` uses `temperature=0` and `max_tokens=800`. Model is set in `config.py` (`ANTHROPIC_MODEL`). These are not exposed via env vars — change them in code. +- **Deduplication on startup**: `add_course_folder()` checks existing titles in `course_catalog` and skips already-ingested courses. + +### Document format + +Course files (`.txt`, `.pdf`, `.docx`) in `docs/` must follow this structure for `.txt` — `.pdf`/`.docx` support is parsed but the required header fields are the same: + +``` +Course Title: +Course Link: <url> +Course Instructor: <name> + +Lesson 0: <title> +Lesson Link: <url> +<lesson content> + +Lesson 1: <title> +... +``` + +`DocumentProcessor` splits content into sentence-aware chunks (800 chars, 100 char overlap). The first chunk of each lesson is prefixed with `"Lesson N content: ..."` for retrieval context. + +### Adding a new search tool + +1. Create a class extending `Tool` (ABC in `search_tools.py`) implementing `get_tool_definition()` and `execute()` +2. Register it: `self.tool_manager.register_tool(your_tool)` in `RAGSystem.__init__()` +3. If it should surface sources in the UI, add `last_sources` and `last_source_links` instance attributes — `ToolManager.get_last_sources()` checks all registered tools for these diff --git a/backend-tool-refactor.md b/backend-tool-refactor.md new file mode 100644 index 000000000..de23ae5c7 --- /dev/null +++ b/backend-tool-refactor.md @@ -0,0 +1,28 @@ +Refactor @backend/ai_generator.py to support sequential tool calling where Claude can make up to 2 tool calls in separate API rounds. + +Current behavior: +- Claude makes 1 tool call → tools are removed from API params → final response +- If Claude wants another tool call after seeing results, it can't (gets empty response) + +Desired behavior: +- Each tool call should be a separate API request where Claude can reason about previous results +- Support for complex queries requiring multiple searches for comparisons, multi-part questions, or when information from different courses/lessons is needed + +Example flow: +1. User: "Search for a course that discusses the same topic as lesson 4 of course X" +2. Claude: get course outline for course X → gets title of lesson 4 +3. Claude: uses the title to search for a course that discusses the same topic → returns course information +4. Claude: provides complete answer + +Requirements: +- Maximum 2 sequential rounds per user query +- Terminate when: (a) 2 rounds completed, (b) Claude's response has no tool_use blocks, or (c) tool call fails +- Preserve conversation context between rounds +- Handle tool execution errors gracefully + +Notes: +- Update the system prompt in @backend/ai_generator.py +- Update the test @backend/tests/test_ai_generator.py +- Write tests that verify the external behavior (API calls made, tools executed, results returned) rather than internal state details. + +Use two parallel subagents to brainstorm possible plans. Do not implement any code. diff --git a/backend/ai_generator.py b/backend/ai_generator.py index 0363ca90c..b154b1001 100644 --- a/backend/ai_generator.py +++ b/backend/ai_generator.py @@ -1,135 +1,127 @@ -import anthropic -from typing import List, Optional, Dict, Any - -class AIGenerator: - """Handles interactions with Anthropic's Claude API for generating responses""" - - # Static system prompt to avoid rebuilding on each call - SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information. - -Search Tool Usage: -- Use the search tool **only** for questions about specific course content or detailed educational materials -- **One search per query maximum** -- Synthesize search results into accurate, fact-based responses -- If search yields no results, state this clearly without offering alternatives - -Response Protocol: -- **General knowledge questions**: Answer using existing knowledge without searching -- **Course-specific questions**: Search first, then answer -- **No meta-commentary**: - - Provide direct answers only — no reasoning process, search explanations, or question-type analysis - - Do not mention "based on the search results" - - -All responses must be: -1. **Brief, Concise and focused** - Get to the point quickly -2. **Educational** - Maintain instructional value -3. **Clear** - Use accessible language -4. **Example-supported** - Include relevant examples when they aid understanding -Provide only the direct answer to what was asked. -""" - - def __init__(self, api_key: str, model: str): - self.client = anthropic.Anthropic(api_key=api_key) - self.model = model - - # Pre-build base API parameters - self.base_params = { - "model": self.model, - "temperature": 0, - "max_tokens": 800 - } - - def generate_response(self, query: str, - conversation_history: Optional[str] = None, - tools: Optional[List] = None, - tool_manager=None) -> str: - """ - Generate AI response with optional tool usage and conversation context. - - Args: - query: The user's question or request - conversation_history: Previous messages for context - tools: Available tools the AI can use - tool_manager: Manager to execute tools - - Returns: - Generated response as string - """ - - # Build system content efficiently - avoid string ops when possible - system_content = ( - f"{self.SYSTEM_PROMPT}\n\nPrevious conversation:\n{conversation_history}" - if conversation_history - else self.SYSTEM_PROMPT - ) - - # Prepare API call parameters efficiently - api_params = { - **self.base_params, - "messages": [{"role": "user", "content": query}], - "system": system_content - } - - # Add tools if available - if tools: - api_params["tools"] = tools - api_params["tool_choice"] = {"type": "auto"} - - # Get response from Claude - response = self.client.messages.create(**api_params) - - # Handle tool execution if needed - if response.stop_reason == "tool_use" and tool_manager: - return self._handle_tool_execution(response, api_params, tool_manager) - - # Return direct response - return response.content[0].text - - def _handle_tool_execution(self, initial_response, base_params: Dict[str, Any], tool_manager): - """ - Handle execution of tool calls and get follow-up response. - - Args: - initial_response: The response containing tool use requests - base_params: Base API parameters - tool_manager: Manager to execute tools - - Returns: - Final response text after tool execution - """ - # Start with existing messages - messages = base_params["messages"].copy() - - # Add AI's tool use response - messages.append({"role": "assistant", "content": initial_response.content}) - - # Execute all tool calls and collect results - tool_results = [] - for content_block in initial_response.content: - if content_block.type == "tool_use": - tool_result = tool_manager.execute_tool( - content_block.name, - **content_block.input - ) - - tool_results.append({ - "type": "tool_result", - "tool_use_id": content_block.id, - "content": tool_result - }) - - # Add tool results as single message - if tool_results: - messages.append({"role": "user", "content": tool_results}) - - # Prepare final API call without tools - final_params = { - **self.base_params, - "messages": messages, - "system": base_params["system"] - } - - # Get final response - final_response = self.client.messages.create(**final_params) - return final_response.content[0].text \ No newline at end of file +import anthropic +from typing import List, Optional + +class AIGenerator: + """Handles interactions with Anthropic's Claude API for generating responses""" + + # Static system prompt to avoid rebuilding on each call + SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information. + +Search Tool Usage: +- Use `search_course_content` for questions about specific course content or detailed educational materials +- Use `get_course_outline` for questions about course structure, lesson list, or what topics a course covers — it returns the course title, course link, and all lessons with their numbers and titles +- **Up to 2 sequential tool calls per query** +- Synthesize results into accurate, fact-based responses +- If a tool yields no results, state this clearly without offering alternatives + +Response Protocol: +- **General knowledge questions**: Answer using existing knowledge without searching +- **Course-specific questions**: Search first, then answer +- **No meta-commentary**: + - Provide direct answers only — no reasoning process, search explanations, or question-type analysis + - Do not mention "based on the search results" + + +All responses must be: +1. **Brief, Concise and focused** - Get to the point quickly +2. **Educational** - Maintain instructional value +3. **Clear** - Use accessible language +4. **Example-supported** - Include relevant examples when they aid understanding +Provide only the direct answer to what was asked. +""" + + MAX_ROUNDS = 2 + + def __init__(self, api_key: str, model: str): + self.client = anthropic.Anthropic(api_key=api_key) + self.model = model + + # Pre-build base API parameters + self.base_params = { + "model": self.model, + "temperature": 0, + "max_tokens": 800 + } + + def generate_response(self, query: str, + conversation_history: Optional[str] = None, + tools: Optional[List] = None, + tool_manager=None) -> str: + """ + Generate AI response with optional tool usage and conversation context. + Supports up to MAX_ROUNDS sequential tool-call rounds before forcing a + final text response. + + Args: + query: The user's question or request + conversation_history: Previous messages for context + tools: Available tools the AI can use + tool_manager: Manager to execute tools + + Returns: + Generated response as string + """ + + # Build system content efficiently - avoid string ops when possible + system_content = ( + f"{self.SYSTEM_PROMPT}\n\nPrevious conversation:\n{conversation_history}" + if conversation_history + else self.SYSTEM_PROMPT + ) + + messages = [{"role": "user", "content": query}] + + # Prepare initial API call parameters + api_params = { + **self.base_params, + "messages": messages, + "system": system_content + } + + if tools: + api_params["tools"] = tools + api_params["tool_choice"] = {"type": "auto"} + + # Tool-calling loop: up to MAX_ROUNDS sequential rounds + round_count = 0 + while round_count < self.MAX_ROUNDS: + response = self.client.messages.create(**api_params) + + # Early exit: no tool use requested or no manager to execute them + if response.stop_reason != "tool_use" or not tool_manager: + return response.content[0].text + + # Append assistant turn (contains tool-use blocks) + messages.append({"role": "assistant", "content": response.content}) + + # Execute all tool calls in this round + tool_results = [] + for block in response.content: + if block.type == "tool_use": + result = tool_manager.execute_tool(block.name, **block.input) + tool_results.append({ + "type": "tool_result", + "tool_use_id": block.id, + "content": result + }) + + messages.append({"role": "user", "content": tool_results}) + + # Rebuild api_params with updated messages; keep tools for next round + api_params = { + **self.base_params, + "messages": messages, + "system": system_content, + "tools": tools, + "tool_choice": {"type": "auto"} + } + round_count += 1 + + # Max rounds reached — force a text response by calling without tools + final_params = { + **self.base_params, + "messages": messages, + "system": system_content + } + final_response = self.client.messages.create(**final_params) + return final_response.content[0].text diff --git a/backend/app.py b/backend/app.py index 5a69d741d..601dd93b5 100644 --- a/backend/app.py +++ b/backend/app.py @@ -51,6 +51,10 @@ class CourseStats(BaseModel): total_courses: int course_titles: List[str] +class ClearSessionRequest(BaseModel): + """Request model for clearing a session""" + session_id: str + # API Endpoints @app.post("/api/query", response_model=QueryResponse) @@ -73,6 +77,12 @@ async def query_documents(request: QueryRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) +@app.post("/api/clear-session") +async def clear_session(request: ClearSessionRequest): + """Clear a conversation session""" + rag_system.session_manager.clear_session(request.session_id) + return {"status": "ok"} + @app.get("/api/courses", response_model=CourseStats) async def get_course_stats(): """Get course analytics and statistics""" diff --git a/backend/config.py b/backend/config.py index d9f6392ef..fb9b6ee69 100644 --- a/backend/config.py +++ b/backend/config.py @@ -18,7 +18,7 @@ class Config: # Document processing settings CHUNK_SIZE: int = 800 # Size of text chunks for vector storage CHUNK_OVERLAP: int = 100 # Characters to overlap between chunks - MAX_RESULTS: int = 5 # Maximum search results to return + MAX_RESULTS: int = 5 # Maximum search results to return MAX_HISTORY: int = 2 # Number of conversation messages to remember # Database paths diff --git a/backend/rag_system.py b/backend/rag_system.py index 50d848c8e..443649f0e 100644 --- a/backend/rag_system.py +++ b/backend/rag_system.py @@ -4,7 +4,7 @@ from vector_store import VectorStore from ai_generator import AIGenerator from session_manager import SessionManager -from search_tools import ToolManager, CourseSearchTool +from search_tools import ToolManager, CourseSearchTool, CourseOutlineTool from models import Course, Lesson, CourseChunk class RAGSystem: @@ -23,6 +23,8 @@ def __init__(self, config): self.tool_manager = ToolManager() self.search_tool = CourseSearchTool(self.vector_store) self.tool_manager.register_tool(self.search_tool) + self.outline_tool = CourseOutlineTool(self.vector_store) + self.tool_manager.register_tool(self.outline_tool) def add_course_document(self, file_path: str) -> Tuple[Course, int]: """ diff --git a/backend/search_tools.py b/backend/search_tools.py index adfe82352..73e44cfce 100644 --- a/backend/search_tools.py +++ b/backend/search_tools.py @@ -113,6 +113,49 @@ def _format_results(self, results: SearchResults) -> str: return "\n\n".join(formatted) +class CourseOutlineTool(Tool): + """Tool for retrieving a course's full lesson outline from course metadata""" + + def __init__(self, vector_store: VectorStore): + self.store = vector_store + self.last_sources = [] + + def get_tool_definition(self) -> Dict[str, Any]: + return { + "name": "get_course_outline", + "description": "Get the complete outline of a course: its title, course link, and all lessons (number and title)", + "input_schema": { + "type": "object", + "properties": { + "course_name": { + "type": "string", + "description": "Course title (partial matches work, e.g. 'MCP', 'Introduction')" + } + }, + "required": ["course_name"] + } + } + + def execute(self, course_name: str) -> str: + outline = self.store.get_course_outline(course_name) + if not outline: + return f"No course found matching '{course_name}'." + + title = outline['title'] + course_link = outline['course_link'] + lessons = outline['lessons'] + + self.last_sources = [title] + + lines = [f"Course: {title}", f"Link: {course_link}", "", "Lessons:"] + for lesson in lessons: + num = lesson.get('lesson_number', '') + lesson_title = lesson.get('lesson_title', '') + lines.append(f" Lesson {num}: {lesson_title}") + + return "\n".join(lines) + + class ToolManager: """Manages available tools for the AI""" diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 000000000..5605b419f --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,68 @@ +""" +Shared pytest fixtures for the RAG chatbot test suite. + +The FastAPI app in app.py has two side effects on import that must be suppressed +in tests: + 1. RAGSystem(config) — connects to ChromaDB and loads embeddings + 2. app.mount("/", StaticFiles(directory="../frontend")) — the frontend directory + does not exist in the test environment + +Both are patched before the module is imported so the app loads cleanly. +""" +import sys +import os + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import pytest +from unittest.mock import MagicMock, patch + + +def _import_app_with_patches(): + """ + Import app.py while RAGSystem and StaticFiles are mocked out. + + The patches must be active during the import because app.py calls + RAGSystem(config) and StaticFiles(...) at module level. Once the module + is loaded those names are bound locally, so stopping the patches + afterwards is safe. + """ + mock_rag = MagicMock() + with patch("rag_system.RAGSystem", return_value=mock_rag), \ + patch("fastapi.staticfiles.StaticFiles"): + import app as app_module + # Ensure the module-level rag_system variable points to our mock + app_module.rag_system = mock_rag + return app_module, mock_rag + + +_app_module, _mock_rag_instance = _import_app_with_patches() + + +@pytest.fixture +def mock_rag(): + """ + The mock RAGSystem instance wired into the FastAPI app. + + Call tracking and side effects are cleared between tests so that + assertions in one test cannot bleed into the next. + """ + _mock_rag_instance.reset_mock(side_effect=True) + return _mock_rag_instance + + +@pytest.fixture +def client(): + """FastAPI TestClient backed by the patched app.""" + from fastapi.testclient import TestClient + return TestClient(_app_module.app) + + +@pytest.fixture +def sample_query_payload(): + return {"query": "What is machine learning?", "session_id": "test-session-123"} + + +@pytest.fixture +def sample_rag_response(): + return ("Machine learning is a subset of AI.", ["Course A - Lesson 1"]) diff --git a/backend/tests/test_ai_generator.py b/backend/tests/test_ai_generator.py new file mode 100644 index 000000000..823240184 --- /dev/null +++ b/backend/tests/test_ai_generator.py @@ -0,0 +1,511 @@ +""" +Tests for AIGenerator in ai_generator.py. + +All tests mock the Anthropic client to avoid real API calls. +They verify that AIGenerator correctly: + - Calls the search_course_content tool for content queries + - Processes tool_use responses by executing tools and feeding results back + - Returns plain text when no tool use is needed +""" +import sys +import os +import pytest +from unittest.mock import MagicMock, patch, call + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from ai_generator import AIGenerator + + +# --------------------------------------------------------------------------- +# Helpers to build mock Anthropic response objects +# --------------------------------------------------------------------------- + +def make_text_content(text): + block = MagicMock() + block.type = "text" + block.text = text + return block + + +def make_tool_use_content(tool_name, tool_input, tool_id="tool_call_1"): + block = MagicMock() + block.type = "tool_use" + block.name = tool_name + block.input = tool_input + block.id = tool_id + return block + + +def make_response(stop_reason, content_blocks): + response = MagicMock() + response.stop_reason = stop_reason + response.content = content_blocks + return response + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +class TestAIGeneratorDirectResponse: + + @patch("ai_generator.anthropic.Anthropic") + def test_generate_response_returns_text_for_general_query(self, mock_anthropic_cls): + """When Claude responds with end_turn, the text is returned directly.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + mock_client.messages.create.return_value = make_response( + stop_reason="end_turn", + content_blocks=[make_text_content("Paris is the capital of France.")], + ) + + gen = AIGenerator(api_key="test-key", model="claude-test") + result = gen.generate_response(query="What is the capital of France?") + + assert result == "Paris is the capital of France." + + @patch("ai_generator.anthropic.Anthropic") + def test_generate_response_includes_tools_in_api_call(self, mock_anthropic_cls): + """When tools are provided, they are included in the API call parameters.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + mock_client.messages.create.return_value = make_response( + stop_reason="end_turn", + content_blocks=[make_text_content("Some answer.")], + ) + + tool_def = {"name": "search_course_content", "description": "...", "input_schema": {}} + gen = AIGenerator(api_key="test-key", model="claude-test") + gen.generate_response(query="What is in lesson 1?", tools=[tool_def]) + + call_kwargs = mock_client.messages.create.call_args[1] + assert "tools" in call_kwargs + assert call_kwargs["tools"] == [tool_def] + assert call_kwargs["tool_choice"] == {"type": "auto"} + + @patch("ai_generator.anthropic.Anthropic") + def test_generate_response_no_tools_if_not_provided(self, mock_anthropic_cls): + """When no tools are passed, the API call has no tools key.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + mock_client.messages.create.return_value = make_response( + stop_reason="end_turn", + content_blocks=[make_text_content("Answer.")], + ) + + gen = AIGenerator(api_key="test-key", model="claude-test") + gen.generate_response(query="Hello") + + call_kwargs = mock_client.messages.create.call_args[1] + assert "tools" not in call_kwargs + + +class TestAIGeneratorToolExecution: + + @patch("ai_generator.anthropic.Anthropic") + def test_tool_is_executed_on_tool_use_stop_reason(self, mock_anthropic_cls): + """When stop_reason is tool_use, the tool is executed and the final text is returned.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + + tool_block = make_tool_use_content( + "search_course_content", {"query": "transformers"}, "id_1" + ) + first_response = make_response(stop_reason="tool_use", content_blocks=[tool_block]) + final_response = make_response( + stop_reason="end_turn", + content_blocks=[make_text_content("Transformers use attention.")], + ) + mock_client.messages.create.side_effect = [first_response, final_response] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "Transformers are attention models." + + gen = AIGenerator(api_key="test-key", model="claude-test") + result = gen.generate_response( + query="What are transformers?", + tools=[{"name": "search_course_content"}], + tool_manager=tool_manager, + ) + + assert result == "Transformers use attention." + assert mock_client.messages.create.call_count == 2 + tool_manager.execute_tool.assert_called_once_with( + "search_course_content", query="transformers" + ) + + @patch("ai_generator.anthropic.Anthropic") + def test_tool_result_sent_back_as_user_message(self, mock_anthropic_cls): + """Tool result is appended to the conversation as a user message with tool_result type.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + + tool_block = make_tool_use_content( + "search_course_content", {"query": "MCP"}, "call_abc" + ) + first_response = make_response(stop_reason="tool_use", content_blocks=[tool_block]) + final_response = make_response( + stop_reason="end_turn", + content_blocks=[make_text_content("MCP stands for Model Context Protocol.")], + ) + mock_client.messages.create.side_effect = [first_response, final_response] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "MCP lesson content here." + + gen = AIGenerator(api_key="test-key", model="claude-test") + gen.generate_response( + query="What is MCP?", + tools=[{"name": "search_course_content"}], + tool_manager=tool_manager, + ) + + # The second API call should include a user message with tool_result + second_call_kwargs = mock_client.messages.create.call_args_list[1][1] + messages = second_call_kwargs["messages"] + + # Find the user message containing tool results + tool_result_message = next( + (m for m in messages if m["role"] == "user" and isinstance(m["content"], list)), + None, + ) + assert tool_result_message is not None + tool_result_block = tool_result_message["content"][0] + assert tool_result_block["type"] == "tool_result" + assert tool_result_block["tool_use_id"] == "call_abc" + assert tool_result_block["content"] == "MCP lesson content here." + + @patch("ai_generator.anthropic.Anthropic") + def test_final_api_call_has_no_tools(self, mock_anthropic_cls): + """After MAX_ROUNDS of tool use, the post-loop final API call has no tools.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + + tool_block1 = make_tool_use_content("search_course_content", {"query": "AI"}, "id_1") + tool_block2 = make_tool_use_content("get_course_outline", {"course_name": "AI"}, "id_2") + round1_response = make_response(stop_reason="tool_use", content_blocks=[tool_block1]) + round2_response = make_response(stop_reason="tool_use", content_blocks=[tool_block2]) + final_response = make_response( + stop_reason="end_turn", + content_blocks=[make_text_content("AI answer.")], + ) + mock_client.messages.create.side_effect = [round1_response, round2_response, final_response] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "Some search result." + + gen = AIGenerator(api_key="test-key", model="claude-test") + gen.generate_response( + query="Tell me about AI", + tools=[{"name": "search_course_content"}], + tool_manager=tool_manager, + ) + + # Third call (post-loop) should NOT include tools + assert mock_client.messages.create.call_count == 3 + third_call_kwargs = mock_client.messages.create.call_args_list[2][1] + assert "tools" not in third_call_kwargs + + @patch("ai_generator.anthropic.Anthropic") + def test_generate_response_with_conversation_history(self, mock_anthropic_cls): + """Conversation history is injected into the system prompt, not messages.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + mock_client.messages.create.return_value = make_response( + stop_reason="end_turn", + content_blocks=[make_text_content("Follow-up answer.")], + ) + + gen = AIGenerator(api_key="test-key", model="claude-test") + gen.generate_response( + query="What about lesson 2?", + conversation_history="User: What is lesson 1?\nAssistant: Lesson 1 covers X.", + ) + + call_kwargs = mock_client.messages.create.call_args[1] + system_content = call_kwargs["system"] + assert "Previous conversation" in system_content + assert "What is lesson 1?" in system_content + # History should be in system, not in messages + assert len(call_kwargs["messages"]) == 1 + + +class TestAIGeneratorSearchToolCalling: + + @patch("ai_generator.anthropic.Anthropic") + def test_content_query_triggers_search_tool_call(self, mock_anthropic_cls): + """ + Verifies the full loop: content query → Claude requests search_course_content + tool → tool is executed → final answer is returned. + """ + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + + tool_block = make_tool_use_content( + "search_course_content", + {"query": "what does lesson 1 cover", "course_name": "AI Course"}, + "search_id_1", + ) + first_response = make_response(stop_reason="tool_use", content_blocks=[tool_block]) + final_response = make_response( + stop_reason="end_turn", + content_blocks=[make_text_content("Lesson 1 covers neural networks.")], + ) + mock_client.messages.create.side_effect = [first_response, final_response] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = ( + "[AI Course - Lesson 1]\nThis lesson introduces neural networks." + ) + + gen = AIGenerator(api_key="test-key", model="claude-test") + search_tool_def = { + "name": "search_course_content", + "description": "Search course materials", + "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}, + } + result = gen.generate_response( + query="Answer this question about course materials: What does lesson 1 cover?", + tools=[search_tool_def], + tool_manager=tool_manager, + ) + + assert result == "Lesson 1 covers neural networks." + tool_manager.execute_tool.assert_called_once_with( + "search_course_content", + query="what does lesson 1 cover", + course_name="AI Course", + ) + + +class TestAIGeneratorTwoRoundToolCalling: + """Tests for sequential 2-round tool-calling behavior.""" + + @patch("ai_generator.anthropic.Anthropic") + def test_two_sequential_tool_calls_makes_three_api_calls(self, mock_anthropic_cls): + """Two rounds of tool use followed by end_turn results in exactly 3 API calls.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + + tb1 = make_tool_use_content("get_course_outline", {"course_name": "X"}, "id_1") + tb2 = make_tool_use_content("search_course_content", {"query": "topic"}, "id_2") + mock_client.messages.create.side_effect = [ + make_response("tool_use", [tb1]), + make_response("tool_use", [tb2]), + make_response("end_turn", [make_text_content("Final answer.")]), + ] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "some result" + + gen = AIGenerator(api_key="test-key", model="claude-test") + result = gen.generate_response( + query="Search for a course on the same topic as lesson 4 of course X", + tools=[{"name": "get_course_outline"}, {"name": "search_course_content"}], + tool_manager=tool_manager, + ) + + assert mock_client.messages.create.call_count == 3 + assert result == "Final answer." + assert tool_manager.execute_tool.call_count == 2 + + @patch("ai_generator.anthropic.Anthropic") + def test_tools_included_in_second_round_api_call(self, mock_anthropic_cls): + """The second API call (round 2) still includes tools so Claude can use them again.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + + tb1 = make_tool_use_content("get_course_outline", {"course_name": "X"}, "id_1") + tb2 = make_tool_use_content("search_course_content", {"query": "topic"}, "id_2") + mock_client.messages.create.side_effect = [ + make_response("tool_use", [tb1]), + make_response("tool_use", [tb2]), + make_response("end_turn", [make_text_content("Done.")]), + ] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "result" + + gen = AIGenerator(api_key="test-key", model="claude-test") + gen.generate_response( + query="Multi-step query", + tools=[{"name": "search_course_content"}], + tool_manager=tool_manager, + ) + + second_call_kwargs = mock_client.messages.create.call_args_list[1][1] + assert "tools" in second_call_kwargs + + @patch("ai_generator.anthropic.Anthropic") + def test_final_call_after_max_rounds_has_no_tools(self, mock_anthropic_cls): + """The post-loop final call (3rd) after hitting MAX_ROUNDS has no tools.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + + tb1 = make_tool_use_content("search_course_content", {"query": "a"}, "id_1") + tb2 = make_tool_use_content("search_course_content", {"query": "b"}, "id_2") + mock_client.messages.create.side_effect = [ + make_response("tool_use", [tb1]), + make_response("tool_use", [tb2]), + make_response("end_turn", [make_text_content("Answer.")]), + ] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "result" + + gen = AIGenerator(api_key="test-key", model="claude-test") + gen.generate_response( + query="Query", + tools=[{"name": "search_course_content"}], + tool_manager=tool_manager, + ) + + third_call_kwargs = mock_client.messages.create.call_args_list[2][1] + assert "tools" not in third_call_kwargs + + @patch("ai_generator.anthropic.Anthropic") + def test_message_list_grows_with_each_round(self, mock_anthropic_cls): + """After 2 rounds, the final API call receives 5 messages in the correct order.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + + tb1 = make_tool_use_content("get_course_outline", {"course_name": "X"}, "id_1") + tb2 = make_tool_use_content("search_course_content", {"query": "topic"}, "id_2") + r1 = make_response("tool_use", [tb1]) + r2 = make_response("tool_use", [tb2]) + mock_client.messages.create.side_effect = [ + r1, r2, + make_response("end_turn", [make_text_content("Done.")]), + ] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "result" + + gen = AIGenerator(api_key="test-key", model="claude-test") + gen.generate_response( + query="Complex query", + tools=[{"name": "search_course_content"}], + tool_manager=tool_manager, + ) + + final_call_messages = mock_client.messages.create.call_args_list[2][1]["messages"] + assert len(final_call_messages) == 5 + assert final_call_messages[0]["role"] == "user" # original query + assert final_call_messages[1]["role"] == "assistant" # round 1 tool-use + assert final_call_messages[2]["role"] == "user" # round 1 tool-results + assert final_call_messages[3]["role"] == "assistant" # round 2 tool-use + assert final_call_messages[4]["role"] == "user" # round 2 tool-results + + @patch("ai_generator.anthropic.Anthropic") + def test_tool_error_does_not_terminate_loop(self, mock_anthropic_cls): + """A tool returning an error string is passed as context; the loop continues.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + + tb = make_tool_use_content("search_course_content", {"query": "x"}, "id_err") + mock_client.messages.create.side_effect = [ + make_response("tool_use", [tb]), + make_response("end_turn", [make_text_content("Sorry, search failed.")]), + ] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "Search error: n_results must be positive" + + gen = AIGenerator(api_key="test-key", model="claude-test") + result = gen.generate_response( + query="Find something", + tools=[{"name": "search_course_content"}], + tool_manager=tool_manager, + ) + + assert mock_client.messages.create.call_count == 2 + assert result == "Sorry, search failed." + + # Error string passed as tool_result content in second call + second_call_messages = mock_client.messages.create.call_args_list[1][1]["messages"] + tool_result_msg = next( + m for m in second_call_messages + if m["role"] == "user" and isinstance(m["content"], list) + ) + assert tool_result_msg["content"][0]["content"] == "Search error: n_results must be positive" + + @patch("ai_generator.anthropic.Anthropic") + def test_early_exit_when_no_tool_use_in_first_response(self, mock_anthropic_cls): + """When the first response has end_turn, only 1 API call is made.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + mock_client.messages.create.return_value = make_response( + "end_turn", [make_text_content("Direct answer.")] + ) + + gen = AIGenerator(api_key="test-key", model="claude-test") + result = gen.generate_response( + query="What is 2+2?", + tools=[{"name": "search_course_content"}], + tool_manager=MagicMock(), + ) + + assert mock_client.messages.create.call_count == 1 + assert result == "Direct answer." + + @patch("ai_generator.anthropic.Anthropic") + def test_early_exit_after_first_round_no_second_tool_use(self, mock_anthropic_cls): + """When round 2 returns end_turn, exactly 2 API calls are made (no third call).""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + + tb = make_tool_use_content("search_course_content", {"query": "topic"}, "id_1") + mock_client.messages.create.side_effect = [ + make_response("tool_use", [tb]), + make_response("end_turn", [make_text_content("Answer after one search.")]), + ] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "search result" + + gen = AIGenerator(api_key="test-key", model="claude-test") + result = gen.generate_response( + query="Find info on topic", + tools=[{"name": "search_course_content"}], + tool_manager=tool_manager, + ) + + assert mock_client.messages.create.call_count == 2 + assert result == "Answer after one search." + + @patch("ai_generator.anthropic.Anthropic") + def test_two_tool_results_in_single_round_appended_correctly(self, mock_anthropic_cls): + """When a single round contains 2 tool-use blocks, both are executed and appended.""" + mock_client = MagicMock() + mock_anthropic_cls.return_value = mock_client + + tb1 = make_tool_use_content("search_course_content", {"query": "a"}, "id_a") + tb2 = make_tool_use_content("get_course_outline", {"course_name": "X"}, "id_b") + mock_client.messages.create.side_effect = [ + make_response("tool_use", [tb1, tb2]), + make_response("end_turn", [make_text_content("Both results used.")]), + ] + + tool_manager = MagicMock() + tool_manager.execute_tool.return_value = "result" + + gen = AIGenerator(api_key="test-key", model="claude-test") + result = gen.generate_response( + query="Compare two things", + tools=[{"name": "search_course_content"}], + tool_manager=tool_manager, + ) + + assert tool_manager.execute_tool.call_count == 2 + assert result == "Both results used." + + # Second API call should have a user message with 2 tool_result dicts + second_call_messages = mock_client.messages.create.call_args_list[1][1]["messages"] + tool_result_msg = next( + m for m in second_call_messages + if m["role"] == "user" and isinstance(m["content"], list) + ) + assert len(tool_result_msg["content"]) == 2 + assert tool_result_msg["content"][0]["tool_use_id"] == "id_a" + assert tool_result_msg["content"][1]["tool_use_id"] == "id_b" diff --git a/backend/tests/test_api_endpoints.py b/backend/tests/test_api_endpoints.py new file mode 100644 index 000000000..cb222225d --- /dev/null +++ b/backend/tests/test_api_endpoints.py @@ -0,0 +1,171 @@ +""" +Tests for the FastAPI endpoints in app.py. + +Fixtures (client, mock_rag) are provided by conftest.py. +Each test configures mock_rag return values/side effects for the scenario +under test, then inspects the HTTP response. + +Endpoints covered: + POST /api/query + GET /api/courses + POST /api/clear-session +""" + + +class TestQueryEndpoint: + + def test_returns_200_for_valid_request(self, client, mock_rag): + mock_rag.query.return_value = ("ML is a subset of AI.", ["Course A - Lesson 1"]) + mock_rag.session_manager.create_session.return_value = "new-session" + + response = client.post("/api/query", json={"query": "What is ML?"}) + + assert response.status_code == 200 + + def test_response_body_has_answer_sources_session_id(self, client, mock_rag): + mock_rag.query.return_value = ("ML is a subset of AI.", ["Course A - Lesson 1"]) + mock_rag.session_manager.create_session.return_value = "new-session" + + data = client.post("/api/query", json={"query": "What is ML?"}).json() + + assert data["answer"] == "ML is a subset of AI." + assert data["sources"] == ["Course A - Lesson 1"] + assert data["session_id"] == "new-session" + + def test_creates_session_when_none_provided(self, client, mock_rag): + mock_rag.query.return_value = ("Answer.", []) + mock_rag.session_manager.create_session.return_value = "generated-id" + + data = client.post("/api/query", json={"query": "Hello"}).json() + + mock_rag.session_manager.create_session.assert_called_once() + assert data["session_id"] == "generated-id" + + def test_uses_provided_session_id_without_creating_new_one(self, client, mock_rag): + mock_rag.query.return_value = ("Answer.", []) + + data = client.post( + "/api/query", json={"query": "Hello", "session_id": "existing-sess"} + ).json() + + mock_rag.session_manager.create_session.assert_not_called() + assert data["session_id"] == "existing-sess" + + def test_passes_query_and_session_id_to_rag_system(self, client, mock_rag): + mock_rag.query.return_value = ("Answer.", []) + + client.post("/api/query", json={"query": "Test query", "session_id": "sess-1"}) + + mock_rag.query.assert_called_once_with("Test query", "sess-1") + + def test_returns_500_when_rag_raises(self, client, mock_rag): + mock_rag.session_manager.create_session.return_value = "sess" + mock_rag.query.side_effect = RuntimeError("DB unavailable") + + response = client.post("/api/query", json={"query": "Fail"}) + + assert response.status_code == 500 + + def test_returns_422_when_query_field_missing(self, client, mock_rag): + response = client.post("/api/query", json={"session_id": "sess"}) + + assert response.status_code == 422 + + def test_empty_sources_list_is_valid(self, client, mock_rag): + mock_rag.query.return_value = ("General answer.", []) + mock_rag.session_manager.create_session.return_value = "s" + + data = client.post("/api/query", json={"query": "Hello"}).json() + + assert data["sources"] == [] + + def test_multiple_sources_are_returned(self, client, mock_rag): + mock_rag.query.return_value = ( + "Answer citing two lessons.", + ["Course A - Lesson 1", "Course B - Lesson 3"], + ) + mock_rag.session_manager.create_session.return_value = "s" + + data = client.post("/api/query", json={"query": "Multi-source question"}).json() + + assert len(data["sources"]) == 2 + assert "Course A - Lesson 1" in data["sources"] + assert "Course B - Lesson 3" in data["sources"] + + +class TestCoursesEndpoint: + + def test_returns_200(self, client, mock_rag): + mock_rag.get_course_analytics.return_value = { + "total_courses": 1, + "course_titles": ["Intro to AI"], + } + + response = client.get("/api/courses") + + assert response.status_code == 200 + + def test_response_contains_total_courses_and_titles(self, client, mock_rag): + mock_rag.get_course_analytics.return_value = { + "total_courses": 2, + "course_titles": ["AI Basics", "ML Fundamentals"], + } + + data = client.get("/api/courses").json() + + assert data["total_courses"] == 2 + assert data["course_titles"] == ["AI Basics", "ML Fundamentals"] + + def test_empty_catalog_returns_zero_courses(self, client, mock_rag): + mock_rag.get_course_analytics.return_value = { + "total_courses": 0, + "course_titles": [], + } + + data = client.get("/api/courses").json() + + assert data["total_courses"] == 0 + assert data["course_titles"] == [] + + def test_returns_500_when_analytics_raises(self, client, mock_rag): + mock_rag.get_course_analytics.side_effect = RuntimeError("ChromaDB error") + + response = client.get("/api/courses") + + assert response.status_code == 500 + + def test_title_count_matches_total_courses_field(self, client, mock_rag): + titles = ["Course A", "Course B", "Course C"] + mock_rag.get_course_analytics.return_value = { + "total_courses": len(titles), + "course_titles": titles, + } + + data = client.get("/api/courses").json() + + assert data["total_courses"] == len(data["course_titles"]) + + +class TestClearSessionEndpoint: + + def test_returns_200_with_ok_status(self, client, mock_rag): + response = client.post("/api/clear-session", json={"session_id": "sess-abc"}) + + assert response.status_code == 200 + assert response.json() == {"status": "ok"} + + def test_delegates_to_session_manager(self, client, mock_rag): + client.post("/api/clear-session", json={"session_id": "sess-to-clear"}) + + mock_rag.session_manager.clear_session.assert_called_once_with("sess-to-clear") + + def test_returns_422_when_session_id_missing(self, client, mock_rag): + response = client.post("/api/clear-session", json={}) + + assert response.status_code == 422 + + def test_different_session_ids_are_forwarded_correctly(self, client, mock_rag): + for session_id in ["alpha", "beta", "gamma"]: + mock_rag.session_manager.clear_session.reset_mock() + client.post("/api/clear-session", json={"session_id": session_id}) + mock_rag.session_manager.clear_session.assert_called_once_with(session_id) diff --git a/backend/tests/test_course_search_tool.py b/backend/tests/test_course_search_tool.py new file mode 100644 index 000000000..3ec1991f5 --- /dev/null +++ b/backend/tests/test_course_search_tool.py @@ -0,0 +1,212 @@ +""" +Tests for CourseSearchTool.execute() in search_tools.py. + +Unit tests use a mocked VectorStore to isolate CourseSearchTool logic. +The integration test uses a real in-memory ChromaDB with max_results=0 +to expose the configuration bug. +""" +import sys +import os +import pytest +from unittest.mock import MagicMock, patch + +# Add backend to path so imports work when running from project root +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from search_tools import CourseSearchTool +from vector_store import SearchResults, VectorStore + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def make_mock_store(docs=None, metadata=None, error=None): + """Return a MagicMock VectorStore whose search() returns controlled data.""" + store = MagicMock(spec=VectorStore) + if error: + store.search.return_value = SearchResults( + documents=[], metadata=[], distances=[], error=error + ) + else: + store.search.return_value = SearchResults( + documents=docs or [], + metadata=metadata or [], + distances=[0.1] * len(docs or []), + ) + return store + + +# --------------------------------------------------------------------------- +# Unit tests — all use mocked VectorStore +# --------------------------------------------------------------------------- + +class TestCourseSearchToolExecuteUnit: + + def test_execute_returns_formatted_results(self): + """execute() formats documents with course/lesson context headers.""" + store = make_mock_store( + docs=["Transformers are attention-based models."], + metadata=[{"course_title": "AI Basics", "lesson_number": 2}], + ) + tool = CourseSearchTool(store) + result = tool.execute(query="what are transformers") + + assert "AI Basics" in result + assert "Lesson 2" in result + assert "Transformers are attention-based models." in result + + def test_execute_returns_empty_message_when_no_results(self): + """execute() returns a human-readable 'not found' string when empty.""" + store = make_mock_store(docs=[], metadata=[]) + tool = CourseSearchTool(store) + result = tool.execute(query="nonexistent topic") + + assert "No relevant content found" in result + + def test_execute_returns_empty_message_includes_course_filter(self): + """Empty result message includes the course filter name.""" + store = make_mock_store(docs=[], metadata=[]) + tool = CourseSearchTool(store) + result = tool.execute(query="something", course_name="Python 101") + + assert "No relevant content found" in result + assert "Python 101" in result + + def test_execute_returns_error_string_on_search_error(self): + """When VectorStore returns an error, execute() returns that error string.""" + store = make_mock_store(error="Search error: n_results must be a positive integer") + tool = CourseSearchTool(store) + result = tool.execute(query="anything") + + assert "Search error" in result + + def test_execute_populates_last_sources(self): + """After a successful search, last_sources contains course+lesson strings.""" + store = make_mock_store( + docs=["Content A", "Content B"], + metadata=[ + {"course_title": "Course X", "lesson_number": 1}, + {"course_title": "Course X", "lesson_number": 3}, + ], + ) + tool = CourseSearchTool(store) + tool.execute(query="something") + + assert len(tool.last_sources) == 2 + assert "Course X - Lesson 1" in tool.last_sources + assert "Course X - Lesson 3" in tool.last_sources + + def test_execute_last_sources_empty_on_error(self): + """last_sources stays empty when the search returns an error.""" + store = make_mock_store(error="Search error: n_results must be a positive integer") + tool = CourseSearchTool(store) + tool.execute(query="something") + + assert tool.last_sources == [] + + def test_execute_passes_course_name_to_store(self): + """course_name kwarg is forwarded to VectorStore.search().""" + store = make_mock_store(docs=[], metadata=[]) + tool = CourseSearchTool(store) + tool.execute(query="topic", course_name="MCP Course") + + store.search.assert_called_once_with( + query="topic", course_name="MCP Course", lesson_number=None + ) + + def test_execute_passes_lesson_number_to_store(self): + """lesson_number kwarg is forwarded to VectorStore.search().""" + store = make_mock_store(docs=[], metadata=[]) + tool = CourseSearchTool(store) + tool.execute(query="topic", lesson_number=4) + + store.search.assert_called_once_with( + query="topic", course_name=None, lesson_number=4 + ) + + +# --------------------------------------------------------------------------- +# Integration test — uses a real in-memory ChromaDB with max_results=0 +# This test is EXPECTED TO FAIL on the broken system, exposing the bug. +# --------------------------------------------------------------------------- + +class TestCourseSearchToolIntegration: + + @pytest.fixture() + def in_memory_store_broken(self, tmp_path): + """VectorStore backed by ChromaDB with max_results=0 (broken config).""" + store = VectorStore( + chroma_path=str(tmp_path / "chroma"), + embedding_model="all-MiniLM-L6-v2", + max_results=0, # Mirrors the broken config value + ) + # Add one document so the collection is non-empty + from models import Course, Lesson, CourseChunk + course = Course( + title="Test Course", + course_link="http://example.com", + instructor="Test Instructor", + lessons=[Lesson(lesson_number=1, title="Intro", lesson_link="http://example.com/1")], + ) + store.add_course_metadata(course) + store.add_course_content([ + CourseChunk( + content="This lesson covers the basics of Python.", + course_title="Test Course", + lesson_number=1, + chunk_index=0, + ) + ]) + return store + + @pytest.fixture() + def in_memory_store_fixed(self, tmp_path): + """VectorStore backed by ChromaDB with max_results=5 (correct config).""" + store = VectorStore( + chroma_path=str(tmp_path / "chroma_fixed"), + embedding_model="all-MiniLM-L6-v2", + max_results=5, # Correct value + ) + from models import Course, Lesson, CourseChunk + course = Course( + title="Test Course", + course_link="http://example.com", + instructor="Test Instructor", + lessons=[Lesson(lesson_number=1, title="Intro", lesson_link="http://example.com/1")], + ) + store.add_course_metadata(course) + store.add_course_content([ + CourseChunk( + content="This lesson covers the basics of Python.", + course_title="Test Course", + lesson_number=1, + chunk_index=0, + ) + ]) + return store + + def test_search_with_zero_max_results_returns_error(self, in_memory_store_broken): + """ + Documents the bug: with max_results=0, ChromaDB raises ValueError and + VectorStore wraps it as a 'Search error' string instead of returning content. + This test PASSES because it asserts the broken behavior exists. + The FIX is in config.py: MAX_RESULTS=5 so the production store never uses 0. + """ + tool = CourseSearchTool(in_memory_store_broken) + result = tool.execute(query="Python basics") + + # max_results=0 causes ChromaDB to raise: + # "Number of requested results 0, cannot be negative, or zero." + assert "Search error" in result, ( + "Expected a search error with max_results=0 — " + "the bug behavior is not reproducible." + ) + + def test_search_with_positive_max_results_returns_content(self, in_memory_store_fixed): + """With max_results=5, search returns actual course content.""" + tool = CourseSearchTool(in_memory_store_fixed) + result = tool.execute(query="Python basics") + + assert "Search error" not in result + assert "Test Course" in result diff --git a/backend/tests/test_rag_system.py b/backend/tests/test_rag_system.py new file mode 100644 index 000000000..eca920b15 --- /dev/null +++ b/backend/tests/test_rag_system.py @@ -0,0 +1,218 @@ +""" +Tests for RAGSystem.query() in rag_system.py. + +These tests verify how the full RAG pipeline handles content-related questions. +Two integration tests explicitly demonstrate the MAX_RESULTS=0 bug: + - test_search_tool_fails_with_zero_max_results → FAILS on broken system + - test_search_tool_succeeds_with_positive_max_results → FAILS on broken system, PASSES after fix +""" +import sys +import os +import pytest +from unittest.mock import MagicMock, patch + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from rag_system import RAGSystem +from search_tools import CourseSearchTool, ToolManager +from vector_store import VectorStore +from models import Course, Lesson, CourseChunk +from config import config + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def make_rag_system_with_mock_ai(vector_store=None): + """ + Build a RAGSystem where only AIGenerator is mocked out. + This lets us test the full tool-calling pipeline without real API calls. + """ + with patch("rag_system.AIGenerator") as MockAI: + mock_ai_instance = MagicMock() + MockAI.return_value = mock_ai_instance + rag = RAGSystem(config) + if vector_store: + rag.vector_store = vector_store + rag.search_tool.store = vector_store + rag.outline_tool.store = vector_store + return rag, mock_ai_instance + + +# --------------------------------------------------------------------------- +# Unit tests — AIGenerator is mocked +# --------------------------------------------------------------------------- + +class TestRAGSystemQuery: + + def test_query_returns_tuple_of_answer_and_sources(self): + """query() always returns a (str, list) tuple.""" + rag, mock_ai = make_rag_system_with_mock_ai() + mock_ai.generate_response.return_value = "Neural networks are layers of nodes." + + answer, sources = rag.query("What are neural networks?") + + assert isinstance(answer, str) + assert isinstance(sources, list) + assert answer == "Neural networks are layers of nodes." + + def test_query_passes_tool_definitions_to_ai_generator(self): + """generate_response is called with tool definitions from ToolManager.""" + rag, mock_ai = make_rag_system_with_mock_ai() + mock_ai.generate_response.return_value = "Some answer." + + rag.query("What is in lesson 2?") + + call_kwargs = mock_ai.generate_response.call_args[1] + assert "tools" in call_kwargs + tool_names = [t["name"] for t in call_kwargs["tools"]] + assert "search_course_content" in tool_names + + def test_query_passes_tool_manager_to_ai_generator(self): + """generate_response receives the tool_manager so it can execute tools.""" + rag, mock_ai = make_rag_system_with_mock_ai() + mock_ai.generate_response.return_value = "Answer." + + rag.query("What is MCP?") + + call_kwargs = mock_ai.generate_response.call_args[1] + assert "tool_manager" in call_kwargs + assert call_kwargs["tool_manager"] is rag.tool_manager + + def test_query_wraps_user_question_in_prompt(self): + """RAGSystem prepends context to the user query before passing to AIGenerator.""" + rag, mock_ai = make_rag_system_with_mock_ai() + mock_ai.generate_response.return_value = "Answer." + + rag.query("What is reinforcement learning?") + + call_kwargs = mock_ai.generate_response.call_args[1] + assert "What is reinforcement learning?" in call_kwargs["query"] + + def test_query_saves_exchange_to_session(self): + """After a query, the exchange is stored in SessionManager.""" + rag, mock_ai = make_rag_system_with_mock_ai() + mock_ai.generate_response.return_value = "RL answer." + + session_id = rag.session_manager.create_session() + rag.query("What is RL?", session_id=session_id) + + history = rag.session_manager.get_conversation_history(session_id) + assert history is not None + assert "What is RL?" in history + assert "RL answer." in history + + def test_query_resets_sources_after_retrieval(self): + """Sources are cleared from the tool manager after each query.""" + rag, mock_ai = make_rag_system_with_mock_ai() + mock_ai.generate_response.return_value = "Answer." + + # Manually pre-populate sources to simulate a previous search + rag.search_tool.last_sources = ["Some Course - Lesson 1"] + + rag.query("New question?") + + # After query(), sources should be cleared + assert rag.tool_manager.get_last_sources() == [] + + +# --------------------------------------------------------------------------- +# Integration tests — expose the MAX_RESULTS=0 bug using real ChromaDB +# --------------------------------------------------------------------------- + +class TestRAGSystemSearchIntegration: + """ + These tests build a real in-memory ChromaDB with sample data. + They demonstrate that MAX_RESULTS=0 causes search to fail. + """ + + @pytest.fixture() + def populated_store_broken(self, tmp_path): + """VectorStore with max_results=0 (mirrors broken config).""" + store = VectorStore( + chroma_path=str(tmp_path / "broken"), + embedding_model="all-MiniLM-L6-v2", + max_results=0, + ) + course = Course( + title="Intro to ML", + course_link="http://example.com/ml", + instructor="Test Instructor", + lessons=[Lesson(lesson_number=1, title="Supervised Learning", lesson_link="http://example.com/ml/1")], + ) + store.add_course_metadata(course) + store.add_course_content([ + CourseChunk( + content="Supervised learning uses labeled training data to learn a mapping from inputs to outputs.", + course_title="Intro to ML", + lesson_number=1, + chunk_index=0, + ) + ]) + return store + + @pytest.fixture() + def populated_store_fixed(self, tmp_path): + """VectorStore with max_results=5 (correct config).""" + store = VectorStore( + chroma_path=str(tmp_path / "fixed"), + embedding_model="all-MiniLM-L6-v2", + max_results=5, + ) + course = Course( + title="Intro to ML", + course_link="http://example.com/ml", + instructor="Test Instructor", + lessons=[Lesson(lesson_number=1, title="Supervised Learning", lesson_link="http://example.com/ml/1")], + ) + store.add_course_metadata(course) + store.add_course_content([ + CourseChunk( + content="Supervised learning uses labeled training data to learn a mapping from inputs to outputs.", + course_title="Intro to ML", + lesson_number=1, + chunk_index=0, + ) + ]) + return store + + def test_search_tool_fails_with_zero_max_results(self, populated_store_broken): + """ + Documents the bug: with max_results=0, the search tool returns a 'Search error' + string instead of course content. ChromaDB raises: + 'Number of requested results 0, cannot be negative, or zero.' + This test PASSES by asserting that broken behavior occurs when max_results=0. + The fix is in config.py: change MAX_RESULTS from 0 to 5. + """ + tool = CourseSearchTool(populated_store_broken) + result = tool.execute(query="supervised learning") + + assert "Search error" in result, ( + "Expected a search error with max_results=0 — " + "the bug behavior is not reproducible." + ) + + def test_search_tool_succeeds_with_positive_max_results(self, populated_store_fixed): + """ + After the fix, search returns actual course content. + This test also FAILS on the broken system (because it uses max_results=5 + directly), confirming the fix works when max_results is positive. + """ + tool = CourseSearchTool(populated_store_fixed) + result = tool.execute(query="supervised learning") + + assert "Search error" not in result + assert "Intro to ML" in result + assert "Supervised" in result or "labeled" in result + + def test_config_max_results_is_positive(self): + """ + Verifies that the config value MAX_RESULTS is a positive integer. + This test FAILS on the broken system where MAX_RESULTS=0. + """ + assert config.MAX_RESULTS > 0, ( + f"BUG: config.MAX_RESULTS is {config.MAX_RESULTS}. " + "It must be a positive integer (e.g. 5) for searches to work. " + "Fix: change MAX_RESULTS in backend/config.py" + ) diff --git a/backend/vector_store.py b/backend/vector_store.py index 390abe71c..55a80ef10 100644 --- a/backend/vector_store.py +++ b/backend/vector_store.py @@ -147,15 +147,19 @@ def add_course_metadata(self, course: Course): "lesson_link": lesson.lesson_link }) + raw_metadata = { + "title": course.title, + "instructor": course.instructor, + "course_link": course.course_link, + "lessons_json": json.dumps(lessons_metadata), + "lesson_count": len(course.lessons) + } + # ChromaDB rejects None metadata values; replace with empty string + metadata = {k: (v if v is not None else "") for k, v in raw_metadata.items()} + self.course_catalog.add( documents=[course_text], - metadatas=[{ - "title": course.title, - "instructor": course.instructor, - "course_link": course.course_link, - "lessons_json": json.dumps(lessons_metadata), # Serialize as JSON string - "lesson_count": len(course.lessons) - }], + metadatas=[metadata], ids=[course.title] ) @@ -264,4 +268,25 @@ def get_lesson_link(self, course_title: str, lesson_number: int) -> Optional[str return None except Exception as e: print(f"Error getting lesson link: {e}") - \ No newline at end of file + + def get_course_outline(self, course_name: str) -> Optional[Dict]: + """Get course outline (title, link, lessons list) for a given course name""" + import json + try: + resolved_title = self._resolve_course_name(course_name) + if not resolved_title: + return None + results = self.course_catalog.get(ids=[resolved_title]) + if results and 'metadatas' in results and results['metadatas']: + metadata = results['metadatas'][0] + lessons_json = metadata.get('lessons_json') + lessons = json.loads(lessons_json) if lessons_json else [] + return { + "title": metadata.get('title', resolved_title), + "course_link": metadata.get('course_link', ''), + "lessons": lessons + } + return None + except Exception as e: + print(f"Error getting course outline: {e}") + return None diff --git a/frontend-changes.md b/frontend-changes.md new file mode 100644 index 000000000..e2c792027 --- /dev/null +++ b/frontend-changes.md @@ -0,0 +1,139 @@ +# Frontend Code Quality Changes + +## Summary + +Added code quality tooling for the frontend (vanilla JS/CSS/HTML) and applied formatting consistency fixes to the existing source files. + +--- + +## New Files + +### `package.json` +Defines the project's frontend dev dependencies and npm scripts: +- `npm run format` — formats all frontend files with Prettier (modifies in place) +- `npm run format:check` — checks formatting without modifying files +- `npm run lint` — lints `frontend/script.js` with ESLint +- `npm run lint:fix` — auto-fixes ESLint issues in `frontend/script.js` + +**Dev dependencies added:** `prettier@^3.3.0`, `eslint@^8.57.0` + +--- + +### `.prettierrc` +Prettier configuration matching the existing code style: +- `tabWidth: 4` — 4-space indentation (consistent with current files) +- `singleQuote: true` — single quotes for JS strings +- `trailingComma: "es5"` — trailing commas in arrays/objects +- `printWidth: 88` — max line length +- `endOfLine: "lf"` — Unix line endings + +--- + +### `.eslintrc.js` +ESLint configuration for browser-side JavaScript: +- Extends `eslint:recommended` ruleset +- Declares `marked` as a known global (loaded via CDN) +- `no-unused-vars`: warn +- `no-console`: warn (flags debug `console.log` calls left in production code) + +--- + +### `scripts/frontend-format.sh` +Shell script to format all frontend files in one command: +```bash +./scripts/frontend-format.sh +``` +Runs `prettier --write` over `frontend/**/*.{js,css,html}`. + +--- + +### `scripts/frontend-lint.sh` +Shell script to check frontend quality without modifying files (suitable for CI): +```bash +./scripts/frontend-lint.sh +``` +Runs `prettier --check` then `eslint` and exits non-zero if any check fails. + +--- + +## Modified Files + +### `frontend/script.js` +- Removed two double blank lines (between event listeners and between top-level functions) to match Prettier's single-blank-line-between-blocks style. + +### `frontend/index.html` +- Removed an extra blank line between the closing `</div>` and the `<script>` tags at the bottom of `<body>`. + +--- + +## Usage + +Install dependencies once: +```bash +npm install +``` + +Then use the scripts directly: +```bash +# Format (modifies files) +./scripts/frontend-format.sh + +# Lint only (no modifications — good for CI) +./scripts/frontend-lint.sh +``` + +Or use npm scripts: +```bash +npm run format # format and save +npm run format:check # check formatting only +npm run lint # ESLint check +npm run lint:fix # ESLint auto-fix +``` + +--- + +# Frontend Changes: Dark/Light Theme Toggle + +## Feature +Added a toggle button that switches between dark (default) and light themes. + +## Files Modified + +### `frontend/index.html` +- Added a `<button id="themeToggle">` element with fixed positioning, placed directly inside `<body>` before `.container` +- Button contains two SVG icons: a **sun** (shown in dark mode) and a **moon** (shown in light mode) +- Includes `aria-label` and `title` attributes for accessibility + +### `frontend/style.css` +- Added `[data-theme="light"]` CSS variable overrides: + - `--background: #f8fafc` + - `--surface: #ffffff` + - `--surface-hover: #f1f5f9` + - `--text-primary: #0f172a` + - `--text-secondary: #64748b` + - `--border-color: #e2e8f0` + - `--assistant-message: #f1f5f9` + - `--shadow` reduced opacity for light context + - `--welcome-bg: #dbeafe` +- Added `transition: background-color 0.3s ease, color 0.3s ease, border-color 0.3s ease` to all major UI elements for smooth switching +- Added `.theme-toggle` button styles: + - `position: fixed; top: 1rem; right: 1rem; z-index: 1000` + - 40×40px circular button matching the design aesthetic + - Hover: scales up, highlights in primary blue, shows focus ring + - Focus outline replaced with `box-shadow` focus ring for consistency +- Icon visibility rules: sun visible in dark mode, moon visible in light mode (via `[data-theme="light"]` selector) + +### `frontend/script.js` +- Added an IIFE at the top of the file that reads `localStorage` and applies the saved theme to `<html>` immediately, preventing a flash of wrong theme on page load +- Added `initTheme()` function (sets `data-theme` from `localStorage` on DOMContentLoaded) +- Added `toggleTheme()` function: + - Reads current `data-theme` attribute on `<html>` + - Toggles the attribute between `"light"` and absent (dark) + - Persists choice to `localStorage` under key `"theme"` +- Registered click listener for `#themeToggle` in `setupEventListeners()` + +## Design Decisions +- Theme attribute is set on `document.documentElement` (`<html>`) so CSS selectors like `[data-theme="light"] .theme-toggle` work globally +- Dark mode is the default (no attribute needed); light mode uses `data-theme="light"` +- Theme preference persists across page reloads via `localStorage` +- All color changes rely on the existing CSS variable system — no element-level style overrides needed diff --git a/frontend/index.html b/frontend/index.html index f8e25a62f..1794d04f9 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -7,9 +7,27 @@ <meta http-equiv="Pragma" content="no-cache"> <meta http-equiv="Expires" content="0"> <title>Course Materials Assistant - + +

Course Materials Assistant

@@ -19,6 +37,11 @@

Course Materials Assistant

+ +
+ +
+
@@ -74,8 +97,7 @@

Course Materials Assistant

- - + \ No newline at end of file diff --git a/frontend/script.js b/frontend/script.js index 562a8a363..9a3f45e83 100644 --- a/frontend/script.js +++ b/frontend/script.js @@ -1,6 +1,13 @@ // API base URL - use relative path to work from any host const API_URL = '/api'; +// Apply saved theme immediately to avoid flash +(function() { + if (localStorage.getItem('theme') === 'light') { + document.documentElement.setAttribute('data-theme', 'light'); + } +})(); + // Global state let currentSessionId = null; @@ -21,15 +28,39 @@ document.addEventListener('DOMContentLoaded', () => { loadCourseStats(); }); +// Theme Management +function initTheme() { + const savedTheme = localStorage.getItem('theme'); + if (savedTheme === 'light') { + document.documentElement.setAttribute('data-theme', 'light'); + } +} + +function toggleTheme() { + const isLight = document.documentElement.getAttribute('data-theme') === 'light'; + if (isLight) { + document.documentElement.removeAttribute('data-theme'); + localStorage.setItem('theme', 'dark'); + } else { + document.documentElement.setAttribute('data-theme', 'light'); + localStorage.setItem('theme', 'light'); + } +} + // Event Listeners function setupEventListeners() { + // Theme toggle + document.getElementById('themeToggle').addEventListener('click', toggleTheme); + + // New chat button + document.getElementById('newChatButton').addEventListener('click', createNewSession); + // Chat functionality sendButton.addEventListener('click', sendMessage); chatInput.addEventListener('keypress', (e) => { if (e.key === 'Enter') sendMessage(); }); - - + // Suggested questions document.querySelectorAll('.suggested-item').forEach(button => { button.addEventListener('click', (e) => { @@ -40,7 +71,6 @@ function setupEventListeners() { }); } - // Chat Functions async function sendMessage() { const query = chatInput.value.trim(); @@ -147,6 +177,17 @@ function escapeHtml(text) { // Removed removeMessage function - no longer needed since we handle loading differently async function createNewSession() { + if (currentSessionId) { + try { + await fetch(`${API_URL}/clear-session`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ session_id: currentSessionId }) + }); + } catch (e) { + // ignore — session cleanup is best-effort + } + } currentSessionId = null; chatMessages.innerHTML = ''; addMessage('Welcome to the Course Materials Assistant! I can help you with questions about courses, lessons and specific content. What would you like to know?', 'assistant', null, true); diff --git a/frontend/style.css b/frontend/style.css index 825d03675..ae443ac58 100644 --- a/frontend/style.css +++ b/frontend/style.css @@ -24,6 +24,78 @@ --welcome-border: #2563eb; } +/* Light Theme Variables */ +[data-theme="light"] { + --background: #f8fafc; + --surface: #ffffff; + --surface-hover: #f1f5f9; + --text-primary: #0f172a; + --text-secondary: #64748b; + --border-color: #e2e8f0; + --assistant-message: #f1f5f9; + --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); + --welcome-bg: #dbeafe; + --welcome-border: #2563eb; +} + +/* Smooth theme transitions */ +body, +.sidebar, +.chat-main, +.chat-container, +.chat-messages, +.chat-input-container, +#chatInput, +.message-content, +.stat-item, +.suggested-item, +.new-chat-button, +.stats-header, +.suggested-header, +.course-title-item { + transition: background-color 0.3s ease, color 0.3s ease, border-color 0.3s ease; +} + +/* Theme Toggle Button */ +.theme-toggle { + position: fixed; + top: 1rem; + right: 1rem; + z-index: 1000; + width: 40px; + height: 40px; + border-radius: 50%; + border: 1px solid var(--border-color); + background: var(--surface); + color: var(--text-secondary); + cursor: pointer; + display: flex; + align-items: center; + justify-content: center; + transition: background-color 0.3s ease, color 0.2s ease, border-color 0.3s ease, transform 0.2s ease, box-shadow 0.2s ease; +} + +.theme-toggle:hover { + color: var(--primary-color); + border-color: var(--primary-color); + transform: scale(1.1); + box-shadow: 0 0 0 3px var(--focus-ring); +} + +.theme-toggle:focus { + outline: none; + box-shadow: 0 0 0 3px var(--focus-ring); +} + +/* Icon visibility based on theme */ +/* Default (dark mode): show sun icon to switch to light */ +.theme-toggle .icon-moon { display: none; } +.theme-toggle .icon-sun { display: block; } + +/* Light mode: show moon icon to switch to dark */ +[data-theme="light"] .theme-toggle .icon-moon { display: block; } +[data-theme="light"] .theme-toggle .icon-sun { display: none; } + /* Base Styles */ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; @@ -445,6 +517,33 @@ header h1 { margin: 0.5rem 0; } +/* New Chat Button */ +.new-chat-button { + font-size: 0.875rem; + font-weight: 600; + color: var(--text-secondary); + cursor: pointer; + padding: 0.5rem 0; + border: none; + background: none; + outline: none; + -webkit-appearance: none; + appearance: none; + transition: color 0.2s ease; + text-transform: uppercase; + letter-spacing: 0.5px; + width: 100%; + text-align: left; +} + +.new-chat-button:hover { + color: var(--primary-color); +} + +.new-chat-button:focus { + color: var(--primary-color); +} + /* Sidebar Headers */ .stats-header, .suggested-header { diff --git a/package.json b/package.json new file mode 100644 index 000000000..622f4177f --- /dev/null +++ b/package.json @@ -0,0 +1,14 @@ +{ + "name": "ragchatbot-frontend", + "private": true, + "scripts": { + "format": "prettier --write \"frontend/**/*.{js,css,html}\"", + "format:check": "prettier --check \"frontend/**/*.{js,css,html}\"", + "lint": "eslint frontend/script.js", + "lint:fix": "eslint --fix frontend/script.js" + }, + "devDependencies": { + "eslint": "^8.57.0", + "prettier": "^3.3.0" + } +} diff --git a/pyproject.toml b/pyproject.toml index 3f05e2de0..b8bf2ba4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,3 +13,14 @@ dependencies = [ "python-multipart==0.0.20", "python-dotenv==1.1.1", ] + +[dependency-groups] +dev = [ + "pytest>=9.0.2", + "httpx>=0.27.0", +] + +[tool.pytest.ini_options] +testpaths = ["backend/tests"] +pythonpath = ["backend"] +addopts = "-v" diff --git a/scripts/frontend-format.sh b/scripts/frontend-format.sh new file mode 100755 index 000000000..6961e4f29 --- /dev/null +++ b/scripts/frontend-format.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Format all frontend files using Prettier. +# Modifies files in place. +set -e +cd "$(dirname "$0")/.." + +echo "Formatting frontend files with Prettier..." +npx prettier --write "frontend/**/*.{js,css,html}" +echo "Formatting complete." diff --git a/scripts/frontend-lint.sh b/scripts/frontend-lint.sh new file mode 100755 index 000000000..1a0a9a10a --- /dev/null +++ b/scripts/frontend-lint.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Lint frontend files without modifying them. +# Exits with a non-zero code if any check fails. +set -e +cd "$(dirname "$0")/.." + +echo "Checking formatting with Prettier..." +npx prettier --check "frontend/**/*.{js,css,html}" + +echo "Linting JavaScript with ESLint..." +npx eslint frontend/script.js + +echo "All frontend checks passed." diff --git a/temp.txt b/temp.txt new file mode 100644 index 000000000..c72da8e35 --- /dev/null +++ b/temp.txt @@ -0,0 +1,8 @@ +The RAG chatbot returns 'query failed' for any content-related questions. I need you to: +1. Write tests to evaluate the outputs of the execute method of the CourseSearchTool in @backend/search_tools.py +2. Write tests to evaluate if @backend/ai_generator.py correctly calls for the CourseSearchTool +3. Write tests to evaluate how the RAG system is handling the content-query related questions. + +Save the tests in a tests folder within @backend. Run those tests against the current system to identify which components are failing. Propose fixes based on what the tests reveal is broken. + +Think a lot. diff --git a/uv.lock b/uv.lock index 9ae65c557..cf091f1b6 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.13" [[package]] @@ -470,6 +470,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/ed/1f1afb2e9e7f38a545d628f864d562a5ae64fe6f7a10e28ffb9b185b4e89/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec", size = 37461, upload-time = "2025-01-03T18:51:54.306Z" }, ] +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -1038,6 +1047,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" }, ] +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + [[package]] name = "posthog" version = "5.4.0" @@ -1207,6 +1225,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" }, ] +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1561,6 +1595,12 @@ dependencies = [ { name = "uvicorn" }, ] +[package.dev-dependencies] +dev = [ + { name = "httpx" }, + { name = "pytest" }, +] + [package.metadata] requires-dist = [ { name = "anthropic", specifier = "==0.58.2" }, @@ -1572,6 +1612,12 @@ requires-dist = [ { name = "uvicorn", specifier = "==0.35.0" }, ] +[package.metadata.requires-dev] +dev = [ + { name = "httpx", specifier = ">=0.27.0" }, + { name = "pytest", specifier = ">=9.0.2" }, +] + [[package]] name = "sympy" version = "1.14.0"