diff --git a/.eslintrc.js b/.eslintrc.js
new file mode 100644
index 000000000..96d9178bc
--- /dev/null
+++ b/.eslintrc.js
@@ -0,0 +1,17 @@
+module.exports = {
+ env: {
+ browser: true,
+ es2021: true,
+ },
+ extends: ['eslint:recommended'],
+ parserOptions: {
+ ecmaVersion: 2021,
+ },
+ globals: {
+ marked: 'readonly',
+ },
+ rules: {
+ 'no-unused-vars': 'warn',
+ 'no-console': 'warn',
+ },
+};
diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml
new file mode 100644
index 000000000..b5e8cfd4d
--- /dev/null
+++ b/.github/workflows/claude-code-review.yml
@@ -0,0 +1,44 @@
+name: Claude Code Review
+
+on:
+ pull_request:
+ types: [opened, synchronize, ready_for_review, reopened]
+ # Optional: Only run on specific file changes
+ # paths:
+ # - "src/**/*.ts"
+ # - "src/**/*.tsx"
+ # - "src/**/*.js"
+ # - "src/**/*.jsx"
+
+jobs:
+ claude-review:
+ # Optional: Filter by PR author
+ # if: |
+ # github.event.pull_request.user.login == 'external-contributor' ||
+ # github.event.pull_request.user.login == 'new-developer' ||
+ # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
+
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ pull-requests: read
+ issues: read
+ id-token: write
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Run Claude Code Review
+ id: claude-review
+ uses: anthropics/claude-code-action@v1
+ with:
+ claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+ plugin_marketplaces: 'https://github.com/anthropics/claude-code.git'
+ plugins: 'code-review@claude-code-plugins'
+ prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ github.event.pull_request.number }}'
+ # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+ # or https://code.claude.com/docs/en/cli-reference for available options
+
diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml
new file mode 100644
index 000000000..d300267f1
--- /dev/null
+++ b/.github/workflows/claude.yml
@@ -0,0 +1,50 @@
+name: Claude Code
+
+on:
+ issue_comment:
+ types: [created]
+ pull_request_review_comment:
+ types: [created]
+ issues:
+ types: [opened, assigned]
+ pull_request_review:
+ types: [submitted]
+
+jobs:
+ claude:
+ if: |
+ (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
+ (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
+ (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
+ (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ pull-requests: read
+ issues: read
+ id-token: write
+ actions: read # Required for Claude to read CI results on PRs
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Run Claude Code
+ id: claude
+ uses: anthropics/claude-code-action@v1
+ with:
+ claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+
+ # This is an optional setting that allows Claude to read CI results on PRs
+ additional_permissions: |
+ actions: read
+
+ # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
+ # prompt: 'Update the pull request description to include a summary of changes.'
+
+ # Optional: Add claude_args to customize behavior and configuration
+ # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+ # or https://code.claude.com/docs/en/cli-reference for available options
+ # claude_args: '--allowed-tools Bash(gh pr:*)'
+
diff --git a/.prettierrc b/.prettierrc
new file mode 100644
index 000000000..80668882f
--- /dev/null
+++ b/.prettierrc
@@ -0,0 +1,11 @@
+{
+ "printWidth": 88,
+ "tabWidth": 4,
+ "useTabs": false,
+ "semi": true,
+ "singleQuote": true,
+ "trailingComma": "es5",
+ "bracketSpacing": true,
+ "arrowParens": "always",
+ "endOfLine": "lf"
+}
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 000000000..35d101ea9
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,97 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Development Commands
+
+```bash
+# Run the application
+./run.sh
+# or manually:
+cd backend && uv run uvicorn app:app --reload --port 8000
+
+# Install dependencies
+uv sync
+
+# Install dev dependencies (required for linting/formatting)
+uv sync --group dev
+
+
+# Add a new dependency
+uv add package_name
+
+# Run all tests
+uv run pytest
+
+# Run a single test file
+uv run pytest backend/tests/test_rag_system.py
+
+# Run a single test
+uv run pytest backend/tests/test_rag_system.py::TestClassName::test_method_name
+
+# Format code (modifies files: isort → black → flake8 → mypy)
+./scripts/format.sh
+
+# Lint only, no modifications
+./scripts/lint.sh
+```
+
+- Web UI: http://localhost:8000
+- API docs: http://localhost:8000/docs
+- Requires `ANTHROPIC_API_KEY` in a `.env` file at the project root
+
+## Architecture
+
+This is a RAG (Retrieval-Augmented Generation) chatbot for course materials. FastAPI serves both the API and the vanilla JS frontend as static files.
+
+### API endpoints
+
+- `POST /api/query` — main query endpoint, returns `{ answer, sources, source_links, session_id }`
+- `GET /api/courses` — returns course catalog stats `{ total_courses, course_titles }`
+- `POST /api/clear-session` — clears a session by `{ session_id }`
+
+### Query flow
+
+1. Frontend (`frontend/script.js`) sends `POST /api/query` with `{ query, session_id }`
+2. `app.py` creates a session if none exists, delegates to `RAGSystem.query()`
+3. `RAGSystem` fetches conversation history from `SessionManager`, then calls `AIGenerator.generate_response()`
+4. `AIGenerator` runs a **tool-calling loop** (max 2 rounds) with the Claude API:
+ - Claude may call `search_course_content` (semantic chunk search) or `get_course_outline` (lesson list)
+ - Tool results are appended to the message list and sent back to Claude
+ - After max rounds, a final API call without tools forces a text response
+5. `ToolManager` collects `last_sources` and `last_source_links` from whichever tool ran last
+6. Response, sources, and lesson links are returned to the frontend
+7. Frontend renders the answer as Markdown (`marked.js`) with a collapsible sources block
+
+### Key design decisions
+
+- **Course name resolution**: Partial/fuzzy course names are resolved via a semantic search against the `course_catalog` ChromaDB collection before filtering `course_content`. This lets Claude pass "MCP" and still find "Introduction to MCP Servers".
+- **Dual ChromaDB collections**: `course_catalog` stores one document per course (title + metadata including `lessons_json`). `course_content` stores all text chunks with `course_title`/`lesson_number` metadata for filtered search.
+- **Session storage**: Sessions are in-memory only — they are lost on server restart. `SessionManager` keeps the last 2 exchange pairs (4 messages) per session. Conversation history is injected into the system prompt, not the message list.
+- **AI generation config**: `AIGenerator` uses `temperature=0` and `max_tokens=800`. Model is set in `config.py` (`ANTHROPIC_MODEL`). These are not exposed via env vars — change them in code.
+- **Deduplication on startup**: `add_course_folder()` checks existing titles in `course_catalog` and skips already-ingested courses.
+
+### Document format
+
+Course files (`.txt`, `.pdf`, `.docx`) in `docs/` must follow this structure for `.txt` — `.pdf`/`.docx` support is parsed but the required header fields are the same:
+
+```
+Course Title:
+Course Link:
+Course Instructor:
+
+Lesson 0:
+Lesson Link:
+
+
+Lesson 1:
+...
+```
+
+`DocumentProcessor` splits content into sentence-aware chunks (800 chars, 100 char overlap). The first chunk of each lesson is prefixed with `"Lesson N content: ..."` for retrieval context.
+
+### Adding a new search tool
+
+1. Create a class extending `Tool` (ABC in `search_tools.py`) implementing `get_tool_definition()` and `execute()`
+2. Register it: `self.tool_manager.register_tool(your_tool)` in `RAGSystem.__init__()`
+3. If it should surface sources in the UI, add `last_sources` and `last_source_links` instance attributes — `ToolManager.get_last_sources()` checks all registered tools for these
diff --git a/backend-tool-refactor.md b/backend-tool-refactor.md
new file mode 100644
index 000000000..de23ae5c7
--- /dev/null
+++ b/backend-tool-refactor.md
@@ -0,0 +1,28 @@
+Refactor @backend/ai_generator.py to support sequential tool calling where Claude can make up to 2 tool calls in separate API rounds.
+
+Current behavior:
+- Claude makes 1 tool call → tools are removed from API params → final response
+- If Claude wants another tool call after seeing results, it can't (gets empty response)
+
+Desired behavior:
+- Each tool call should be a separate API request where Claude can reason about previous results
+- Support for complex queries requiring multiple searches for comparisons, multi-part questions, or when information from different courses/lessons is needed
+
+Example flow:
+1. User: "Search for a course that discusses the same topic as lesson 4 of course X"
+2. Claude: get course outline for course X → gets title of lesson 4
+3. Claude: uses the title to search for a course that discusses the same topic → returns course information
+4. Claude: provides complete answer
+
+Requirements:
+- Maximum 2 sequential rounds per user query
+- Terminate when: (a) 2 rounds completed, (b) Claude's response has no tool_use blocks, or (c) tool call fails
+- Preserve conversation context between rounds
+- Handle tool execution errors gracefully
+
+Notes:
+- Update the system prompt in @backend/ai_generator.py
+- Update the test @backend/tests/test_ai_generator.py
+- Write tests that verify the external behavior (API calls made, tools executed, results returned) rather than internal state details.
+
+Use two parallel subagents to brainstorm possible plans. Do not implement any code.
diff --git a/backend/ai_generator.py b/backend/ai_generator.py
index 0363ca90c..b154b1001 100644
--- a/backend/ai_generator.py
+++ b/backend/ai_generator.py
@@ -1,135 +1,127 @@
-import anthropic
-from typing import List, Optional, Dict, Any
-
-class AIGenerator:
- """Handles interactions with Anthropic's Claude API for generating responses"""
-
- # Static system prompt to avoid rebuilding on each call
- SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information.
-
-Search Tool Usage:
-- Use the search tool **only** for questions about specific course content or detailed educational materials
-- **One search per query maximum**
-- Synthesize search results into accurate, fact-based responses
-- If search yields no results, state this clearly without offering alternatives
-
-Response Protocol:
-- **General knowledge questions**: Answer using existing knowledge without searching
-- **Course-specific questions**: Search first, then answer
-- **No meta-commentary**:
- - Provide direct answers only — no reasoning process, search explanations, or question-type analysis
- - Do not mention "based on the search results"
-
-
-All responses must be:
-1. **Brief, Concise and focused** - Get to the point quickly
-2. **Educational** - Maintain instructional value
-3. **Clear** - Use accessible language
-4. **Example-supported** - Include relevant examples when they aid understanding
-Provide only the direct answer to what was asked.
-"""
-
- def __init__(self, api_key: str, model: str):
- self.client = anthropic.Anthropic(api_key=api_key)
- self.model = model
-
- # Pre-build base API parameters
- self.base_params = {
- "model": self.model,
- "temperature": 0,
- "max_tokens": 800
- }
-
- def generate_response(self, query: str,
- conversation_history: Optional[str] = None,
- tools: Optional[List] = None,
- tool_manager=None) -> str:
- """
- Generate AI response with optional tool usage and conversation context.
-
- Args:
- query: The user's question or request
- conversation_history: Previous messages for context
- tools: Available tools the AI can use
- tool_manager: Manager to execute tools
-
- Returns:
- Generated response as string
- """
-
- # Build system content efficiently - avoid string ops when possible
- system_content = (
- f"{self.SYSTEM_PROMPT}\n\nPrevious conversation:\n{conversation_history}"
- if conversation_history
- else self.SYSTEM_PROMPT
- )
-
- # Prepare API call parameters efficiently
- api_params = {
- **self.base_params,
- "messages": [{"role": "user", "content": query}],
- "system": system_content
- }
-
- # Add tools if available
- if tools:
- api_params["tools"] = tools
- api_params["tool_choice"] = {"type": "auto"}
-
- # Get response from Claude
- response = self.client.messages.create(**api_params)
-
- # Handle tool execution if needed
- if response.stop_reason == "tool_use" and tool_manager:
- return self._handle_tool_execution(response, api_params, tool_manager)
-
- # Return direct response
- return response.content[0].text
-
- def _handle_tool_execution(self, initial_response, base_params: Dict[str, Any], tool_manager):
- """
- Handle execution of tool calls and get follow-up response.
-
- Args:
- initial_response: The response containing tool use requests
- base_params: Base API parameters
- tool_manager: Manager to execute tools
-
- Returns:
- Final response text after tool execution
- """
- # Start with existing messages
- messages = base_params["messages"].copy()
-
- # Add AI's tool use response
- messages.append({"role": "assistant", "content": initial_response.content})
-
- # Execute all tool calls and collect results
- tool_results = []
- for content_block in initial_response.content:
- if content_block.type == "tool_use":
- tool_result = tool_manager.execute_tool(
- content_block.name,
- **content_block.input
- )
-
- tool_results.append({
- "type": "tool_result",
- "tool_use_id": content_block.id,
- "content": tool_result
- })
-
- # Add tool results as single message
- if tool_results:
- messages.append({"role": "user", "content": tool_results})
-
- # Prepare final API call without tools
- final_params = {
- **self.base_params,
- "messages": messages,
- "system": base_params["system"]
- }
-
- # Get final response
- final_response = self.client.messages.create(**final_params)
- return final_response.content[0].text
\ No newline at end of file
+import anthropic
+from typing import List, Optional
+
+class AIGenerator:
+ """Handles interactions with Anthropic's Claude API for generating responses"""
+
+ # Static system prompt to avoid rebuilding on each call
+ SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information.
+
+Search Tool Usage:
+- Use `search_course_content` for questions about specific course content or detailed educational materials
+- Use `get_course_outline` for questions about course structure, lesson list, or what topics a course covers — it returns the course title, course link, and all lessons with their numbers and titles
+- **Up to 2 sequential tool calls per query**
+- Synthesize results into accurate, fact-based responses
+- If a tool yields no results, state this clearly without offering alternatives
+
+Response Protocol:
+- **General knowledge questions**: Answer using existing knowledge without searching
+- **Course-specific questions**: Search first, then answer
+- **No meta-commentary**:
+ - Provide direct answers only — no reasoning process, search explanations, or question-type analysis
+ - Do not mention "based on the search results"
+
+
+All responses must be:
+1. **Brief, Concise and focused** - Get to the point quickly
+2. **Educational** - Maintain instructional value
+3. **Clear** - Use accessible language
+4. **Example-supported** - Include relevant examples when they aid understanding
+Provide only the direct answer to what was asked.
+"""
+
+ MAX_ROUNDS = 2
+
+ def __init__(self, api_key: str, model: str):
+ self.client = anthropic.Anthropic(api_key=api_key)
+ self.model = model
+
+ # Pre-build base API parameters
+ self.base_params = {
+ "model": self.model,
+ "temperature": 0,
+ "max_tokens": 800
+ }
+
+ def generate_response(self, query: str,
+ conversation_history: Optional[str] = None,
+ tools: Optional[List] = None,
+ tool_manager=None) -> str:
+ """
+ Generate AI response with optional tool usage and conversation context.
+ Supports up to MAX_ROUNDS sequential tool-call rounds before forcing a
+ final text response.
+
+ Args:
+ query: The user's question or request
+ conversation_history: Previous messages for context
+ tools: Available tools the AI can use
+ tool_manager: Manager to execute tools
+
+ Returns:
+ Generated response as string
+ """
+
+ # Build system content efficiently - avoid string ops when possible
+ system_content = (
+ f"{self.SYSTEM_PROMPT}\n\nPrevious conversation:\n{conversation_history}"
+ if conversation_history
+ else self.SYSTEM_PROMPT
+ )
+
+ messages = [{"role": "user", "content": query}]
+
+ # Prepare initial API call parameters
+ api_params = {
+ **self.base_params,
+ "messages": messages,
+ "system": system_content
+ }
+
+ if tools:
+ api_params["tools"] = tools
+ api_params["tool_choice"] = {"type": "auto"}
+
+ # Tool-calling loop: up to MAX_ROUNDS sequential rounds
+ round_count = 0
+ while round_count < self.MAX_ROUNDS:
+ response = self.client.messages.create(**api_params)
+
+ # Early exit: no tool use requested or no manager to execute them
+ if response.stop_reason != "tool_use" or not tool_manager:
+ return response.content[0].text
+
+ # Append assistant turn (contains tool-use blocks)
+ messages.append({"role": "assistant", "content": response.content})
+
+ # Execute all tool calls in this round
+ tool_results = []
+ for block in response.content:
+ if block.type == "tool_use":
+ result = tool_manager.execute_tool(block.name, **block.input)
+ tool_results.append({
+ "type": "tool_result",
+ "tool_use_id": block.id,
+ "content": result
+ })
+
+ messages.append({"role": "user", "content": tool_results})
+
+ # Rebuild api_params with updated messages; keep tools for next round
+ api_params = {
+ **self.base_params,
+ "messages": messages,
+ "system": system_content,
+ "tools": tools,
+ "tool_choice": {"type": "auto"}
+ }
+ round_count += 1
+
+ # Max rounds reached — force a text response by calling without tools
+ final_params = {
+ **self.base_params,
+ "messages": messages,
+ "system": system_content
+ }
+ final_response = self.client.messages.create(**final_params)
+ return final_response.content[0].text
diff --git a/backend/app.py b/backend/app.py
index 5a69d741d..601dd93b5 100644
--- a/backend/app.py
+++ b/backend/app.py
@@ -51,6 +51,10 @@ class CourseStats(BaseModel):
total_courses: int
course_titles: List[str]
+class ClearSessionRequest(BaseModel):
+ """Request model for clearing a session"""
+ session_id: str
+
# API Endpoints
@app.post("/api/query", response_model=QueryResponse)
@@ -73,6 +77,12 @@ async def query_documents(request: QueryRequest):
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
+@app.post("/api/clear-session")
+async def clear_session(request: ClearSessionRequest):
+ """Clear a conversation session"""
+ rag_system.session_manager.clear_session(request.session_id)
+ return {"status": "ok"}
+
@app.get("/api/courses", response_model=CourseStats)
async def get_course_stats():
"""Get course analytics and statistics"""
diff --git a/backend/config.py b/backend/config.py
index d9f6392ef..fb9b6ee69 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -18,7 +18,7 @@ class Config:
# Document processing settings
CHUNK_SIZE: int = 800 # Size of text chunks for vector storage
CHUNK_OVERLAP: int = 100 # Characters to overlap between chunks
- MAX_RESULTS: int = 5 # Maximum search results to return
+ MAX_RESULTS: int = 5 # Maximum search results to return
MAX_HISTORY: int = 2 # Number of conversation messages to remember
# Database paths
diff --git a/backend/rag_system.py b/backend/rag_system.py
index 50d848c8e..443649f0e 100644
--- a/backend/rag_system.py
+++ b/backend/rag_system.py
@@ -4,7 +4,7 @@
from vector_store import VectorStore
from ai_generator import AIGenerator
from session_manager import SessionManager
-from search_tools import ToolManager, CourseSearchTool
+from search_tools import ToolManager, CourseSearchTool, CourseOutlineTool
from models import Course, Lesson, CourseChunk
class RAGSystem:
@@ -23,6 +23,8 @@ def __init__(self, config):
self.tool_manager = ToolManager()
self.search_tool = CourseSearchTool(self.vector_store)
self.tool_manager.register_tool(self.search_tool)
+ self.outline_tool = CourseOutlineTool(self.vector_store)
+ self.tool_manager.register_tool(self.outline_tool)
def add_course_document(self, file_path: str) -> Tuple[Course, int]:
"""
diff --git a/backend/search_tools.py b/backend/search_tools.py
index adfe82352..73e44cfce 100644
--- a/backend/search_tools.py
+++ b/backend/search_tools.py
@@ -113,6 +113,49 @@ def _format_results(self, results: SearchResults) -> str:
return "\n\n".join(formatted)
+class CourseOutlineTool(Tool):
+ """Tool for retrieving a course's full lesson outline from course metadata"""
+
+ def __init__(self, vector_store: VectorStore):
+ self.store = vector_store
+ self.last_sources = []
+
+ def get_tool_definition(self) -> Dict[str, Any]:
+ return {
+ "name": "get_course_outline",
+ "description": "Get the complete outline of a course: its title, course link, and all lessons (number and title)",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "course_name": {
+ "type": "string",
+ "description": "Course title (partial matches work, e.g. 'MCP', 'Introduction')"
+ }
+ },
+ "required": ["course_name"]
+ }
+ }
+
+ def execute(self, course_name: str) -> str:
+ outline = self.store.get_course_outline(course_name)
+ if not outline:
+ return f"No course found matching '{course_name}'."
+
+ title = outline['title']
+ course_link = outline['course_link']
+ lessons = outline['lessons']
+
+ self.last_sources = [title]
+
+ lines = [f"Course: {title}", f"Link: {course_link}", "", "Lessons:"]
+ for lesson in lessons:
+ num = lesson.get('lesson_number', '')
+ lesson_title = lesson.get('lesson_title', '')
+ lines.append(f" Lesson {num}: {lesson_title}")
+
+ return "\n".join(lines)
+
+
class ToolManager:
"""Manages available tools for the AI"""
diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py
new file mode 100644
index 000000000..5605b419f
--- /dev/null
+++ b/backend/tests/conftest.py
@@ -0,0 +1,68 @@
+"""
+Shared pytest fixtures for the RAG chatbot test suite.
+
+The FastAPI app in app.py has two side effects on import that must be suppressed
+in tests:
+ 1. RAGSystem(config) — connects to ChromaDB and loads embeddings
+ 2. app.mount("/", StaticFiles(directory="../frontend")) — the frontend directory
+ does not exist in the test environment
+
+Both are patched before the module is imported so the app loads cleanly.
+"""
+import sys
+import os
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import pytest
+from unittest.mock import MagicMock, patch
+
+
+def _import_app_with_patches():
+ """
+ Import app.py while RAGSystem and StaticFiles are mocked out.
+
+ The patches must be active during the import because app.py calls
+ RAGSystem(config) and StaticFiles(...) at module level. Once the module
+ is loaded those names are bound locally, so stopping the patches
+ afterwards is safe.
+ """
+ mock_rag = MagicMock()
+ with patch("rag_system.RAGSystem", return_value=mock_rag), \
+ patch("fastapi.staticfiles.StaticFiles"):
+ import app as app_module
+ # Ensure the module-level rag_system variable points to our mock
+ app_module.rag_system = mock_rag
+ return app_module, mock_rag
+
+
+_app_module, _mock_rag_instance = _import_app_with_patches()
+
+
+@pytest.fixture
+def mock_rag():
+ """
+ The mock RAGSystem instance wired into the FastAPI app.
+
+ Call tracking and side effects are cleared between tests so that
+ assertions in one test cannot bleed into the next.
+ """
+ _mock_rag_instance.reset_mock(side_effect=True)
+ return _mock_rag_instance
+
+
+@pytest.fixture
+def client():
+ """FastAPI TestClient backed by the patched app."""
+ from fastapi.testclient import TestClient
+ return TestClient(_app_module.app)
+
+
+@pytest.fixture
+def sample_query_payload():
+ return {"query": "What is machine learning?", "session_id": "test-session-123"}
+
+
+@pytest.fixture
+def sample_rag_response():
+ return ("Machine learning is a subset of AI.", ["Course A - Lesson 1"])
diff --git a/backend/tests/test_ai_generator.py b/backend/tests/test_ai_generator.py
new file mode 100644
index 000000000..823240184
--- /dev/null
+++ b/backend/tests/test_ai_generator.py
@@ -0,0 +1,511 @@
+"""
+Tests for AIGenerator in ai_generator.py.
+
+All tests mock the Anthropic client to avoid real API calls.
+They verify that AIGenerator correctly:
+ - Calls the search_course_content tool for content queries
+ - Processes tool_use responses by executing tools and feeding results back
+ - Returns plain text when no tool use is needed
+"""
+import sys
+import os
+import pytest
+from unittest.mock import MagicMock, patch, call
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from ai_generator import AIGenerator
+
+
+# ---------------------------------------------------------------------------
+# Helpers to build mock Anthropic response objects
+# ---------------------------------------------------------------------------
+
+def make_text_content(text):
+ block = MagicMock()
+ block.type = "text"
+ block.text = text
+ return block
+
+
+def make_tool_use_content(tool_name, tool_input, tool_id="tool_call_1"):
+ block = MagicMock()
+ block.type = "tool_use"
+ block.name = tool_name
+ block.input = tool_input
+ block.id = tool_id
+ return block
+
+
+def make_response(stop_reason, content_blocks):
+ response = MagicMock()
+ response.stop_reason = stop_reason
+ response.content = content_blocks
+ return response
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestAIGeneratorDirectResponse:
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_generate_response_returns_text_for_general_query(self, mock_anthropic_cls):
+ """When Claude responds with end_turn, the text is returned directly."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+ mock_client.messages.create.return_value = make_response(
+ stop_reason="end_turn",
+ content_blocks=[make_text_content("Paris is the capital of France.")],
+ )
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ result = gen.generate_response(query="What is the capital of France?")
+
+ assert result == "Paris is the capital of France."
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_generate_response_includes_tools_in_api_call(self, mock_anthropic_cls):
+ """When tools are provided, they are included in the API call parameters."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+ mock_client.messages.create.return_value = make_response(
+ stop_reason="end_turn",
+ content_blocks=[make_text_content("Some answer.")],
+ )
+
+ tool_def = {"name": "search_course_content", "description": "...", "input_schema": {}}
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ gen.generate_response(query="What is in lesson 1?", tools=[tool_def])
+
+ call_kwargs = mock_client.messages.create.call_args[1]
+ assert "tools" in call_kwargs
+ assert call_kwargs["tools"] == [tool_def]
+ assert call_kwargs["tool_choice"] == {"type": "auto"}
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_generate_response_no_tools_if_not_provided(self, mock_anthropic_cls):
+ """When no tools are passed, the API call has no tools key."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+ mock_client.messages.create.return_value = make_response(
+ stop_reason="end_turn",
+ content_blocks=[make_text_content("Answer.")],
+ )
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ gen.generate_response(query="Hello")
+
+ call_kwargs = mock_client.messages.create.call_args[1]
+ assert "tools" not in call_kwargs
+
+
+class TestAIGeneratorToolExecution:
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_tool_is_executed_on_tool_use_stop_reason(self, mock_anthropic_cls):
+ """When stop_reason is tool_use, the tool is executed and the final text is returned."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+
+ tool_block = make_tool_use_content(
+ "search_course_content", {"query": "transformers"}, "id_1"
+ )
+ first_response = make_response(stop_reason="tool_use", content_blocks=[tool_block])
+ final_response = make_response(
+ stop_reason="end_turn",
+ content_blocks=[make_text_content("Transformers use attention.")],
+ )
+ mock_client.messages.create.side_effect = [first_response, final_response]
+
+ tool_manager = MagicMock()
+ tool_manager.execute_tool.return_value = "Transformers are attention models."
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ result = gen.generate_response(
+ query="What are transformers?",
+ tools=[{"name": "search_course_content"}],
+ tool_manager=tool_manager,
+ )
+
+ assert result == "Transformers use attention."
+ assert mock_client.messages.create.call_count == 2
+ tool_manager.execute_tool.assert_called_once_with(
+ "search_course_content", query="transformers"
+ )
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_tool_result_sent_back_as_user_message(self, mock_anthropic_cls):
+ """Tool result is appended to the conversation as a user message with tool_result type."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+
+ tool_block = make_tool_use_content(
+ "search_course_content", {"query": "MCP"}, "call_abc"
+ )
+ first_response = make_response(stop_reason="tool_use", content_blocks=[tool_block])
+ final_response = make_response(
+ stop_reason="end_turn",
+ content_blocks=[make_text_content("MCP stands for Model Context Protocol.")],
+ )
+ mock_client.messages.create.side_effect = [first_response, final_response]
+
+ tool_manager = MagicMock()
+ tool_manager.execute_tool.return_value = "MCP lesson content here."
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ gen.generate_response(
+ query="What is MCP?",
+ tools=[{"name": "search_course_content"}],
+ tool_manager=tool_manager,
+ )
+
+ # The second API call should include a user message with tool_result
+ second_call_kwargs = mock_client.messages.create.call_args_list[1][1]
+ messages = second_call_kwargs["messages"]
+
+ # Find the user message containing tool results
+ tool_result_message = next(
+ (m for m in messages if m["role"] == "user" and isinstance(m["content"], list)),
+ None,
+ )
+ assert tool_result_message is not None
+ tool_result_block = tool_result_message["content"][0]
+ assert tool_result_block["type"] == "tool_result"
+ assert tool_result_block["tool_use_id"] == "call_abc"
+ assert tool_result_block["content"] == "MCP lesson content here."
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_final_api_call_has_no_tools(self, mock_anthropic_cls):
+ """After MAX_ROUNDS of tool use, the post-loop final API call has no tools."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+
+ tool_block1 = make_tool_use_content("search_course_content", {"query": "AI"}, "id_1")
+ tool_block2 = make_tool_use_content("get_course_outline", {"course_name": "AI"}, "id_2")
+ round1_response = make_response(stop_reason="tool_use", content_blocks=[tool_block1])
+ round2_response = make_response(stop_reason="tool_use", content_blocks=[tool_block2])
+ final_response = make_response(
+ stop_reason="end_turn",
+ content_blocks=[make_text_content("AI answer.")],
+ )
+ mock_client.messages.create.side_effect = [round1_response, round2_response, final_response]
+
+ tool_manager = MagicMock()
+ tool_manager.execute_tool.return_value = "Some search result."
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ gen.generate_response(
+ query="Tell me about AI",
+ tools=[{"name": "search_course_content"}],
+ tool_manager=tool_manager,
+ )
+
+ # Third call (post-loop) should NOT include tools
+ assert mock_client.messages.create.call_count == 3
+ third_call_kwargs = mock_client.messages.create.call_args_list[2][1]
+ assert "tools" not in third_call_kwargs
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_generate_response_with_conversation_history(self, mock_anthropic_cls):
+ """Conversation history is injected into the system prompt, not messages."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+ mock_client.messages.create.return_value = make_response(
+ stop_reason="end_turn",
+ content_blocks=[make_text_content("Follow-up answer.")],
+ )
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ gen.generate_response(
+ query="What about lesson 2?",
+ conversation_history="User: What is lesson 1?\nAssistant: Lesson 1 covers X.",
+ )
+
+ call_kwargs = mock_client.messages.create.call_args[1]
+ system_content = call_kwargs["system"]
+ assert "Previous conversation" in system_content
+ assert "What is lesson 1?" in system_content
+ # History should be in system, not in messages
+ assert len(call_kwargs["messages"]) == 1
+
+
+class TestAIGeneratorSearchToolCalling:
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_content_query_triggers_search_tool_call(self, mock_anthropic_cls):
+ """
+ Verifies the full loop: content query → Claude requests search_course_content
+ tool → tool is executed → final answer is returned.
+ """
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+
+ tool_block = make_tool_use_content(
+ "search_course_content",
+ {"query": "what does lesson 1 cover", "course_name": "AI Course"},
+ "search_id_1",
+ )
+ first_response = make_response(stop_reason="tool_use", content_blocks=[tool_block])
+ final_response = make_response(
+ stop_reason="end_turn",
+ content_blocks=[make_text_content("Lesson 1 covers neural networks.")],
+ )
+ mock_client.messages.create.side_effect = [first_response, final_response]
+
+ tool_manager = MagicMock()
+ tool_manager.execute_tool.return_value = (
+ "[AI Course - Lesson 1]\nThis lesson introduces neural networks."
+ )
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ search_tool_def = {
+ "name": "search_course_content",
+ "description": "Search course materials",
+ "input_schema": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]},
+ }
+ result = gen.generate_response(
+ query="Answer this question about course materials: What does lesson 1 cover?",
+ tools=[search_tool_def],
+ tool_manager=tool_manager,
+ )
+
+ assert result == "Lesson 1 covers neural networks."
+ tool_manager.execute_tool.assert_called_once_with(
+ "search_course_content",
+ query="what does lesson 1 cover",
+ course_name="AI Course",
+ )
+
+
+class TestAIGeneratorTwoRoundToolCalling:
+ """Tests for sequential 2-round tool-calling behavior."""
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_two_sequential_tool_calls_makes_three_api_calls(self, mock_anthropic_cls):
+ """Two rounds of tool use followed by end_turn results in exactly 3 API calls."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+
+ tb1 = make_tool_use_content("get_course_outline", {"course_name": "X"}, "id_1")
+ tb2 = make_tool_use_content("search_course_content", {"query": "topic"}, "id_2")
+ mock_client.messages.create.side_effect = [
+ make_response("tool_use", [tb1]),
+ make_response("tool_use", [tb2]),
+ make_response("end_turn", [make_text_content("Final answer.")]),
+ ]
+
+ tool_manager = MagicMock()
+ tool_manager.execute_tool.return_value = "some result"
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ result = gen.generate_response(
+ query="Search for a course on the same topic as lesson 4 of course X",
+ tools=[{"name": "get_course_outline"}, {"name": "search_course_content"}],
+ tool_manager=tool_manager,
+ )
+
+ assert mock_client.messages.create.call_count == 3
+ assert result == "Final answer."
+ assert tool_manager.execute_tool.call_count == 2
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_tools_included_in_second_round_api_call(self, mock_anthropic_cls):
+ """The second API call (round 2) still includes tools so Claude can use them again."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+
+ tb1 = make_tool_use_content("get_course_outline", {"course_name": "X"}, "id_1")
+ tb2 = make_tool_use_content("search_course_content", {"query": "topic"}, "id_2")
+ mock_client.messages.create.side_effect = [
+ make_response("tool_use", [tb1]),
+ make_response("tool_use", [tb2]),
+ make_response("end_turn", [make_text_content("Done.")]),
+ ]
+
+ tool_manager = MagicMock()
+ tool_manager.execute_tool.return_value = "result"
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ gen.generate_response(
+ query="Multi-step query",
+ tools=[{"name": "search_course_content"}],
+ tool_manager=tool_manager,
+ )
+
+ second_call_kwargs = mock_client.messages.create.call_args_list[1][1]
+ assert "tools" in second_call_kwargs
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_final_call_after_max_rounds_has_no_tools(self, mock_anthropic_cls):
+ """The post-loop final call (3rd) after hitting MAX_ROUNDS has no tools."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+
+ tb1 = make_tool_use_content("search_course_content", {"query": "a"}, "id_1")
+ tb2 = make_tool_use_content("search_course_content", {"query": "b"}, "id_2")
+ mock_client.messages.create.side_effect = [
+ make_response("tool_use", [tb1]),
+ make_response("tool_use", [tb2]),
+ make_response("end_turn", [make_text_content("Answer.")]),
+ ]
+
+ tool_manager = MagicMock()
+ tool_manager.execute_tool.return_value = "result"
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ gen.generate_response(
+ query="Query",
+ tools=[{"name": "search_course_content"}],
+ tool_manager=tool_manager,
+ )
+
+ third_call_kwargs = mock_client.messages.create.call_args_list[2][1]
+ assert "tools" not in third_call_kwargs
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_message_list_grows_with_each_round(self, mock_anthropic_cls):
+ """After 2 rounds, the final API call receives 5 messages in the correct order."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+
+ tb1 = make_tool_use_content("get_course_outline", {"course_name": "X"}, "id_1")
+ tb2 = make_tool_use_content("search_course_content", {"query": "topic"}, "id_2")
+ r1 = make_response("tool_use", [tb1])
+ r2 = make_response("tool_use", [tb2])
+ mock_client.messages.create.side_effect = [
+ r1, r2,
+ make_response("end_turn", [make_text_content("Done.")]),
+ ]
+
+ tool_manager = MagicMock()
+ tool_manager.execute_tool.return_value = "result"
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ gen.generate_response(
+ query="Complex query",
+ tools=[{"name": "search_course_content"}],
+ tool_manager=tool_manager,
+ )
+
+ final_call_messages = mock_client.messages.create.call_args_list[2][1]["messages"]
+ assert len(final_call_messages) == 5
+ assert final_call_messages[0]["role"] == "user" # original query
+ assert final_call_messages[1]["role"] == "assistant" # round 1 tool-use
+ assert final_call_messages[2]["role"] == "user" # round 1 tool-results
+ assert final_call_messages[3]["role"] == "assistant" # round 2 tool-use
+ assert final_call_messages[4]["role"] == "user" # round 2 tool-results
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_tool_error_does_not_terminate_loop(self, mock_anthropic_cls):
+ """A tool returning an error string is passed as context; the loop continues."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+
+ tb = make_tool_use_content("search_course_content", {"query": "x"}, "id_err")
+ mock_client.messages.create.side_effect = [
+ make_response("tool_use", [tb]),
+ make_response("end_turn", [make_text_content("Sorry, search failed.")]),
+ ]
+
+ tool_manager = MagicMock()
+ tool_manager.execute_tool.return_value = "Search error: n_results must be positive"
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ result = gen.generate_response(
+ query="Find something",
+ tools=[{"name": "search_course_content"}],
+ tool_manager=tool_manager,
+ )
+
+ assert mock_client.messages.create.call_count == 2
+ assert result == "Sorry, search failed."
+
+ # Error string passed as tool_result content in second call
+ second_call_messages = mock_client.messages.create.call_args_list[1][1]["messages"]
+ tool_result_msg = next(
+ m for m in second_call_messages
+ if m["role"] == "user" and isinstance(m["content"], list)
+ )
+ assert tool_result_msg["content"][0]["content"] == "Search error: n_results must be positive"
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_early_exit_when_no_tool_use_in_first_response(self, mock_anthropic_cls):
+ """When the first response has end_turn, only 1 API call is made."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+ mock_client.messages.create.return_value = make_response(
+ "end_turn", [make_text_content("Direct answer.")]
+ )
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ result = gen.generate_response(
+ query="What is 2+2?",
+ tools=[{"name": "search_course_content"}],
+ tool_manager=MagicMock(),
+ )
+
+ assert mock_client.messages.create.call_count == 1
+ assert result == "Direct answer."
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_early_exit_after_first_round_no_second_tool_use(self, mock_anthropic_cls):
+ """When round 2 returns end_turn, exactly 2 API calls are made (no third call)."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+
+ tb = make_tool_use_content("search_course_content", {"query": "topic"}, "id_1")
+ mock_client.messages.create.side_effect = [
+ make_response("tool_use", [tb]),
+ make_response("end_turn", [make_text_content("Answer after one search.")]),
+ ]
+
+ tool_manager = MagicMock()
+ tool_manager.execute_tool.return_value = "search result"
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ result = gen.generate_response(
+ query="Find info on topic",
+ tools=[{"name": "search_course_content"}],
+ tool_manager=tool_manager,
+ )
+
+ assert mock_client.messages.create.call_count == 2
+ assert result == "Answer after one search."
+
+ @patch("ai_generator.anthropic.Anthropic")
+ def test_two_tool_results_in_single_round_appended_correctly(self, mock_anthropic_cls):
+ """When a single round contains 2 tool-use blocks, both are executed and appended."""
+ mock_client = MagicMock()
+ mock_anthropic_cls.return_value = mock_client
+
+ tb1 = make_tool_use_content("search_course_content", {"query": "a"}, "id_a")
+ tb2 = make_tool_use_content("get_course_outline", {"course_name": "X"}, "id_b")
+ mock_client.messages.create.side_effect = [
+ make_response("tool_use", [tb1, tb2]),
+ make_response("end_turn", [make_text_content("Both results used.")]),
+ ]
+
+ tool_manager = MagicMock()
+ tool_manager.execute_tool.return_value = "result"
+
+ gen = AIGenerator(api_key="test-key", model="claude-test")
+ result = gen.generate_response(
+ query="Compare two things",
+ tools=[{"name": "search_course_content"}],
+ tool_manager=tool_manager,
+ )
+
+ assert tool_manager.execute_tool.call_count == 2
+ assert result == "Both results used."
+
+ # Second API call should have a user message with 2 tool_result dicts
+ second_call_messages = mock_client.messages.create.call_args_list[1][1]["messages"]
+ tool_result_msg = next(
+ m for m in second_call_messages
+ if m["role"] == "user" and isinstance(m["content"], list)
+ )
+ assert len(tool_result_msg["content"]) == 2
+ assert tool_result_msg["content"][0]["tool_use_id"] == "id_a"
+ assert tool_result_msg["content"][1]["tool_use_id"] == "id_b"
diff --git a/backend/tests/test_api_endpoints.py b/backend/tests/test_api_endpoints.py
new file mode 100644
index 000000000..cb222225d
--- /dev/null
+++ b/backend/tests/test_api_endpoints.py
@@ -0,0 +1,171 @@
+"""
+Tests for the FastAPI endpoints in app.py.
+
+Fixtures (client, mock_rag) are provided by conftest.py.
+Each test configures mock_rag return values/side effects for the scenario
+under test, then inspects the HTTP response.
+
+Endpoints covered:
+ POST /api/query
+ GET /api/courses
+ POST /api/clear-session
+"""
+
+
+class TestQueryEndpoint:
+
+ def test_returns_200_for_valid_request(self, client, mock_rag):
+ mock_rag.query.return_value = ("ML is a subset of AI.", ["Course A - Lesson 1"])
+ mock_rag.session_manager.create_session.return_value = "new-session"
+
+ response = client.post("/api/query", json={"query": "What is ML?"})
+
+ assert response.status_code == 200
+
+ def test_response_body_has_answer_sources_session_id(self, client, mock_rag):
+ mock_rag.query.return_value = ("ML is a subset of AI.", ["Course A - Lesson 1"])
+ mock_rag.session_manager.create_session.return_value = "new-session"
+
+ data = client.post("/api/query", json={"query": "What is ML?"}).json()
+
+ assert data["answer"] == "ML is a subset of AI."
+ assert data["sources"] == ["Course A - Lesson 1"]
+ assert data["session_id"] == "new-session"
+
+ def test_creates_session_when_none_provided(self, client, mock_rag):
+ mock_rag.query.return_value = ("Answer.", [])
+ mock_rag.session_manager.create_session.return_value = "generated-id"
+
+ data = client.post("/api/query", json={"query": "Hello"}).json()
+
+ mock_rag.session_manager.create_session.assert_called_once()
+ assert data["session_id"] == "generated-id"
+
+ def test_uses_provided_session_id_without_creating_new_one(self, client, mock_rag):
+ mock_rag.query.return_value = ("Answer.", [])
+
+ data = client.post(
+ "/api/query", json={"query": "Hello", "session_id": "existing-sess"}
+ ).json()
+
+ mock_rag.session_manager.create_session.assert_not_called()
+ assert data["session_id"] == "existing-sess"
+
+ def test_passes_query_and_session_id_to_rag_system(self, client, mock_rag):
+ mock_rag.query.return_value = ("Answer.", [])
+
+ client.post("/api/query", json={"query": "Test query", "session_id": "sess-1"})
+
+ mock_rag.query.assert_called_once_with("Test query", "sess-1")
+
+ def test_returns_500_when_rag_raises(self, client, mock_rag):
+ mock_rag.session_manager.create_session.return_value = "sess"
+ mock_rag.query.side_effect = RuntimeError("DB unavailable")
+
+ response = client.post("/api/query", json={"query": "Fail"})
+
+ assert response.status_code == 500
+
+ def test_returns_422_when_query_field_missing(self, client, mock_rag):
+ response = client.post("/api/query", json={"session_id": "sess"})
+
+ assert response.status_code == 422
+
+ def test_empty_sources_list_is_valid(self, client, mock_rag):
+ mock_rag.query.return_value = ("General answer.", [])
+ mock_rag.session_manager.create_session.return_value = "s"
+
+ data = client.post("/api/query", json={"query": "Hello"}).json()
+
+ assert data["sources"] == []
+
+ def test_multiple_sources_are_returned(self, client, mock_rag):
+ mock_rag.query.return_value = (
+ "Answer citing two lessons.",
+ ["Course A - Lesson 1", "Course B - Lesson 3"],
+ )
+ mock_rag.session_manager.create_session.return_value = "s"
+
+ data = client.post("/api/query", json={"query": "Multi-source question"}).json()
+
+ assert len(data["sources"]) == 2
+ assert "Course A - Lesson 1" in data["sources"]
+ assert "Course B - Lesson 3" in data["sources"]
+
+
+class TestCoursesEndpoint:
+
+ def test_returns_200(self, client, mock_rag):
+ mock_rag.get_course_analytics.return_value = {
+ "total_courses": 1,
+ "course_titles": ["Intro to AI"],
+ }
+
+ response = client.get("/api/courses")
+
+ assert response.status_code == 200
+
+ def test_response_contains_total_courses_and_titles(self, client, mock_rag):
+ mock_rag.get_course_analytics.return_value = {
+ "total_courses": 2,
+ "course_titles": ["AI Basics", "ML Fundamentals"],
+ }
+
+ data = client.get("/api/courses").json()
+
+ assert data["total_courses"] == 2
+ assert data["course_titles"] == ["AI Basics", "ML Fundamentals"]
+
+ def test_empty_catalog_returns_zero_courses(self, client, mock_rag):
+ mock_rag.get_course_analytics.return_value = {
+ "total_courses": 0,
+ "course_titles": [],
+ }
+
+ data = client.get("/api/courses").json()
+
+ assert data["total_courses"] == 0
+ assert data["course_titles"] == []
+
+ def test_returns_500_when_analytics_raises(self, client, mock_rag):
+ mock_rag.get_course_analytics.side_effect = RuntimeError("ChromaDB error")
+
+ response = client.get("/api/courses")
+
+ assert response.status_code == 500
+
+ def test_title_count_matches_total_courses_field(self, client, mock_rag):
+ titles = ["Course A", "Course B", "Course C"]
+ mock_rag.get_course_analytics.return_value = {
+ "total_courses": len(titles),
+ "course_titles": titles,
+ }
+
+ data = client.get("/api/courses").json()
+
+ assert data["total_courses"] == len(data["course_titles"])
+
+
+class TestClearSessionEndpoint:
+
+ def test_returns_200_with_ok_status(self, client, mock_rag):
+ response = client.post("/api/clear-session", json={"session_id": "sess-abc"})
+
+ assert response.status_code == 200
+ assert response.json() == {"status": "ok"}
+
+ def test_delegates_to_session_manager(self, client, mock_rag):
+ client.post("/api/clear-session", json={"session_id": "sess-to-clear"})
+
+ mock_rag.session_manager.clear_session.assert_called_once_with("sess-to-clear")
+
+ def test_returns_422_when_session_id_missing(self, client, mock_rag):
+ response = client.post("/api/clear-session", json={})
+
+ assert response.status_code == 422
+
+ def test_different_session_ids_are_forwarded_correctly(self, client, mock_rag):
+ for session_id in ["alpha", "beta", "gamma"]:
+ mock_rag.session_manager.clear_session.reset_mock()
+ client.post("/api/clear-session", json={"session_id": session_id})
+ mock_rag.session_manager.clear_session.assert_called_once_with(session_id)
diff --git a/backend/tests/test_course_search_tool.py b/backend/tests/test_course_search_tool.py
new file mode 100644
index 000000000..3ec1991f5
--- /dev/null
+++ b/backend/tests/test_course_search_tool.py
@@ -0,0 +1,212 @@
+"""
+Tests for CourseSearchTool.execute() in search_tools.py.
+
+Unit tests use a mocked VectorStore to isolate CourseSearchTool logic.
+The integration test uses a real in-memory ChromaDB with max_results=0
+to expose the configuration bug.
+"""
+import sys
+import os
+import pytest
+from unittest.mock import MagicMock, patch
+
+# Add backend to path so imports work when running from project root
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from search_tools import CourseSearchTool
+from vector_store import SearchResults, VectorStore
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_mock_store(docs=None, metadata=None, error=None):
+ """Return a MagicMock VectorStore whose search() returns controlled data."""
+ store = MagicMock(spec=VectorStore)
+ if error:
+ store.search.return_value = SearchResults(
+ documents=[], metadata=[], distances=[], error=error
+ )
+ else:
+ store.search.return_value = SearchResults(
+ documents=docs or [],
+ metadata=metadata or [],
+ distances=[0.1] * len(docs or []),
+ )
+ return store
+
+
+# ---------------------------------------------------------------------------
+# Unit tests — all use mocked VectorStore
+# ---------------------------------------------------------------------------
+
+class TestCourseSearchToolExecuteUnit:
+
+ def test_execute_returns_formatted_results(self):
+ """execute() formats documents with course/lesson context headers."""
+ store = make_mock_store(
+ docs=["Transformers are attention-based models."],
+ metadata=[{"course_title": "AI Basics", "lesson_number": 2}],
+ )
+ tool = CourseSearchTool(store)
+ result = tool.execute(query="what are transformers")
+
+ assert "AI Basics" in result
+ assert "Lesson 2" in result
+ assert "Transformers are attention-based models." in result
+
+ def test_execute_returns_empty_message_when_no_results(self):
+ """execute() returns a human-readable 'not found' string when empty."""
+ store = make_mock_store(docs=[], metadata=[])
+ tool = CourseSearchTool(store)
+ result = tool.execute(query="nonexistent topic")
+
+ assert "No relevant content found" in result
+
+ def test_execute_returns_empty_message_includes_course_filter(self):
+ """Empty result message includes the course filter name."""
+ store = make_mock_store(docs=[], metadata=[])
+ tool = CourseSearchTool(store)
+ result = tool.execute(query="something", course_name="Python 101")
+
+ assert "No relevant content found" in result
+ assert "Python 101" in result
+
+ def test_execute_returns_error_string_on_search_error(self):
+ """When VectorStore returns an error, execute() returns that error string."""
+ store = make_mock_store(error="Search error: n_results must be a positive integer")
+ tool = CourseSearchTool(store)
+ result = tool.execute(query="anything")
+
+ assert "Search error" in result
+
+ def test_execute_populates_last_sources(self):
+ """After a successful search, last_sources contains course+lesson strings."""
+ store = make_mock_store(
+ docs=["Content A", "Content B"],
+ metadata=[
+ {"course_title": "Course X", "lesson_number": 1},
+ {"course_title": "Course X", "lesson_number": 3},
+ ],
+ )
+ tool = CourseSearchTool(store)
+ tool.execute(query="something")
+
+ assert len(tool.last_sources) == 2
+ assert "Course X - Lesson 1" in tool.last_sources
+ assert "Course X - Lesson 3" in tool.last_sources
+
+ def test_execute_last_sources_empty_on_error(self):
+ """last_sources stays empty when the search returns an error."""
+ store = make_mock_store(error="Search error: n_results must be a positive integer")
+ tool = CourseSearchTool(store)
+ tool.execute(query="something")
+
+ assert tool.last_sources == []
+
+ def test_execute_passes_course_name_to_store(self):
+ """course_name kwarg is forwarded to VectorStore.search()."""
+ store = make_mock_store(docs=[], metadata=[])
+ tool = CourseSearchTool(store)
+ tool.execute(query="topic", course_name="MCP Course")
+
+ store.search.assert_called_once_with(
+ query="topic", course_name="MCP Course", lesson_number=None
+ )
+
+ def test_execute_passes_lesson_number_to_store(self):
+ """lesson_number kwarg is forwarded to VectorStore.search()."""
+ store = make_mock_store(docs=[], metadata=[])
+ tool = CourseSearchTool(store)
+ tool.execute(query="topic", lesson_number=4)
+
+ store.search.assert_called_once_with(
+ query="topic", course_name=None, lesson_number=4
+ )
+
+
+# ---------------------------------------------------------------------------
+# Integration test — uses a real in-memory ChromaDB with max_results=0
+# This test is EXPECTED TO FAIL on the broken system, exposing the bug.
+# ---------------------------------------------------------------------------
+
+class TestCourseSearchToolIntegration:
+
+ @pytest.fixture()
+ def in_memory_store_broken(self, tmp_path):
+ """VectorStore backed by ChromaDB with max_results=0 (broken config)."""
+ store = VectorStore(
+ chroma_path=str(tmp_path / "chroma"),
+ embedding_model="all-MiniLM-L6-v2",
+ max_results=0, # Mirrors the broken config value
+ )
+ # Add one document so the collection is non-empty
+ from models import Course, Lesson, CourseChunk
+ course = Course(
+ title="Test Course",
+ course_link="http://example.com",
+ instructor="Test Instructor",
+ lessons=[Lesson(lesson_number=1, title="Intro", lesson_link="http://example.com/1")],
+ )
+ store.add_course_metadata(course)
+ store.add_course_content([
+ CourseChunk(
+ content="This lesson covers the basics of Python.",
+ course_title="Test Course",
+ lesson_number=1,
+ chunk_index=0,
+ )
+ ])
+ return store
+
+ @pytest.fixture()
+ def in_memory_store_fixed(self, tmp_path):
+ """VectorStore backed by ChromaDB with max_results=5 (correct config)."""
+ store = VectorStore(
+ chroma_path=str(tmp_path / "chroma_fixed"),
+ embedding_model="all-MiniLM-L6-v2",
+ max_results=5, # Correct value
+ )
+ from models import Course, Lesson, CourseChunk
+ course = Course(
+ title="Test Course",
+ course_link="http://example.com",
+ instructor="Test Instructor",
+ lessons=[Lesson(lesson_number=1, title="Intro", lesson_link="http://example.com/1")],
+ )
+ store.add_course_metadata(course)
+ store.add_course_content([
+ CourseChunk(
+ content="This lesson covers the basics of Python.",
+ course_title="Test Course",
+ lesson_number=1,
+ chunk_index=0,
+ )
+ ])
+ return store
+
+ def test_search_with_zero_max_results_returns_error(self, in_memory_store_broken):
+ """
+ Documents the bug: with max_results=0, ChromaDB raises ValueError and
+ VectorStore wraps it as a 'Search error' string instead of returning content.
+ This test PASSES because it asserts the broken behavior exists.
+ The FIX is in config.py: MAX_RESULTS=5 so the production store never uses 0.
+ """
+ tool = CourseSearchTool(in_memory_store_broken)
+ result = tool.execute(query="Python basics")
+
+ # max_results=0 causes ChromaDB to raise:
+ # "Number of requested results 0, cannot be negative, or zero."
+ assert "Search error" in result, (
+ "Expected a search error with max_results=0 — "
+ "the bug behavior is not reproducible."
+ )
+
+ def test_search_with_positive_max_results_returns_content(self, in_memory_store_fixed):
+ """With max_results=5, search returns actual course content."""
+ tool = CourseSearchTool(in_memory_store_fixed)
+ result = tool.execute(query="Python basics")
+
+ assert "Search error" not in result
+ assert "Test Course" in result
diff --git a/backend/tests/test_rag_system.py b/backend/tests/test_rag_system.py
new file mode 100644
index 000000000..eca920b15
--- /dev/null
+++ b/backend/tests/test_rag_system.py
@@ -0,0 +1,218 @@
+"""
+Tests for RAGSystem.query() in rag_system.py.
+
+These tests verify how the full RAG pipeline handles content-related questions.
+Two integration tests explicitly demonstrate the MAX_RESULTS=0 bug:
+ - test_search_tool_fails_with_zero_max_results → FAILS on broken system
+ - test_search_tool_succeeds_with_positive_max_results → FAILS on broken system, PASSES after fix
+"""
+import sys
+import os
+import pytest
+from unittest.mock import MagicMock, patch
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from rag_system import RAGSystem
+from search_tools import CourseSearchTool, ToolManager
+from vector_store import VectorStore
+from models import Course, Lesson, CourseChunk
+from config import config
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_rag_system_with_mock_ai(vector_store=None):
+ """
+ Build a RAGSystem where only AIGenerator is mocked out.
+ This lets us test the full tool-calling pipeline without real API calls.
+ """
+ with patch("rag_system.AIGenerator") as MockAI:
+ mock_ai_instance = MagicMock()
+ MockAI.return_value = mock_ai_instance
+ rag = RAGSystem(config)
+ if vector_store:
+ rag.vector_store = vector_store
+ rag.search_tool.store = vector_store
+ rag.outline_tool.store = vector_store
+ return rag, mock_ai_instance
+
+
+# ---------------------------------------------------------------------------
+# Unit tests — AIGenerator is mocked
+# ---------------------------------------------------------------------------
+
+class TestRAGSystemQuery:
+
+ def test_query_returns_tuple_of_answer_and_sources(self):
+ """query() always returns a (str, list) tuple."""
+ rag, mock_ai = make_rag_system_with_mock_ai()
+ mock_ai.generate_response.return_value = "Neural networks are layers of nodes."
+
+ answer, sources = rag.query("What are neural networks?")
+
+ assert isinstance(answer, str)
+ assert isinstance(sources, list)
+ assert answer == "Neural networks are layers of nodes."
+
+ def test_query_passes_tool_definitions_to_ai_generator(self):
+ """generate_response is called with tool definitions from ToolManager."""
+ rag, mock_ai = make_rag_system_with_mock_ai()
+ mock_ai.generate_response.return_value = "Some answer."
+
+ rag.query("What is in lesson 2?")
+
+ call_kwargs = mock_ai.generate_response.call_args[1]
+ assert "tools" in call_kwargs
+ tool_names = [t["name"] for t in call_kwargs["tools"]]
+ assert "search_course_content" in tool_names
+
+ def test_query_passes_tool_manager_to_ai_generator(self):
+ """generate_response receives the tool_manager so it can execute tools."""
+ rag, mock_ai = make_rag_system_with_mock_ai()
+ mock_ai.generate_response.return_value = "Answer."
+
+ rag.query("What is MCP?")
+
+ call_kwargs = mock_ai.generate_response.call_args[1]
+ assert "tool_manager" in call_kwargs
+ assert call_kwargs["tool_manager"] is rag.tool_manager
+
+ def test_query_wraps_user_question_in_prompt(self):
+ """RAGSystem prepends context to the user query before passing to AIGenerator."""
+ rag, mock_ai = make_rag_system_with_mock_ai()
+ mock_ai.generate_response.return_value = "Answer."
+
+ rag.query("What is reinforcement learning?")
+
+ call_kwargs = mock_ai.generate_response.call_args[1]
+ assert "What is reinforcement learning?" in call_kwargs["query"]
+
+ def test_query_saves_exchange_to_session(self):
+ """After a query, the exchange is stored in SessionManager."""
+ rag, mock_ai = make_rag_system_with_mock_ai()
+ mock_ai.generate_response.return_value = "RL answer."
+
+ session_id = rag.session_manager.create_session()
+ rag.query("What is RL?", session_id=session_id)
+
+ history = rag.session_manager.get_conversation_history(session_id)
+ assert history is not None
+ assert "What is RL?" in history
+ assert "RL answer." in history
+
+ def test_query_resets_sources_after_retrieval(self):
+ """Sources are cleared from the tool manager after each query."""
+ rag, mock_ai = make_rag_system_with_mock_ai()
+ mock_ai.generate_response.return_value = "Answer."
+
+ # Manually pre-populate sources to simulate a previous search
+ rag.search_tool.last_sources = ["Some Course - Lesson 1"]
+
+ rag.query("New question?")
+
+ # After query(), sources should be cleared
+ assert rag.tool_manager.get_last_sources() == []
+
+
+# ---------------------------------------------------------------------------
+# Integration tests — expose the MAX_RESULTS=0 bug using real ChromaDB
+# ---------------------------------------------------------------------------
+
+class TestRAGSystemSearchIntegration:
+ """
+ These tests build a real in-memory ChromaDB with sample data.
+ They demonstrate that MAX_RESULTS=0 causes search to fail.
+ """
+
+ @pytest.fixture()
+ def populated_store_broken(self, tmp_path):
+ """VectorStore with max_results=0 (mirrors broken config)."""
+ store = VectorStore(
+ chroma_path=str(tmp_path / "broken"),
+ embedding_model="all-MiniLM-L6-v2",
+ max_results=0,
+ )
+ course = Course(
+ title="Intro to ML",
+ course_link="http://example.com/ml",
+ instructor="Test Instructor",
+ lessons=[Lesson(lesson_number=1, title="Supervised Learning", lesson_link="http://example.com/ml/1")],
+ )
+ store.add_course_metadata(course)
+ store.add_course_content([
+ CourseChunk(
+ content="Supervised learning uses labeled training data to learn a mapping from inputs to outputs.",
+ course_title="Intro to ML",
+ lesson_number=1,
+ chunk_index=0,
+ )
+ ])
+ return store
+
+ @pytest.fixture()
+ def populated_store_fixed(self, tmp_path):
+ """VectorStore with max_results=5 (correct config)."""
+ store = VectorStore(
+ chroma_path=str(tmp_path / "fixed"),
+ embedding_model="all-MiniLM-L6-v2",
+ max_results=5,
+ )
+ course = Course(
+ title="Intro to ML",
+ course_link="http://example.com/ml",
+ instructor="Test Instructor",
+ lessons=[Lesson(lesson_number=1, title="Supervised Learning", lesson_link="http://example.com/ml/1")],
+ )
+ store.add_course_metadata(course)
+ store.add_course_content([
+ CourseChunk(
+ content="Supervised learning uses labeled training data to learn a mapping from inputs to outputs.",
+ course_title="Intro to ML",
+ lesson_number=1,
+ chunk_index=0,
+ )
+ ])
+ return store
+
+ def test_search_tool_fails_with_zero_max_results(self, populated_store_broken):
+ """
+ Documents the bug: with max_results=0, the search tool returns a 'Search error'
+ string instead of course content. ChromaDB raises:
+ 'Number of requested results 0, cannot be negative, or zero.'
+ This test PASSES by asserting that broken behavior occurs when max_results=0.
+ The fix is in config.py: change MAX_RESULTS from 0 to 5.
+ """
+ tool = CourseSearchTool(populated_store_broken)
+ result = tool.execute(query="supervised learning")
+
+ assert "Search error" in result, (
+ "Expected a search error with max_results=0 — "
+ "the bug behavior is not reproducible."
+ )
+
+ def test_search_tool_succeeds_with_positive_max_results(self, populated_store_fixed):
+ """
+ After the fix, search returns actual course content.
+ This test also FAILS on the broken system (because it uses max_results=5
+ directly), confirming the fix works when max_results is positive.
+ """
+ tool = CourseSearchTool(populated_store_fixed)
+ result = tool.execute(query="supervised learning")
+
+ assert "Search error" not in result
+ assert "Intro to ML" in result
+ assert "Supervised" in result or "labeled" in result
+
+ def test_config_max_results_is_positive(self):
+ """
+ Verifies that the config value MAX_RESULTS is a positive integer.
+ This test FAILS on the broken system where MAX_RESULTS=0.
+ """
+ assert config.MAX_RESULTS > 0, (
+ f"BUG: config.MAX_RESULTS is {config.MAX_RESULTS}. "
+ "It must be a positive integer (e.g. 5) for searches to work. "
+ "Fix: change MAX_RESULTS in backend/config.py"
+ )
diff --git a/backend/vector_store.py b/backend/vector_store.py
index 390abe71c..55a80ef10 100644
--- a/backend/vector_store.py
+++ b/backend/vector_store.py
@@ -147,15 +147,19 @@ def add_course_metadata(self, course: Course):
"lesson_link": lesson.lesson_link
})
+ raw_metadata = {
+ "title": course.title,
+ "instructor": course.instructor,
+ "course_link": course.course_link,
+ "lessons_json": json.dumps(lessons_metadata),
+ "lesson_count": len(course.lessons)
+ }
+ # ChromaDB rejects None metadata values; replace with empty string
+ metadata = {k: (v if v is not None else "") for k, v in raw_metadata.items()}
+
self.course_catalog.add(
documents=[course_text],
- metadatas=[{
- "title": course.title,
- "instructor": course.instructor,
- "course_link": course.course_link,
- "lessons_json": json.dumps(lessons_metadata), # Serialize as JSON string
- "lesson_count": len(course.lessons)
- }],
+ metadatas=[metadata],
ids=[course.title]
)
@@ -264,4 +268,25 @@ def get_lesson_link(self, course_title: str, lesson_number: int) -> Optional[str
return None
except Exception as e:
print(f"Error getting lesson link: {e}")
-
\ No newline at end of file
+
+ def get_course_outline(self, course_name: str) -> Optional[Dict]:
+ """Get course outline (title, link, lessons list) for a given course name"""
+ import json
+ try:
+ resolved_title = self._resolve_course_name(course_name)
+ if not resolved_title:
+ return None
+ results = self.course_catalog.get(ids=[resolved_title])
+ if results and 'metadatas' in results and results['metadatas']:
+ metadata = results['metadatas'][0]
+ lessons_json = metadata.get('lessons_json')
+ lessons = json.loads(lessons_json) if lessons_json else []
+ return {
+ "title": metadata.get('title', resolved_title),
+ "course_link": metadata.get('course_link', ''),
+ "lessons": lessons
+ }
+ return None
+ except Exception as e:
+ print(f"Error getting course outline: {e}")
+ return None
diff --git a/frontend-changes.md b/frontend-changes.md
new file mode 100644
index 000000000..e2c792027
--- /dev/null
+++ b/frontend-changes.md
@@ -0,0 +1,139 @@
+# Frontend Code Quality Changes
+
+## Summary
+
+Added code quality tooling for the frontend (vanilla JS/CSS/HTML) and applied formatting consistency fixes to the existing source files.
+
+---
+
+## New Files
+
+### `package.json`
+Defines the project's frontend dev dependencies and npm scripts:
+- `npm run format` — formats all frontend files with Prettier (modifies in place)
+- `npm run format:check` — checks formatting without modifying files
+- `npm run lint` — lints `frontend/script.js` with ESLint
+- `npm run lint:fix` — auto-fixes ESLint issues in `frontend/script.js`
+
+**Dev dependencies added:** `prettier@^3.3.0`, `eslint@^8.57.0`
+
+---
+
+### `.prettierrc`
+Prettier configuration matching the existing code style:
+- `tabWidth: 4` — 4-space indentation (consistent with current files)
+- `singleQuote: true` — single quotes for JS strings
+- `trailingComma: "es5"` — trailing commas in arrays/objects
+- `printWidth: 88` — max line length
+- `endOfLine: "lf"` — Unix line endings
+
+---
+
+### `.eslintrc.js`
+ESLint configuration for browser-side JavaScript:
+- Extends `eslint:recommended` ruleset
+- Declares `marked` as a known global (loaded via CDN)
+- `no-unused-vars`: warn
+- `no-console`: warn (flags debug `console.log` calls left in production code)
+
+---
+
+### `scripts/frontend-format.sh`
+Shell script to format all frontend files in one command:
+```bash
+./scripts/frontend-format.sh
+```
+Runs `prettier --write` over `frontend/**/*.{js,css,html}`.
+
+---
+
+### `scripts/frontend-lint.sh`
+Shell script to check frontend quality without modifying files (suitable for CI):
+```bash
+./scripts/frontend-lint.sh
+```
+Runs `prettier --check` then `eslint` and exits non-zero if any check fails.
+
+---
+
+## Modified Files
+
+### `frontend/script.js`
+- Removed two double blank lines (between event listeners and between top-level functions) to match Prettier's single-blank-line-between-blocks style.
+
+### `frontend/index.html`
+- Removed an extra blank line between the closing `` and the `
-
+