From 22ba3ab227b0a511e6bc00cbba1b765c28b19d0a Mon Sep 17 00:00:00 2001
From: Anik Bhattacharjee <anbhatta@redhat.com>
Date: Tue, 31 Mar 2026 16:17:23 -0400
Subject: [PATCH] LCORE-1595: Design doc/s for MCP Apps integration

This PR introduces design docs for introducing support for the
MCP Apps extension into Lightspeed core, such that responses from
tools can include interactive UI components, instead of a "text wall
that summarizes complex data"

Signed-off-by: Anik Bhattacharjee <anbhatta@redhat.com>
---
 .../example_mcp_server.md                     | 174 ++++
 .../mcp-apps-dependency-graph.md              |  68 ++
 .../mcp-apps-for-ui-components/mcp-apps.md    | 952 ++++++++++++++++++
 3 files changed, 1194 insertions(+)
 create mode 100644 docs/design/mcp-apps-for-ui-components/example_mcp_server.md
 create mode 100644 docs/design/mcp-apps-for-ui-components/mcp-apps-dependency-graph.md
 create mode 100644 docs/design/mcp-apps-for-ui-components/mcp-apps.md
diff --git a/docs/design/mcp-apps-for-ui-components/example_mcp_server.md b/docs/design/mcp-apps-for-ui-components/example_mcp_server.md
new file mode 100644
index 000000000..b4fb953ba
--- /dev/null
+++ b/docs/design/mcp-apps-for-ui-components/example_mcp_server.md
@@ -0,0 +1,174 @@
+**Example MCP Server Implementing MCP Apps pattern:**
+
+```python
+from mcp.server import Server
+from mcp.types import Tool, TextContent, Resource
+import mcp.server.stdio
+
+server = Server("example-dashboard-server")
+
+# Step 1: Define tool with _meta.ui.resourceUri
+@server.list_tools()
+async def list_tools() -> list[Tool]:
+    return [
+        Tool(
+            name="get_metrics",
+            description="Get system metrics with interactive dashboard",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "timeframe": {
+                        "type": "string",
+                        "enum": ["1h", "24h", "7d"],
+                        "default": "1h"
+                    }
+                }
+            },
+            # MCP Apps metadata - declares UI resource
+            _meta={
+                "ui": {
+                    "resourceUri": "ui://example-dashboard-server/metrics-dashboard"
+                }
+            }
+        )
+    ]
+
+# Step 2: Implement the tool (returns data)
+@server.call_tool()
+async def call_tool(name: str, arguments: dict) -> list[TextContent]:
+    if name == "get_metrics":
+        timeframe = arguments.get("timeframe", "1h")
+
+        # Return raw data as JSON
+        metrics_data = {
+            "cpu": [45, 52, 48, 61, 55],
+            "memory": [2.1, 2.3, 2.2, 2.5, 2.4],
+            "timeframe": timeframe
+        }
+
+        return [TextContent(
+            type="text",
+            text=str(metrics_data)
+        )]
+
+# Step 3: Implement resource handler (returns UI)
+@server.list_resources()
+async def list_resources() -> list[Resource]:
+    return [
+        Resource(
+            uri="ui://example-dashboard-server/metrics-dashboard",
+            name="Metrics Dashboard",
+            mimeType="text/html",
+            description="Interactive metrics visualization"
+        )
+    ]
+
+@server.read_resource()
+async def read_resource(uri: str) -> str:
+    if uri == "ui://example-dashboard-server/metrics-dashboard":
+        # Return HTML/JavaScript UI component
+        return """<!DOCTYPE html>
+<html>
+<head>
+    <title>Metrics Dashboard</title>
+    <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
+    <script type="module">
+        import { App } from 'https://esm.sh/@modelcontextprotocol/ext-apps@1.1.2';
+
+        const app = new App();
+        await app.connect();
+
+        // Receive data from tool result
+        app.ontoolresult = (result) => {
+            const data = JSON.parse(result.content);
+            renderChart(data);
+        };
+
+        function renderChart(data) {
+            const ctx = document.getElementById('metricsChart').getContext('2d');
+            new Chart(ctx, {
+                type: 'line',
+                data: {
+                    labels: ['T-4', 'T-3', 'T-2', 'T-1', 'Now'],
+                    datasets: [
+                        {
+                            label: 'CPU %',
+                            data: data.cpu,
+                            borderColor: 'rgb(75, 192, 192)',
+                            tension: 0.1
+                        },
+                        {
+                            label: 'Memory GB',
+                            data: data.memory,
+                            borderColor: 'rgb(255, 99, 132)',
+                            tension: 0.1
+                        }
+                    ]
+                },
+                options: {
+                    responsive: true,
+                    plugins: {
+                        title: {
+                            display: true,
+                            text: `Metrics (${data.timeframe})`
+                        }
+                    }
+                }
+            });
+
+            // User interaction - call tool to refresh
+            document.getElementById('refreshBtn').onclick = async () => {
+                const timeframe = document.getElementById('timeframe').value;
+                const result = await app.callTool('get_metrics', { timeframe });
+                renderChart(JSON.parse(result.content));
+            };
+        };
+    </script>
+</head>
+<body>
+    <h1>System Metrics Dashboard</h1>
+    <select id="timeframe">
+        <option value="1h">Last Hour</option>
+        <option value="24h">Last 24 Hours</option>
+        <option value="7d">Last 7 Days</option>
+    </select>
+    <button id="refreshBtn">Refresh</button>
+    <canvas id="metricsChart" width="400" height="200"></canvas>
+</body>
+</html>"""
+
+    raise ValueError(f"Unknown resource: {uri}")
+
+# Run server
+async def main():
+    async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
+        await server.run(
+            read_stream,
+            write_stream,
+            server.create_initialization_options()
+        )
+```
+
+**Key Pattern Elements:**
+
+1. **Tool Definition** (`@server.list_tools()`):
+   - Tool has `_meta.ui.resourceUri` pointing to UI resource
+   - Tool returns data as JSON/text
+
+2. **Resource Definition** (`@server.list_resources()`):
+   - Declares available UI resources with `ui://` URIs
+   - Specifies MIME type (typically `text/html`)
+
+3. **Resource Implementation** (`@server.read_resource()`):
+   - Returns complete HTML/JavaScript UI component
+   - UI uses `@modelcontextprotocol/ext-apps` for bidirectional communication
+   - `app.ontoolresult` receives data from tool execution
+   - `app.callTool()` enables UI to call tools back (e.g., refresh button)
+
+**Flow:**
+1. LLM calls `get_metrics` tool
+2. Host sees `_meta.ui.resourceUri` in tool definition
+3. Host fetches UI via `read_resource(uri="ui://example-dashboard-server/metrics-dashboard")`
+4. Host renders HTML in sandboxed iframe
+5. UI receives tool result via postMessage
+6. User clicks "Refresh" → UI calls `get_metrics` again via host
\ No newline at end of file
diff --git a/docs/design/mcp-apps-for-ui-components/mcp-apps-dependency-graph.md b/docs/design/mcp-apps-for-ui-components/mcp-apps-dependency-graph.md
new file mode 100644
index 000000000..d275ebed1
--- /dev/null
+++ b/docs/design/mcp-apps-for-ui-components/mcp-apps-dependency-graph.md
@@ -0,0 +1,68 @@
+# MCP Apps Implementation Dependency Graph
+
+## Visual Dependency Graph
+
+```mermaid
+graph TD
+    %% Llama Stack Work (External)
+    T1[LCORE-1727 Implement llama-stack<br/>Resources API]
+    T2[LCORE-1807 Implement llama-stack<br/>Tool Invocation API]
+
+    %% Core Dependencies
+    T3[LCORE-???? Upgrade llama-stack<br/>dependencies]
+
+    %% Foundation Layer - Can start in parallel after upgrade
+    T4[LCORE-1808 Implement<br/>ToolDefinitionCache]
+    T6[LCORE-1810 Extend ToolResultSummary<br/>with ui_resource field]
+    T12[LCORE-???? Implement<br/>/v1/tools/invoke endpoint]
+
+    %% Implementation Layer
+    T5[LCORE-???? Update /v1/tools<br/>to populate cache]
+    T7[LCORE-1811 Implement inline<br/>UI resource fetching]
+
+    %% Integration Layer
+    T8[LCORE-1812 Wire tool cache<br/>through query flow]
+
+    %% Testing Layer
+    T10[LCORE-1813 Add integration tests]
+    T11[LCORE-1816 Add E2E tests]
+
+    %% Documentation Layer
+    T14[LCORE-1815 Documentation]
+
+    %% Dependencies
+    T1 --> T3
+    T2 --> T3
+
+    T3 --> T4
+    T3 --> T6
+    T3 --> T12
+
+    T4 --> T5
+    T4 --> T7
+
+    T6 --> T7
+
+    T7 --> T8
+    T7 --> T10
+
+    T8 --> T10
+    T8 --> T11
+
+    T12 --> T14
+
+    T7 --> T14
+
+    %% Styling
+    classDef external fill:#ffcccc,stroke:#cc0000,stroke-width:2px
+    classDef foundation fill:#ccffcc,stroke:#00cc00,stroke-width:2px
+    classDef implementation fill:#ccccff,stroke:#0000cc,stroke-width:2px
+    classDef testing fill:#ffffcc,stroke:#cccc00,stroke-width:2px
+    classDef docs fill:#ffccff,stroke:#cc00cc,stroke-width:2px
+
+    class T1,T2 external
+    class T3,T4,T6,T12 foundation
+    class T5,T7,T8 implementation
+    class T10,T11 testing
+    class T14 docs
+```
diff --git a/docs/design/mcp-apps-for-ui-components/mcp-apps.md b/docs/design/mcp-apps-for-ui-components/mcp-apps.md
new file mode 100644
index 000000000..8150e066b
--- /dev/null
+++ b/docs/design/mcp-apps-for-ui-components/mcp-apps.md
@@ -0,0 +1,952 @@
+# MCP Apps Integration Design
+
+## Summary
+
+This document outlines the design for integrating [MCP Apps](https://modelcontextprotocol.io/docs/extensions/apps) support into Lightspeed Core Stack (LCS). MCP Apps enable MCP servers to return interactive UI components (charts, tables, dashboards) that render directly in conversation streams, replacing text-only responses with rich, interactive visualizations.
+
+**Key Goal:** Enable Lightspeed to act as an MCP Host capable of discovering, fetching, and returning UI resources with full HTML content from MCP servers that support the MCP Apps extension.
+
+**Chosen Approach:** Extend Llama Stack with two new APIs:
+- Resources API for fetching UI resources ([issue #5430](https://github.com/llamastack/llama-stack/issues/5430))
+- Tool Invocation API for bidirectional communication ([issue #5512](https://github.com/llamastack/llama-stack/issues/5512))
+
+**Implementation Strategy:**
+1. Contribute/wait for Resources API and Tool Invocation API in llama-stack
+2. Upgrade llama-stack dependencies when available
+3. Fetch UI resources **inline during query processing** via `client.resources.read()`
+4. Include full HTML content directly in query response `ui_resource.content` field
+5. Add `POST /v1/tools/invoke` endpoint for direct tool calls from MCP Apps UIs
+
+**Key Benefits:**
+- **Single request flow**: Clients receive tool results with enriched UI resources together
+- **Direct tool invocation**: MCP Apps UIs can call tools deterministically without LLM overhead
+- **Minimal custom code**: Leverages llama-stack implementation, benefits entire ecosystem
+
+This approach minimizes custom code in Lightspeed while providing maximum value to the broader llama-stack community.
+
+## Background
+
+### Current State
+
+Lightspeed Core Stack already supports:
+- MCP server registration (static via config, dynamic via API)
+- Tool discovery from MCP servers via Llama Stack
+- Tool invocation with result capture in text format
+- MCP authentication (file-based, K8s, client-provided, OAuth, header propagation)
+
+**Limitations:**
+- Tools return only text/JSON results
+- Complex data (performance metrics, resource lists, cost analysis) returned as "text walls"
+- No support for interactive UI components (charts, dashboards etc)
+
+### MCP Apps Protocol Overview
+
+Based on [official MCP Apps documentation](https://modelcontextprotocol.io/docs/extensions/apps):
+
+**Core Concepts:**
+1. **UI Resources**: HTML/JavaScript interfaces served via `ui://` URI scheme
+2. **Tool Metadata**: Tools declare UI capabilities via `_meta.ui.resourceUri` field
+3. **Host Rendering**: Clients fetch UI resources and render in sandboxed iframes
+4. **Bidirectional Communication**: postMessage-based JSON-RPC protocol between UI and host
+
+**Example Tool with UI:**
+```json
+{
+  "name": "visualize_data",
+  "description": "Visualize data as an interactive chart",
+  "inputSchema": { ... },
+  "_meta": {
+    "ui": {
+      "resourceUri": "ui://charts/interactive"
+    }
+  }
+}
+```
+
+**Protocol Flow:**
+```
+1. LLM selects tool with _meta.ui.resourceUri
+2. Host fetches UI resource from MCP server (ui:// → HTML)
+3. Host renders HTML in sandboxed iframe
+4. UI initializes with ui/initialize handshake
+5. Tool result pushed to UI via postMessage
+6. UI can call tools back via tools/call requests
+```
+
+### Problem Statement
+
+**User Request:**
+> "The goal of this RFE is to integrate support for the MCP Apps extension into Lightspeed core. This will allow Lightspeed developers to move beyond plain text responses by returning interactive UI components—such as charts, tables, and dashboards—that render directly within the conversation stream."
+
+**Example Use Cases:**
+- Kubernetes cluster metrics → Interactive dashboards instead of JSON dumps
+- Real-time monitoring → Live-updating dashboards
+
+## Architecture Analysis
+
+### Current Lightspeed Query Flow
+
+```
+┌──────────────┐
+│ Client (UI)  │
+└──────┬───────┘
+       │ POST /v1/query
+       ↓
+┌────────────────────────────────┐
+│ Lightspeed Core Stack          │
+│  /app/endpoints/query.py       │
+│   - prepare_responses_params() │
+│   - build_mcp_headers()        │
+└──────┬─────────────────────────┘
+       │ Responses API call
+       ↓
+┌────────────────────────────────┐
+│ Llama Stack (0.5.2)            │
+│  - Model inference             │
+│  - MCP tool orchestration      │
+└──────┬─────────────────────────┘
+       │ MCP protocol (SSE)
+       ↓
+┌────────────────────────────────┐
+│ MCP Server (e.g., kube-mcp)    │
+│  - Tool execution              │
+│  - Returns text/JSON results   │
+└────────────────────────────────┘
+```
+
+## Design 
+
+### Extend Llama Stack with Resources API 
+
+**Approach:** Contribute Resources API support to llama-stack (via [issue #5430](https://github.com/llamastack/llama-stack/issues/5430)), then upgrade and integrate
+
+**Implementation Plan:**
+
+1. **Phase 1: Llama Stack Enhancement**
+   - Implement Resources API in llama-stack based on [issue #5430](https://github.com/llamastack/llama-stack/issues/5430)
+   - Add `list_mcp_resources()` and `read_mcp_resource()` functions to `mcp.py`
+   - Expose `/v1/resources/list` and `/v1/resources/read` endpoints
+   - Release new llama-stack version with MCP Apps support
+
+2. **Phase 2: Lightspeed Integration**
+   - Upgrade `llama-stack` and `llama-stack-client` dependencies
+   - Implement `ToolDefinitionCache` to store `_meta.ui` fields
+   - Extend `ToolResultSummary` with `ui_resource` field (includes full HTML content)
+   - Modify `build_tool_call_summary()` to fetch UI resources inline during query processing
+
+**Architecture:**
+```
+┌──────────────┐
+│ Client (UI)  │
+└──────┬───────┘
+       │ POST /v1/query
+       ↓
+┌────────────────────────────────┐
+│ Lightspeed Core Stack          │
+│  1. Process query via Llama    │
+│  2. Detect tool with _meta.ui  │
+│  3. Call resources API inline  │ ← Fetch during query processing
+│  4. Include HTML in response   │
+└──────┬─────────────────────────┘
+       │ POST /v1/resources/read
+       ↓
+┌────────────────────────────────┐
+│ Llama Stack (Resources API)    │
+│  - read_mcp_resource()         │ ← Core implementation
+│  - Handle SSE/HTTP transports  │
+└──────┬─────────────────────────┘
+       │ resources/read (MCP protocol)
+       ↓
+┌────────────────────────────────┐
+│ MCP Server (e.g., kube-mcp)    │
+│  - Return UI resource HTML     │
+└────────────────────────────────┘
+```
+
+**Pros:**
+- Minimal custom code in Lightspeed
+- Leverages official llama-stack implementation
+- Maintains clean architectural layer separation
+- Reuses existing auth/session management from llama-stack
+- Community benefit: MCP Apps support for all llama-stack users
+
+**Cons:**
+- Depends on llama-stack development timeline
+- Requires coordination with llama-stack maintainers
+
+
+### Data Model Changes
+
+The primary heavy lifting for resource retrieval will be handled by the Llama Stack Resources API. Lightspeed simply calls `client.resources.read()` during query processing to fetch UI content inline.
+
+1.1 UI Resource Metadata
+
+We will introduce a `UIResourceMetadata` model to encapsulate the details required for the frontend to render interactive components.
+
+```python
+class UIResourceMetadata(BaseModel):
+    """UI resource content for MCP Apps."""
+    resource_uri: str         # e.g., ui://server/path
+    server_name: str          # Hosting MCP server
+    content: str              # HTML content (or base64 if binary)
+    mime_type: str = "text/html"  # Content type
+    is_binary: bool = False   # Whether content is base64-encoded
+```
+
+1.2 Extended Tool Result Summary
+
+The existing `ToolResultSummary` will be updated to include an optional `ui_resource` field. This will signal to the frontend that a tool output has an associated interactive interface.
+
+**Current Model** (in `src/utils/types.py`):
+```python
+class ToolResultSummary(BaseModel):
+    """Model representing a result from a tool call."""
+
+    id: str
+    status: str
+    content: str
+    type: str
+    round: int
+```
+
+**Extended Model**:
+```python
+class ToolResultSummary(BaseModel):
+    """Model representing a result from a tool call."""
+
+    id: str
+    status: str
+    content: str
+    type: str
+    round: int
+    ui_resource: Optional[UIResourceMetadata] = Field(
+        None,
+        description="UI resource metadata for MCP Apps (if tool supports interactive UI)"
+    )
+```
+
+**Field Behavior**:
+- `ui_resource` is optional (defaults to `None`)
+- Only populated when tool definition contains `_meta.ui.resourceUri`
+- Contains full HTML content, not just metadata (inline fetching approach)
+- Clients should check if field is present before attempting to render UI component
+
+1.3 Tool Definition Cache
+
+**Cache Invalidation Strategy:**
+
+The cache will be updated with MCP server tool definitions on every `GET /v1/tools` call:
+
+**Implementation:**
+
+```python
+"""Tool definition caching for MCP Apps metadata lookup."""
+
+from typing import Any, Optional
+from datetime import datetime
+from utils.types import Singleton
+
+class ToolDefinitionCache(metaclass=Singleton):
+    """Cache for tool definitions with _meta.ui information.
+
+    Cache is refreshed on-demand when update_from_toolgroups() is called,
+    typically during GET /v1/tools requests.
+    """
+
+    _tools: dict[str, dict[str, Any]] = {}
+    _last_update: Optional[datetime] = None
+
+    def update_from_toolgroups(self, toolgroups: list[Any]) -> None:
+        """Update cache from Llama Stack toolgroups response.
+
+        Args:
+            toolgroups: List of toolgroup objects from client.toolgroups.list()
+        """
+        # Clear existing cache
+        self._tools.clear()
+
+        # Iterate toolgroups and cache tools with their metadata
+        for toolgroup in toolgroups:
+            if hasattr(toolgroup, 'tools'):
+                for tool in toolgroup.tools:
+                    # Store full tool definition including _meta
+                    self._tools[tool.name] = {
+                        "name": tool.name,
+                        "description": tool.description,
+                        "_meta": tool.metadata if hasattr(tool, 'metadata') else {},
+                    }
+
+        self._last_update = datetime.utcnow()
+
+    def get_tool_metadata(self, tool_name: str) -> Optional[dict[str, Any]]:
+        """Get cached tool definition with metadata.
+
+        Args:
+            tool_name: Name of the tool
+
+        Returns:
+            Tool definition dict with _meta field, or None if not found
+        """
+        return self._tools.get(tool_name)
+
+    def get_last_update(self) -> Optional[datetime]:
+        """Get timestamp of last cache update.
+
+        Returns:
+            Datetime of last update, or None if never updated
+        """
+        return self._last_update
+
+    def clear(self) -> None:
+        """Clear the tool cache."""
+        self._tools.clear()
+        self._last_update = None
+```
+
+2. Implementation Roadmap
+
+The section discusses the implementation in lightspeed-stack, and assumes that the [Resources API is available
+in llama-stack](https://github.com/llamastack/llama-stack/issues/5430).
+
+**Llama Stack Integration**
+
+Upgrade llama-stack and llama-stack-client dependencies to the release that Resources API is available in. This phase establishes the foundation for calling `client.resources.read()` to fetch UI resources.
+
+**Response Enrichment**
+
+The response pipeline will be modified to detect UI-capable tools and fetch their HTML content:
+
+**Step 1: Update `/v1/tools` endpoint to populate cache**
+
+```python
+# In src/app/endpoints/tools.py
+
+from utils.tool_cache import ToolDefinitionCache
+
+@router.get("/tools", ...)
+async def list_tools(...):
+    # Existing code
+    toolgroups_response = await client.toolgroups.list()
+
+    # NEW: Refresh cache on every tools list request (on-demand invalidation)
+    ToolDefinitionCache().update_from_toolgroups(toolgroups_response.data)
+
+    # Rest of existing code
+    return ListToolsResponse(...)
+```
+
+**Cache Invalidation:**
+- Cache refreshes on every `GET /v1/tools` call
+- Ensures metadata is always in sync with MCP server state
+- No risk of stale `_meta.ui` references
+
+**Step 2: Fetch UI resources inline during query processing**
+
+**Implementation Example:**
+
+```python
+# In src/utils/responses.py - build_tool_call_summary()
+
+async def build_tool_call_summary(
+    output_item: ResponseOutput,
+    tool_cache: Optional[ToolDefinitionCache] = None,
+    mcp_server_url: Optional[str] = None,
+) -> tuple[Optional[ToolCallSummary], Optional[ToolResultSummary]]:
+    # ... existing code ...
+
+    if item_type == "mcp_call":
+        mcp_call_item = cast(MCPCall, output_item)
+
+        # Build standard tool result
+        tool_result = ToolResultSummary(
+            id=mcp_call_item.call_id,
+            status="success",
+            content=result_content,
+            type="mcp_call",
+            round=current_round,
+        )
+
+        # NEW: Check for UI resource and fetch inline
+        if tool_cache:
+            tool_def = tool_cache.get_tool_metadata(mcp_call_item.name)
+            if tool_def and "_meta" in tool_def and "ui" in tool_def["_meta"]:
+                resource_uri = tool_def["_meta"]["ui"].get("resourceUri")
+                if resource_uri and mcp_server_url:
+                    try:
+                        # Fetch UI resource from llama-stack
+                        from client import client
+                        resource_response = await client.resources.read(
+                            mcp_endpoint={"uri": mcp_server_url},
+                            uri=resource_uri,
+                        )
+
+                        # Include full content in response
+                        tool_result.ui_resource = UIResourceMetadata(
+                            resource_uri=resource_uri,
+                            server_name=extract_server_name(mcp_server_url),
+                            content=resource_response.content,
+                            mime_type=resource_response.mime_type,
+                            is_binary=resource_response.is_binary,
+                        )
+                    except Exception as e:
+                        logger.warning(f"Failed to fetch UI resource {resource_uri}: {e}")
+                        # Continue without ui_resource - tool result still valid
+
+        return tool_call, tool_result
+```
+
+**Key Simplification:**
+UI resources are fetched **during query processing** and included directly in the response. No separate endpoint needed - the client receives everything in one request.
+
+**Step 3: Add direct tool invocation endpoint**
+
+For MCP Apps bidirectional communication, add `POST /v1/tools/invoke` endpoint:
+
+```python
+# In src/app/endpoints/tools.py
+
+@router.post("/tools/invoke")
+@authorize(Action.INVOKE_TOOL)
+async def invoke_tool_endpoint(
+    request: InvokeToolRequest,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+    mcp_headers: McpHeaders = Depends(mcp_headers_dependency),
+) -> InvokeToolResponse:
+    """Invoke a tool directly (for MCP Apps bidirectional communication)."""
+    client = AsyncLlamaStackClientHolder().get_client()
+
+    result = await client.tools.invoke(
+        tool_name=request.tool_name,
+        arguments=request.arguments,
+        extra_headers=build_mcp_headers(configuration, mcp_headers, request.headers, token),
+    )
+
+    return InvokeToolResponse(tool_name=request.tool_name, result=result)
+```
+
+**Dependency:** Requires llama-stack to expose `invoke_tool` as HTTP endpoint. See [llama-stack issue #5512](https://github.com/llamastack/llama-stack/issues/5512).
+
+3. API Specification
+
+**Modified Query Response (POST /v1/query)**
+
+The system will enrich tool results with the full UI resource content fetched from llama-stack.
+
+**Example Response:**
+
+```json
+{
+  "response": "Here are the namespaces in an interactive table:",
+  "tool_results": [
+    {
+      "id": "call-abc",
+      "status": "success",
+      "content": "{\"namespaces\": [\"default\", \"kube-system\"]}",
+      "type": "mcp_call",
+      "round": 1,
+      "ui_resource": {
+        "resource_uri": "ui://kube-mcp/namespaces-table",
+        "server_name": "kube-mcp",
+        "content": "<!DOCTYPE html><html>...<script type=\"module\">import { App } from '@modelcontextprotocol/ext-apps'...</script></html>",
+        "mime_type": "text/html",
+        "is_binary": false
+      }
+    }
+  ]
+}
+```
+
+**Benefits of Inline Content:**
+- **Single request**: Client receives tool result + UI in one response
+- **Simpler auth**: No need for client to authenticate twice
+- **Atomic rendering**: All data needed arrives together
+- **No extra endpoints**: Eliminates `/v1/mcp-ui-resources` complexity
+
+3.1. Bidirectional Communication Architecture
+
+**Key Insight:** Lightspeed-stack is **NOT involved** in the bidirectional communication between the client and the MCP Apps UI. This communication happens entirely client-side via postMessage.
+
+**Communication Flow:**
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│ Step 1: Initial Query                                           │
+│                                                                  │
+│  Client App → POST /v1/query → Lightspeed-stack                │
+│  Response includes: tool_result.ui_resource.content (HTML)      │
+└─────────────────────────────────────────────────────────────────┘
+
+┌─────────────────────────────────────────────────────────────────┐
+│ Step 2: Client Renders UI (Lightspeed-stack done)              │
+│                                                                  │
+│  ┌──────────────────────────────────────────────────────────┐  │
+│  │ Client Application (Browser/Desktop)                     │  │
+│  │                                                           │  │
+│  │  ┌────────────────────────────────────────────────────┐  │  │
+│  │  │ Sandboxed iframe                                    │  │  │
+│  │  │ (ui_resource.content rendered)                      │  │  │
+│  │  │                                                      │  │  │
+│  │  │ <html>                                               │  │  │
+│  │  │   import { App } from '@mcp/ext-apps'               │  │  │
+│  │  │                                                      │  │  │
+│  │  │   app.ontoolresult = (result) => {                  │  │  │
+│  │  │     renderTable(result.content) // Display data     │  │  │
+│  │  │   }                                                  │  │  │
+│  │  └────────────────────────────────────────────────────┘  │  │
+│  │                                                           │  │
+│  │  Client JS receives tool_result.content                  │  │
+│  │  Client sends to iframe via postMessage ─────────────────┤  │
+│  └──────────────────────────────────────────────────────────┘  │
+└─────────────────────────────────────────────────────────────────┘
+
+┌─────────────────────────────────────────────────────────────────┐
+│ Step 3: UI Calls Tool (Client → Lightspeed, NOT iframe direct) │
+│                                                                  │
+│  ┌────────────────────────────────────────────────────────┐    │
+│  │ iframe                                                  │    │
+│  │   app.callTool("get_pod_details", {name: "pod-1"})  ──┐│    │
+│  └────────────────────────────────────────────────────────│┘    │
+│                                                            │     │
+│  Client JS receives postMessage ←─────────────────────────┘     │
+│  Client makes NEW HTTP request ↓                                │
+│                                                                  │
+│  POST /v1/tools/invoke → Lightspeed-stack                       │
+│  Response includes tool result                                  │
+│  Client sends to iframe via postMessage                         │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+**Lightspeed-stack's Responsibilities:**
+
+| Responsibility | Lightspeed-stack | Client Application |
+|----------------|------------------|-------------------|
+| Fetch HTML from llama-stack | ✅ Yes | ❌ No |
+| Include HTML in response | ✅ Yes | ❌ No |
+| Render iframe | ❌ No | ✅ Yes |
+| Handle postMessage events | ❌ No | ✅ Yes |
+| Send tool results to iframe | ❌ No | ✅ Yes |
+| Receive tool calls from iframe | ❌ No | ✅ Yes |
+| Execute tool calls | ✅ Yes (via /v1/tools/invoke) | ❌ No |
+
+**Key Points:**
+
+1. **Lightspeed-stack is stateless**: No WebSocket, no SSE connection to client, no postMessage handling
+2. **UI communicates with client, not server**: All postMessage happens between iframe and client JS
+3. **Tool calls from UI = new HTTP requests**: When UI calls a tool, client makes a fresh `POST /v1/tools/invoke`
+4. **Client implements MCP Apps protocol**: Client must use `@modelcontextprotocol/ext-apps` library or equivalent
+5. **Direct tool invocation required**: Uses `/v1/tools/invoke` endpoint (not `/v1/query`) for deterministic, fast execution
+
+**Client Implementation Example:**
+
+```javascript
+// Client-side code (NOT lightspeed-stack)
+const response = await fetch('/v1/query', {
+  method: 'POST',
+  body: JSON.stringify({ query: 'List namespaces' })
+});
+
+const data = await response.json();
+
+// If tool result has UI resource
+if (data.tool_results[0].ui_resource) {
+  const iframe = document.createElement('iframe');
+  iframe.sandbox = 'allow-scripts';
+  iframe.srcdoc = data.tool_results[0].ui_resource.content;
+  document.body.appendChild(iframe);
+
+  // Listen for tool calls from UI
+  window.addEventListener('message', async (event) => {
+    if (event.source === iframe.contentWindow) {
+      const message = JSON.parse(event.data);
+
+      if (message.method === 'tools/call') {
+        // Make direct tool invocation request to lightspeed-stack
+        const toolResponse = await fetch('/v1/tools/invoke', {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({
+            tool_name: message.params.name,
+            arguments: message.params.arguments
+          })
+        });
+
+        const toolResult = await toolResponse.json();
+
+        // Send result back to iframe
+        iframe.contentWindow.postMessage(
+          JSON.stringify({ id: message.id, result: toolResult.result }),
+          '*'
+        );
+      }
+    }
+  });
+
+  // Send initial tool result to iframe
+  iframe.contentWindow.postMessage(
+    JSON.stringify({
+      method: 'ui/toolResult',
+      params: { content: data.tool_results[0].content }
+    }),
+    '*'
+  );
+}
+```
+
+**Why This Architecture?**
+
+- **Stateless backend**: Lightspeed-stack remains a simple REST API, no WebSocket overhead
+- **Client flexibility**: Different clients (web, desktop, mobile) can implement postMessage handling differently
+- **Security**: Iframe sandbox prevents UI from directly accessing lightspeed-stack APIs
+- **Standard HTTP**: Tool calls from UI are just normal query requests
+
+3.2. Client Compatibility
+
+**Lightspeed-stack Client Landscape:**
+
+| Client | Interface | MCP Apps Support |
+|--------|-----------|------------------|
+| **OpenShift Lightspeed** | Web (OpenShift Console) | ✅ **Full support** (HTML/iframe rendering) |
+| **Ansible Lightspeed** | Web/VS Code | ✅ **Full support** (if web-based) |
+| **RHEL Lightspeed** | CLI (terminal) | ❌ **No support** (text-only, no HTML rendering) |
+
+**Design Decision: Always Include ui_resource**
+
+The `ui_resource` field will be included in responses whenever a tool has `_meta.ui.resourceUri`, regardless of client type.
+
+**Rationale:**
+
+1. **Graceful degradation**: CLI clients simply ignore the `ui_resource` field and use `content` (text/JSON)
+2. **Simplicity**: No capability negotiation or conditional logic needed
+3. **Backward compatibility**: Existing clients already ignore unknown JSON fields
+4. **Bandwidth**: HTML content is small (~10-50KB), not a bottleneck
+5. **Future-proof**: Clients can opt-in to MCP Apps support without server changes
+
+**Client Behavior:**
+
+```python
+# Web client (OpenShift Console) - renders UI
+response = await query(...)
+if response.tool_results[0].ui_resource:
+    render_iframe(response.tool_results[0].ui_resource.content)
+else:
+    display_text(response.tool_results[0].content)
+
+# CLI client (RHEL Lightspeed) - ignores UI
+response = await query(...)
+display_text(response.tool_results[0].content)  # Ignores ui_resource
+```
+
+**Guarantees:**
+
+- Tool result `content` field **always present** (CLI clients get usable data)
+- `ui_resource` field is **optional** (clients can safely ignore)
+- **No breaking changes** to existing clients
+- **Progressive enhancement** for web clients
+
+3.3. Response Format: Regular vs Streaming API
+
+Both query endpoints (`POST /v1/query` and `POST /v1/streaming_query`) include `ui_resource` data, but differ in delivery timing.
+
+**Regular Query API (POST /v1/query)**
+
+Returns a single JSON response with all data:
+
+```json
+{
+  "completion_message": {
+    "role": "assistant",
+    "content": "Based on the pod resource usage, here are the top consumers...",
+    "stop_reason": "end_of_turn",
+    "tool_calls": [...],
+    "tool_results": [
+      {
+        "tool_name": "get_pod_resource_usage",
+        "content": "{\"pods\": [...]}",
+        "ui_resource": {
+          "resource_uri": "ui://kube-mcp/pod-usage-chart",
+          "server_name": "kubernetes-mcp-server",
+          "content": "<!DOCTYPE html><html>...</html>",
+          "mime_type": "text/html",
+          "is_binary": false
+        }
+      }
+    ]
+  }
+}
+```
+
+**Characteristics:**
+- Single HTTP response
+- All data arrives together atomically
+- Client receives `ui_resource` and completion text simultaneously
+- Simple to parse - standard JSON deserialization
+
+**Streaming Query API (POST /v1/streaming_query)**
+
+Returns Server-Sent Events (SSE) stream with multiple events:
+
+```
+event: tool_call
+data: {"tool_name": "get_pod_resource_usage", "arguments": {...}}
+
+event: tool_result
+data: {
+  "tool_name": "get_pod_resource_usage",
+  "content": "{\"pods\": [...]}",
+  "ui_resource": {
+    "resource_uri": "ui://kube-mcp/pod-usage-chart",
+    "server_name": "kubernetes-mcp-server",
+    "content": "<!DOCTYPE html><html>...</html>",
+    "mime_type": "text/html",
+    "is_binary": false
+  }
+}
+
+event: chunk
+data: {"delta": "Based on the "}
+
+event: chunk
+data: {"delta": "pod resource usage, "}
+
+event: chunk
+data: {"delta": "here are the top consumers..."}
+
+event: done
+data: {"stop_reason": "end_of_turn"}
+```
+
+**Characteristics:**
+- Multiple SSE events streamed over time
+- `ui_resource` arrives in `tool_result` event (immediately after tool execution)
+- Completion text streams word-by-word in subsequent `chunk` events
+- Client can render UI component before completion text finishes streaming
+
+**Key UX Difference:**
+
+| Aspect | Regular API | Streaming API |
+|--------|-------------|---------------|
+| **User perception** | All content appears at once | UI appears first, text streams after |
+| **Time to first UI render** | When response completes | As soon as tool executes |
+| **Perceived latency** | Higher (wait for full response) | Lower (progressive rendering) |
+
+**Implementation Notes:**
+
+- Both formats include identical `ui_resource` structure
+- HTML content (~5-50KB) is sent as single complete string in both formats
+- HTML is NOT chunked across multiple events in streaming mode
+- Streaming mode provides better UX for long-running queries where tool execution completes before text generation
+
+**Client Implementation Differences:**
+
+```javascript
+// Regular API - simple fetch
+const response = await fetch('/v1/query', {
+  method: 'POST',
+  body: JSON.stringify({ query: '...' })
+});
+const data = await response.json();
+if (data.tool_results[0]?.ui_resource) {
+  renderIframe(data.tool_results[0].ui_resource.content);
+}
+
+// Streaming API - SSE handling
+const eventSource = new EventSource('/v1/streaming_query');
+eventSource.addEventListener('tool_result', (event) => {
+  const data = JSON.parse(event.data);
+  if (data.ui_resource) {
+    renderIframe(data.ui_resource.content);  // Render immediately
+  }
+});
+eventSource.addEventListener('chunk', (event) => {
+  const data = JSON.parse(event.data);
+  appendTextDelta(data.delta);  // Stream text after UI
+});
+```
+
+3.4. Direct Tool Invocation Endpoint
+
+MCP Apps UIs need to call tools directly when users interact (e.g., clicking "Refresh" in a dashboard).
+
+A direct tool invocation endpoint is required for deterministic, fast, token-free tool execution. See [llama-stack issue #5512](https://github.com/llamastack/llama-stack/issues/5512) for full justification and proposed API design for a `POST /v1/tools/invoke` endpoint.
+
+**Request:**
+```json
+{
+  "tool_name": "get_pod_metrics",
+  "arguments": { "namespace": "production" }
+}
+```
+
+**Response:**
+```json
+{
+  "tool_name": "get_pod_metrics",
+  "result": { "pods": [...] },
+  "error": null
+}
+```
+
+**Implementation Dependencies:**
+
+1. **Llama Stack**: Must expose `invoke_tool` as HTTP endpoint ([issue #5512](https://github.com/llamastack/llama-stack/issues/5512))
+2. **Lightspeed-stack**: Add passthrough endpoint `POST /v1/tools/invoke` that calls llama-stack
+3. **Authorization**: Use same auth model as `/v1/query` (RBAC action: `INVOKE_TOOL`)
+
+**Lightspeed Implementation:**
+
+```python
+# src/app/endpoints/tools.py
+
+@router.post("/tools/invoke")
+@authorize(Action.INVOKE_TOOL)
+async def invoke_tool_endpoint(
+    request: InvokeToolRequest,
+    auth: Annotated[AuthTuple, Depends(get_auth_dependency())],
+    mcp_headers: McpHeaders = Depends(mcp_headers_dependency),
+) -> InvokeToolResponse:
+    """Invoke a tool directly (for MCP Apps bidirectional communication)."""
+    client = AsyncLlamaStackClientHolder().get_client()
+
+    result = await client.tools.invoke(
+        tool_name=request.tool_name,
+        arguments=request.arguments,
+        extra_headers=build_mcp_headers(...),
+    )
+
+    return InvokeToolResponse(
+        tool_name=request.tool_name,
+        result=result,
+    )
+```
+
+4. Security Considerations
+
+**Client-Side Sandbox Isolation**
+- UI resources MUST be rendered in sandboxed iframes
+- Recommended sandbox attributes: `sandbox="allow-scripts"`
+- The `allow-scripts` sandbox enables JavaScript execution while blocking:
+  - Access to parent window origin (no `allow-same-origin`)
+  - Form submissions (`allow-forms`)
+  - Navigation of top-level browsing context (`allow-top-navigation`)
+  - Pop-ups (`allow-popups`)
+- This provides maximum isolation while enabling MCP Apps UI functionality
+
+**Server-Side Validation**
+- MCP servers are trusted (pre-registered in config)
+- UI resources fetched with same auth as tool execution
+- Rate limiting applied at query level (no separate endpoint to limit)
+
+**Content Trust Model**
+- HTML content comes from registered MCP servers only
+- No user-provided or untrusted HTML content
+- UI resources isolated from parent page context via iframe sandbox
+
+5. Testing Strategy
+
+**Unit Tests:**
+- `test_tool_cache.py` - Tool definition caching and invalidation
+- `test_ui_resource_enrichment.py` - ToolResultSummary.ui_resource population with inline HTML
+- `test_llama_stack_resource_client.py` - Mock llama-stack resources API calls
+
+**Integration Tests:**
+- Test full query flow with MCP Apps-enabled tool
+- Verify QueryResponse includes ui_resource with full HTML content
+- Verify tool results without `_meta.ui` don't include ui_resource field
+
+**E2E Tests (Behave):**
+```gherkin
+Feature: MCP Apps UI Resource Integration
+
+  Scenario: Query tool with UI resource returns inline HTML
+    Given an MCP server "test-mcp" is registered
+    And the tool "visualize_data" has ui resource "ui://test-mcp/chart"
+    When I send a query "visualize my data"
+    Then the response includes tool_result with ui_resource
+    And ui_resource.resource_uri is "ui://test-mcp/chart"
+    And ui_resource.content contains "<html>"
+    And ui_resource.content contains "@modelcontextprotocol/ext-apps"
+    And ui_resource.mime_type is "text/html"
+
+  Scenario: Query tool without UI resource omits ui_resource field
+    Given an MCP server "test-mcp" is registered
+    And the tool "simple_tool" has no ui resource
+    When I send a query "run simple tool"
+    Then the response includes tool_result without ui_resource field
+```
+
+### Open Questions
+
+**Q1: Llama Stack Resources API Implementation**
+- Status: Proposed via [llama-stack issue #5430](https://github.com/llamastack/llama-stack/issues/5430)
+- **Action:** Monitor issue for maintainer feedback and implementation timeline
+
+**Q2: Llama Stack Tool Invocation Endpoint**
+- Status: Proposed via [llama-stack issue #5512](https://github.com/llamastack/llama-stack/issues/5512)
+- Requirement: Bidirectional communication for MCP Apps UIs
+- **Action:** Monitor issue; MCP Apps support is blocked until this is available
+
+**Q3: Should we add X-MCP-Apps-Support header in future?**
+- Current: Always include `ui_resource` (simple, graceful degradation)
+- Future enhancement: Optional header to skip `ui_resource` for CLI clients
+- **Action:** Monitor bandwidth usage; add header in v2 if needed
+
+
+## References
+
+- [MCP Apps Official Documentation](https://modelcontextprotocol.io/docs/extensions/apps)
+- [MCP Apps GitHub Repository](https://github.com/modelcontextprotocol/ext-apps/)
+- [MCP Apps Blog Post (Jan 26, 2026)](https://blog.modelcontextprotocol.io/posts/2026-01-26-mcp-apps/)
+- [MCP Apps Build Guide](https://modelcontextprotocol.io/extensions/apps/build)
+- [MCP Apps API Documentation](https://apps.extensions.modelcontextprotocol.io/api/)
+- [Llama Stack Resources API Proposal (Issue #5430)](https://github.com/llamastack/llama-stack/issues/5430)
+- [Llama Stack Tool Invocation Endpoint Proposal (Issue #5512)](https://github.com/llamastack/llama-stack/issues/5512)
+- [Lightspeed Core Stack Demo (MCP Integration)](demo.md)
+
+## Appendix A: Example MCP Server with UI
+
+**Tool Definition:**
+```json
+{
+  "name": "k8s_pod_metrics",
+  "description": "Get pod resource metrics",
+  "inputSchema": {
+    "type": "object",
+    "properties": {
+      "namespace": {"type": "string"}
+    }
+  },
+  "_meta": {
+    "ui": {
+      "resourceUri": "ui://kube-mcp/pod-metrics-dashboard"
+    }
+  }
+}
+```
+
+**UI Resource (ui://kube-mcp/pod-metrics-dashboard):**
+```html
+<!DOCTYPE html>
+<html>
+<head>
+  <title>Pod Metrics Dashboard</title>
+  <script type="module">
+    import { App } from 'https://esm.sh/@modelcontextprotocol/ext-apps@1.1.2';
+
+    const app = new App();
+    await app.connect();
+
+    app.ontoolresult = (result) => {
+      const metrics = JSON.parse(result.content);
+      renderChart(metrics);
+    };
+
+    function renderChart(data) {
+      // D3.js / Chart.js visualization
+    }
+  </script>
+</head>
+<body>
+  <div id="dashboard"></div>
+</body>
+</html>
+```