diff --git a/packages/junior/src/chat/respond.ts b/packages/junior/src/chat/respond.ts index e56da16c..df39b39b 100644 --- a/packages/junior/src/chat/respond.ts +++ b/packages/junior/src/chat/respond.ts @@ -39,10 +39,9 @@ import type { ThreadArtifactsState } from "@/chat/state/artifacts"; import type { ConversationPendingAuthState } from "@/chat/state/conversation"; import { createTools } from "@/chat/tools"; import { resolveChannelCapabilities } from "@/chat/tools/channel-capabilities"; -import type { ToolDefinition } from "@/chat/tools/definition"; import { toActiveMcpCatalogSummaries } from "@/chat/tools/skill/mcp-tool-summary"; import type { ImageGenerateToolDeps } from "@/chat/tools/types"; -import { isAdvisorToolAllowed } from "@/chat/tools/advisor/tool"; +import { createAdvisorToolDefinitions } from "@/chat/tools/advisor/tool"; import { GEN_AI_PROVIDER_NAME, completeObject, @@ -898,7 +897,17 @@ export async function generateAssistantReply( } }; const agentTools = createAgentTools( - tools as Record>, + tools, + skillSandbox, + spanContext, + context.onStatus, + sandboxExecutor, + capabilityRuntime, + pluginAuth, + onToolCall, + ); + advisorTools = createAgentTools( + createAdvisorToolDefinitions(tools), skillSandbox, spanContext, context.onStatus, @@ -907,7 +916,6 @@ export async function generateAssistantReply( pluginAuth, onToolCall, ); - advisorTools = agentTools.filter((tool) => isAdvisorToolAllowed(tool.name)); // Keep Pi's native tool schema static for the whole turn. Ideally this // would use provider-native tool loading/search APIs, but Pi's generic // AgentTool surface cannot yet express OpenAI/Anthropic deferred MCP tools. diff --git a/packages/junior/src/chat/tools/advisor/tool.ts b/packages/junior/src/chat/tools/advisor/tool.ts index 8a156fdc..ef3f80a3 100644 --- a/packages/junior/src/chat/tools/advisor/tool.ts +++ b/packages/junior/src/chat/tools/advisor/tool.ts @@ -26,7 +26,7 @@ import { getAdvisorSessionKey, type AdvisorSessionStore, } from "@/chat/tools/advisor/session-store"; -import { tool } from "@/chat/tools/definition"; +import { tool, type ToolDefinition } from "@/chat/tools/definition"; import { escapeXml } from "@/chat/xml"; export type AdvisorErrorCode = @@ -57,26 +57,15 @@ export interface AdvisorToolRuntimeContext { streamFn?: StreamFn; } -const ADVISOR_ALLOWED_TOOL_NAMES = new Set([ - "bash", - "readFile", - "searchMcpTools", - "slackCanvasRead", - "slackChannelListMessages", - "slackListGetItems", - "systemTime", - "webFetch", - "webSearch", -]); - const ADVISOR_TOOL_DESCRIPTION = - "Ask a stronger advisor for deep technical guidance. Call this when the task has a hard reasoning core: algorithm design, architecture, concurrency, security-sensitive logic, data modeling, unclear requirements, repeated failures, difficult debugging, broad refactors, or final review of nontrivial work. Pass a focused question plus curated context containing the exact evidence, constraints, current plan, alternatives, command output, code snippets, or diffs the advisor should start from. The advisor does not automatically receive the parent transcript, keeps its own advisor history for this parent conversation, can use inspection tools to verify evidence, can reason deeply, and returns guidance for you to apply and verify. Follow-up calls can build on prior advisor guidance but must include any new evidence or changed constraints. Use it after initial orientation reads when repository context matters, before committing to a non-obvious implementation plan, when changing approach, when stuck, and before declaring complex work complete. Do not use it for greetings, simple deterministic edits, routine formatting, or tasks where the next action is already obvious from fresh tool output."; + "Ask a stronger advisor for deep technical guidance. Call this when the task has a hard reasoning core: algorithm design, architecture, concurrency, security-sensitive logic, data modeling, unclear requirements, repeated failures, difficult debugging, broad refactors, or final review of nontrivial work. Pass a focused question plus curated context containing the exact evidence, constraints, current plan, alternatives, command output, code snippets, or diffs the advisor should start from. The advisor does not automatically receive the parent transcript, keeps its own advisor history for this parent conversation, can use read-only inspection tools to verify evidence, can reason deeply, and returns guidance for you to apply and verify. Follow-up calls can build on prior advisor guidance but must include any new evidence or changed constraints. Use it after initial orientation reads when repository context matters, before committing to a non-obvious implementation plan, when changing approach, when stuck, and before declaring complex work complete. Do not use it for greetings, simple deterministic edits, routine formatting, or tasks where the next action is already obvious from fresh tool output."; const ADVISOR_SYSTEM_PROMPT = [ "You are a senior technical advisor for the executor.", - "Analyze the executor-supplied context deeply. Use inspection tools when direct inspection or verification would materially improve the advice.", + "Analyze the executor-supplied context deeply. Use read-only tools when direct inspection or verification would materially improve the advice.", "Distinguish evidence from inference. Treat the advisor task as the focus for this call and the executor context as the starting evidence packet.", "Do not assume access to parent transcript or tool output that was not included or gathered in this advisor call.", + "Use only the read-only tools provided to you.", "Do not make user-visible side effects, post Slack messages, or mutate files. If a mutating action is needed, recommend it to the executor instead.", "Identify the hard part, recommend a concrete plan or correction, call out blocking risks, and propose focused verification.", "If the supplied context is insufficient, say exactly what additional evidence the executor needs to gather before acting.", @@ -116,9 +105,26 @@ function success(memo: string): AdvisorToolResult { }; } -/** Return whether a normal executor tool is safe to expose to the advisor. */ -export function isAdvisorToolAllowed(toolName: string): boolean { - return ADVISOR_ALLOWED_TOOL_NAMES.has(toolName); +function hasReadOnlyToolAnnotations( + annotations: ToolDefinition["annotations"], +): boolean { + return ( + annotations?.readOnlyHint === true && annotations.destructiveHint !== true + ); +} + +/** Build the advisor's read-only tool definition subset. */ +export function createAdvisorToolDefinitions( + definitions: Record>, +): Record> { + return Object.fromEntries( + Object.entries(definitions).filter( + ([name, definition]) => + name !== "callMcpTool" && + name !== "searchMcpTools" && + hasReadOnlyToolAnnotations(definition.annotations), + ), + ); } /** Create the advisor tool backed by conversation-scoped message history. */ diff --git a/packages/junior/src/chat/tools/definition.ts b/packages/junior/src/chat/tools/definition.ts index 1c7e5cfc..cfedc6b8 100644 --- a/packages/junior/src/chat/tools/definition.ts +++ b/packages/junior/src/chat/tools/definition.ts @@ -1,8 +1,10 @@ +import type { ToolAnnotations } from "@modelcontextprotocol/sdk/types.js"; import type { Static, TSchema } from "@sinclair/typebox"; export interface ToolDefinition { description: string; inputSchema: TInputSchema; + annotations?: ToolAnnotations; execute?: ( input: Static, options: { experimental_context?: unknown }, diff --git a/packages/junior/src/chat/tools/index.ts b/packages/junior/src/chat/tools/index.ts index 7f7b7ff7..ee3efc64 100644 --- a/packages/junior/src/chat/tools/index.ts +++ b/packages/junior/src/chat/tools/index.ts @@ -23,6 +23,7 @@ import { } from "@/chat/tools/slack/list-tools"; import { createSystemTimeTool } from "@/chat/tools/system-time"; import { createAdvisorTool } from "@/chat/tools/advisor/tool"; +import type { ToolDefinition } from "@/chat/tools/definition"; import type { ToolHooks, ToolRuntimeContext, @@ -82,7 +83,7 @@ export function createTools( context: ToolRuntimeContext, ) { const state = createToolState(hooks, context); - const tools: Record = { + const tools: Record> = { loadSkill: createLoadSkillTool(availableSkills, { onSkillLoaded: hooks.onSkillLoaded, }), diff --git a/packages/junior/src/chat/tools/sandbox/read-file.ts b/packages/junior/src/chat/tools/sandbox/read-file.ts index 8d75cca8..ed0a5f08 100644 --- a/packages/junior/src/chat/tools/sandbox/read-file.ts +++ b/packages/junior/src/chat/tools/sandbox/read-file.ts @@ -5,6 +5,7 @@ export function createReadFileTool() { return tool({ description: "Read a file from the sandbox workspace. Use when you need exact file contents to verify facts or make edits safely. Do not use for broad discovery when search tools are better.", + annotations: { readOnlyHint: true, destructiveHint: false }, inputSchema: Type.Object( { path: Type.String({ diff --git a/packages/junior/src/chat/tools/slack/canvas-tools.ts b/packages/junior/src/chat/tools/slack/canvas-tools.ts index 642a53a8..5ed15546 100644 --- a/packages/junior/src/chat/tools/slack/canvas-tools.ts +++ b/packages/junior/src/chat/tools/slack/canvas-tools.ts @@ -237,6 +237,7 @@ export function createSlackCanvasReadTool() { return tool({ description: "Read a Slack canvas the bot has access to (including canvases the bot created) by canvas ID or Slack canvas/docs URL. Use when the user shares a Slack canvas link (https://*.slack.com/docs/... or /canvas/...) or references a canvas ID and you need its contents. Do not use for generic web pages — use webFetch for those.", + annotations: { readOnlyHint: true, destructiveHint: false }, inputSchema: Type.Object({ canvas: Type.String({ minLength: 1, diff --git a/packages/junior/src/chat/tools/slack/channel-list-messages.ts b/packages/junior/src/chat/tools/slack/channel-list-messages.ts index 9bb7e0a6..09f9e902 100644 --- a/packages/junior/src/chat/tools/slack/channel-list-messages.ts +++ b/packages/junior/src/chat/tools/slack/channel-list-messages.ts @@ -10,6 +10,7 @@ export function createSlackChannelListMessagesTool( return tool({ description: "List channel messages from Slack history in the active channel context. Use when the user asks for recent or historical channel context outside this thread. Do not use for live monitoring or when current thread context already answers the question.", + annotations: { readOnlyHint: true, destructiveHint: false }, inputSchema: Type.Object({ limit: Type.Optional( Type.Integer({ diff --git a/packages/junior/src/chat/tools/slack/list-tools.ts b/packages/junior/src/chat/tools/slack/list-tools.ts index b520c773..b841f865 100644 --- a/packages/junior/src/chat/tools/slack/list-tools.ts +++ b/packages/junior/src/chat/tools/slack/list-tools.ts @@ -133,6 +133,7 @@ export function createSlackListGetItemsTool(state: ToolState) { return tool({ description: "Read items from the active Slack list tracked in artifact context. Use when the user asks for task status, open items, or list contents. Do not use when list state is already known from the immediately prior result.", + annotations: { readOnlyHint: true, destructiveHint: false }, inputSchema: Type.Object({ limit: Type.Optional( Type.Integer({ diff --git a/packages/junior/src/chat/tools/system-time.ts b/packages/junior/src/chat/tools/system-time.ts index 6966e387..afe76d5e 100644 --- a/packages/junior/src/chat/tools/system-time.ts +++ b/packages/junior/src/chat/tools/system-time.ts @@ -5,6 +5,7 @@ export function createSystemTimeTool() { return tool({ description: "Return current system time in UTC and local ISO formats. Use when the user asks for current time/date context. Do not use as a substitute for historical or timezone-conversion research.", + annotations: { readOnlyHint: true, destructiveHint: false }, inputSchema: Type.Object({}), execute: async () => { const now = new Date(); @@ -12,9 +13,11 @@ export function createSystemTimeTool() { ok: true, unix_ms: now.getTime(), iso_utc: now.toISOString(), - iso_local: new Date(now.getTime() - now.getTimezoneOffset() * 60000).toISOString().replace("Z", ""), - timezone_offset_minutes: now.getTimezoneOffset() + iso_local: new Date(now.getTime() - now.getTimezoneOffset() * 60000) + .toISOString() + .replace("Z", ""), + timezone_offset_minutes: now.getTimezoneOffset(), }; - } + }, }); } diff --git a/packages/junior/src/chat/tools/web/fetch-tool.ts b/packages/junior/src/chat/tools/web/fetch-tool.ts index c398c3d0..1c5b1395 100644 --- a/packages/junior/src/chat/tools/web/fetch-tool.ts +++ b/packages/junior/src/chat/tools/web/fetch-tool.ts @@ -41,6 +41,11 @@ export function createWebFetchTool(hooks: ToolHooks) { return tool({ description: "Fetch and extract readable content from a specific URL. Use when you need details from a known page or document. Do not use for discovery when search is the first step.", + annotations: { + readOnlyHint: true, + destructiveHint: false, + openWorldHint: true, + }, inputSchema: Type.Object({ url: Type.String({ minLength: 1, diff --git a/packages/junior/src/chat/tools/web/search.ts b/packages/junior/src/chat/tools/web/search.ts index 689b9524..3461e192 100644 --- a/packages/junior/src/chat/tools/web/search.ts +++ b/packages/junior/src/chat/tools/web/search.ts @@ -76,6 +76,11 @@ export function createWebSearchTool() { return tool({ description: "Search public web sources and return top snippets/URLs. Use when you need discovery or source candidates. Do not use when the user already provided a specific URL to inspect.", + annotations: { + readOnlyHint: true, + destructiveHint: false, + openWorldHint: true, + }, inputSchema: Type.Object({ query: Type.String({ minLength: 1, diff --git a/packages/junior/tests/integration/advisor/advisor-tool.test.ts b/packages/junior/tests/integration/advisor/advisor-tool.test.ts index d189156c..d4744f4e 100644 --- a/packages/junior/tests/integration/advisor/advisor-tool.test.ts +++ b/packages/junior/tests/integration/advisor/advisor-tool.test.ts @@ -7,11 +7,12 @@ import { createTools } from "@/chat/tools"; import { resolveChannelCapabilities } from "@/chat/tools/channel-capabilities"; import type { AdvisorSessionStore } from "@/chat/tools/advisor/session-store"; import { + createAdvisorToolDefinitions, createAdvisorTool, - isAdvisorToolAllowed, type AdvisorToolResult, type AdvisorToolRuntimeContext, } from "@/chat/tools/advisor/tool"; +import { tool } from "@/chat/tools/definition"; type StreamResponse = Awaited>; @@ -145,39 +146,58 @@ describe("advisor tool", () => { expect(JSON.stringify(contexts[0])).toContain("inspectEvidence"); }); - it("keeps write and user-visible tools out of the advisor tool set", () => { - expect( - [ - "bash", - "readFile", - "searchMcpTools", - "slackCanvasRead", - "slackChannelListMessages", - "slackListGetItems", - "systemTime", - "webFetch", - "webSearch", - ].every(isAdvisorToolAllowed), - ).toBe(true); + it("builds the advisor tool set from read-only metadata", () => { + const readOnlyTool = tool({ + description: "Read only", + annotations: { readOnlyHint: true, destructiveHint: false }, + inputSchema: Type.Object({}), + }); + const conflictingTool = tool({ + description: "Conflicting", + annotations: { readOnlyHint: true, destructiveHint: true }, + inputSchema: Type.Object({}), + }); + const writeTool = tool({ + description: "Write", + inputSchema: Type.Object({}), + }); - expect( - [ - "advisor", - "attachFile", - "callMcpTool", - "imageGenerate", - "loadSkill", - "reportProgress", - "slackCanvasCreate", - "slackCanvasUpdate", - "slackChannelPostMessage", - "slackListAddItems", - "slackListCreate", - "slackListUpdateItem", - "slackMessageAddReaction", - "writeFile", - ].some(isAdvisorToolAllowed), - ).toBe(false); + const advisorDefinitions = createAdvisorToolDefinitions({ + attachFile: writeTool, + conflictingTool, + readFile: readOnlyTool, + slackCanvasCreate: writeTool, + slackCanvasRead: readOnlyTool, + writeFile: writeTool, + }); + + expect(Object.keys(advisorDefinitions).sort()).toEqual([ + "readFile", + "slackCanvasRead", + ]); + }); + + it("exposes the expected real read-only tool definitions to the advisor", () => { + const advisorDefinitions = createAdvisorToolDefinitions( + createTools( + [], + {}, + { + channelCapabilities: resolveChannelCapabilities("C12345"), + sandbox: {} as any, + }, + ), + ); + + expect(Object.keys(advisorDefinitions).sort()).toEqual([ + "readFile", + "slackCanvasRead", + "slackChannelListMessages", + "slackListGetItems", + "systemTime", + "webFetch", + "webSearch", + ]); }); it("continues the advisor session across calls in a parent conversation", async () => { diff --git a/specs/advisor-tool-spec.md b/specs/advisor-tool-spec.md index c710070b..738d3b74 100644 --- a/specs/advisor-tool-spec.md +++ b/specs/advisor-tool-spec.md @@ -18,7 +18,7 @@ The core contract is intentionally small: - The executor calls `advisor({ question, context })`. - The advisor gets its own conversation-scoped Pi message history. - The executor passes the current evidence explicitly; the parent transcript is not forked or implicitly forwarded. -- The advisor can use inspection tools from the normal tool layer, but not recursive, write, or user-visible tools. +- The advisor can use tools from the normal tool layer that are annotated read-only, but not recursive, write, or user-visible tools. - The advisor returns guidance; it does not own implementation. ## Prior Art @@ -32,7 +32,7 @@ The core contract is intentionally small: - Forking the main transcript into a hidden advisor conversation. - Maintaining replay hashes, call records, idempotency bookkeeping, or per-turn call counters. - Building a general multi-agent orchestration framework. -- Adding a separate read-only tool sandbox in V1. The advisor receives an explicit inspection-tool allowlist; the executor remains responsible for side effects. +- Adding a separate read-only tool sandbox in V1. The advisor receives the host-filtered read-only tool subset; the executor remains responsible for side effects. ## Configuration @@ -54,7 +54,7 @@ Input: - `question`: required focused advisor question or decision point. - `context`: required curated evidence packet with the requirements, constraints, current plan, alternatives, code snippets, diffs, command output, and open questions the advisor should start from. -The tool description is the executor-facing trigger policy. It must say the advisor is stronger, tool-backed, does not automatically receive the parent transcript, keeps advisor history for the parent conversation, receives an inspection-oriented tool subset, and is for hard reasoning rather than routine work. +The tool description is the executor-facing trigger policy. It must say the advisor is stronger, tool-backed, does not automatically receive the parent transcript, keeps advisor history for the parent conversation, receives a read-only tool subset, and is for hard reasoning rather than routine work. ## Runtime Contract @@ -62,7 +62,7 @@ The tool description is the executor-facing trigger policy. It must say the advi 2. Build one advisor request message with `` and `` sections. 3. Load advisor messages from `junior::advisor_session`. 4. Create a Pi `Agent` with the advisor model, thinking level, system prompt, and advisor-allowed tools. -5. Expose only the advisor inspection-tool allowlist. Do not expose recursive, write, user-visible, or unconstrained external-action tools. +5. Expose only read-only tool definitions to the advisor. A tool is advisor-readable only when `readOnlyHint: true` and `destructiveHint` is not `true`. Do not expose recursive, write, user-visible, or unconstrained external-action tools. 6. Assign the loaded messages to `advisorAgent.state.messages`. 7. Run `advisorAgent.prompt(requestMessage)`. 8. On success, save `advisorAgent.state.messages` back to the advisor session key. @@ -90,6 +90,7 @@ It must require the advisor to: - Use tools when inspection or verification would materially improve the advice. - Distinguish evidence from inference. - Avoid assuming access to parent transcript or tool output that was not supplied or gathered in the advisor run. +- Use only tools annotated as read-only. - Avoid user-visible side effects and file mutation; recommend mutating actions to the executor instead. - Identify the hard part, recommend a concrete plan or correction, call out blocking risks, and propose focused verification. - Say what evidence is missing when the supplied context is insufficient. @@ -126,9 +127,11 @@ Coverage must prove: - advisor config defaults, overrides, and invalid config handling - tool exposure only when advisor runtime context is configured - explicit executor context reaches the advisor -- advisor receives inspection tools while write and user-visible tools are excluded +- advisor receives read-only tools while write and user-visible tools are excluded - advisor messages persist and restore across calls in the same parent conversation +Future MCP advisor access needs a separate nested-agent auth and resume contract before `searchMcpTools` or `callMcpTool` can be exposed to the advisor. + ## References - `./testing/index.md`