diff --git a/AGENTS.md b/AGENTS.md index f78fb0e9..081249a2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -100,6 +100,7 @@ Co-Authored-By: (agent model name) - `specs/chat-architecture-spec.md` (chat composition, service, and test-seam architecture contract) - `specs/slack-agent-delivery-spec.md` (Slack entry surfaces, reply delivery, continuation, files, images, and resume behavior contract) - `specs/slack-outbound-contract-spec.md` (Slack outbound boundary, message/file/reaction safety rules, and markdown-to-`mrkdwn` ownership) +- `specs/slack-rendering-spec.md` (Slack `mrkdwn` output contract: allow-list / forbid-list for the Slack surface — draft) - `specs/skill-capabilities-spec.md` (capability declaration + broker/injection contract) - `specs/oauth-flows-spec.md` (OAuth authorization code flow + Slack UX contract) - `specs/harness-agent-spec.md` (agent loop and output contract) diff --git a/packages/junior-evals/evals/core/slack-mrkdwn-hygiene.eval.ts b/packages/junior-evals/evals/core/slack-mrkdwn-hygiene.eval.ts new file mode 100644 index 00000000..081a9744 --- /dev/null +++ b/packages/junior-evals/evals/core/slack-mrkdwn-hygiene.eval.ts @@ -0,0 +1,88 @@ +import { describe } from "vitest"; +import { mention, rubric, slackEval } from "../helpers"; + +describe("Slack mrkdwn hygiene", () => { + slackEval("rewrites markdown tables into Slack-safe output", { + events: [ + mention( + "Give me a short comparison table of Sentry, Bugsnag, and Rollbar focused on deployment model and tracing.", + ), + ], + overrides: { + reply_timeout_ms: 120_000, + }, + requireSandboxReady: false, + taskTimeout: 150_000, + timeout: 210_000, + criteria: rubric({ + contract: + "Comparison output is Slack-safe: it may use bullets or a fenced code block, but it must not leave a raw markdown pipe table as the visible reply.", + pass: [ + "assistant_posts contains a single reply that compares Sentry, Bugsnag, and Rollbar.", + "If the reply uses a table-like layout, it is fenced as code or otherwise reformatted for Slack-safe rendering.", + ], + fail: [ + "Do not emit an unfenced markdown table with a separator row such as `| --- | --- |`.", + "Do not leave a raw pipe-table as the final visible reply.", + ], + }), + }); + + slackEval( + "uses single-asterisk bold, single-tilde strike, and Slack link syntax", + { + events: [ + mention( + "In one short Slack reply, bold the word 'ready', strike through the word 'draft', and link the label 'docs' to https://docs.slack.dev/ .", + ), + ], + overrides: { + reply_timeout_ms: 120_000, + }, + requireSandboxReady: false, + taskTimeout: 150_000, + timeout: 210_000, + criteria: rubric({ + contract: + "Emphasis and link syntax follow Slack `mrkdwn`: single-asterisk bold, single-tilde strike, and `` links. CommonMark/GFM equivalents are forbidden.", + pass: [ + "assistant_posts contains a single reply that addresses the bold, strike, and link asks.", + "Bold uses `*ready*` (single asterisks).", + "Strike uses `~draft~` (single tildes).", + "The docs link appears as `` or the bare URL.", + ], + fail: [ + "Do not emit `**ready**` (CommonMark bold).", + "Do not emit `~~draft~~` (CommonMark strike).", + "Do not emit `[docs](https://docs.slack.dev/)` (CommonMark link).", + ], + }), + }, + ); + + slackEval("uses bold section labels instead of markdown headings", { + events: [ + mention( + "Give me a two-section Slack reply with short headings 'Summary' and 'Next steps', each with one sentence under it.", + ), + ], + overrides: { + reply_timeout_ms: 120_000, + }, + requireSandboxReady: false, + taskTimeout: 150_000, + timeout: 210_000, + criteria: rubric({ + contract: + "Section structure uses a bold label on its own line. Markdown heading syntax is forbidden because Slack does not render it.", + pass: [ + "assistant_posts contains a single reply with two sections.", + "Each section label appears as `*Summary*` and `*Next steps*` on their own lines (bold labels), followed by a sentence.", + ], + fail: [ + "Do not emit `# Summary`, `## Summary`, `### Summary`, or any other markdown heading syntax.", + "Do not emit `**Summary**` (CommonMark bold).", + ], + }), + }); +}); diff --git a/packages/junior-evals/evals/helpers.ts b/packages/junior-evals/evals/helpers.ts index da50fd41..86906f84 100644 --- a/packages/junior-evals/evals/helpers.ts +++ b/packages/junior-evals/evals/helpers.ts @@ -61,7 +61,6 @@ const canvasSchema = z.object({ "Initial markdown body written into the created Slack canvas during the turn", ), }); - const evalOutputSchema = z.object({ assistant_posts: z .array(assistantPostSchema) diff --git a/packages/junior/src/chat/prompt.ts b/packages/junior/src/chat/prompt.ts index ec646e25..6e3c5f16 100644 --- a/packages/junior/src/chat/prompt.ts +++ b/packages/junior/src/chat/prompt.ts @@ -252,6 +252,45 @@ function baseSystemPrompt(): string { ].join("\n"); } +function buildSlackOutputContract(params: { + maxInlineChars: number; + maxInlineLines: number; +}): string { + return [ + ``, + "Your reply is delivered as plain Slack `mrkdwn` text. Slack `mrkdwn` is a strict, smaller syntax than CommonMark or GitHub-Flavored Markdown — anything outside the allow-list below renders as literal characters.", + "", + "Allowed mrkdwn (you may use these):", + "- `*bold*` — surround with single asterisks. Slack does NOT support `**bold**`; it renders the asterisks literally.", + "- `_italic_` — surround with single underscores.", + "- `~strike~` — surround with single tildes. Slack does NOT support `~~strike~~`.", + "- `` `inline code` `` and triple-backtick fenced code blocks for code, commands, and monospaced snippets.", + "- `> quoted text` at the start of a line for block quotes. A blank line ends the quote.", + "- `` for hyperlinks with a label. A bare `https://example.com` auto-links without a label. Slack does NOT support `[Label](https://example.com)` — it renders literally.", + "- `<@USERID>`, `<#CHANNELID>`, `` for user, channel, and group mentions. Use the raw IDs exposed elsewhere in this prompt.", + "- `- item` or `* item` at the start of a line for bullet lists. Numbered lists (`1. item`) render but indent awkwardly — prefer bullets.", + "- A bold label on its own line (`*Section*`) in place of markdown headings.", + "", + "Forbidden (do NOT emit these — they render as literal characters or broken formatting):", + "- Markdown tables using pipes and dashes (`| col | col |` / `|---|---|`). Slack renders the pipes verbatim. When you need tabular data, use short bulleted lists grouped by row, or a fenced code block with manually aligned columns.", + "- Markdown headings (`#`, `##`, `###`, and so on). Use a bold label on its own line instead.", + "- Markdown link syntax (`[label](url)`). Rewrite as `` or a bare URL.", + "- CommonMark bold/strike doubles (`**bold**`, `~~strike~~`). Use the single-delimiter forms above.", + "- HTML tags, image embeds, and raw Slack Block Kit JSON.", + "", + "Other response rules:", + "- Keep responses brief and scannable. Lead with the answer; add detail only when depth is warranted.", + `- Prefer a single compact thread reply when the full answer comfortably fits within this inline budget (${params.maxInlineChars} chars / ${params.maxInlineLines} lines).`, + "- When canvas creation is available and a research or document-style answer would likely need continuation, multiple sections, or future reference value, create a Slack canvas and keep the thread reply to a short summary plus the canvas link.", + "- Typical canvas-first cases include long-form research summaries, timelines, bios or profiles, structured notes, plans, comparisons, and other reusable reference documents.", + "- Do not create a canvas for short factual answers that fit cleanly in one normal thread reply.", + "- For tool-heavy research, discovery, or source-checking requests, do not send an initial acknowledgement. Start the visible reply only once you can present the actual answer.", + "- Do not narrate tool execution or emit repeated status updates in the visible reply.", + "- End every turn with a single final user-facing response in the format above.", + "", + ].join("\n"); +} + function formatReferenceFilesSection(): string[] { const files = listReferenceFiles(); if (files.length === 0) { @@ -274,6 +313,7 @@ function formatReferenceFilesSection(): string[] { ]; } +/** Build the canonical system prompt from repo policy, runtime context, and active skill state. */ export function buildSystemPrompt(params: { availableSkills: SkillMetadata[]; activeSkills: Skill[]; @@ -562,25 +602,10 @@ export function buildSystemPrompt(params: { "- If no skill is a clear fit, continue with normal tool usage.", ].join("\n"), ), - renderTag( - "output-contract", - [ - "Always produce output that follows this contract:", - ``, - "- Use Slack-friendly markdown, not full CommonMark. Prefer bold section labels over markdown headings, and use bullets and short code blocks when helpful.", - "- Keep normal responses brief and scannable.", - "- If depth is needed, start with a concise summary and then provide fuller detail.", - "- Prefer a single compact thread reply when the full answer comfortably fits within this inline budget.", - "- When canvas creation is available and a research or document-style answer would likely need continuation, multiple sections, or future reference value, create a Slack canvas and keep the thread reply to a short summary plus the canvas link.", - "- Typical canvas-first cases include long-form research summaries, timelines, bios or profiles, structured notes, plans, comparisons, and other reusable reference documents.", - "- Do not create a canvas for short factual answers that fit cleanly in one normal thread reply.", - "- For tool-heavy research, discovery, or source-checking requests, do not send an initial acknowledgment. Start the visible reply only once you can present the actual answer.", - "- Do not narrate tool execution or repeated status updates in the visible reply.", - "- Avoid tables and markdown links like `[label](url)` unless explicitly requested. Prefer plain URLs or Slack-native entities when exact rendering matters.", - "- End every turn with a final user-facing markdown response.", - "", - ].join("\n"), - ), + buildSlackOutputContract({ + maxInlineChars: slackOutputPolicy.maxInlineChars, + maxInlineLines: slackOutputPolicy.maxInlineLines, + }), availableSkillsSection, activeSkillsSection, ...(activeToolsSection ? [activeToolsSection] : []), diff --git a/packages/junior/src/chat/runtime/reply-executor.ts b/packages/junior/src/chat/runtime/reply-executor.ts index 794e7696..e04afceb 100644 --- a/packages/junior/src/chat/runtime/reply-executor.ts +++ b/packages/junior/src/chat/runtime/reply-executor.ts @@ -127,6 +127,7 @@ interface ReplyExecutorDeps { services: ReplyExecutorServices; } +/** Create the main turn executor that prepares context, runs the agent, and delivers the reply. */ export function createReplyToThread(deps: ReplyExecutorDeps) { return async function replyToThread( thread: Thread, @@ -445,8 +446,7 @@ export function createReplyToThread(deps: ReplyExecutorDeps) { traceId: getActiveTraceId(), usage: reply.diagnostics.usage, }); - const shouldUseSlackFooter = - Boolean(replyFooter) && + const shouldUseSlackApiReplyDelivery = Boolean(channelId && threadTs) && (thread.adapter as { name?: string } | undefined)?.name === "slack"; @@ -454,12 +454,12 @@ export function createReplyToThread(deps: ReplyExecutorDeps) { // completed after the visible reply has been accepted by Slack. if (plannedPosts.length > 0) { let sent: SentMessage | undefined; - if (shouldUseSlackFooter) { + if (shouldUseSlackApiReplyDelivery) { const slackChannelId = channelId; const slackThreadTs = threadTs; if (!slackChannelId || !slackThreadTs) { throw new Error( - "Slack footer delivery requires a concrete channel and thread timestamp", + "Slack reply delivery requires a concrete channel and thread timestamp", ); } diff --git a/packages/junior/src/chat/slack/footer.ts b/packages/junior/src/chat/slack/footer.ts index d4062585..f7ea4431 100644 --- a/packages/junior/src/chat/slack/footer.ts +++ b/packages/junior/src/chat/slack/footer.ts @@ -1,23 +1,6 @@ import type { AgentTurnUsage } from "@/chat/usage"; -interface SlackMrkdwnTextObject { - text: string; - type: "mrkdwn"; -} - -interface SlackSectionBlock { - text: SlackMrkdwnTextObject; - type: "section"; -} - -interface SlackContextBlock { - elements: SlackMrkdwnTextObject[]; - type: "context"; -} - -export type SlackMessageBlock = SlackSectionBlock | SlackContextBlock; - -export interface SlackReplyFooterItem { +interface SlackReplyFooterItem { label: string; value: string; } @@ -26,13 +9,6 @@ export interface SlackReplyFooter { items: SlackReplyFooterItem[]; } -function escapeSlackMrkdwn(text: string): string { - return text - .replaceAll("&", "&") - .replaceAll("<", "<") - .replaceAll(">", ">"); -} - function formatSlackTokenCount(value: number): string { return new Intl.NumberFormat("en-US").format(value); } @@ -122,30 +98,3 @@ export function buildSlackReplyFooter(args: { return items.length > 0 ? { items } : undefined; } - -/** Build Slack blocks for a finalized reply plus its optional metadata footer. */ -export function buildSlackReplyBlocks( - text: string, - footer: SlackReplyFooter | undefined, -): SlackMessageBlock[] | undefined { - if (!text.trim() || !footer?.items.length) { - return undefined; - } - - return [ - { - type: "section", - text: { - type: "mrkdwn", - text, - }, - }, - { - type: "context", - elements: footer.items.map((item) => ({ - type: "mrkdwn", - text: `*${escapeSlackMrkdwn(item.label)}:* ${escapeSlackMrkdwn(item.value)}`, - })), - }, - ]; -} diff --git a/packages/junior/src/chat/slack/markdown-table.ts b/packages/junior/src/chat/slack/markdown-table.ts new file mode 100644 index 00000000..9c565bd3 --- /dev/null +++ b/packages/junior/src/chat/slack/markdown-table.ts @@ -0,0 +1,129 @@ +export interface MarkdownTableMatch { + after: string; + before: string; + rows: string[][]; +} + +function getMarkdownFenceDelimiter(line: string): "```" | "~~~" | null { + const trimmed = line.trimStart(); + if (trimmed.startsWith("```")) { + return "```"; + } + if (trimmed.startsWith("~~~")) { + return "~~~"; + } + return null; +} + +/** Parse a single markdown table row while preserving Slack link cells. */ +export function parseMarkdownTableRow(line: string): string[] | null { + if (!line.includes("|")) { + return null; + } + + const normalized = line.trim().replace(/^\|/, "").replace(/\|$/, ""); + const cells: string[] = []; + let current = ""; + let insideSlackLink = false; + + for (const char of normalized) { + if (char === "<") { + insideSlackLink = true; + current += char; + continue; + } + if (char === ">") { + insideSlackLink = false; + current += char; + continue; + } + if (char === "|" && !insideSlackLink) { + cells.push(current.trim()); + current = ""; + continue; + } + current += char; + } + + cells.push(current.trim()); + return cells.length >= 2 ? cells : null; +} + +/** Return true when a line is a markdown table separator row. */ +export function isMarkdownTableSeparator(line: string): boolean { + const cells = parseMarkdownTableRow(line); + return ( + cells !== null && + cells.length >= 2 && + cells.every((cell) => /^:?-{3,}:?$/.test(cell)) + ); +} + +/** Render parsed markdown-table rows into a Slack-safe ASCII code block. */ +export function renderMarkdownTableCodeBlock(rows: string[][]): string { + const columnCount = Math.max(...rows.map((row) => row.length)); + const normalizedRows = rows.map((row) => + Array.from({ length: columnCount }, (_unused, index) => row[index] ?? ""), + ); + const widths = Array.from({ length: columnCount }, (_unused, index) => + Math.max(3, ...normalizedRows.map((row) => (row[index] ?? "").length)), + ); + + const formatRow = (row: string[]) => + row + .map((cell, index) => cell.padEnd(widths[index] ?? 3)) + .join(" | ") + .trimEnd(); + + const header = formatRow(normalizedRows[0] ?? []); + const separator = widths + .map((width) => "-".repeat(Math.max(3, width))) + .join(" | "); + const body = normalizedRows.slice(1).map(formatRow); + + return ["```", header, separator, ...body, "```"].join("\n"); +} + +/** Extract the first simple markdown table plus surrounding text. */ +export function extractFirstMarkdownTable( + text: string, +): MarkdownTableMatch | null { + const lines = text.split("\n"); + let activeFence: "```" | "~~~" | null = null; + + for (let index = 0; index < lines.length; index += 1) { + const fenceDelimiter = getMarkdownFenceDelimiter(lines[index] ?? ""); + if (fenceDelimiter) { + activeFence = + activeFence === fenceDelimiter ? null : (activeFence ?? fenceDelimiter); + continue; + } + if (activeFence) { + continue; + } + + const header = parseMarkdownTableRow(lines[index] ?? ""); + if (!header || !isMarkdownTableSeparator(lines[index + 1] ?? "")) { + continue; + } + + const rows = [header]; + let nextIndex = index + 2; + while (nextIndex < lines.length) { + const row = parseMarkdownTableRow(lines[nextIndex] ?? ""); + if (!row) { + break; + } + rows.push(row); + nextIndex += 1; + } + + return { + after: lines.slice(nextIndex).join("\n"), + before: lines.slice(0, index).join("\n"), + rows, + }; + } + + return null; +} diff --git a/packages/junior/src/chat/slack/mrkdwn.ts b/packages/junior/src/chat/slack/mrkdwn.ts index dcc8c10b..218b779b 100644 --- a/packages/junior/src/chat/slack/mrkdwn.ts +++ b/packages/junior/src/chat/slack/mrkdwn.ts @@ -1,7 +1,522 @@ import { truncateStatusText } from "@/chat/runtime/status-format"; +import { + isMarkdownTableSeparator, + parseMarkdownTableRow, + renderMarkdownTableCodeBlock, +} from "@/chat/slack/markdown-table"; + +const PROTECTED_SLACK_SEGMENT_PATTERN = + /(```[\s\S]*?```|~~~[\s\S]*?~~~|`[^`\n]+`)/g; + +function normalizeSlackMarkdownSegment(text: string): string { + let normalized = stripMarkdownHtmlComments(text); + normalized = normalizeMarkdownHeadings(normalized); + normalized = normalizeCommonMarkEmphasis(normalized); + normalized = normalizeMarkdownLinks(normalized); + normalized = normalizeWrappedRawUrls(normalized); + normalized = normalizeMarkdownTables(normalized); + return escapeSlackControlChars(normalized); +} + +function normalizeUnprotectedSlackMarkdown(text: string): string { + let out = ""; + let lastIndex = 0; + + for (const match of text.matchAll(PROTECTED_SLACK_SEGMENT_PATTERN)) { + const index = match.index ?? 0; + if (index > lastIndex) { + out += normalizeSlackMarkdownSegment(text.slice(lastIndex, index)); + } + out += match[0]; + lastIndex = index + match[0].length; + } + + if (lastIndex < text.length) { + out += normalizeSlackMarkdownSegment(text.slice(lastIndex)); + } + + return out; +} + +function normalizeMarkdownHeadings(text: string): string { + return text.replace(/^#{1,6}\s+(.+)$/gm, (_match, heading) => { + const normalized = String(heading) + .trim() + .replace(/\s+#+\s*$/, "") + .trim(); + return normalized ? `*${normalized}*` : ""; + }); +} + +function normalizeCommonMarkEmphasis(text: string): string { + return text + .replace(/\*\*\*([^\s*](?:[^\n]*?[^\s*])?)\*\*\*/g, "*$1*") + .replace(/\*\*([^\s*](?:[^\n]*?[^\s*])?)\*\*/g, "*$1*") + .replace(/~~([^\s~](?:[^\n]*?[^\s~])?)~~/g, "~$1~"); +} + +function removeMarkdownHtmlComments(text: string): string { + let normalized = text; + + while (true) { + const start = normalized.indexOf("", start + 4); + if (end === -1) { + return normalized; + } + + normalized = `${normalized.slice(0, start)}${normalized.slice(end + 3)}`; + } +} + +function stripMarkdownHtmlComments(text: string): string { + return removeMarkdownHtmlComments(text).replaceAll("\n## Summary\nDone"), + ).toBe("Intro\n\n*Summary*\n\nDone"); + }); + + it("escapes dangling html comment openers after stripping comments", () => { + expect(renderSlackMrkdwn("Intro -- outro")).toBe( + "Intro <!-- outro", + ); + }); + + it("preserves already-valid Slack links, mentions, and formatting", () => { + expect( + renderSlackMrkdwn(" <@U123> *ready*"), + ).toBe(" <@U123> *ready*"); + }); + + it("escapes literal Slack control characters in prose", () => { + expect(renderSlackMrkdwn("1 < 2 & 3 > 2")).toBe("1 < 2 & 3 > 2"); + }); + + it("preserves block quotes and special Slack tokens while escaping prose", () => { + expect( + renderSlackMrkdwn( + "> quoted\n\n<#C123>\n3 < 4", + ), + ).toBe( + "> quoted\n\n\n\n<#C123>\n\n3 < 4", + ); + }); + + it("does not double-escape existing Slack entities", () => { + expect(renderSlackMrkdwn("Fish & Chips <3")).toBe( + "Fish & Chips <3", + ); + }); + + it("does not rewrite markdown syntax inside code fences", () => { + const input = [ + "```", + "## Summary", + "**ready**", + "[docs](https://docs.slack.dev/)", + "", + "| Name | Score |", + "| --- | --- |", + "```", + ].join("\n"); + + expect(renderSlackMrkdwn(input)).toBe(input); + }); }); describe("buildSlackOutputMessage", () => { @@ -183,52 +332,52 @@ describe("splitSlackReplyText", () => { }); }); -describe("ensureBlockSpacing", () => { +describe("renderSlackMrkdwn spacing", () => { it("inserts blank line between prose and list", () => { - expect(ensureBlockSpacing("done.\n- #37\n- #38")).toBe( + expect(renderSlackMrkdwn("done.\n- #37\n- #38")).toBe( "done.\n\n- #37\n- #38", ); }); it("preserves existing blank line between prose and list", () => { - expect(ensureBlockSpacing("done.\n\n- #37\n- #38")).toBe( + expect(renderSlackMrkdwn("done.\n\n- #37\n- #38")).toBe( "done.\n\n- #37\n- #38", ); }); it("keeps consecutive list items compact", () => { - expect(ensureBlockSpacing("- #37\n- #38")).toBe("- #37\n- #38"); + expect(renderSlackMrkdwn("- #37\n- #38")).toBe("- #37\n- #38"); }); it("inserts blank line between prose lines", () => { - expect(ensureBlockSpacing("sentence one.\nsentence two.")).toBe( + expect(renderSlackMrkdwn("sentence one.\nsentence two.")).toBe( "sentence one.\n\nsentence two.", ); }); it("preserves code block contents", () => { const input = "text\n```\ncode\ncode\n```\ntext"; - const result = ensureBlockSpacing(input); + const result = renderSlackMrkdwn(input); expect(result).toBe("text\n\n```\ncode\ncode\n```\n\ntext"); }); it("preserves already-spaced blocks", () => { - expect(ensureBlockSpacing("a\n\nb")).toBe("a\n\nb"); + expect(renderSlackMrkdwn("a\n\nb")).toBe("a\n\nb"); }); it("inserts blank lines around list block within prose", () => { - expect(ensureBlockSpacing("done:\n* a\n* b\nfin.")).toBe( + expect(renderSlackMrkdwn("done:\n* a\n* b\nfin.")).toBe( "done:\n\n* a\n* b\n\nfin.", ); }); it("handles ordered list items", () => { - expect(ensureBlockSpacing("intro\n1. first\n2. second\nend")).toBe( + expect(renderSlackMrkdwn("intro\n1. first\n2. second\nend")).toBe( "intro\n\n1. first\n2. second\n\nend", ); }); it("handles bullet list with •", () => { - expect(ensureBlockSpacing("intro\n• a\n• b")).toBe("intro\n\n• a\n• b"); + expect(renderSlackMrkdwn("intro\n• a\n• b")).toBe("intro\n\n• a\n• b"); }); }); diff --git a/packages/junior/tests/unit/services/turn-checkpoint.test.ts b/packages/junior/tests/unit/services/turn-checkpoint.test.ts index ba1b0162..f6500013 100644 --- a/packages/junior/tests/unit/services/turn-checkpoint.test.ts +++ b/packages/junior/tests/unit/services/turn-checkpoint.test.ts @@ -1,21 +1,35 @@ -import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { AgentMessage } from "@mariozechner/pi-agent-core"; -import { persistAuthPauseCheckpoint } from "@/chat/services/turn-checkpoint"; -import { disconnectStateAdapter } from "@/chat/state/adapter"; -import { - getAgentTurnSessionCheckpoint, - upsertAgentTurnSessionCheckpoint, -} from "@/chat/state/turn-session-store"; + +let disconnectStateAdapter: typeof import("@/chat/state/adapter").disconnectStateAdapter; +let getAgentTurnSessionCheckpoint: typeof import("@/chat/state/turn-session-store").getAgentTurnSessionCheckpoint; +let upsertAgentTurnSessionCheckpoint: typeof import("@/chat/state/turn-session-store").upsertAgentTurnSessionCheckpoint; +let persistAuthPauseCheckpoint: typeof import("@/chat/services/turn-checkpoint").persistAuthPauseCheckpoint; describe("persistAuthPauseCheckpoint", () => { beforeEach(async () => { process.env.JUNIOR_STATE_ADAPTER = "memory"; + vi.resetModules(); + + const stateAdapterModule = await import("@/chat/state/adapter"); + disconnectStateAdapter = stateAdapterModule.disconnectStateAdapter; await disconnectStateAdapter(); + + const turnSessionStoreModule = + await import("@/chat/state/turn-session-store"); + getAgentTurnSessionCheckpoint = + turnSessionStoreModule.getAgentTurnSessionCheckpoint; + upsertAgentTurnSessionCheckpoint = + turnSessionStoreModule.upsertAgentTurnSessionCheckpoint; + + ({ persistAuthPauseCheckpoint } = + await import("@/chat/services/turn-checkpoint")); }); afterEach(async () => { - await disconnectStateAdapter(); + await disconnectStateAdapter?.(); delete process.env.JUNIOR_STATE_ADAPTER; + vi.resetModules(); }); it("reuses the latest stored transcript when the auth pause captured no messages", async () => { diff --git a/packages/junior/tests/unit/slack/footer.test.ts b/packages/junior/tests/unit/slack/footer.test.ts index dc494178..432da2a4 100644 --- a/packages/junior/tests/unit/slack/footer.test.ts +++ b/packages/junior/tests/unit/slack/footer.test.ts @@ -1,8 +1,5 @@ import { describe, expect, it } from "vitest"; -import { - buildSlackReplyBlocks, - buildSlackReplyFooter, -} from "@/chat/slack/footer"; +import { buildSlackReplyFooter } from "@/chat/slack/footer"; describe("buildSlackReplyFooter", () => { it("returns compact footer items for available diagnostics", () => { @@ -77,56 +74,3 @@ describe("buildSlackReplyFooter", () => { }); }); }); - -describe("buildSlackReplyBlocks", () => { - it("renders the reply body plus a Slack context footer block", () => { - const footer = buildSlackReplyFooter({ - conversationId: "slack:C123:1700000000.000100", - durationMs: 1250, - traceId: "trace_123", - usage: { - inputTokens: 400, - outputTokens: 250, - }, - }); - - expect(buildSlackReplyBlocks("Hello world", footer)).toEqual([ - { - type: "section", - text: { - type: "mrkdwn", - text: "Hello world", - }, - }, - { - type: "context", - elements: [ - { - type: "mrkdwn", - text: "*ID:* slack:C123:1700000000.000100", - }, - { - type: "mrkdwn", - text: "*Tokens:* 650", - }, - { - type: "mrkdwn", - text: "*Time:* 1.3s", - }, - { - type: "mrkdwn", - text: "*Trace:* trace_123", - }, - ], - }, - ]); - }); - - it("does not emit blocks when the reply has no visible text", () => { - const footer = buildSlackReplyFooter({ - conversationId: "slack:C123:1700000000.000100", - }); - - expect(buildSlackReplyBlocks(" ", footer)).toBeUndefined(); - }); -}); diff --git a/packages/junior/tests/unit/slack/reply-blocks.test.ts b/packages/junior/tests/unit/slack/reply-blocks.test.ts new file mode 100644 index 00000000..0b5110da --- /dev/null +++ b/packages/junior/tests/unit/slack/reply-blocks.test.ts @@ -0,0 +1,141 @@ +import { describe, expect, it } from "vitest"; +import { buildSlackReplyFooter } from "@/chat/slack/footer"; +import { buildSlackReplyBlocks } from "@/chat/slack/reply-blocks"; + +describe("buildSlackReplyBlocks", () => { + it("wraps plain replies in an expanded section block", () => { + expect(buildSlackReplyBlocks("Hello world", undefined)).toEqual([ + { + type: "section", + expand: true, + text: { + type: "mrkdwn", + text: "Hello world", + }, + }, + ]); + }); + + it("renders a native table block when the original reply contained a table", () => { + expect( + buildSlackReplyBlocks( + [ + "```", + "Service | Docs", + "------- | -------------------------------", + "Slack | ", + "```", + ].join("\n"), + undefined, + { + richSourceText: [ + "| Service | Docs |", + "| --- | --- |", + "| Slack | [Slack](https://docs.slack.dev/) |", + ].join("\n"), + }, + ), + ).toEqual([ + { + type: "table", + rows: [ + [ + { type: "raw_text", text: "Service" }, + { type: "raw_text", text: "Docs" }, + ], + [ + { type: "raw_text", text: "Slack" }, + { + type: "rich_text", + elements: [ + { + type: "rich_text_section", + elements: [ + { + type: "link", + url: "https://docs.slack.dev/", + text: "Slack", + }, + ], + }, + ], + }, + ], + ], + }, + ]); + }); + + it("does not upgrade fenced code blocks that only look like markdown tables", () => { + const fencedTable = ["```", "| Col | Value |", "| --- | --- |", "```"].join( + "\n", + ); + + expect( + buildSlackReplyBlocks(fencedTable, undefined, { + richSourceText: fencedTable, + }), + ).toEqual([ + { + type: "section", + expand: true, + text: { + type: "mrkdwn", + text: fencedTable, + }, + }, + ]); + }); + + it("renders the reply body plus a Slack context footer block", () => { + const footer = buildSlackReplyFooter({ + conversationId: "slack:C123:1700000000.000100", + durationMs: 1250, + traceId: "trace_123", + usage: { + inputTokens: 400, + outputTokens: 250, + }, + }); + + expect(buildSlackReplyBlocks("Hello world", footer)).toEqual([ + { + type: "section", + expand: true, + text: { + type: "mrkdwn", + text: "Hello world", + }, + }, + { + type: "context", + elements: [ + { + type: "mrkdwn", + text: "*ID:* slack:C123:1700000000.000100", + }, + { + type: "mrkdwn", + text: "*Tokens:* 650", + }, + { + type: "mrkdwn", + text: "*Time:* 1.3s", + }, + { + type: "mrkdwn", + text: "*Trace:* trace_123", + }, + ], + }, + ]); + }); + + it("does not emit blocks when the reply has no visible text", () => { + const footer = buildSlackReplyFooter({ + conversationId: "slack:C123:1700000000.000100", + }); + + expect(buildSlackReplyBlocks(" ", footer)).toBeUndefined(); + }); +}); diff --git a/specs/index.md b/specs/index.md index 281fad49..467fff18 100644 --- a/specs/index.md +++ b/specs/index.md @@ -14,6 +14,7 @@ - 2026-03-21: Added canonical chat architecture spec. - 2026-04-15: Added canonical Slack agent delivery spec. - 2026-04-16: Added canonical Slack write contract spec. +- 2026-04-17: Added draft Slack output contract spec (`slack-rendering-spec.md`) covering the `mrkdwn` allow-list and forbid-list for the Slack surface. ## Status @@ -48,6 +49,7 @@ Define spec taxonomy, naming conventions, and canonical source-of-truth document - `specs/chat-architecture-spec.md` - `specs/slack-agent-delivery-spec.md` - `specs/slack-outbound-contract-spec.md` +- `specs/slack-rendering-spec.md` - `specs/skill-capabilities-spec.md` - `specs/oauth-flows-spec.md` - `specs/harness-agent-spec.md` diff --git a/specs/slack-agent-delivery-spec.md b/specs/slack-agent-delivery-spec.md index b9325939..9c5c177e 100644 --- a/specs/slack-agent-delivery-spec.md +++ b/specs/slack-agent-delivery-spec.md @@ -15,6 +15,7 @@ - 2026-04-16: Labeled long-running assistant status behavior as Slack-required behavior versus Junior runtime policy versus product policy. - 2026-04-16: Added an optional finalized-reply footer contract for Slack context-block metadata. - 2026-04-19: Removed stale references to typed status kinds and documented explicit progress as free-form rendered text. +- 2026-04-19: Standardized finalized Slack reply delivery on the shared raw-Web-API reply planner whenever a concrete Slack thread target exists. - 2026-04-20: Strengthened the tool-backed progress policy to require early explicit progress for non-trivial turns and documented concrete phase-label guidance. - 2026-04-20: Clarified that only explicit `reportProgress` updates replace generic loading messages; ordinary tool calls must not synthesize progress phases. @@ -153,6 +154,7 @@ Current rules: 7. When Junior adds reply footer metadata, it attaches that metadata as a Slack `context` block on the final text chunk only, while keeping the main reply text as the top-level fallback. 8. Footer metadata is derived from structured reply diagnostics and correlation state. Conversation ID, trace ID, token totals, and turn duration may be shown when available; footer rendering must not scrape logs or spans after the fact. 9. Footer metadata is not an assistant-status surface and must not be used to convey in-flight progress. +10. When Junior has a concrete Slack `channelId` plus `thread_ts` for the finalized reply, both live turns and resume-style handlers use the shared raw-Web-API reply planner for the visible reply posts. That planner owns the reply envelope and file-delivery shape so finalized Slack rendering is not split across competing delivery paths. This is intentional. Slack-native text streaming may still exist as an adapter capability, but it is not part of Junior's correctness contract. diff --git a/specs/slack-outbound-contract-spec.md b/specs/slack-outbound-contract-spec.md index af9aba6d..51cab111 100644 --- a/specs/slack-outbound-contract-spec.md +++ b/specs/slack-outbound-contract-spec.md @@ -3,12 +3,13 @@ ## Metadata - Created: 2026-04-16 -- Last Edited: 2026-04-16 +- Last Edited: 2026-04-19 ## Changelog - 2026-04-16: Initial canonical contract for Slack outbound operations and reply-text translation ownership. - 2026-04-16: Added support for finalized reply footers rendered as Slack context blocks with top-level text fallbacks. +- 2026-04-19: Documented control-character escaping and standardized the shared raw-Web-API finalized-reply envelope on section/context blocks with native table upgrades for simple single-post tables. ## Status @@ -50,9 +51,11 @@ Slack outbound behavior is split into two explicit boundaries: Current rules: 1. Prompting should target Slack-friendly markdown, not generic full CommonMark. -2. `slack/output.ts` is the only canonical place to normalize line endings, block spacing, and reply chunk boundaries for Slack replies. -3. Continuation markers and interruption markers are delivery-time annotations owned by `slack/output.ts`, not model-authored text. -4. If future markdown-to-`mrkdwn` adaptations are needed, they must be added in `slack/output.ts` rather than scattered across delivery callers. +2. `slack/output.ts` plus `slack/mrkdwn.ts` are the only canonical place to normalize line endings, block spacing, known CommonMark/GFM formatting failures, and reply chunk boundaries for Slack replies. +3. The output boundary may apply targeted deterministic repairs for high-frequency Slack-incompatible constructs such as `**bold**`, `~~strike~~`, `[label](url)`, markdown headings, simple pipe tables, and raw URLs wrapped in formatting delimiters. +4. The output boundary must escape literal Slack control characters (`&`, `<`, `>`) in visible prose unless they are part of preserved Slack tokens or block-quote markers. +5. Continuation markers and interruption markers are delivery-time annotations owned by `slack/output.ts`, not model-authored text. +6. If future markdown-to-`mrkdwn` adaptations are needed, they must be added in the Slack output boundary rather than scattered across delivery callers. ### 3. Message Posting Contract @@ -67,6 +70,7 @@ Current rules: 7. When a caller supplies Slack blocks, outbound posting still includes the top-level `text` fallback for notifications and accessibility. 8. Finalized reply footers that show correlation or diagnostic metadata are rendered as Slack `context` blocks attached through the shared outbound boundary, not assembled ad hoc by callers. 9. Footer values such as token counts and turn duration are passed as structured reply diagnostics into delivery. Outbound rendering formats those values for Slack; it does not derive them from tracing/logging side effects. +10. The shared raw-Web-API reply planner wraps each finalized text post in a `section` block with `expand: true`. The final text post may also attach a `context` footer block, and a one-post reply with one simple markdown table source may upgrade the body to a native Slack `table` block. In all cases, the top-level `text` fallback remains required. ### 4. Ephemeral Message Contract diff --git a/specs/slack-rendering-spec.md b/specs/slack-rendering-spec.md new file mode 100644 index 00000000..b4f8009d --- /dev/null +++ b/specs/slack-rendering-spec.md @@ -0,0 +1,122 @@ +# Slack Output Contract + +## Metadata + +- Created: 2026-04-17 +- Last Edited: 2026-04-19 + +## Changelog + +- 2026-04-17: Initial draft of the render-intent layer, plugin renderer registry, and Work Object boundary for Slack delivery. +- 2026-04-17: Dropped Work Objects. Replaced the declarative plugin-template registry with a native-intent palette the model selects from; plugins now teach intent usage through SKILL.md rather than YAML templates. +- 2026-04-17: Added the Intent Delivery Mechanism section (ToolStrategy via the native `reply` tool, Renderer pattern). +- 2026-04-17: Removed the render-intent palette, the `reply` tool, and the plugin recipe layer. The spec now documents a single output contract: the final assistant reply is plain Slack `mrkdwn` text, and the prompt's job is to teach the model which `mrkdwn` features Slack actually renders. A structured-layout palette may return later if there is a concrete product reason to spend model tool-budget on presentation. +- 2026-04-18: Added deterministic output normalization for the most common CommonMark/GFM failure modes (`**bold**`, `~~strike~~`, markdown links, headings, simple pipe tables, wrapped raw URLs) in `chat/slack/mrkdwn.ts`, and moved verification away from prompt-string assertions toward formatter unit coverage plus behavior evals. +- 2026-04-19: Added control-character escaping for literal Slack text (`&`, `<`, `>`), standardized the raw-Web-API reply envelope on section/context blocks with `expand`, and allowed the raw-Web-API reply path to upgrade a single simple markdown table into a native Slack table block while keeping top-level text fallbacks. + +## Status + +Draft + +## Purpose + +Define the canonical output contract between Junior's assistant turns and Slack delivery so every visible reply is well-formed Slack `mrkdwn` that Slack actually renders. + +Slack's `mrkdwn` is a strict, smaller syntax than CommonMark or GitHub-Flavored Markdown. CommonMark features that Slack silently ignores — pipe tables, `**bold**`, `[label](url)`, `##` headings — render as literal characters and degrade the reply. The output contract names the allow-list the model may use, forbids the CommonMark/GFM constructs that Slack does not support, and applies a small deterministic repair layer for the highest-frequency failure modes before the final reply is delivered. + +This spec sits in front of `slack-agent-delivery-spec.md` (reply delivery semantics) and `slack-outbound-contract-spec.md` (outbound Slack API safety). It does not change either of those contracts. + +## Scope + +- The Slack `mrkdwn` syntax the model is allowed to emit in a final reply. +- The CommonMark/GFM constructs the model must not emit because Slack does not render them. +- How the prompt teaches these rules (a single `` section). +- The targeted deterministic repairs that the Slack output boundary applies before delivery. + +## Non-Goals + +- Replacing the visible reply delivery contract defined in `slack-agent-delivery-spec.md`. +- Replacing the outbound boundary defined in `slack-outbound-contract-spec.md`. +- Introducing a render-intent palette, a `reply` tool, or any other structured-layout mechanism. Revisit if there is a concrete product reason to spend model tool-budget on layout. +- Letting the model author Slack Block Kit blocks directly. +- Specifying chart or image-generation surfaces. Slack still receives those as image attachments with a concise textual takeaway. + +## Contracts + +### 1. Output form + +Every final assistant reply still has a plain-text Slack fallback. The model never authors blocks and never emits raw JSON. + +When Junior has a concrete Slack thread target, the shared raw-Web-API reply planner attaches a shared reply envelope around that fallback text: + +- a `section` block for each visible text post, with `expand: true` +- an optional `context` block for diagnostic footer metadata on the final text post +- when the finalized reply fits in one post and the original source text contains one simple markdown pipe table, a native Slack `table` block may replace the ASCII-table body rendering while the top-level fallback text remains the normalized `mrkdwn` string + +### 2. Allowed Slack `mrkdwn` + +The prompt explicitly permits the following syntax. Anything not on this allow-list renders as literal characters. + +- `*bold*` — surround with single asterisks. Slack does not render `**bold**`. +- `_italic_` — surround with single underscores. +- `~strike~` — surround with single tildes. Slack does not render `~~strike~~`. +- `` `inline code` `` and triple-backtick fenced code blocks. +- `> quoted text` at the start of a line for block quotes. A blank line ends the quote. +- `` for hyperlinks with a label. A bare `https://example.com` auto-links without a label. Slack does not render `[Label](https://example.com)`. +- `<@USERID>`, `<#CHANNELID>`, `` for user, channel, and group mentions. The model uses the raw IDs provided elsewhere in the prompt. +- `- item` or `* item` at the start of a line for bullet lists. Numbered lists render but indent awkwardly — prefer bullets. +- A bold label on its own line (`*Section*`) in place of a markdown heading. + +### 3. Forbidden constructs + +The prompt explicitly forbids the following because Slack renders them as literal characters or broken formatting. + +- Markdown tables using pipes and dashes (`| col | col |` / `|---|---|`). Slack renders the pipes verbatim. When tabular data is needed, the model uses short bulleted lists grouped by row, or a fenced code block with manually aligned columns. +- Markdown headings (`#`, `##`, `###`, and so on). Use a bold label on its own line instead. +- Markdown link syntax (`[label](url)`). Rewrite as `` or a bare URL. +- CommonMark bold/strike doubles (`**bold**`, `~~strike~~`). Use the single-delimiter forms. +- HTML tags, image embeds, and raw Slack Block Kit JSON. + +### 4. Prompt surface + +These rules live in one place: the `` section built by `buildSlackOutputContract` in `packages/junior/src/chat/prompt.ts`. The section is the sole authority on what a Slack response may contain. Plugin `SKILL.md` content describes domain behavior (what to fetch, how to phrase a ticket) but does not restate or override these syntax rules. + +The section also carries the other per-reply guidance this surface requires: brevity, no initial-acknowledgement for tool-heavy research, no progress narration, one final reply per turn. + +### 5. Deterministic normalization + +The output boundary applies a targeted normalization pass in `packages/junior/src/chat/slack/mrkdwn.ts` before reply chunking and delivery. This pass is intentionally narrow and conservative: + +- `**bold**` is rewritten to `*bold*`. +- `~~strike~~` is rewritten to `~strike~`. +- `[label](url)` is rewritten to ``. +- Markdown headings (`## Summary`) are rewritten to bold section labels (`*Summary*`). +- Simple markdown pipe tables are rewritten as fenced code blocks with aligned columns so Slack renders them safely. +- Raw URLs that are directly wrapped in formatting delimiters are rewritten to Slack link syntax so formatting characters do not bleed into the URL. +- Literal `&`, `<`, and `>` characters in prose are escaped unless they are part of preserved Slack tokens (`<@U...>`, `<#C...>`, ``, links) or a block-quote marker. + +Code spans and fenced code blocks are preserved verbatim. The normalizer does not promise full CommonMark-to-Slack conversion; unsupported complex structures are still a prompt-quality issue. + +## Failure Model + +1. The model emits a common forbidden construct (`**bold**`, `~~strike~~`, `[label](url)`, `##` heading, simple pipe table). The deterministic normalizer repairs it before delivery. +2. The model emits a more complex unsupported structure that the normalizer does not recognize. Slack may still render it poorly; that is a prompt-quality failure, and the fix usually lives in the `` section plus a behavior eval. +3. The model emits a correct `mrkdwn` construct that exceeds the envelope's length cap. The outbound boundary truncates or chunks per `slack-outbound-contract-spec.md`. No change here. +4. The model tries to author blocks or JSON directly. The prompt forbids it; if it slips through, the outbound boundary treats the raw string as text and the visible output degrades. + +## Verification + +Required verification coverage for this contract: + +1. Unit: the deterministic formatter in `chat/slack/mrkdwn.ts` repairs the known high-frequency CommonMark/GFM failure modes and preserves code spans / code fences. +2. Unit/integration: the raw-Web-API reply path emits section/context blocks with top-level text fallbacks and upgrades a single simple markdown table to a native Slack table block when source text is available. +3. Evals: realistic Slack conversations confirm the final visible reply does not contain unsupported constructs (raw pipe tables, `**bold**`, `[label](url)`, `##` headings) even when the user asks for a comparison, a heading, or a link. + +## Related Specs + +- `./slack-agent-delivery-spec.md` +- `./slack-outbound-contract-spec.md` +- `./chat-architecture-spec.md` +- `./plugin-spec.md` +- `./logging/index.md` +- `./testing/index.md`