PostHog · edwinyjlim · May 21, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/src/lib/__tests__/agent-runner-ask.test.ts b/src/lib/__tests__/agent-runner-ask.test.ts
@@ -0,0 +1,30 @@
+import { shouldDisableAsk } from '../agent/agent-runner';
+
+const baseSession = { ci: false, signup: false };
+const baseConfig = {};
+
+describe('shouldDisableAsk', () => {
+  it('enables wizard_ask in interactive runs by default', () => {
+    expect(shouldDisableAsk(baseSession, baseConfig)).toBe(false);
+  });
+
+  it('auto-disables when running in CI mode', () => {
+    expect(shouldDisableAsk({ ci: true, signup: false }, baseConfig)).toBe(
+      true,
+    );
+  });
+
+  it('auto-disables during the signup flow (which is non-interactive at the prompt layer)', () => {
+    expect(shouldDisableAsk({ ci: false, signup: true }, baseConfig)).toBe(
+      true,
+    );
+  });
+
+  it('honors an explicit disableAsk override on the workflow', () => {
+    expect(shouldDisableAsk(baseSession, { disableAsk: true })).toBe(true);
+  });
+
+  it('treats disableAsk=false as not disabling', () => {
+    expect(shouldDisableAsk(baseSession, { disableAsk: false })).toBe(false);
+  });
+});
diff --git a/src/lib/__tests__/wizard-ask-bridge.test.ts b/src/lib/__tests__/wizard-ask-bridge.test.ts
@@ -0,0 +1,199 @@
+import {
+  CANCELLED_SENTINEL,
+  createWizardAskBridge,
+} from '../wizard-ask-bridge';
+import { analytics } from '../../utils/analytics';
+import type { AskAnswers, PendingQuestion } from '../wizard-session';
+
+jest.mock('../../utils/analytics', () => ({
+  analytics: {
+    wizardCapture: jest.fn(),
+  },
+}));
+
+const wizardCaptureMock = analytics.wizardCapture as jest.Mock;
+
+beforeEach(() => {
+  wizardCaptureMock.mockClear();
+});
+
+describe('createWizardAskBridge', () => {
+  it('forwards questions to showQuestion and resolves with the captured answers', async () => {
+    const captured: PendingQuestion[] = [];
+    let resolveAnswers!: (answers: AskAnswers) => void;
+    const showQuestion = (q: PendingQuestion): Promise<AskAnswers> => {
+      captured.push(q);
+      return new Promise<AskAnswers>((r) => {
+        resolveAnswers = r;
+      });
+    };
+
+    const bridge = createWizardAskBridge({
+      getSource: () => 'creating-product-tours',
+      showQuestion,
+    });
+
+    const requestPromise = bridge.request({
+      questions: [{ id: 'goal', prompt: 'Goal?', kind: 'text' }],
+    });
+
+    expect(captured).toHaveLength(1);
+    expect(captured[0].questions).toEqual([
+      { id: 'goal', prompt: 'Goal?', kind: 'text' },
+    ]);
+    expect(captured[0].source).toBe('creating-product-tours');
+    expect(captured[0].id).toMatch(/.+/);
+
+    resolveAnswers({ goal: 'Help users find the export button' });
+
+    await expect(requestPromise).resolves.toEqual({
+      goal: 'Help users find the export button',
+    });
+  });
+
+  it('stamps a unique id per request', async () => {
+    const ids: string[] = [];
+    const showQuestion = (q: PendingQuestion): Promise<AskAnswers> => {
+      ids.push(q.id);
+      return Promise.resolve({});
+    };
+
+    const bridge = createWizardAskBridge({
+      getSource: () => 'skill',
+      showQuestion,
+    });
+
+    await bridge.request({
+      questions: [{ id: 'a', prompt: 'A', kind: 'text' }],
+    });
+    await bridge.request({
+      questions: [{ id: 'a', prompt: 'A', kind: 'text' }],
+    });
+
+    expect(ids).toHaveLength(2);
+    expect(ids[0]).not.toBe(ids[1]);
+  });
+
+  it('reads source from getSource at call time so late-bound skillIds work', async () => {
+    let source = 'first-skill';
+    const captured: PendingQuestion[] = [];
+    const showQuestion = (q: PendingQuestion): Promise<AskAnswers> => {
+      captured.push(q);
+      return Promise.resolve({});
+    };
+
+    const bridge = createWizardAskBridge({
+      getSource: () => source,
+      showQuestion,
+    });
+
+    await bridge.request({
+      questions: [{ id: 'a', prompt: 'A', kind: 'text' }],
+    });
+    source = 'second-skill';
+    await bridge.request({
+      questions: [{ id: 'b', prompt: 'B', kind: 'text' }],
+    });
+
+    expect(captured[0].source).toBe('first-skill');
+    expect(captured[1].source).toBe('second-skill');
+  });
+
+  describe('analytics', () => {
+    it('emits `wizard_ask answered` with duration and question count', async () => {
+      let resolveAnswers!: (answers: AskAnswers) => void;
+      const bridge = createWizardAskBridge({
+        getSource: () => 'product-tours',
+        showQuestion: () =>
+          new Promise<AskAnswers>((r) => {
+            resolveAnswers = r;
+          }),
+      });
+
+      const p = bridge.request({
+        questions: [
+          { id: 'a', prompt: 'A', kind: 'text' },
+          { id: 'b', prompt: 'B', kind: 'text' },
+        ],
+      });
+      resolveAnswers({ a: 'x', b: 'y' });
+      await p;
+
+      expect(wizardCaptureMock).toHaveBeenCalledWith(
+        'wizard_ask answered',
+        expect.objectContaining({
+          source: 'product-tours',
+          question_count: 2,
+          duration_ms: expect.any(Number),
+        }),
+      );
+    });
+
+    it('emits `wizard_ask cancelled` when every field comes back as the cancelled sentinel', async () => {
+      const bridge = createWizardAskBridge({
+        getSource: () => 'product-tours',
+        showQuestion: () =>
+          Promise.resolve({ a: CANCELLED_SENTINEL, b: CANCELLED_SENTINEL }),
+      });
+
+      await bridge.request({
+        questions: [
+          { id: 'a', prompt: 'A', kind: 'text' },
+          { id: 'b', prompt: 'B', kind: 'text' },
+        ],
+      });
+
+      const cancelledCall = wizardCaptureMock.mock.calls.find(
+        ([name]) => name === 'wizard_ask cancelled',
+      );
+      expect(cancelledCall).toBeDefined();
+      expect(cancelledCall?.[1]).toMatchObject({
+        source: 'product-tours',
+        question_count: 2,
+        timed_out: false,
+      });
+
+      // It is cancelled, not answered.
+      expect(
+        wizardCaptureMock.mock.calls.some(
+          ([name]) => name === 'wizard_ask answered',
+        ),
+      ).toBe(false);
+    });
+  });
+
+  describe('timeout', () => {
+    it('resolves every field with the cancelled sentinel when the user does not answer in time', async () => {
+      jest.useFakeTimers();
+      try {
+        // showQuestion intentionally never resolves — the timeout has to win.
+        const bridge = createWizardAskBridge({
+          getSource: () => 'product-tours',
+          showQuestion: () => new Promise<AskAnswers>(() => undefined),
+          timeoutMs: 1000,
+        });
+
+        const promise = bridge.request({
+          questions: [
+            { id: 'goal', prompt: 'Goal?', kind: 'text' },
+            { id: 'audience', prompt: 'Who?', kind: 'text' },
+          ],
+        });
+
+        jest.advanceTimersByTime(1000);
+
+        await expect(promise).resolves.toEqual({
+          goal: CANCELLED_SENTINEL,
+          audience: CANCELLED_SENTINEL,
+        });
+
+        const cancelledCall = wizardCaptureMock.mock.calls.find(
+          ([name]) => name === 'wizard_ask cancelled',
+        );
+        expect(cancelledCall?.[1]).toMatchObject({ timed_out: true });
+      } finally {
+        jest.useRealTimers();
+      }
+    });
+  });
+});
diff --git a/src/lib/__tests__/wizard-can-use-tool.test.ts b/src/lib/__tests__/wizard-can-use-tool.test.ts
@@ -0,0 +1,55 @@
+import { wizardCanUseTool } from '../agent/agent-interface';
+
+jest.mock('../../utils/analytics', () => ({
+  analytics: {
+    wizardCapture: jest.fn(),
+  },
+}));
+jest.mock('../../utils/debug');
+
+describe('wizardCanUseTool — wizard_ask pending guard', () => {
+  for (const tool of ['Write', 'Edit'] as const) {
+    it(`denies ${tool} while a wizard_ask overlay is pending`, () => {
+      const result = wizardCanUseTool(
+        tool,
+        { file_path: 'src/app.ts', content: 'x' },
+        { wizardAskPending: true },
+      );
+      expect(result).toEqual({
+        behavior: 'deny',
+        message: expect.stringMatching(/wizard_ask question is open/),
+      });
+    });
+
+    it(`allows ${tool} when no overlay is pending`, () => {
+      const result = wizardCanUseTool(
+        tool,
+        { file_path: 'src/app.ts', content: 'x' },
+        { wizardAskPending: false },
+      );
+      expect(result.behavior).toBe('allow');
+    });
+  }
+
+  it('still allows Read while a wizard_ask overlay is pending (read-only is safe)', () => {
+    const result = wizardCanUseTool(
+      'Read',
+      { file_path: 'src/app.ts' },
+      { wizardAskPending: true },
+    );
+    expect(result.behavior).toBe('allow');
+  });
+
+  it('defaults to no guard when context is omitted (preserves pre-Phase-3 callers)', () => {
+    const result = wizardCanUseTool('Write', { file_path: 'src/app.ts' });
+    expect(result.behavior).toBe('allow');
+  });
+
+  it('still denies Write on .env files even when no overlay is pending', () => {
+    const result = wizardCanUseTool('Write', { file_path: '.env.local' });
+    expect(result).toEqual({
+      behavior: 'deny',
+      message: expect.stringMatching(/wizard-tools MCP server/),
+    });
+  });
+});
diff --git a/src/lib/__tests__/wizard-tools.test.ts b/src/lib/__tests__/wizard-tools.test.ts
@@ -2,9 +2,12 @@ import * as fs from 'fs';
 import * as os from 'os';
 import * as path from 'path';
 import {
+  ASK_BATCH_THRESHOLD,
+  DEFAULT_ASK_MAX_QUESTIONS,
   WIZARD_TOOL_NAMES,
   __test,
   ensureGitignoreCoverage,
+  evaluateAskCap,
   mergeEnvValues,
   parseEnvKeys,
   resolveEnvPath,
@@ -298,4 +301,43 @@ describe('WIZARD_TOOL_NAMES', () => {
   it('exposes audit_add_checks so future workflows can append checks through the MCP server', () => {
     expect(WIZARD_TOOL_NAMES).toContain('wizard-tools:audit_add_checks');
   });
+
+  it('exposes wizard_ask so skills can collect structured input from the user', () => {
+    expect(WIZARD_TOOL_NAMES).toContain('wizard-tools:wizard_ask');
+  });
+});
+
+describe('evaluateAskCap', () => {
+  const MAX = DEFAULT_ASK_MAX_QUESTIONS;
+
+  it('allows calls under both the adjacency threshold and the max cap', () => {
+    for (let i = 0; i < ASK_BATCH_THRESHOLD; i++) {
+      expect(evaluateAskCap(i, MAX)).toEqual({ kind: 'ok' });
+    }
+  });
+
+  it('returns the adjacency error once the threshold is hit', () => {
+    expect(evaluateAskCap(ASK_BATCH_THRESHOLD, MAX)).toEqual({
+      kind: 'capped',
+      reason: 'adjacency',
+      message: expect.stringMatching(/batch/i),
+    });
+  });
+
+  it('escalates to the max_questions reason once the cap is reached', () => {
+    expect(evaluateAskCap(MAX, MAX)).toEqual({
+      kind: 'capped',
+      reason: 'max_questions',
+      message: expect.stringMatching(/cap reached/i),
+    });
+  });
+
+  it('honors a custom maxQuestions override smaller than the adjacency threshold', () => {
+    // With maxQuestions=2 (below ASK_BATCH_THRESHOLD), the per-run cap wins.
+    expect(evaluateAskCap(2, 2)).toEqual({
+      kind: 'capped',
+      reason: 'max_questions',
+      message: expect.any(String),
+    });
+  });
 });
diff --git a/src/lib/agent/__tests__/__snapshots__/commandments.test.ts.snap b/src/lib/agent/__tests__/__snapshots__/commandments.test.ts.snap
@@ -0,0 +1,23 @@
+// Jest Snapshot v1, https://goo.gl/fbAQLP
+
+exports[`getWizardCommandments matches the published commandment list 1`] = `
+"Never hallucinate a PostHog project token, host, or any other secret. Always use the real values that have been configured for this project (for example via environment variables).
+Never write API keys, access tokens, or other secrets directly into source code. Always reference environment variables instead, and rely on the wizard-tools MCP server (check_env_keys / set_env_values) to create or update .env files.
+Always use the detect_package_manager tool from the wizard-tools MCP server to determine the package manager. Do not guess based on lockfiles or hard-code npm, yarn, pnpm, bun, pip, etc.
+When installing packages, start the installation as a background task and then continue with other work. Do not block waiting for installs to finish unless explicitly instructed.
+Before writing to any file, you MUST read that exact file immediately beforehand using the Read tool, even if you have already read it earlier in the run. This avoids tool failures and stale edits.
+Treat feature flags, custom properties, and event names as part of an analytics contract. Prefer reusing existing names and patterns in the project. When you must introduce new ones, make them clear, descriptive, and consistent with existing conventions, and avoid scattering the same flag or property across many unrelated callsites.
+Prefer minimal, targeted edits that achieve the requested behavior while preserving existing structure and style. Avoid large refactors, broad reformatting, or unrelated changes unless explicitly requested.
+Do not spawn subagents unless explicitly instructed to do so.
+After reading the integration skill, use the TodoWrite tool to track your progress. Create a todo list at the describing the high-level areas of work, mark each as in_progress when you begin it, and completed when done. Describe high-level tasks from the workflow ("inserting event tracking code", etc), not specific files or lines. For each new stage of the integration, create additional todos if necessary. Create a final "wrapping up" step and mark it completed when the integration is complete.
+Do not assume "PostHog provider" or "PostHogProvider" in todo labels, status messages, or code. Only use a provider if the skill workflow explicitly requires one for the specific framework.
+If you see a [YARA WARNING] or [YARA VIOLATION] message, handle it silently. Fix the issue (revert the write, avoid the flagged pattern) without mentioning security warnings or YARA to the user. These are internal guardrails, not user-facing alerts. Never show scary security language to the user.
+When a skill instructs you to gather information from the user, use the \`wizard_ask\` MCP tool from the wizard-tools server. Never inline questions in your text output expecting a reply — the user has no way to answer text.
+When a skill provides a numbered or bulleted list of questions, translate the entire list into a single \`wizard_ask\` tool call:
+  - One tool call per skill step. Batch every question from that step into the \`questions\` array — never split into multiple calls.
+  - Infer \`kind\` from the question phrasing: comma-separated alternatives ("React, Vue, or vanilla JS?") → \`single\`; phrasing like "all that apply" or "any of" → \`multi\`; everything else → \`text\`.
+  - For \`single\` and \`multi\`, extract the alternatives from the prose into \`options\` as \`{ label, value }\` pairs. Use the human phrase as \`label\` and a lowercase-hyphenated form as \`value\` (e.g., \`label: "Vanilla JS"\`, \`value: "vanilla-js"\`).
+  - Use a kebab-case slug of the question label as \`id\` (e.g., "Tech stack" → \`tech-stack\`, "Show frequency" → \`show-frequency\`).
+  - Do not invent fields the schema does not define (no \`source\`, \`category\`, \`priority\`, etc.) — the tool rejects unknown fields and the wizard already knows which skill is running.
+After \`wizard_ask\` returns, use the answers directly — do not re-ask in text or call \`wizard_ask\` again for the same fields."
+`;