From d9d4f895bcf5f95bd853fe2921de3e6a2798798f Mon Sep 17 00:00:00 2001 From: Kit Langton Date: Wed, 1 Apr 2026 19:47:26 -0400 Subject: [PATCH] fix(test): auto-acknowledge tool-result follow-ups in mock LLM server (#20528) --- .github/workflows/test.yml | 3 - packages/app/e2e/fixtures.ts | 19 ++ packages/app/e2e/models/model-picker.spec.ts | 2 +- packages/app/e2e/prompt/mock.ts | 10 + .../session/session-child-navigation.spec.ts | 15 +- .../e2e/session/session-composer-dock.spec.ts | 107 ++++---- .../app/e2e/session/session-review.spec.ts | 75 +++--- packages/app/test/e2e/mock.test.ts | 66 +++++ packages/opencode/script/seed-e2e.ts | 15 -- packages/opencode/src/session/processor.ts | 8 +- packages/opencode/src/snapshot/index.ts | 3 +- packages/opencode/test/lib/llm-server.ts | 43 +++- .../test/session/snapshot-tool-race.test.ts | 233 ++++++++++++++++++ 13 files changed, 482 insertions(+), 117 deletions(-) create mode 100644 packages/app/test/e2e/mock.test.ts create mode 100644 packages/opencode/test/session/snapshot-tool-race.test.ts diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f184d1ddb3..9c58be30ab 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -100,9 +100,6 @@ jobs: run: bun --cwd packages/app test:e2e:local env: CI: true - OPENCODE_API_KEY: ${{ secrets.OPENCODE_API_KEY }} - OPENCODE_E2E_MODEL: opencode/claude-haiku-4-5 - OPENCODE_E2E_REQUIRE_PAID: "true" timeout-minutes: 30 - name: Upload Playwright artifacts diff --git a/packages/app/e2e/fixtures.ts b/packages/app/e2e/fixtures.ts index 592db60da7..8c018a9f0b 100644 --- a/packages/app/e2e/fixtures.ts +++ b/packages/app/e2e/fixtures.ts @@ -15,6 +15,7 @@ import { waitSlug, waitSession, } from "./actions" +import { openaiModel, withMockOpenAI } from "./prompt/mock" import { createSdk, dirSlug, getWorktree, sessionPath } from "./utils" type LLMFixture = { @@ -47,6 +48,7 @@ type LLMFixture = { wait: (count: number) => Promise inputs: () => Promise[]> pending: () => Promise + misses: () => Promise }>> } export const settingsKey = "settings.v3" @@ -83,6 +85,7 @@ type TestFixtures = { gotoSession: (sessionID?: string) => Promise withProject: (callback: (project: ProjectHandle) => Promise, options?: ProjectOptions) => Promise withBackendProject: (callback: (project: ProjectHandle) => Promise, options?: ProjectOptions) => Promise + withMockProject: (callback: (project: ProjectHandle) => Promise, options?: ProjectOptions) => Promise } type WorkerFixtures = { @@ -132,6 +135,7 @@ export const test = base.extend({ wait: (count) => rt.runPromise(svc.wait(count)), inputs: () => rt.runPromise(svc.inputs), pending: () => rt.runPromise(svc.pending), + misses: () => rt.runPromise(svc.misses), }) } finally { await rt.dispose() @@ -193,6 +197,21 @@ export const test = base.extend({ runProject(page, callback, { ...options, serverUrl: backend.url, sdk: backend.sdk }), ) }, + withMockProject: async ({ page, llm, backend }, use) => { + await use((callback, options) => + withMockOpenAI({ + serverUrl: backend.url, + llmUrl: llm.url, + fn: () => + runProject(page, callback, { + ...options, + model: options?.model ?? openaiModel, + serverUrl: backend.url, + sdk: backend.sdk, + }), + }), + ) + }, }) async function runProject( diff --git a/packages/app/e2e/models/model-picker.spec.ts b/packages/app/e2e/models/model-picker.spec.ts index 220a0baa1a..d94c026521 100644 --- a/packages/app/e2e/models/model-picker.spec.ts +++ b/packages/app/e2e/models/model-picker.spec.ts @@ -2,7 +2,7 @@ import { test, expect } from "../fixtures" import { promptSelector } from "../selectors" import { clickListItem } from "../actions" -test("smoke model selection updates prompt footer", async ({ page, gotoSession }) => { +test.fixme("smoke model selection updates prompt footer", async ({ page, gotoSession }) => { await gotoSession() await page.locator(promptSelector).click() diff --git a/packages/app/e2e/prompt/mock.ts b/packages/app/e2e/prompt/mock.ts index eb40a70cba..bd09af2665 100644 --- a/packages/app/e2e/prompt/mock.ts +++ b/packages/app/e2e/prompt/mock.ts @@ -16,6 +16,16 @@ export function promptMatch(token: string) { return (hit: Hit) => bodyText(hit).includes(token) } +/** + * Match requests whose body contains the exact serialized tool input. + * The seed prompts embed JSON.stringify(input) in the prompt text, which + * gets escaped again inside the JSON body — so we double-escape to match. + */ +export function inputMatch(input: unknown) { + const escaped = JSON.stringify(JSON.stringify(input)).slice(1, -1) + return (hit: Hit) => bodyText(hit).includes(escaped) +} + export async function withMockOpenAI(input: { serverUrl: string; llmUrl: string; fn: () => Promise }) { const sdk = createSdk(undefined, input.serverUrl) const prev = await sdk.global.config.get().then((res) => res.data ?? {}) diff --git a/packages/app/e2e/session/session-child-navigation.spec.ts b/packages/app/e2e/session/session-child-navigation.spec.ts index fa366e5157..1ab4746e42 100644 --- a/packages/app/e2e/session/session-child-navigation.spec.ts +++ b/packages/app/e2e/session/session-child-navigation.spec.ts @@ -1,8 +1,9 @@ import { seedSessionTask, withSession } from "../actions" import { test, expect } from "../fixtures" +import { inputMatch } from "../prompt/mock" import { promptSelector } from "../selectors" -test("task tool child-session link does not trigger stale show errors", async ({ page, withBackendProject }) => { +test("task tool child-session link does not trigger stale show errors", async ({ page, llm, withMockProject }) => { test.setTimeout(120_000) const errs: string[] = [] @@ -12,12 +13,18 @@ test("task tool child-session link does not trigger stale show errors", async ({ page.on("pageerror", onError) try { - await withBackendProject(async ({ gotoSession, trackSession, sdk }) => { + await withMockProject(async ({ gotoSession, trackSession, sdk }) => { await withSession(sdk, `e2e child nav ${Date.now()}`, async (session) => { - const child = await seedSessionTask(sdk, { - sessionID: session.id, + const taskInput = { description: "Open child session", prompt: "Search the repository for AssistantParts and then reply with exactly CHILD_OK.", + subagent_type: "general", + } + await llm.toolMatch(inputMatch(taskInput), "task", taskInput) + const child = await seedSessionTask(sdk, { + sessionID: session.id, + description: taskInput.description, + prompt: taskInput.prompt, }) trackSession(child.sessionID) diff --git a/packages/app/e2e/session/session-composer-dock.spec.ts b/packages/app/e2e/session/session-composer-dock.spec.ts index 2c87a309d1..bf0cc35b71 100644 --- a/packages/app/e2e/session/session-composer-dock.spec.ts +++ b/packages/app/e2e/session/session-composer-dock.spec.ts @@ -14,6 +14,7 @@ import { sessionTodoToggleButtonSelector, } from "../selectors" import { modKey } from "../utils" +import { inputMatch } from "../prompt/mock" type Sdk = Parameters[0] type PermissionRule = { permission: string; pattern: string; action: "allow" | "deny" | "ask" } @@ -36,6 +37,17 @@ async function withDockSession( } } +const defaultQuestions = [ + { + header: "Need input", + question: "Pick one option", + options: [ + { label: "Continue", description: "Continue now" }, + { label: "Stop", description: "Stop here" }, + ], + }, +] + test.setTimeout(120_000) async function withDockSeed(sdk: Sdk, sessionID: string, fn: () => Promise) { @@ -291,8 +303,8 @@ test("auto-accept toggle works before first submit", async ({ page, withBackendP }) }) -test("blocked question flow unblocks after submit", async ({ page, withBackendProject }) => { - await withBackendProject(async (project) => { +test("blocked question flow unblocks after submit", async ({ page, llm, withMockProject }) => { + await withMockProject(async (project) => { await withDockSession( project.sdk, "e2e composer dock question", @@ -300,18 +312,10 @@ test("blocked question flow unblocks after submit", async ({ page, withBackendPr await withDockSeed(project.sdk, session.id, async () => { await project.gotoSession(session.id) + await llm.toolMatch(inputMatch({ questions: defaultQuestions }), "question", { questions: defaultQuestions }) await seedSessionQuestion(project.sdk, { sessionID: session.id, - questions: [ - { - header: "Need input", - question: "Pick one option", - options: [ - { label: "Continue", description: "Continue now" }, - { label: "Stop", description: "Stop here" }, - ], - }, - ], + questions: defaultQuestions, }) const dock = page.locator(questionDockSelector) @@ -328,8 +332,8 @@ test("blocked question flow unblocks after submit", async ({ page, withBackendPr }) }) -test("blocked question flow supports keyboard shortcuts", async ({ page, withBackendProject }) => { - await withBackendProject(async (project) => { +test("blocked question flow supports keyboard shortcuts", async ({ page, llm, withMockProject }) => { + await withMockProject(async (project) => { await withDockSession( project.sdk, "e2e composer dock question keyboard", @@ -337,18 +341,10 @@ test("blocked question flow supports keyboard shortcuts", async ({ page, withBac await withDockSeed(project.sdk, session.id, async () => { await project.gotoSession(session.id) + await llm.toolMatch(inputMatch({ questions: defaultQuestions }), "question", { questions: defaultQuestions }) await seedSessionQuestion(project.sdk, { sessionID: session.id, - questions: [ - { - header: "Need input", - question: "Pick one option", - options: [ - { label: "Continue", description: "Continue now" }, - { label: "Stop", description: "Stop here" }, - ], - }, - ], + questions: defaultQuestions, }) const dock = page.locator(questionDockSelector) @@ -371,8 +367,8 @@ test("blocked question flow supports keyboard shortcuts", async ({ page, withBac }) }) -test("blocked question flow supports escape dismiss", async ({ page, withBackendProject }) => { - await withBackendProject(async (project) => { +test("blocked question flow supports escape dismiss", async ({ page, llm, withMockProject }) => { + await withMockProject(async (project) => { await withDockSession( project.sdk, "e2e composer dock question escape", @@ -380,18 +376,10 @@ test("blocked question flow supports escape dismiss", async ({ page, withBackend await withDockSeed(project.sdk, session.id, async () => { await project.gotoSession(session.id) + await llm.toolMatch(inputMatch({ questions: defaultQuestions }), "question", { questions: defaultQuestions }) await seedSessionQuestion(project.sdk, { sessionID: session.id, - questions: [ - { - header: "Need input", - question: "Pick one option", - options: [ - { label: "Continue", description: "Continue now" }, - { label: "Stop", description: "Stop here" }, - ], - }, - ], + questions: defaultQuestions, }) const dock = page.locator(questionDockSelector) @@ -512,9 +500,20 @@ test("blocked permission flow supports allow always", async ({ page, withBackend test("child session question request blocks parent dock and unblocks after submit", async ({ page, - withBackendProject, + llm, + withMockProject, }) => { - await withBackendProject(async (project) => { + const questions = [ + { + header: "Child input", + question: "Pick one child option", + options: [ + { label: "Continue", description: "Continue child" }, + { label: "Stop", description: "Stop child" }, + ], + }, + ] + await withMockProject(async (project) => { await withDockSession( project.sdk, "e2e composer dock child question parent", @@ -532,18 +531,10 @@ test("child session question request blocks parent dock and unblocks after submi try { await withDockSeed(project.sdk, child.id, async () => { + await llm.toolMatch(inputMatch({ questions }), "question", { questions }) await seedSessionQuestion(project.sdk, { sessionID: child.id, - questions: [ - { - header: "Child input", - question: "Pick one child option", - options: [ - { label: "Continue", description: "Continue child" }, - { label: "Stop", description: "Stop child" }, - ], - }, - ], + questions, }) const dock = page.locator(questionDockSelector) @@ -652,8 +643,15 @@ test("todo dock transitions and collapse behavior", async ({ page, withBackendPr }) }) -test("keyboard focus stays off prompt while blocked", async ({ page, withBackendProject }) => { - await withBackendProject(async (project) => { +test("keyboard focus stays off prompt while blocked", async ({ page, llm, withMockProject }) => { + const questions = [ + { + header: "Need input", + question: "Pick one option", + options: [{ label: "Continue", description: "Continue now" }], + }, + ] + await withMockProject(async (project) => { await withDockSession( project.sdk, "e2e composer dock keyboard", @@ -661,15 +659,10 @@ test("keyboard focus stays off prompt while blocked", async ({ page, withBackend await withDockSeed(project.sdk, session.id, async () => { await project.gotoSession(session.id) + await llm.toolMatch(inputMatch({ questions }), "question", { questions }) await seedSessionQuestion(project.sdk, { sessionID: session.id, - questions: [ - { - header: "Need input", - question: "Pick one option", - options: [{ label: "Continue", description: "Continue now" }], - }, - ], + questions, }) await expectQuestionBlocked(page) diff --git a/packages/app/e2e/session/session-review.spec.ts b/packages/app/e2e/session/session-review.spec.ts index 3137bd5559..fb6a7ad1db 100644 --- a/packages/app/e2e/session/session-review.spec.ts +++ b/packages/app/e2e/session/session-review.spec.ts @@ -1,6 +1,6 @@ import { waitSessionIdle, withSession } from "../actions" import { test, expect } from "../fixtures" -import { createSdk } from "../utils" +import { inputMatch } from "../prompt/mock" const count = 14 @@ -40,7 +40,14 @@ function edit(file: string, prev: string, next: string) { ) } -async function patch(sdk: ReturnType, sessionID: string, patchText: string) { +async function patchWithMock( + llm: Parameters[0]["llm"], + sdk: Parameters[0], + sessionID: string, + patchText: string, +) { + const callsBefore = await llm.calls() + await llm.toolMatch(inputMatch({ patchText }), "apply_patch", { patchText }) await sdk.session.promptAsync({ sessionID, agent: "build", @@ -54,6 +61,13 @@ async function patch(sdk: ReturnType, sessionID: string, patch parts: [{ type: "text", text: "Apply the provided patch exactly once." }], }) + // Wait for the agent loop to actually start before checking idle. + // promptAsync is fire-and-forget — without this, waitSessionIdle can + // return immediately because the session status is still undefined. + await expect + .poll(() => llm.calls().then((c) => c > callsBefore), { timeout: 30_000 }) + .toBe(true) + await waitSessionIdle(sdk, sessionID, 120_000) } @@ -233,8 +247,7 @@ async function fileOverflow(page: Parameters[0]["page"]) { } } -test("review applies inline comment clicks without horizontal overflow", async ({ page, withProject }) => { - test.skip(true, "Flaky in CI for now.") +test("review applies inline comment clicks without horizontal overflow", async ({ page, llm, withMockProject }) => { test.setTimeout(180_000) const tag = `review-comment-${Date.now()}` @@ -243,16 +256,15 @@ test("review applies inline comment clicks without horizontal overflow", async ( await page.setViewportSize({ width: 1280, height: 900 }) - await withProject(async (project) => { - const sdk = createSdk(project.directory) - - await withSession(sdk, `e2e review comment ${tag}`, async (session) => { - await patch(sdk, session.id, seed([{ file, mark: tag }])) + await withMockProject(async (project) => { + await withSession(project.sdk, `e2e review comment ${tag}`, async (session) => { + project.trackSession(session.id) + await patchWithMock(llm, project.sdk, session.id, seed([{ file, mark: tag }])) await expect .poll( async () => { - const diff = await sdk.session.diff({ sessionID: session.id }).then((res) => res.data ?? []) + const diff = await project.sdk.session.diff({ sessionID: session.id }).then((res) => res.data ?? []) return diff.length }, { timeout: 60_000 }, @@ -283,8 +295,7 @@ test("review applies inline comment clicks without horizontal overflow", async ( }) }) -test("review file comments submit on click without clipping actions", async ({ page, withProject }) => { - test.skip(true, "Flaky in CI for now.") +test("review file comments submit on click without clipping actions", async ({ page, llm, withMockProject }) => { test.setTimeout(180_000) const tag = `review-file-comment-${Date.now()}` @@ -293,16 +304,15 @@ test("review file comments submit on click without clipping actions", async ({ p await page.setViewportSize({ width: 1280, height: 900 }) - await withProject(async (project) => { - const sdk = createSdk(project.directory) - - await withSession(sdk, `e2e review file comment ${tag}`, async (session) => { - await patch(sdk, session.id, seed([{ file, mark: tag }])) + await withMockProject(async (project) => { + await withSession(project.sdk, `e2e review file comment ${tag}`, async (session) => { + project.trackSession(session.id) + await patchWithMock(llm, project.sdk, session.id, seed([{ file, mark: tag }])) await expect .poll( async () => { - const diff = await sdk.session.diff({ sessionID: session.id }).then((res) => res.data ?? []) + const diff = await project.sdk.session.diff({ sessionID: session.id }).then((res) => res.data ?? []) return diff.length }, { timeout: 60_000 }, @@ -334,8 +344,7 @@ test("review file comments submit on click without clipping actions", async ({ p }) }) -test("review keeps scroll position after a live diff update", async ({ page, withProject }) => { - test.skip(Boolean(process.env.CI), "Flaky in CI for now.") +test.fixme("review keeps scroll position after a live diff update", async ({ page, llm, withMockProject }) => { test.setTimeout(180_000) const tag = `review-${Date.now()}` @@ -345,16 +354,15 @@ test("review keeps scroll position after a live diff update", async ({ page, wit await page.setViewportSize({ width: 1600, height: 1000 }) - await withProject(async (project) => { - const sdk = createSdk(project.directory) - - await withSession(sdk, `e2e review ${tag}`, async (session) => { - await patch(sdk, session.id, seed(list)) + await withMockProject(async (project) => { + await withSession(project.sdk, `e2e review ${tag}`, async (session) => { + project.trackSession(session.id) + await patchWithMock(llm, project.sdk, session.id, seed(list)) await expect .poll( async () => { - const info = await sdk.session.get({ sessionID: session.id }).then((res) => res.data) + const info = await project.sdk.session.get({ sessionID: session.id }).then((res) => res.data) return info?.summary?.files ?? 0 }, { timeout: 60_000 }, @@ -364,7 +372,7 @@ test("review keeps scroll position after a live diff update", async ({ page, wit await expect .poll( async () => { - const diff = await sdk.session.diff({ sessionID: session.id }).then((res) => res.data ?? []) + const diff = await project.sdk.session.diff({ sessionID: session.id }).then((res) => res.data ?? []) return diff.length }, { timeout: 60_000 }, @@ -381,15 +389,16 @@ test("review keeps scroll position after a live diff update", async ({ page, wit const view = page.locator('[data-slot="session-review-scroll"] .scroll-view__viewport').first() await expect(view).toBeVisible() const heads = page.getByRole("heading", { level: 3 }).filter({ hasText: /^review-scroll-/ }) - await expect(heads).toHaveCount(list.length, { - timeout: 60_000, - }) + await expect(heads).toHaveCount(list.length, { timeout: 60_000 }) await expand(page) await waitMark(page, hit.file, hit.mark) const row = page - .getByRole("heading", { level: 3, name: new RegExp(hit.file.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")) }) + .getByRole("heading", { + level: 3, + name: new RegExp(hit.file.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")), + }) .first() await expect(row).toBeVisible() await row.evaluate((el) => el.scrollIntoView({ block: "center" })) @@ -398,12 +407,12 @@ test("review keeps scroll position after a live diff update", async ({ page, wit const prev = await spot(page, hit.file) if (!prev) throw new Error(`missing review row for ${hit.file}`) - await patch(sdk, session.id, edit(hit.file, hit.mark, next)) + await patchWithMock(llm, project.sdk, session.id, edit(hit.file, hit.mark, next)) await expect .poll( async () => { - const diff = await sdk.session.diff({ sessionID: session.id }).then((res) => res.data ?? []) + const diff = await project.sdk.session.diff({ sessionID: session.id }).then((res) => res.data ?? []) const item = diff.find((item) => item.file === hit.file) return typeof item?.after === "string" ? item.after : "" }, diff --git a/packages/app/test/e2e/mock.test.ts b/packages/app/test/e2e/mock.test.ts new file mode 100644 index 0000000000..3bd80f34dc --- /dev/null +++ b/packages/app/test/e2e/mock.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, test } from "bun:test" +import { bodyText, inputMatch, promptMatch } from "../../e2e/prompt/mock" + +function hit(body: Record) { + return { body } +} + +describe("promptMatch", () => { + test("matches token in serialized body", () => { + const match = promptMatch("hello") + expect(match(hit({ messages: [{ role: "user", content: "say hello" }] }))).toBe(true) + expect(match(hit({ messages: [{ role: "user", content: "say goodbye" }] }))).toBe(false) + }) +}) + +describe("inputMatch", () => { + test("matches exact tool input in chat completions body", () => { + const input = { questions: [{ header: "Need input", question: "Pick one" }] } + const match = inputMatch(input) + + // The seed prompt embeds JSON.stringify(input) in the user message + const prompt = `Use this JSON input: ${JSON.stringify(input)}` + const body = { messages: [{ role: "user", content: prompt }] } + expect(match(hit(body))).toBe(true) + }) + + test("matches exact tool input in responses API body", () => { + const input = { questions: [{ header: "Need input", question: "Pick one" }] } + const match = inputMatch(input) + + const prompt = `Use this JSON input: ${JSON.stringify(input)}` + const body = { model: "test", input: [{ role: "user", content: [{ type: "input_text", text: prompt }] }] } + expect(match(hit(body))).toBe(true) + }) + + test("matches patchText with newlines", () => { + const patchText = "*** Begin Patch\n*** Add File: test.txt\n+line1\n*** End Patch" + const match = inputMatch({ patchText }) + + const prompt = `Use this JSON input: ${JSON.stringify({ patchText })}` + const body = { messages: [{ role: "user", content: prompt }] } + expect(match(hit(body))).toBe(true) + + // Also works in responses API format + const respBody = { model: "test", input: [{ role: "user", content: [{ type: "input_text", text: prompt }] }] } + expect(match(hit(respBody))).toBe(true) + }) + + test("does not match unrelated requests", () => { + const input = { questions: [{ header: "Need input" }] } + const match = inputMatch(input) + + expect(match(hit({ messages: [{ role: "user", content: "hello" }] }))).toBe(false) + expect(match(hit({ model: "test", input: [] }))).toBe(false) + }) + + test("does not match partial input", () => { + const input = { questions: [{ header: "Need input", question: "Pick one" }] } + const match = inputMatch(input) + + // Only header, missing question + const partial = `Use this JSON input: ${JSON.stringify({ questions: [{ header: "Need input" }] })}` + const body = { messages: [{ role: "user", content: partial }] } + expect(match(hit(body))).toBe(false) + }) +}) diff --git a/packages/opencode/script/seed-e2e.ts b/packages/opencode/script/seed-e2e.ts index 3f247fa0e9..f5bd7194f2 100644 --- a/packages/opencode/script/seed-e2e.ts +++ b/packages/opencode/script/seed-e2e.ts @@ -2,7 +2,6 @@ const dir = process.env.OPENCODE_E2E_PROJECT_DIR ?? process.cwd() const title = process.env.OPENCODE_E2E_SESSION_TITLE ?? "E2E Session" const text = process.env.OPENCODE_E2E_MESSAGE ?? "Seeded for UI e2e" const model = process.env.OPENCODE_E2E_MODEL ?? "opencode/gpt-5-nano" -const requirePaid = process.env.OPENCODE_E2E_REQUIRE_PAID === "true" const parts = model.split("/") const providerID = parts[0] ?? "opencode" const modelID = parts[1] ?? "gpt-5-nano" @@ -12,7 +11,6 @@ const seed = async () => { const { Instance } = await import("../src/project/instance") const { InstanceBootstrap } = await import("../src/project/bootstrap") const { Config } = await import("../src/config/config") - const { Provider } = await import("../src/provider/provider") const { Session } = await import("../src/session") const { MessageID, PartID } = await import("../src/session/schema") const { Project } = await import("../src/project/project") @@ -27,19 +25,6 @@ const seed = async () => { await Config.waitForDependencies() await ToolRegistry.ids() - if (requirePaid && providerID === "opencode" && !process.env.OPENCODE_API_KEY) { - throw new Error("OPENCODE_API_KEY is required when OPENCODE_E2E_REQUIRE_PAID=true") - } - - const info = await Provider.getModel(ProviderID.make(providerID), ModelID.make(modelID)) - if (requirePaid) { - const paid = - info.cost.input > 0 || info.cost.output > 0 || info.cost.cache.read > 0 || info.cost.cache.write > 0 - if (!paid) { - throw new Error(`OPENCODE_E2E_MODEL must resolve to a paid model: ${providerID}/${modelID}`) - } - } - const session = await Session.create({ title }) const messageID = MessageID.ascending() const partID = PartID.ascending() diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 2482e40fb3..abc101f180 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -84,13 +84,17 @@ export namespace SessionProcessor { const status = yield* SessionStatus.Service const create = Effect.fn("SessionProcessor.create")(function* (input: Input) { + // Pre-capture snapshot before the LLM stream starts. The AI SDK + // may execute tools internally before emitting start-step events, + // so capturing inside the event handler can be too late. + const initialSnapshot = yield* snapshot.track() const ctx: ProcessorContext = { assistantMessage: input.assistantMessage, sessionID: input.sessionID, model: input.model, toolcalls: {}, shouldBreak: false, - snapshot: undefined, + snapshot: initialSnapshot, blocked: false, needsCompaction: false, currentText: undefined, @@ -250,7 +254,7 @@ export namespace SessionProcessor { throw value.error case "start-step": - ctx.snapshot = yield* snapshot.track() + if (!ctx.snapshot) ctx.snapshot = yield* snapshot.track() yield* session.updatePart({ id: PartID.ascending(), messageID: ctx.assistantMessage.id, diff --git a/packages/opencode/src/snapshot/index.ts b/packages/opencode/src/snapshot/index.ts index 1eecad702c..aa62c02715 100644 --- a/packages/opencode/src/snapshot/index.ts +++ b/packages/opencode/src/snapshot/index.ts @@ -174,7 +174,8 @@ export namespace Snapshot { } const tracked = diff.text.split("\0").filter(Boolean) - const all = Array.from(new Set([...tracked, ...other.text.split("\0").filter(Boolean)])) + const untracked = other.text.split("\0").filter(Boolean) + const all = Array.from(new Set([...tracked, ...untracked])) if (!all.length) return const large = (yield* Effect.all( diff --git a/packages/opencode/test/lib/llm-server.ts b/packages/opencode/test/lib/llm-server.ts index 1c624cd0d5..747693d70b 100644 --- a/packages/opencode/test/lib/llm-server.ts +++ b/packages/opencode/test/lib/llm-server.ts @@ -584,6 +584,30 @@ function hit(url: string, body: unknown) { } satisfies Hit } +/** Auto-acknowledging tool-result follow-ups avoids requiring tests to queue two responses per tool call. */ +function isToolResultFollowUp(body: unknown): boolean { + if (!body || typeof body !== "object") return false + // OpenAI chat format: last message has role "tool" + if ("messages" in body && Array.isArray(body.messages)) { + const last = body.messages[body.messages.length - 1] + return last?.role === "tool" + } + // Responses API: input contains function_call_output + if ("input" in body && Array.isArray(body.input)) { + return body.input.some((item: Record) => item?.type === "function_call_output") + } + return false +} + +function requestSummary(body: unknown): string { + if (!body || typeof body !== "object") return "empty body" + if ("messages" in body && Array.isArray(body.messages)) { + const roles = body.messages.map((m: Record) => m.role).join(",") + return `messages=[${roles}]` + } + return `keys=[${Object.keys(body).join(",")}]` +} + namespace TestLLMServer { export interface Service { readonly url: string @@ -604,6 +628,7 @@ namespace TestLLMServer { readonly wait: (count: number) => Effect.Effect readonly inputs: Effect.Effect[]> readonly pending: Effect.Effect + readonly misses: Effect.Effect } } @@ -617,6 +642,7 @@ export class TestLLMServer extends ServiceMap.Service { list = [...list, ...input.map((value) => ({ item: item(value) }))] @@ -646,7 +672,21 @@ export class TestLLMServer extends ServiceMap.Service ({}))) const current = hit(req.originalUrl, body) const next = pull(current) - if (!next) return HttpServerResponse.text("unexpected request", { status: 500 }) + if (!next) { + // Auto-acknowledge tool-result follow-ups so tests only need to + // queue one response per tool call instead of two. + if (isToolResultFollowUp(body)) { + hits = [...hits, current] + yield* notify() + const auto: Sse = { type: "sse", head: [role()], tail: [textLine("ok"), finishLine("stop")] } + if (mode === "responses") return send(responses(auto, modelFrom(body))) + return send(auto) + } + misses = [...misses, current] + const summary = requestSummary(body) + console.warn(`[TestLLMServer] unmatched request: ${req.originalUrl} (${summary}, pending=${list.length})`) + return HttpServerResponse.text(`unexpected request: ${summary}`, { status: 500 }) + } hits = [...hits, current] yield* notify() if (next.type !== "sse") return fail(next) @@ -725,6 +765,7 @@ export class TestLLMServer extends ServiceMap.Service hits.map((hit) => hit.body)), pending: Effect.sync(() => list.length), + misses: Effect.sync(() => [...misses]), }) }), ).pipe(Layer.provide(HttpRouter.layer), Layer.provide(NodeHttpServer.layer(() => Http.createServer(), { port: 0 }))) diff --git a/packages/opencode/test/session/snapshot-tool-race.test.ts b/packages/opencode/test/session/snapshot-tool-race.test.ts new file mode 100644 index 0000000000..b6c2718c67 --- /dev/null +++ b/packages/opencode/test/session/snapshot-tool-race.test.ts @@ -0,0 +1,233 @@ +/** + * Reproducer for snapshot race condition with instant tool execution. + * + * When the mock LLM returns a tool call response instantly, the AI SDK + * processes the tool call and executes the tool (e.g. apply_patch) before + * the processor's start-step handler can capture a pre-tool snapshot. + * Both the "before" and "after" snapshots end up with the same git tree + * hash, so computeDiff returns empty and the session summary shows 0 files. + * + * This is a real bug: the snapshot system assumes it can capture state + * before tools run by hooking into start-step, but the AI SDK executes + * tools internally during multi-step processing before emitting events. + */ +import { expect } from "bun:test" +import { Effect } from "effect" +import fs from "fs/promises" +import path from "path" +import { Session } from "../../src/session" +import { LLM } from "../../src/session/llm" +import { SessionPrompt } from "../../src/session/prompt" +import { SessionSummary } from "../../src/session/summary" +import { MessageV2 } from "../../src/session/message-v2" +import { Log } from "../../src/util/log" +import { provideTmpdirServer } from "../fixture/fixture" +import { testEffect } from "../lib/effect" +import { TestLLMServer } from "../lib/llm-server" + +// Same layer setup as prompt-effect.test.ts +import { NodeFileSystem } from "@effect/platform-node" +import { Layer } from "effect" +import { Agent as AgentSvc } from "../../src/agent/agent" +import { Bus } from "../../src/bus" +import { Command } from "../../src/command" +import { Config } from "../../src/config/config" +import { FileTime } from "../../src/file/time" +import { LSP } from "../../src/lsp" +import { MCP } from "../../src/mcp" +import { Permission } from "../../src/permission" +import { Plugin } from "../../src/plugin" +import { Provider as ProviderSvc } from "../../src/provider/provider" +import { SessionCompaction } from "../../src/session/compaction" +import { SessionProcessor } from "../../src/session/processor" +import { SessionStatus } from "../../src/session/status" +import { Shell } from "../../src/shell/shell" +import { Snapshot } from "../../src/snapshot" +import { ToolRegistry } from "../../src/tool/registry" +import { Truncate } from "../../src/tool/truncate" +import { AppFileSystem } from "../../src/filesystem" +import * as CrossSpawnSpawner from "../../src/effect/cross-spawn-spawner" + +Log.init({ print: false }) + +const mcp = Layer.succeed( + MCP.Service, + MCP.Service.of({ + status: () => Effect.succeed({}), + clients: () => Effect.succeed({}), + tools: () => Effect.succeed({}), + prompts: () => Effect.succeed({}), + resources: () => Effect.succeed({}), + add: () => Effect.succeed({ status: { status: "disabled" as const } }), + connect: () => Effect.void, + disconnect: () => Effect.void, + getPrompt: () => Effect.succeed(undefined), + readResource: () => Effect.succeed(undefined), + startAuth: () => Effect.die("unexpected MCP auth"), + authenticate: () => Effect.die("unexpected MCP auth"), + finishAuth: () => Effect.die("unexpected MCP auth"), + removeAuth: () => Effect.void, + supportsOAuth: () => Effect.succeed(false), + hasStoredTokens: () => Effect.succeed(false), + getAuthStatus: () => Effect.succeed("not_authenticated" as const), + }), +) + +const lsp = Layer.succeed( + LSP.Service, + LSP.Service.of({ + init: () => Effect.void, + status: () => Effect.succeed([]), + hasClients: () => Effect.succeed(false), + touchFile: () => Effect.void, + diagnostics: () => Effect.succeed({}), + hover: () => Effect.succeed(undefined), + definition: () => Effect.succeed([]), + references: () => Effect.succeed([]), + implementation: () => Effect.succeed([]), + documentSymbol: () => Effect.succeed([]), + workspaceSymbol: () => Effect.succeed([]), + prepareCallHierarchy: () => Effect.succeed([]), + incomingCalls: () => Effect.succeed([]), + outgoingCalls: () => Effect.succeed([]), + }), +) + +const filetime = Layer.succeed( + FileTime.Service, + FileTime.Service.of({ + read: () => Effect.void, + get: () => Effect.succeed(undefined), + assert: () => Effect.void, + withLock: (_filepath, fn) => Effect.promise(fn), + }), +) + +const status = SessionStatus.layer.pipe(Layer.provideMerge(Bus.layer)) +const infra = Layer.mergeAll(NodeFileSystem.layer, CrossSpawnSpawner.defaultLayer) + +function makeHttp() { + const deps = Layer.mergeAll( + Session.defaultLayer, + Snapshot.defaultLayer, + LLM.defaultLayer, + AgentSvc.defaultLayer, + Command.defaultLayer, + Permission.layer, + Plugin.defaultLayer, + Config.defaultLayer, + ProviderSvc.defaultLayer, + filetime, + lsp, + mcp, + AppFileSystem.defaultLayer, + status, + ).pipe(Layer.provideMerge(infra)) + const registry = ToolRegistry.layer.pipe(Layer.provideMerge(deps)) + const trunc = Truncate.layer.pipe(Layer.provideMerge(deps)) + const proc = SessionProcessor.layer.pipe(Layer.provideMerge(deps)) + const compact = SessionCompaction.layer.pipe(Layer.provideMerge(proc), Layer.provideMerge(deps)) + return Layer.mergeAll( + TestLLMServer.layer, + SessionPrompt.layer.pipe( + Layer.provideMerge(compact), + Layer.provideMerge(proc), + Layer.provideMerge(registry), + Layer.provideMerge(trunc), + Layer.provideMerge(deps), + ), + ) +} + +const it = testEffect(makeHttp()) + +const providerCfg = (url: string) => ({ + provider: { + test: { + name: "Test", + id: "test", + env: [], + npm: "@ai-sdk/openai-compatible", + models: { + "test-model": { + id: "test-model", + name: "Test Model", + attachment: false, + reasoning: false, + temperature: false, + tool_call: true, + release_date: "2025-01-01", + limit: { context: 100000, output: 10000 }, + cost: { input: 0, output: 0 }, + options: {}, + }, + }, + options: { + apiKey: "test-key", + baseURL: url, + }, + }, + }, +}) + +it.live("tool execution produces non-empty session diff (snapshot race)", () => + provideTmpdirServer( + Effect.fnUntraced(function* ({ dir, llm }) { + const prompt = yield* SessionPrompt.Service + const sessions = yield* Session.Service + + const session = yield* sessions.create({ + title: "snapshot race test", + permission: [{ permission: "*", pattern: "*", action: "allow" }], + }) + + // Use bash tool (always registered) to create a file + const command = `echo 'snapshot race test content' > ${path.join(dir, "race-test.txt")}` + yield* llm.toolMatch( + (hit) => JSON.stringify(hit.body).includes("create the file"), + "bash", + { command, description: "create test file" }, + ) + yield* llm.textMatch( + (hit) => JSON.stringify(hit.body).includes("bash"), + "done", + ) + + // Seed user message + yield* prompt.prompt({ + sessionID: session.id, + agent: "build", + noReply: true, + parts: [{ type: "text", text: "create the file" }], + }) + + // Run the agent loop + const result = yield* prompt.loop({ sessionID: session.id }) + expect(result.info.role).toBe("assistant") + + // Verify the file was created + const filePath = path.join(dir, "race-test.txt") + const fileExists = yield* Effect.promise(() => + fs.access(filePath).then(() => true).catch(() => false), + ) + expect(fileExists).toBe(true) + + // Verify the tool call completed (in the first assistant message) + const allMsgs = yield* Effect.promise(() => MessageV2.filterCompacted(MessageV2.stream(session.id))) + const tool = allMsgs + .flatMap((m) => m.parts) + .find((p): p is MessageV2.ToolPart => p.type === "tool" && p.tool === "bash") + expect(tool?.state.status).toBe("completed") + + // Poll for diff — summarize() is fire-and-forget + let diff: Awaited> = [] + for (let i = 0; i < 50; i++) { + diff = yield* Effect.promise(() => SessionSummary.diff({ sessionID: session.id })) + if (diff.length > 0) break + yield* Effect.sleep("100 millis") + } + expect(diff.length).toBeGreaterThan(0) + }), + { git: true, config: providerCfg }, + ), +)