test(app): add golden-path e2e fixtures

2026-04-01 22:48:24 -04:00 · 2026-04-01 22:48:24 -04:00 · ca3ccbab8d
parent 811c7e2494
commit ca3ccbab8d
7 changed files with 343 additions and 179 deletions
--- a/packages/app/e2e/backend.ts
+++ b/packages/app/e2e/backend.ts
@ -62,7 +62,7 @@ function tail(input: string[]) {
  return input.slice(-40).join("")
 }

-export async function startBackend(label: string): Promise<Handle> {
+export async function startBackend(label: string, input?: { llmUrl?: string }): Promise<Handle> {
  const port = await freePort()
  const sandbox = await fs.mkdtemp(path.join(os.tmpdir(), `opencode-e2e-${label}-`))
  const appDir = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..")
@ -80,6 +80,7 @@ export async function startBackend(label: string): Promise<Handle> {
    XDG_STATE_HOME: path.join(sandbox, "state"),
    OPENCODE_CLIENT: "app",
    OPENCODE_STRICT_CONFIG_DEPS: "true",
+    OPENCODE_E2E_LLM_URL: input?.llmUrl,
  } satisfies Record<string, string | undefined>
  const out: string[] = []
  const err: string[] = []
--- a/packages/app/e2e/fixtures.ts
+++ b/packages/app/e2e/fixtures.ts
@ -12,11 +12,14 @@ import {
  setHealthPhase,
  seedProjects,
  sessionIDFromUrl,
-  waitSlug,
  waitSession,
+  waitSessionIdle,
+  waitSessionSaved,
+  waitSlug,
 } from "./actions"
 import { openaiModel, withMockOpenAI } from "./prompt/mock"
-import { createSdk, dirSlug, getWorktree, sessionPath } from "./utils"
+import { promptSelector } from "./selectors"
+import { createSdk, dirSlug, getWorktree, resolveDirectory, sessionPath } from "./utils"

 type LLMFixture = {
  url: string
@ -51,6 +54,23 @@ type LLMFixture = {
  misses: () => Promise<Array<{ url: URL; body: Record<string, unknown> }>>
 }

+type LLMWorker = LLMFixture & {
+  reset: () => Promise<void>
+}
+
+type AssistantFixture = {
+  reply: (value: string, opts?: { usage?: Usage }) => Promise<void>
+  tool: (name: string, input: unknown) => Promise<void>
+  toolHang: (name: string, input: unknown) => Promise<void>
+  reason: (value: string, opts?: { text?: string; usage?: Usage }) => Promise<void>
+  fail: (message?: unknown) => Promise<void>
+  error: (status: number, body: unknown) => Promise<void>
+  hang: () => Promise<void>
+  hold: (value: string, wait: PromiseLike<unknown>) => Promise<void>
+  calls: () => Promise<number>
+  pending: () => Promise<number>
+}
+
 export const settingsKey = "settings.v3"

 const seedModel = (() => {
@ -79,8 +99,15 @@ type ProjectOptions = {
  beforeGoto?: (project: { directory: string; sdk: ReturnType<typeof createSdk> }) => Promise<void>
 }

+type ProjectFixture = ProjectHandle & {
+  open: (options?: ProjectOptions) => Promise<void>
+  prompt: (text: string) => Promise<string>
+}
+
 type TestFixtures = {
  llm: LLMFixture
+  assistant: AssistantFixture
+  project: ProjectFixture
  sdk: ReturnType<typeof createSdk>
  gotoSession: (sessionID?: string) => Promise<void>
  withProject: <T>(callback: (project: ProjectHandle) => Promise<T>, options?: ProjectOptions) => Promise<T>
@ -89,6 +116,7 @@ type TestFixtures = {
 }

 type WorkerFixtures = {
+  _llm: LLMWorker
  backend: {
    url: string
    sdk: (directory?: string) => ReturnType<typeof createSdk>
@ -98,9 +126,42 @@ type WorkerFixtures = {
 }

 export const test = base.extend<TestFixtures, WorkerFixtures>({
+  _llm: [
+    async ({}, use) => {
+      const rt = ManagedRuntime.make(TestLLMServer.layer)
+      try {
+        const svc = await rt.runPromise(TestLLMServer.asEffect())
+        await use({
+          url: svc.url,
+          push: (...input) => rt.runPromise(svc.push(...input)),
+          pushMatch: (match, ...input) => rt.runPromise(svc.pushMatch(match, ...input)),
+          textMatch: (match, value, opts) => rt.runPromise(svc.textMatch(match, value, opts)),
+          toolMatch: (match, name, input) => rt.runPromise(svc.toolMatch(match, name, input)),
+          text: (value, opts) => rt.runPromise(svc.text(value, opts)),
+          tool: (name, input) => rt.runPromise(svc.tool(name, input)),
+          toolHang: (name, input) => rt.runPromise(svc.toolHang(name, input)),
+          reason: (value, opts) => rt.runPromise(svc.reason(value, opts)),
+          fail: (message) => rt.runPromise(svc.fail(message)),
+          error: (status, body) => rt.runPromise(svc.error(status, body)),
+          hang: () => rt.runPromise(svc.hang),
+          hold: (value, wait) => rt.runPromise(svc.hold(value, wait)),
+          reset: () => rt.runPromise(svc.reset),
+          hits: () => rt.runPromise(svc.hits),
+          calls: () => rt.runPromise(svc.calls),
+          wait: (count) => rt.runPromise(svc.wait(count)),
+          inputs: () => rt.runPromise(svc.inputs),
+          pending: () => rt.runPromise(svc.pending),
+          misses: () => rt.runPromise(svc.misses),
+        })
+      } finally {
+        await rt.dispose()
+      }
+    },
+    { scope: "worker" },
+  ],
  backend: [
-    async ({}, use, workerInfo) => {
-      const handle = await startBackend(`w${workerInfo.workerIndex}`)
+    async ({ _llm }, use, workerInfo) => {
+      const handle = await startBackend(`w${workerInfo.workerIndex}`, { llmUrl: _llm.url })
      try {
        await use({
          url: handle.url,
@ -112,35 +173,48 @@ export const test = base.extend<TestFixtures, WorkerFixtures>({
    },
    { scope: "worker" },
  ],
-  llm: async ({}, use) => {
-    const rt = ManagedRuntime.make(TestLLMServer.layer)
-    try {
-      const svc = await rt.runPromise(TestLLMServer.asEffect())
-      await use({
-        url: svc.url,
-        push: (...input) => rt.runPromise(svc.push(...input)),
-        pushMatch: (match, ...input) => rt.runPromise(svc.pushMatch(match, ...input)),
-        textMatch: (match, value, opts) => rt.runPromise(svc.textMatch(match, value, opts)),
-        toolMatch: (match, name, input) => rt.runPromise(svc.toolMatch(match, name, input)),
-        text: (value, opts) => rt.runPromise(svc.text(value, opts)),
-        tool: (name, input) => rt.runPromise(svc.tool(name, input)),
-        toolHang: (name, input) => rt.runPromise(svc.toolHang(name, input)),
-        reason: (value, opts) => rt.runPromise(svc.reason(value, opts)),
-        fail: (message) => rt.runPromise(svc.fail(message)),
-        error: (status, body) => rt.runPromise(svc.error(status, body)),
-        hang: () => rt.runPromise(svc.hang),
-        hold: (value, wait) => rt.runPromise(svc.hold(value, wait)),
-        hits: () => rt.runPromise(svc.hits),
-        calls: () => rt.runPromise(svc.calls),
-        wait: (count) => rt.runPromise(svc.wait(count)),
-        inputs: () => rt.runPromise(svc.inputs),
-        pending: () => rt.runPromise(svc.pending),
-        misses: () => rt.runPromise(svc.misses),
-      })
-    } finally {
-      await rt.dispose()
+  llm: async ({ _llm }, use) => {
+    await _llm.reset()
+    await use({
+      url: _llm.url,
+      push: _llm.push,
+      pushMatch: _llm.pushMatch,
+      textMatch: _llm.textMatch,
+      toolMatch: _llm.toolMatch,
+      text: _llm.text,
+      tool: _llm.tool,
+      toolHang: _llm.toolHang,
+      reason: _llm.reason,
+      fail: _llm.fail,
+      error: _llm.error,
+      hang: _llm.hang,
+      hold: _llm.hold,
+      hits: _llm.hits,
+      calls: _llm.calls,
+      wait: _llm.wait,
+      inputs: _llm.inputs,
+      pending: _llm.pending,
+      misses: _llm.misses,
+    })
+    const pending = await _llm.pending()
+    if (pending > 0) {
+      throw new Error(`TestLLMServer still has ${pending} queued response(s) after the test finished`)
    }
  },
+  assistant: async ({ llm }, use) => {
+    await use({
+      reply: llm.text,
+      tool: llm.tool,
+      toolHang: llm.toolHang,
+      reason: llm.reason,
+      fail: llm.fail,
+      error: llm.error,
+      hang: llm.hang,
+      hold: llm.hold,
+      calls: llm.calls,
+      pending: llm.pending,
+    })
+  },
  page: async ({ page }, use) => {
    let boundary: string | undefined
    setHealthPhase(page, "test")
@ -166,8 +240,7 @@ export const test = base.extend<TestFixtures, WorkerFixtures>({
  },
  directory: [
    async ({}, use) => {
-      const directory = await getWorktree()
-      await use(directory)
+      await use(await getWorktree())
    },
    { scope: "worker" },
  ],
@ -189,6 +262,14 @@ export const test = base.extend<TestFixtures, WorkerFixtures>({
    }
    await use(gotoSession)
  },
+  project: async ({ page, llm, backend }, use) => {
+    const item = makeProject(page, llm, backend)
+    try {
+      await use(item.project)
+    } finally {
+      await item.cleanup()
+    }
+  },
  withProject: async ({ page }, use) => {
    await use((callback, options) => runProject(page, callback, options))
  },
@ -214,6 +295,137 @@ export const test = base.extend<TestFixtures, WorkerFixtures>({
  },
 })

+function makeProject(
+  page: Page,
+  llm: LLMFixture,
+  backend: { url: string; sdk: (directory?: string) => ReturnType<typeof createSdk> },
+) {
+  let state:
+    | {
+        directory: string
+        slug: string
+        sdk: ReturnType<typeof createSdk>
+        sessions: Map<string, string>
+        dirs: Set<string>
+      }
+    | undefined
+
+  const need = () => {
+    if (state) return state
+    throw new Error("project.open() must be called first")
+  }
+
+  const trackSession = (sessionID: string, directory?: string) => {
+    const cur = need()
+    cur.sessions.set(sessionID, directory ?? cur.directory)
+  }
+
+  const trackDirectory = (directory: string) => {
+    const cur = need()
+    if (directory !== cur.directory) cur.dirs.add(directory)
+  }
+
+  const gotoSession = async (sessionID?: string) => {
+    const cur = need()
+    await page.goto(sessionPath(cur.directory, sessionID))
+    await waitSession(page, { directory: cur.directory, sessionID, serverUrl: backend.url })
+    const current = sessionIDFromUrl(page.url())
+    if (current) trackSession(current)
+  }
+
+  const open = async (options?: ProjectOptions) => {
+    if (state) return
+    const directory = await createTestProject({ serverUrl: backend.url })
+    const sdk = backend.sdk(directory)
+    await options?.setup?.(directory)
+    await seedStorage(page, {
+      directory,
+      extra: options?.extra,
+      model: options?.model,
+      serverUrl: backend.url,
+    })
+    state = {
+      directory,
+      slug: "",
+      sdk,
+      sessions: new Map(),
+      dirs: new Set(),
+    }
+    await options?.beforeGoto?.({ directory, sdk })
+    await gotoSession()
+    need().slug = await waitSlug(page)
+  }
+
+  const prompt = async (text: string) => {
+    const cur = need()
+    if ((await llm.pending()) === 0) {
+      await llm.text("ok")
+    }
+
+    const prompt = page.locator(promptSelector).first()
+    await expect(prompt).toBeVisible()
+    await prompt.click()
+    await prompt.fill(text)
+    await prompt.press("Enter")
+
+    await expect(page).toHaveURL(/\/session\/[^/?#]+/, { timeout: 30_000 })
+    const sessionID = sessionIDFromUrl(page.url())
+    if (!sessionID) throw new Error(`Failed to parse session id from url: ${page.url()}`)
+
+    const current = await page
+      .evaluate(() => {
+        const win = window as E2EWindow
+        const next = win.__opencode_e2e?.model?.current
+        if (!next) return null
+        return { dir: next.dir, sessionID: next.sessionID }
+      })
+      .catch(() => null as { dir?: string; sessionID?: string } | null)
+    const directory = current?.dir
+      ? await resolveDirectory(current.dir, backend.url).catch(() => cur.directory)
+      : cur.directory
+
+    trackSession(sessionID, directory)
+    await waitSessionSaved(directory, sessionID, 30_000, backend.url)
+    await waitSessionIdle(backend.sdk(directory), sessionID, 30_000).catch(() => undefined)
+    return sessionID
+  }
+
+  const cleanup = async () => {
+    const cur = state
+    if (!cur) return
+    setHealthPhase(page, "cleanup")
+    await Promise.allSettled(
+      Array.from(cur.sessions, ([sessionID, directory]) =>
+        cleanupSession({ sessionID, directory, serverUrl: backend.url }),
+      ),
+    )
+    await Promise.allSettled(Array.from(cur.dirs, (directory) => cleanupTestProject(directory)))
+    await cleanupTestProject(cur.directory)
+    state = undefined
+    setHealthPhase(page, "test")
+  }
+
+  return {
+    project: {
+      open,
+      prompt,
+      gotoSession,
+      trackSession,
+      trackDirectory,
+      get directory() {
+        return need().directory
+      },
+      get slug() {
+        return need().slug
+      },
+      get sdk() {
+        return need().sdk
+      },
+    },
+    cleanup,
+  }
+}
+
 async function runProject<T>(
  page: Page,
  callback: (project: ProjectHandle) => Promise<T>,
--- a/packages/app/e2e/prompt/prompt-async.spec.ts
+++ b/packages/app/e2e/prompt/prompt-async.spec.ts
@ -1,52 +1,25 @@
 import { test, expect } from "../fixtures"
 import { promptSelector } from "../selectors"
-import { assistantText, sessionIDFromUrl, withSession } from "../actions"
-import { openaiModel, promptMatch, titleMatch, withMockOpenAI } from "./mock"
+import { assistantText, withSession } from "../actions"

 const text = (value: string | null) => (value ?? "").replace(/\u200B/g, "").trim()

 // Regression test for Issue #12453: the synchronous POST /message endpoint holds
 // the connection open while the agent works, causing "Failed to fetch" over
 // VPN/Tailscale. The fix switches to POST /prompt_async which returns immediately.
-test("prompt succeeds when sync message endpoint is unreachable", async ({
-  page,
-  llm,
-  backend,
-  withBackendProject,
-}) => {
+test("prompt succeeds when sync message endpoint is unreachable", async ({ page, project, assistant }) => {
  test.setTimeout(120_000)

  // Simulate Tailscale/VPN killing the long-lived sync connection
  await page.route("**/session/*/message", (route) => route.abort("connectionfailed"))

-  await withMockOpenAI({
-    serverUrl: backend.url,
-    llmUrl: llm.url,
-    fn: async () => {
-      const token = `E2E_ASYNC_${Date.now()}`
-      await llm.textMatch(titleMatch, "E2E Title")
-      await llm.textMatch(promptMatch(token), token)
+  const token = `E2E_ASYNC_${Date.now()}`
+  await project.open()
+  await assistant.reply(token)
+  const sessionID = await project.prompt(`Reply with exactly: ${token}`)

-      await withBackendProject(
-        async (project) => {
-          await page.locator(promptSelector).click()
-          await page.keyboard.type(`Reply with exactly: ${token}`)
-          await page.keyboard.press("Enter")
-
-          await expect(page).toHaveURL(/\/session\/[^/?#]+/, { timeout: 30_000 })
-          const sessionID = sessionIDFromUrl(page.url())!
-          project.trackSession(sessionID)
-
-          await expect.poll(() => llm.calls()).toBeGreaterThanOrEqual(1)
-
-          await expect.poll(() => assistantText(project.sdk, sessionID), { timeout: 90_000 }).toContain(token)
-        },
-        {
-          model: openaiModel,
-        },
-      )
-    },
-  })
+  await expect.poll(() => assistant.calls()).toBeGreaterThanOrEqual(1)
+  await expect.poll(() => assistantText(project.sdk, sessionID), { timeout: 90_000 }).toContain(token)
 })

 test("failed prompt send restores the composer input", async ({ page, sdk, gotoSession }) => {
--- a/packages/app/e2e/prompt/prompt-history.spec.ts
+++ b/packages/app/e2e/prompt/prompt-history.spec.ts
@ -1,10 +1,9 @@
 import type { ToolPart } from "@opencode-ai/sdk/v2/client"
 import type { Page } from "@playwright/test"
 import { test, expect } from "../fixtures"
-import { assistantText, sessionIDFromUrl } from "../actions"
+import { assistantText } from "../actions"
 import { promptSelector } from "../selectors"
 import { createSdk } from "../utils"
-import { openaiModel, promptMatch, titleMatch, withMockOpenAI } from "./mock"

 const text = (value: string | null) => (value ?? "").replace(/\u200B/g, "").trim()
 type Sdk = ReturnType<typeof createSdk>
@ -43,73 +42,45 @@ async function shell(sdk: Sdk, sessionID: string, cmd: string, token: string) {
    .toContain(token)
 }

-test("prompt history restores unsent draft with arrow navigation", async ({
-  page,
-  llm,
-  backend,
-  withBackendProject,
-}) => {
+test("prompt history restores unsent draft with arrow navigation", async ({ page, project, assistant }) => {
  test.setTimeout(120_000)

-  await withMockOpenAI({
-    serverUrl: backend.url,
-    llmUrl: llm.url,
-    fn: async () => {
-      const firstToken = `E2E_HISTORY_ONE_${Date.now()}`
-      const secondToken = `E2E_HISTORY_TWO_${Date.now()}`
-      const first = `Reply with exactly: ${firstToken}`
-      const second = `Reply with exactly: ${secondToken}`
-      const draft = `draft ${Date.now()}`
+  const firstToken = `E2E_HISTORY_ONE_${Date.now()}`
+  const secondToken = `E2E_HISTORY_TWO_${Date.now()}`
+  const first = `Reply with exactly: ${firstToken}`
+  const second = `Reply with exactly: ${secondToken}`
+  const draft = `draft ${Date.now()}`

-      await llm.textMatch(titleMatch, "E2E Title")
-      await llm.textMatch(promptMatch(firstToken), firstToken)
-      await llm.textMatch(promptMatch(secondToken), secondToken)
+  await project.open()
+  await assistant.reply(firstToken)
+  const sessionID = await project.prompt(first)
+  await wait(page, "")
+  await reply(project.sdk, sessionID, firstToken)

-      await withBackendProject(
-        async (project) => {
-          const prompt = page.locator(promptSelector)
+  await assistant.reply(secondToken)
+  await project.prompt(second)
+  await wait(page, "")
+  await reply(project.sdk, sessionID, secondToken)

-          await prompt.click()
-          await page.keyboard.type(first)
-          await page.keyboard.press("Enter")
-          await wait(page, "")
+  const prompt = page.locator(promptSelector)
+  await prompt.click()
+  await page.keyboard.type(draft)
+  await wait(page, draft)

-          await expect(page).toHaveURL(/\/session\/[^/?#]+/, { timeout: 30_000 })
-          const sessionID = sessionIDFromUrl(page.url())!
-          project.trackSession(sessionID)
-          await reply(project.sdk, sessionID, firstToken)
+  await prompt.fill("")
+  await wait(page, "")

-          await prompt.click()
-          await page.keyboard.type(second)
-          await page.keyboard.press("Enter")
-          await wait(page, "")
-          await reply(project.sdk, sessionID, secondToken)
+  await page.keyboard.press("ArrowUp")
+  await wait(page, second)

-          await prompt.click()
-          await page.keyboard.type(draft)
-          await wait(page, draft)
+  await page.keyboard.press("ArrowUp")
+  await wait(page, first)

-          await prompt.fill("")
-          await wait(page, "")
+  await page.keyboard.press("ArrowDown")
+  await wait(page, second)

-          await page.keyboard.press("ArrowUp")
-          await wait(page, second)
-
-          await page.keyboard.press("ArrowUp")
-          await wait(page, first)
-
-          await page.keyboard.press("ArrowDown")
-          await wait(page, second)
-
-          await page.keyboard.press("ArrowDown")
-          await wait(page, "")
-        },
-        {
-          model: openaiModel,
-        },
-      )
-    },
-  })
+  await page.keyboard.press("ArrowDown")
+  await wait(page, "")
 })

 test.fixme("shell history stays separate from normal prompt history", async ({ page, sdk, gotoSession }) => {
--- a/packages/app/e2e/prompt/prompt.spec.ts
+++ b/packages/app/e2e/prompt/prompt.spec.ts
@ -1,9 +1,7 @@
 import { test, expect } from "../fixtures"
-import { promptSelector } from "../selectors"
-import { assistantText, sessionIDFromUrl } from "../actions"
-import { openaiModel, promptMatch, titleMatch, withMockOpenAI } from "./mock"
+import { assistantText } from "../actions"

-test("can send a prompt and receive a reply", async ({ page, llm, backend, withBackendProject }) => {
+test("can send a prompt and receive a reply", async ({ page, project, assistant }) => {
  test.setTimeout(120_000)

  const pageErrors: string[] = []
@ -13,41 +11,13 @@ test("can send a prompt and receive a reply", async ({ page, llm, backend, withB
  page.on("pageerror", onPageError)

  try {
-    await withMockOpenAI({
-      serverUrl: backend.url,
-      llmUrl: llm.url,
-      fn: async () => {
-        const token = `E2E_OK_${Date.now()}`
+    const token = `E2E_OK_${Date.now()}`
+    await project.open()
+    await assistant.reply(token)
+    const sessionID = await project.prompt(`Reply with exactly: ${token}`)

-        await llm.textMatch(titleMatch, "E2E Title")
-        await llm.textMatch(promptMatch(token), token)
-
-        await withBackendProject(
-          async (project) => {
-            const prompt = page.locator(promptSelector)
-            await prompt.click()
-            await page.keyboard.type(`Reply with exactly: ${token}`)
-            await page.keyboard.press("Enter")
-
-            await expect(page).toHaveURL(/\/session\/[^/?#]+/, { timeout: 30_000 })
-
-            const sessionID = (() => {
-              const id = sessionIDFromUrl(page.url())
-              if (!id) throw new Error(`Failed to parse session id from url: ${page.url()}`)
-              return id
-            })()
-            project.trackSession(sessionID)
-
-            await expect.poll(() => llm.calls()).toBeGreaterThanOrEqual(1)
-
-            await expect.poll(() => assistantText(project.sdk, sessionID), { timeout: 30_000 }).toContain(token)
-          },
-          {
-            model: openaiModel,
-          },
-        )
-      },
-    })
+    await expect.poll(() => assistant.calls()).toBeGreaterThanOrEqual(1)
+    await expect.poll(() => assistantText(project.sdk, sessionID), { timeout: 30_000 }).toContain(token)
  } finally {
    page.off("pageerror", onPageError)
  }
--- a/packages/opencode/src/provider/provider.ts
+++ b/packages/opencode/src/provider/provider.ts
@ -114,6 +114,12 @@ export namespace Provider {
    })
  }

+  function e2eURL() {
+    const url = Env.get("OPENCODE_E2E_LLM_URL")
+    if (typeof url !== "string" || url === "") return
+    return url
+  }
+
  type BundledSDK = {
    languageModel(modelId: string): LanguageModelV3
  }
@ -1450,6 +1456,17 @@ export namespace Provider {
        if (s.models.has(key)) return s.models.get(key)!

        return yield* Effect.promise(async () => {
+          const url = e2eURL()
+          if (url) {
+            const language = createOpenAI({
+              name: model.providerID,
+              apiKey: "test-key",
+              baseURL: url,
+            }).responses(model.api.id)
+            s.models.set(key, language)
+            return language
+          }
+
          const provider = s.providers[model.providerID]
          const sdk = await resolveSDK(model, s)

--- a/packages/opencode/test/lib/llm-server.ts
+++ b/packages/opencode/test/lib/llm-server.ts
@ -599,6 +599,11 @@ function isToolResultFollowUp(body: unknown): boolean {
  return false
 }

+function isTitleRequest(body: unknown): boolean {
+  if (!body || typeof body !== "object") return false
+  return JSON.stringify(body).includes("Generate a title for this conversation")
+}
+
 function requestSummary(body: unknown): string {
  if (!body || typeof body !== "object") return "empty body"
  if ("messages" in body && Array.isArray(body.messages)) {
@ -623,6 +628,7 @@ namespace TestLLMServer {
    readonly error: (status: number, body: unknown) => Effect.Effect<void>
    readonly hang: Effect.Effect<void>
    readonly hold: (value: string, wait: PromiseLike<unknown>) => Effect.Effect<void>
+    readonly reset: Effect.Effect<void>
    readonly hits: Effect.Effect<Hit[]>
    readonly calls: Effect.Effect<number>
    readonly wait: (count: number) => Effect.Effect<void>
@ -671,21 +677,29 @@ export class TestLLMServer extends ServiceMap.Service<TestLLMServer, TestLLMServ
        const req = yield* HttpServerRequest.HttpServerRequest
        const body = yield* req.json.pipe(Effect.orElseSucceed(() => ({})))
        const current = hit(req.originalUrl, body)
+        if (isTitleRequest(body)) {
+          hits = [...hits, current]
+          yield* notify()
+          const auto: Sse = { type: "sse", head: [role()], tail: [textLine("E2E Title"), finishLine("stop")] }
+          if (mode === "responses") return send(responses(auto, modelFrom(body)))
+          return send(auto)
+        }
+        // Auto-acknowledge tool-result follow-ups so tests only need to
+        // queue one response per tool call instead of two.
+        if (isToolResultFollowUp(body)) {
+          hits = [...hits, current]
+          yield* notify()
+          const auto: Sse = { type: "sse", head: [role()], tail: [textLine("ok"), finishLine("stop")] }
+          if (mode === "responses") return send(responses(auto, modelFrom(body)))
+          return send(auto)
+        }
        const next = pull(current)
        if (!next) {
-          // Auto-acknowledge tool-result follow-ups so tests only need to
-          // queue one response per tool call instead of two.
-          if (isToolResultFollowUp(body)) {
-            hits = [...hits, current]
-            yield* notify()
-            const auto: Sse = { type: "sse", head: [role()], tail: [textLine("ok"), finishLine("stop")] }
-            if (mode === "responses") return send(responses(auto, modelFrom(body)))
-            return send(auto)
-          }
-          misses = [...misses, current]
-          const summary = requestSummary(body)
-          console.warn(`[TestLLMServer] unmatched request: ${req.originalUrl} (${summary}, pending=${list.length})`)
-          return HttpServerResponse.text(`unexpected request: ${summary}`, { status: 500 })
+          hits = [...hits, current]
+          yield* notify()
+          const auto: Sse = { type: "sse", head: [role()], tail: [textLine("ok"), finishLine("stop")] }
+          if (mode === "responses") return send(responses(auto, modelFrom(body)))
+          return send(auto)
        }
        hits = [...hits, current]
        yield* notify()
@ -755,6 +769,12 @@ export class TestLLMServer extends ServiceMap.Service<TestLLMServer, TestLLMServ
        hold: Effect.fn("TestLLMServer.hold")(function* (value: string, wait: PromiseLike<unknown>) {
          queue(reply().wait(wait).text(value).stop().item())
        }),
+        reset: Effect.sync(() => {
+          hits = []
+          list = []
+          waits = []
+          misses = []
+        }),
        hits: Effect.sync(() => [...hits]),
        calls: Effect.sync(() => hits.length),
        wait: Effect.fn("TestLLMServer.wait")(function* (count: number) {