test(app): add golden-path e2e fixtures

pull/20593/head
Kit Langton 2026-04-01 22:48:24 -04:00
parent 811c7e2494
commit ca3ccbab8d
7 changed files with 343 additions and 179 deletions

View File

@ -62,7 +62,7 @@ function tail(input: string[]) {
return input.slice(-40).join("")
}
export async function startBackend(label: string): Promise<Handle> {
export async function startBackend(label: string, input?: { llmUrl?: string }): Promise<Handle> {
const port = await freePort()
const sandbox = await fs.mkdtemp(path.join(os.tmpdir(), `opencode-e2e-${label}-`))
const appDir = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..")
@ -80,6 +80,7 @@ export async function startBackend(label: string): Promise<Handle> {
XDG_STATE_HOME: path.join(sandbox, "state"),
OPENCODE_CLIENT: "app",
OPENCODE_STRICT_CONFIG_DEPS: "true",
OPENCODE_E2E_LLM_URL: input?.llmUrl,
} satisfies Record<string, string | undefined>
const out: string[] = []
const err: string[] = []

View File

@ -12,11 +12,14 @@ import {
setHealthPhase,
seedProjects,
sessionIDFromUrl,
waitSlug,
waitSession,
waitSessionIdle,
waitSessionSaved,
waitSlug,
} from "./actions"
import { openaiModel, withMockOpenAI } from "./prompt/mock"
import { createSdk, dirSlug, getWorktree, sessionPath } from "./utils"
import { promptSelector } from "./selectors"
import { createSdk, dirSlug, getWorktree, resolveDirectory, sessionPath } from "./utils"
type LLMFixture = {
url: string
@ -51,6 +54,23 @@ type LLMFixture = {
misses: () => Promise<Array<{ url: URL; body: Record<string, unknown> }>>
}
type LLMWorker = LLMFixture & {
reset: () => Promise<void>
}
type AssistantFixture = {
reply: (value: string, opts?: { usage?: Usage }) => Promise<void>
tool: (name: string, input: unknown) => Promise<void>
toolHang: (name: string, input: unknown) => Promise<void>
reason: (value: string, opts?: { text?: string; usage?: Usage }) => Promise<void>
fail: (message?: unknown) => Promise<void>
error: (status: number, body: unknown) => Promise<void>
hang: () => Promise<void>
hold: (value: string, wait: PromiseLike<unknown>) => Promise<void>
calls: () => Promise<number>
pending: () => Promise<number>
}
export const settingsKey = "settings.v3"
const seedModel = (() => {
@ -79,8 +99,15 @@ type ProjectOptions = {
beforeGoto?: (project: { directory: string; sdk: ReturnType<typeof createSdk> }) => Promise<void>
}
type ProjectFixture = ProjectHandle & {
open: (options?: ProjectOptions) => Promise<void>
prompt: (text: string) => Promise<string>
}
type TestFixtures = {
llm: LLMFixture
assistant: AssistantFixture
project: ProjectFixture
sdk: ReturnType<typeof createSdk>
gotoSession: (sessionID?: string) => Promise<void>
withProject: <T>(callback: (project: ProjectHandle) => Promise<T>, options?: ProjectOptions) => Promise<T>
@ -89,6 +116,7 @@ type TestFixtures = {
}
type WorkerFixtures = {
_llm: LLMWorker
backend: {
url: string
sdk: (directory?: string) => ReturnType<typeof createSdk>
@ -98,9 +126,42 @@ type WorkerFixtures = {
}
export const test = base.extend<TestFixtures, WorkerFixtures>({
_llm: [
async ({}, use) => {
const rt = ManagedRuntime.make(TestLLMServer.layer)
try {
const svc = await rt.runPromise(TestLLMServer.asEffect())
await use({
url: svc.url,
push: (...input) => rt.runPromise(svc.push(...input)),
pushMatch: (match, ...input) => rt.runPromise(svc.pushMatch(match, ...input)),
textMatch: (match, value, opts) => rt.runPromise(svc.textMatch(match, value, opts)),
toolMatch: (match, name, input) => rt.runPromise(svc.toolMatch(match, name, input)),
text: (value, opts) => rt.runPromise(svc.text(value, opts)),
tool: (name, input) => rt.runPromise(svc.tool(name, input)),
toolHang: (name, input) => rt.runPromise(svc.toolHang(name, input)),
reason: (value, opts) => rt.runPromise(svc.reason(value, opts)),
fail: (message) => rt.runPromise(svc.fail(message)),
error: (status, body) => rt.runPromise(svc.error(status, body)),
hang: () => rt.runPromise(svc.hang),
hold: (value, wait) => rt.runPromise(svc.hold(value, wait)),
reset: () => rt.runPromise(svc.reset),
hits: () => rt.runPromise(svc.hits),
calls: () => rt.runPromise(svc.calls),
wait: (count) => rt.runPromise(svc.wait(count)),
inputs: () => rt.runPromise(svc.inputs),
pending: () => rt.runPromise(svc.pending),
misses: () => rt.runPromise(svc.misses),
})
} finally {
await rt.dispose()
}
},
{ scope: "worker" },
],
backend: [
async ({}, use, workerInfo) => {
const handle = await startBackend(`w${workerInfo.workerIndex}`)
async ({ _llm }, use, workerInfo) => {
const handle = await startBackend(`w${workerInfo.workerIndex}`, { llmUrl: _llm.url })
try {
await use({
url: handle.url,
@ -112,35 +173,48 @@ export const test = base.extend<TestFixtures, WorkerFixtures>({
},
{ scope: "worker" },
],
llm: async ({}, use) => {
const rt = ManagedRuntime.make(TestLLMServer.layer)
try {
const svc = await rt.runPromise(TestLLMServer.asEffect())
await use({
url: svc.url,
push: (...input) => rt.runPromise(svc.push(...input)),
pushMatch: (match, ...input) => rt.runPromise(svc.pushMatch(match, ...input)),
textMatch: (match, value, opts) => rt.runPromise(svc.textMatch(match, value, opts)),
toolMatch: (match, name, input) => rt.runPromise(svc.toolMatch(match, name, input)),
text: (value, opts) => rt.runPromise(svc.text(value, opts)),
tool: (name, input) => rt.runPromise(svc.tool(name, input)),
toolHang: (name, input) => rt.runPromise(svc.toolHang(name, input)),
reason: (value, opts) => rt.runPromise(svc.reason(value, opts)),
fail: (message) => rt.runPromise(svc.fail(message)),
error: (status, body) => rt.runPromise(svc.error(status, body)),
hang: () => rt.runPromise(svc.hang),
hold: (value, wait) => rt.runPromise(svc.hold(value, wait)),
hits: () => rt.runPromise(svc.hits),
calls: () => rt.runPromise(svc.calls),
wait: (count) => rt.runPromise(svc.wait(count)),
inputs: () => rt.runPromise(svc.inputs),
pending: () => rt.runPromise(svc.pending),
misses: () => rt.runPromise(svc.misses),
})
} finally {
await rt.dispose()
llm: async ({ _llm }, use) => {
await _llm.reset()
await use({
url: _llm.url,
push: _llm.push,
pushMatch: _llm.pushMatch,
textMatch: _llm.textMatch,
toolMatch: _llm.toolMatch,
text: _llm.text,
tool: _llm.tool,
toolHang: _llm.toolHang,
reason: _llm.reason,
fail: _llm.fail,
error: _llm.error,
hang: _llm.hang,
hold: _llm.hold,
hits: _llm.hits,
calls: _llm.calls,
wait: _llm.wait,
inputs: _llm.inputs,
pending: _llm.pending,
misses: _llm.misses,
})
const pending = await _llm.pending()
if (pending > 0) {
throw new Error(`TestLLMServer still has ${pending} queued response(s) after the test finished`)
}
},
assistant: async ({ llm }, use) => {
await use({
reply: llm.text,
tool: llm.tool,
toolHang: llm.toolHang,
reason: llm.reason,
fail: llm.fail,
error: llm.error,
hang: llm.hang,
hold: llm.hold,
calls: llm.calls,
pending: llm.pending,
})
},
page: async ({ page }, use) => {
let boundary: string | undefined
setHealthPhase(page, "test")
@ -166,8 +240,7 @@ export const test = base.extend<TestFixtures, WorkerFixtures>({
},
directory: [
async ({}, use) => {
const directory = await getWorktree()
await use(directory)
await use(await getWorktree())
},
{ scope: "worker" },
],
@ -189,6 +262,14 @@ export const test = base.extend<TestFixtures, WorkerFixtures>({
}
await use(gotoSession)
},
project: async ({ page, llm, backend }, use) => {
const item = makeProject(page, llm, backend)
try {
await use(item.project)
} finally {
await item.cleanup()
}
},
withProject: async ({ page }, use) => {
await use((callback, options) => runProject(page, callback, options))
},
@ -214,6 +295,137 @@ export const test = base.extend<TestFixtures, WorkerFixtures>({
},
})
function makeProject(
page: Page,
llm: LLMFixture,
backend: { url: string; sdk: (directory?: string) => ReturnType<typeof createSdk> },
) {
let state:
| {
directory: string
slug: string
sdk: ReturnType<typeof createSdk>
sessions: Map<string, string>
dirs: Set<string>
}
| undefined
const need = () => {
if (state) return state
throw new Error("project.open() must be called first")
}
const trackSession = (sessionID: string, directory?: string) => {
const cur = need()
cur.sessions.set(sessionID, directory ?? cur.directory)
}
const trackDirectory = (directory: string) => {
const cur = need()
if (directory !== cur.directory) cur.dirs.add(directory)
}
const gotoSession = async (sessionID?: string) => {
const cur = need()
await page.goto(sessionPath(cur.directory, sessionID))
await waitSession(page, { directory: cur.directory, sessionID, serverUrl: backend.url })
const current = sessionIDFromUrl(page.url())
if (current) trackSession(current)
}
const open = async (options?: ProjectOptions) => {
if (state) return
const directory = await createTestProject({ serverUrl: backend.url })
const sdk = backend.sdk(directory)
await options?.setup?.(directory)
await seedStorage(page, {
directory,
extra: options?.extra,
model: options?.model,
serverUrl: backend.url,
})
state = {
directory,
slug: "",
sdk,
sessions: new Map(),
dirs: new Set(),
}
await options?.beforeGoto?.({ directory, sdk })
await gotoSession()
need().slug = await waitSlug(page)
}
const prompt = async (text: string) => {
const cur = need()
if ((await llm.pending()) === 0) {
await llm.text("ok")
}
const prompt = page.locator(promptSelector).first()
await expect(prompt).toBeVisible()
await prompt.click()
await prompt.fill(text)
await prompt.press("Enter")
await expect(page).toHaveURL(/\/session\/[^/?#]+/, { timeout: 30_000 })
const sessionID = sessionIDFromUrl(page.url())
if (!sessionID) throw new Error(`Failed to parse session id from url: ${page.url()}`)
const current = await page
.evaluate(() => {
const win = window as E2EWindow
const next = win.__opencode_e2e?.model?.current
if (!next) return null
return { dir: next.dir, sessionID: next.sessionID }
})
.catch(() => null as { dir?: string; sessionID?: string } | null)
const directory = current?.dir
? await resolveDirectory(current.dir, backend.url).catch(() => cur.directory)
: cur.directory
trackSession(sessionID, directory)
await waitSessionSaved(directory, sessionID, 30_000, backend.url)
await waitSessionIdle(backend.sdk(directory), sessionID, 30_000).catch(() => undefined)
return sessionID
}
const cleanup = async () => {
const cur = state
if (!cur) return
setHealthPhase(page, "cleanup")
await Promise.allSettled(
Array.from(cur.sessions, ([sessionID, directory]) =>
cleanupSession({ sessionID, directory, serverUrl: backend.url }),
),
)
await Promise.allSettled(Array.from(cur.dirs, (directory) => cleanupTestProject(directory)))
await cleanupTestProject(cur.directory)
state = undefined
setHealthPhase(page, "test")
}
return {
project: {
open,
prompt,
gotoSession,
trackSession,
trackDirectory,
get directory() {
return need().directory
},
get slug() {
return need().slug
},
get sdk() {
return need().sdk
},
},
cleanup,
}
}
async function runProject<T>(
page: Page,
callback: (project: ProjectHandle) => Promise<T>,

View File

@ -1,52 +1,25 @@
import { test, expect } from "../fixtures"
import { promptSelector } from "../selectors"
import { assistantText, sessionIDFromUrl, withSession } from "../actions"
import { openaiModel, promptMatch, titleMatch, withMockOpenAI } from "./mock"
import { assistantText, withSession } from "../actions"
const text = (value: string | null) => (value ?? "").replace(/\u200B/g, "").trim()
// Regression test for Issue #12453: the synchronous POST /message endpoint holds
// the connection open while the agent works, causing "Failed to fetch" over
// VPN/Tailscale. The fix switches to POST /prompt_async which returns immediately.
test("prompt succeeds when sync message endpoint is unreachable", async ({
page,
llm,
backend,
withBackendProject,
}) => {
test("prompt succeeds when sync message endpoint is unreachable", async ({ page, project, assistant }) => {
test.setTimeout(120_000)
// Simulate Tailscale/VPN killing the long-lived sync connection
await page.route("**/session/*/message", (route) => route.abort("connectionfailed"))
await withMockOpenAI({
serverUrl: backend.url,
llmUrl: llm.url,
fn: async () => {
const token = `E2E_ASYNC_${Date.now()}`
await llm.textMatch(titleMatch, "E2E Title")
await llm.textMatch(promptMatch(token), token)
const token = `E2E_ASYNC_${Date.now()}`
await project.open()
await assistant.reply(token)
const sessionID = await project.prompt(`Reply with exactly: ${token}`)
await withBackendProject(
async (project) => {
await page.locator(promptSelector).click()
await page.keyboard.type(`Reply with exactly: ${token}`)
await page.keyboard.press("Enter")
await expect(page).toHaveURL(/\/session\/[^/?#]+/, { timeout: 30_000 })
const sessionID = sessionIDFromUrl(page.url())!
project.trackSession(sessionID)
await expect.poll(() => llm.calls()).toBeGreaterThanOrEqual(1)
await expect.poll(() => assistantText(project.sdk, sessionID), { timeout: 90_000 }).toContain(token)
},
{
model: openaiModel,
},
)
},
})
await expect.poll(() => assistant.calls()).toBeGreaterThanOrEqual(1)
await expect.poll(() => assistantText(project.sdk, sessionID), { timeout: 90_000 }).toContain(token)
})
test("failed prompt send restores the composer input", async ({ page, sdk, gotoSession }) => {

View File

@ -1,10 +1,9 @@
import type { ToolPart } from "@opencode-ai/sdk/v2/client"
import type { Page } from "@playwright/test"
import { test, expect } from "../fixtures"
import { assistantText, sessionIDFromUrl } from "../actions"
import { assistantText } from "../actions"
import { promptSelector } from "../selectors"
import { createSdk } from "../utils"
import { openaiModel, promptMatch, titleMatch, withMockOpenAI } from "./mock"
const text = (value: string | null) => (value ?? "").replace(/\u200B/g, "").trim()
type Sdk = ReturnType<typeof createSdk>
@ -43,73 +42,45 @@ async function shell(sdk: Sdk, sessionID: string, cmd: string, token: string) {
.toContain(token)
}
test("prompt history restores unsent draft with arrow navigation", async ({
page,
llm,
backend,
withBackendProject,
}) => {
test("prompt history restores unsent draft with arrow navigation", async ({ page, project, assistant }) => {
test.setTimeout(120_000)
await withMockOpenAI({
serverUrl: backend.url,
llmUrl: llm.url,
fn: async () => {
const firstToken = `E2E_HISTORY_ONE_${Date.now()}`
const secondToken = `E2E_HISTORY_TWO_${Date.now()}`
const first = `Reply with exactly: ${firstToken}`
const second = `Reply with exactly: ${secondToken}`
const draft = `draft ${Date.now()}`
const firstToken = `E2E_HISTORY_ONE_${Date.now()}`
const secondToken = `E2E_HISTORY_TWO_${Date.now()}`
const first = `Reply with exactly: ${firstToken}`
const second = `Reply with exactly: ${secondToken}`
const draft = `draft ${Date.now()}`
await llm.textMatch(titleMatch, "E2E Title")
await llm.textMatch(promptMatch(firstToken), firstToken)
await llm.textMatch(promptMatch(secondToken), secondToken)
await project.open()
await assistant.reply(firstToken)
const sessionID = await project.prompt(first)
await wait(page, "")
await reply(project.sdk, sessionID, firstToken)
await withBackendProject(
async (project) => {
const prompt = page.locator(promptSelector)
await assistant.reply(secondToken)
await project.prompt(second)
await wait(page, "")
await reply(project.sdk, sessionID, secondToken)
await prompt.click()
await page.keyboard.type(first)
await page.keyboard.press("Enter")
await wait(page, "")
const prompt = page.locator(promptSelector)
await prompt.click()
await page.keyboard.type(draft)
await wait(page, draft)
await expect(page).toHaveURL(/\/session\/[^/?#]+/, { timeout: 30_000 })
const sessionID = sessionIDFromUrl(page.url())!
project.trackSession(sessionID)
await reply(project.sdk, sessionID, firstToken)
await prompt.fill("")
await wait(page, "")
await prompt.click()
await page.keyboard.type(second)
await page.keyboard.press("Enter")
await wait(page, "")
await reply(project.sdk, sessionID, secondToken)
await page.keyboard.press("ArrowUp")
await wait(page, second)
await prompt.click()
await page.keyboard.type(draft)
await wait(page, draft)
await page.keyboard.press("ArrowUp")
await wait(page, first)
await prompt.fill("")
await wait(page, "")
await page.keyboard.press("ArrowDown")
await wait(page, second)
await page.keyboard.press("ArrowUp")
await wait(page, second)
await page.keyboard.press("ArrowUp")
await wait(page, first)
await page.keyboard.press("ArrowDown")
await wait(page, second)
await page.keyboard.press("ArrowDown")
await wait(page, "")
},
{
model: openaiModel,
},
)
},
})
await page.keyboard.press("ArrowDown")
await wait(page, "")
})
test.fixme("shell history stays separate from normal prompt history", async ({ page, sdk, gotoSession }) => {

View File

@ -1,9 +1,7 @@
import { test, expect } from "../fixtures"
import { promptSelector } from "../selectors"
import { assistantText, sessionIDFromUrl } from "../actions"
import { openaiModel, promptMatch, titleMatch, withMockOpenAI } from "./mock"
import { assistantText } from "../actions"
test("can send a prompt and receive a reply", async ({ page, llm, backend, withBackendProject }) => {
test("can send a prompt and receive a reply", async ({ page, project, assistant }) => {
test.setTimeout(120_000)
const pageErrors: string[] = []
@ -13,41 +11,13 @@ test("can send a prompt and receive a reply", async ({ page, llm, backend, withB
page.on("pageerror", onPageError)
try {
await withMockOpenAI({
serverUrl: backend.url,
llmUrl: llm.url,
fn: async () => {
const token = `E2E_OK_${Date.now()}`
const token = `E2E_OK_${Date.now()}`
await project.open()
await assistant.reply(token)
const sessionID = await project.prompt(`Reply with exactly: ${token}`)
await llm.textMatch(titleMatch, "E2E Title")
await llm.textMatch(promptMatch(token), token)
await withBackendProject(
async (project) => {
const prompt = page.locator(promptSelector)
await prompt.click()
await page.keyboard.type(`Reply with exactly: ${token}`)
await page.keyboard.press("Enter")
await expect(page).toHaveURL(/\/session\/[^/?#]+/, { timeout: 30_000 })
const sessionID = (() => {
const id = sessionIDFromUrl(page.url())
if (!id) throw new Error(`Failed to parse session id from url: ${page.url()}`)
return id
})()
project.trackSession(sessionID)
await expect.poll(() => llm.calls()).toBeGreaterThanOrEqual(1)
await expect.poll(() => assistantText(project.sdk, sessionID), { timeout: 30_000 }).toContain(token)
},
{
model: openaiModel,
},
)
},
})
await expect.poll(() => assistant.calls()).toBeGreaterThanOrEqual(1)
await expect.poll(() => assistantText(project.sdk, sessionID), { timeout: 30_000 }).toContain(token)
} finally {
page.off("pageerror", onPageError)
}

View File

@ -114,6 +114,12 @@ export namespace Provider {
})
}
function e2eURL() {
const url = Env.get("OPENCODE_E2E_LLM_URL")
if (typeof url !== "string" || url === "") return
return url
}
type BundledSDK = {
languageModel(modelId: string): LanguageModelV3
}
@ -1450,6 +1456,17 @@ export namespace Provider {
if (s.models.has(key)) return s.models.get(key)!
return yield* Effect.promise(async () => {
const url = e2eURL()
if (url) {
const language = createOpenAI({
name: model.providerID,
apiKey: "test-key",
baseURL: url,
}).responses(model.api.id)
s.models.set(key, language)
return language
}
const provider = s.providers[model.providerID]
const sdk = await resolveSDK(model, s)

View File

@ -599,6 +599,11 @@ function isToolResultFollowUp(body: unknown): boolean {
return false
}
function isTitleRequest(body: unknown): boolean {
if (!body || typeof body !== "object") return false
return JSON.stringify(body).includes("Generate a title for this conversation")
}
function requestSummary(body: unknown): string {
if (!body || typeof body !== "object") return "empty body"
if ("messages" in body && Array.isArray(body.messages)) {
@ -623,6 +628,7 @@ namespace TestLLMServer {
readonly error: (status: number, body: unknown) => Effect.Effect<void>
readonly hang: Effect.Effect<void>
readonly hold: (value: string, wait: PromiseLike<unknown>) => Effect.Effect<void>
readonly reset: Effect.Effect<void>
readonly hits: Effect.Effect<Hit[]>
readonly calls: Effect.Effect<number>
readonly wait: (count: number) => Effect.Effect<void>
@ -671,21 +677,29 @@ export class TestLLMServer extends ServiceMap.Service<TestLLMServer, TestLLMServ
const req = yield* HttpServerRequest.HttpServerRequest
const body = yield* req.json.pipe(Effect.orElseSucceed(() => ({})))
const current = hit(req.originalUrl, body)
if (isTitleRequest(body)) {
hits = [...hits, current]
yield* notify()
const auto: Sse = { type: "sse", head: [role()], tail: [textLine("E2E Title"), finishLine("stop")] }
if (mode === "responses") return send(responses(auto, modelFrom(body)))
return send(auto)
}
// Auto-acknowledge tool-result follow-ups so tests only need to
// queue one response per tool call instead of two.
if (isToolResultFollowUp(body)) {
hits = [...hits, current]
yield* notify()
const auto: Sse = { type: "sse", head: [role()], tail: [textLine("ok"), finishLine("stop")] }
if (mode === "responses") return send(responses(auto, modelFrom(body)))
return send(auto)
}
const next = pull(current)
if (!next) {
// Auto-acknowledge tool-result follow-ups so tests only need to
// queue one response per tool call instead of two.
if (isToolResultFollowUp(body)) {
hits = [...hits, current]
yield* notify()
const auto: Sse = { type: "sse", head: [role()], tail: [textLine("ok"), finishLine("stop")] }
if (mode === "responses") return send(responses(auto, modelFrom(body)))
return send(auto)
}
misses = [...misses, current]
const summary = requestSummary(body)
console.warn(`[TestLLMServer] unmatched request: ${req.originalUrl} (${summary}, pending=${list.length})`)
return HttpServerResponse.text(`unexpected request: ${summary}`, { status: 500 })
hits = [...hits, current]
yield* notify()
const auto: Sse = { type: "sse", head: [role()], tail: [textLine("ok"), finishLine("stop")] }
if (mode === "responses") return send(responses(auto, modelFrom(body)))
return send(auto)
}
hits = [...hits, current]
yield* notify()
@ -755,6 +769,12 @@ export class TestLLMServer extends ServiceMap.Service<TestLLMServer, TestLLMServ
hold: Effect.fn("TestLLMServer.hold")(function* (value: string, wait: PromiseLike<unknown>) {
queue(reply().wait(wait).text(value).stop().item())
}),
reset: Effect.sync(() => {
hits = []
list = []
waits = []
misses = []
}),
hits: Effect.sync(() => [...hits]),
calls: Effect.sync(() => hits.length),
wait: Effect.fn("TestLLMServer.wait")(function* (count: number) {