From 544b179ee7404d3de044edd5c900445017c004ca Mon Sep 17 00:00:00 2001 From: James Long Date: Tue, 24 Mar 2026 16:17:58 -0400 Subject: [PATCH] notes --- .../src/provider/sdk/mock/PROTOCOL.md | 208 +++++++++++++++++ .../opencode/src/provider/sdk/mock/README.md | 209 +----------------- .../provider/sdk/mock/runner/serve.test.ts | 52 +++++ 3 files changed, 270 insertions(+), 199 deletions(-) create mode 100644 packages/opencode/src/provider/sdk/mock/PROTOCOL.md create mode 100644 packages/opencode/src/provider/sdk/mock/runner/serve.test.ts diff --git a/packages/opencode/src/provider/sdk/mock/PROTOCOL.md b/packages/opencode/src/provider/sdk/mock/PROTOCOL.md new file mode 100644 index 0000000000..6a9c0b917f --- /dev/null +++ b/packages/opencode/src/provider/sdk/mock/PROTOCOL.md @@ -0,0 +1,208 @@ +# Mock RPC + +Deterministic model scripting for tests. + +--- + +## Overview + +The mock provider lets test harnesses script exactly what the model should emit. Instead of hitting a real API, the user message contains a JSON object that describes each step of the conversation. This makes test scenarios fully deterministic and reproducible. + +--- + +## Understand the protocol + +The user message text is a JSON object with a `steps` array. Each step is an array of actions that the model emits on that turn. + +```json +{ + "steps": [ + [{ "type": "text", "content": "Hello" }], + [{ "type": "text", "content": "Goodbye" }] + ] +} +``` + +The mock model reads the **last** user message in the prompt to find this JSON. + +--- + +## Know how steps are selected + +The model picks which step to execute by counting messages with `role: "tool"` in the prompt. This count represents how many tool-result rounds have occurred. + +- **Step 0** runs on the first call (no tool results yet). +- **Step 1** runs after the first tool-result round. +- **Step N** runs after the Nth tool-result round. + +If the step index is out of bounds, the model emits an empty set of actions. + +--- + +## Use the `text` action + +Emits a text block. + +```json +{ "type": "text", "content": "Some response text" } +``` + +| Field | Type | Description | +|-----------|--------|----------------------| +| `content` | string | The text to emit. | + +--- + +## Use the `tool_call` action + +Calls a tool. The input object is passed as-is. + +```json +{ "type": "tool_call", "name": "write", "input": { "filePath": "a.txt", "content": "hi" } } +``` + +| Field | Type | Description | +|---------|--------|---------------------------------| +| `name` | string | Name of the tool to call. | +| `input` | object | Arguments passed to the tool. | + +--- + +## Use the `thinking` action + +Emits a reasoning/thinking block. + +```json +{ "type": "thinking", "content": "Let me consider the options..." } +``` + +| Field | Type | Description | +|-----------|--------|----------------------------| +| `content` | string | The thinking text to emit. | + +--- + +## Use the `list_tools` action + +Responds with a JSON text block listing all available tools and their schemas. Useful for test scripts that need to discover tool names. No additional fields. + +```json +{ "type": "list_tools" } +``` + +--- + +## Use the `error` action + +Emits an error chunk. + +```json +{ "type": "error", "message": "something went wrong" } +``` + +| Field | Type | Description | +|-----------|--------|------------------------| +| `message` | string | The error message. | + +--- + +## Know the finish reason + +The finish reason is auto-inferred from the actions in the current step. If any action has `type: "tool_call"`, the finish reason is `"tool-calls"`. Otherwise it is `"stop"`. + +Token usage is always reported as `{ inputTokens: 10, outputTokens: 20, totalTokens: 30 }`. + +--- + +## Handle invalid JSON + +If the user message is not valid JSON or doesn't have a `steps` array, the model falls back to a default text response. This keeps backward compatibility with tests that don't use the RPC protocol. + +--- + +## Examples + +### Simple text response + +```json +{ + "steps": [ + [{ "type": "text", "content": "Hello from the mock model" }] + ] +} +``` + +### Tool discovery + +```json +{ + "steps": [ + [{ "type": "list_tools" }] + ] +} +``` + +### Single tool call + +```json +{ + "steps": [ + [{ "type": "tool_call", "name": "read", "input": { "filePath": "config.json" } }] + ] +} +``` + +### Multi-turn tool use + +Step 0 calls a tool. Step 1 runs after the tool result comes back and emits a text response. + +```json +{ + "steps": [ + [{ "type": "tool_call", "name": "write", "input": { "filePath": "a.txt", "content": "hi" } }], + [{ "type": "text", "content": "Done writing the file." }] + ] +} +``` + +### Thinking and text + +```json +{ + "steps": [ + [ + { "type": "thinking", "content": "The user wants a greeting." }, + { "type": "text", "content": "Hey there!" } + ] + ] +} +``` + +### Multiple actions in one step + +A single step can contain any combination of actions. + +```json +{ + "steps": [ + [ + { "type": "text", "content": "I'll create two files." }, + { "type": "tool_call", "name": "write", "input": { "filePath": "a.txt", "content": "aaa" } }, + { "type": "tool_call", "name": "write", "input": { "filePath": "b.txt", "content": "bbb" } } + ], + [ + { "type": "text", "content": "Both files created." } + ] + ] +} +``` + +### Error simulation + +```json +{ + "steps": [ + [{ "type": "error", "message": "rate limit exceeded" }] + ] +} +``` diff --git a/packages/opencode/src/provider/sdk/mock/README.md b/packages/opencode/src/provider/sdk/mock/README.md index 6a9c0b917f..4808dd3e14 100644 --- a/packages/opencode/src/provider/sdk/mock/README.md +++ b/packages/opencode/src/provider/sdk/mock/README.md @@ -1,208 +1,19 @@ -# Mock RPC +I got it to the point where it can run a full mock session -Deterministic model scripting for tests. +Run the server with `./src/provider/sdk/mock/run`. It will run it sandboxes to make sure it doesn't interact with the outside world unexpectedly. ---- +Then run `bun run src/provider/sdk/mock/runner/index.ts` to drive a session and get a log -## Overview +There is also `bun run src/provider/sdk/mock/runner/diff.ts` which will drive two sessions at once and compare them. This is annoying right now because you have to run two servers. This would let you compare the differences between versions though -The mock provider lets test harnesses script exactly what the model should emit. Instead of hitting a real API, the user message contains a JSON object that describes each step of the conversation. This makes test scenarios fully deterministic and reproducible. +## Coverage ---- +I also have an experiment in `serve.test.ts` which runs the server as a bun test, which gives us access to coverage info. Run it like this: -## Understand the protocol - -The user message text is a JSON object with a `steps` array. Each step is an array of actions that the model emits on that turn. - -```json -{ - "steps": [ - [{ "type": "text", "content": "Hello" }], - [{ "type": "text", "content": "Goodbye" }] - ] -} +``` +bun test --coverage --coverage-reporter=lcov --timeout 0 src/provider/sdk/mock/runner/serve.test.ts ``` -The mock model reads the **last** user message in the prompt to find this JSON. +That will give you a `lcov.info` file. Convert it to HTML with this: ---- - -## Know how steps are selected - -The model picks which step to execute by counting messages with `role: "tool"` in the prompt. This count represents how many tool-result rounds have occurred. - -- **Step 0** runs on the first call (no tool results yet). -- **Step 1** runs after the first tool-result round. -- **Step N** runs after the Nth tool-result round. - -If the step index is out of bounds, the model emits an empty set of actions. - ---- - -## Use the `text` action - -Emits a text block. - -```json -{ "type": "text", "content": "Some response text" } -``` - -| Field | Type | Description | -|-----------|--------|----------------------| -| `content` | string | The text to emit. | - ---- - -## Use the `tool_call` action - -Calls a tool. The input object is passed as-is. - -```json -{ "type": "tool_call", "name": "write", "input": { "filePath": "a.txt", "content": "hi" } } -``` - -| Field | Type | Description | -|---------|--------|---------------------------------| -| `name` | string | Name of the tool to call. | -| `input` | object | Arguments passed to the tool. | - ---- - -## Use the `thinking` action - -Emits a reasoning/thinking block. - -```json -{ "type": "thinking", "content": "Let me consider the options..." } -``` - -| Field | Type | Description | -|-----------|--------|----------------------------| -| `content` | string | The thinking text to emit. | - ---- - -## Use the `list_tools` action - -Responds with a JSON text block listing all available tools and their schemas. Useful for test scripts that need to discover tool names. No additional fields. - -```json -{ "type": "list_tools" } -``` - ---- - -## Use the `error` action - -Emits an error chunk. - -```json -{ "type": "error", "message": "something went wrong" } -``` - -| Field | Type | Description | -|-----------|--------|------------------------| -| `message` | string | The error message. | - ---- - -## Know the finish reason - -The finish reason is auto-inferred from the actions in the current step. If any action has `type: "tool_call"`, the finish reason is `"tool-calls"`. Otherwise it is `"stop"`. - -Token usage is always reported as `{ inputTokens: 10, outputTokens: 20, totalTokens: 30 }`. - ---- - -## Handle invalid JSON - -If the user message is not valid JSON or doesn't have a `steps` array, the model falls back to a default text response. This keeps backward compatibility with tests that don't use the RPC protocol. - ---- - -## Examples - -### Simple text response - -```json -{ - "steps": [ - [{ "type": "text", "content": "Hello from the mock model" }] - ] -} -``` - -### Tool discovery - -```json -{ - "steps": [ - [{ "type": "list_tools" }] - ] -} -``` - -### Single tool call - -```json -{ - "steps": [ - [{ "type": "tool_call", "name": "read", "input": { "filePath": "config.json" } }] - ] -} -``` - -### Multi-turn tool use - -Step 0 calls a tool. Step 1 runs after the tool result comes back and emits a text response. - -```json -{ - "steps": [ - [{ "type": "tool_call", "name": "write", "input": { "filePath": "a.txt", "content": "hi" } }], - [{ "type": "text", "content": "Done writing the file." }] - ] -} -``` - -### Thinking and text - -```json -{ - "steps": [ - [ - { "type": "thinking", "content": "The user wants a greeting." }, - { "type": "text", "content": "Hey there!" } - ] - ] -} -``` - -### Multiple actions in one step - -A single step can contain any combination of actions. - -```json -{ - "steps": [ - [ - { "type": "text", "content": "I'll create two files." }, - { "type": "tool_call", "name": "write", "input": { "filePath": "a.txt", "content": "aaa" } }, - { "type": "tool_call", "name": "write", "input": { "filePath": "b.txt", "content": "bbb" } } - ], - [ - { "type": "text", "content": "Both files created." } - ] - ] -} -``` - -### Error simulation - -```json -{ - "steps": [ - [{ "type": "error", "message": "rate limit exceeded" }] - ] -} -``` +genhtml coverage/lcov.info -o coverage/html && open coverage/html/index.html \ No newline at end of file diff --git a/packages/opencode/src/provider/sdk/mock/runner/serve.test.ts b/packages/opencode/src/provider/sdk/mock/runner/serve.test.ts new file mode 100644 index 0000000000..b9c3ad8072 --- /dev/null +++ b/packages/opencode/src/provider/sdk/mock/runner/serve.test.ts @@ -0,0 +1,52 @@ +import { vfsPlugin } from "../plugin" +Bun.plugin(vfsPlugin) + +/** + * Starts the mock opencode server inside bun:test so coverage instrumentation works. + * + * Usage: + * bun test --coverage --coverage-reporter=lcov --timeout 0 src/provider/sdk/mock/runner/serve.test.ts + * + * The server runs until the process is killed (Ctrl-C). + * Coverage data is flushed on exit. + */ + +import { test } from "bun:test" +import { Log } from "../../../../util/log" +import { Server } from "../../../../server/server" +import { Global } from "../../../../global" +import { Filesystem } from "../../../../util/filesystem" +import { JsonMigration } from "../../../../storage/json-migration" +import { Database } from "../../../../storage/db" +import path from "path" + +const PORT = 4096; + +test("serve", async () => { + process.env.AGENT = "1" + process.env.OPENCODE = "1" + process.env.OPENCODE_PID = String(process.pid) + + await Log.init({ print: false, dev: true, level: "DEBUG" }) + + const marker = path.join(Global.Path.data, "opencode.db") + if (!(await Filesystem.exists(marker))) { + console.log("Running one-time database migration...") + await JsonMigration.run(Database.Client().$client, { + progress: (event) => { + const pct = Math.floor((event.current / event.total) * 100) + if (event.current === event.total || pct % 25 === 0) { + console.log(` migration: ${pct}%`) + } + }, + }) + console.log("Migration complete.") + } + + const server = Server.listen({ port: PORT, hostname: "127.0.0.1" }) + console.log(`opencode server listening on http://${server.hostname}:${server.port}`) + + // keep alive until killed + await new Promise(() => {}) + await server.stop() +})