feat: add structured output (JSON schema) support
Add outputFormat option to session.prompt() for requesting structured JSON output. When type is 'json_schema', injects a StructuredOutput tool that validates model output against the provided schema. - Add OutputFormat schema types (text, json_schema) to message-v2.ts - Add structured_output field to AssistantMessage - Add StructuredOutputError for validation failures - Implement createStructuredOutputTool helper in prompt.ts - Integrate structured output into agent loop with retry support - Regenerate OpenAPI spec with new types - Add unit tests for schema validationpull/8161/head
parent
4275907df6
commit
32ef11da1f
|
|
@ -17,6 +17,13 @@ import { type SystemError } from "bun"
|
|||
export namespace MessageV2 {
|
||||
export const OutputLengthError = NamedError.create("MessageOutputLengthError", z.object({}))
|
||||
export const AbortedError = NamedError.create("MessageAbortedError", z.object({ message: z.string() }))
|
||||
export const StructuredOutputError = NamedError.create(
|
||||
"StructuredOutputError",
|
||||
z.object({
|
||||
message: z.string(),
|
||||
retries: z.number(),
|
||||
}),
|
||||
)
|
||||
export const AuthError = NamedError.create(
|
||||
"ProviderAuthError",
|
||||
z.object({
|
||||
|
|
@ -37,6 +44,28 @@ export namespace MessageV2 {
|
|||
)
|
||||
export type APIError = z.infer<typeof APIError.Schema>
|
||||
|
||||
export const OutputFormatText = z.object({
|
||||
type: z.literal("text"),
|
||||
}).meta({
|
||||
ref: "OutputFormatText",
|
||||
})
|
||||
|
||||
export const OutputFormatJsonSchema = z.object({
|
||||
type: z.literal("json_schema"),
|
||||
schema: z.record(z.string(), z.any()).meta({ ref: "JSONSchema" }),
|
||||
retryCount: z.number().int().min(0).default(2),
|
||||
}).meta({
|
||||
ref: "OutputFormatJsonSchema",
|
||||
})
|
||||
|
||||
export const OutputFormat = z.discriminatedUnion("type", [
|
||||
OutputFormatText,
|
||||
OutputFormatJsonSchema,
|
||||
]).meta({
|
||||
ref: "OutputFormat",
|
||||
})
|
||||
export type OutputFormat = z.infer<typeof OutputFormat>
|
||||
|
||||
const PartBase = z.object({
|
||||
id: z.string(),
|
||||
sessionID: z.string(),
|
||||
|
|
@ -294,6 +323,7 @@ export namespace MessageV2 {
|
|||
time: z.object({
|
||||
created: z.number(),
|
||||
}),
|
||||
outputFormat: OutputFormat.optional(),
|
||||
summary: z
|
||||
.object({
|
||||
title: z.string().optional(),
|
||||
|
|
@ -345,6 +375,7 @@ export namespace MessageV2 {
|
|||
NamedError.Unknown.Schema,
|
||||
OutputLengthError.Schema,
|
||||
AbortedError.Schema,
|
||||
StructuredOutputError.Schema,
|
||||
APIError.Schema,
|
||||
])
|
||||
.optional(),
|
||||
|
|
@ -371,6 +402,7 @@ export namespace MessageV2 {
|
|||
write: z.number(),
|
||||
}),
|
||||
}),
|
||||
structured_output: z.any().optional(),
|
||||
finish: z.string().optional(),
|
||||
}).meta({
|
||||
ref: "AssistantMessage",
|
||||
|
|
|
|||
|
|
@ -47,6 +47,14 @@ import { Shell } from "@/shell/shell"
|
|||
// @ts-ignore
|
||||
globalThis.AI_SDK_LOG_WARNINGS = false
|
||||
|
||||
const STRUCTURED_OUTPUT_DESCRIPTION = `Use this tool to return your final response in the requested structured format.
|
||||
|
||||
IMPORTANT:
|
||||
- You MUST call this tool exactly once at the end of your response
|
||||
- The input must be valid JSON matching the required schema
|
||||
- Complete all necessary research and tool calls BEFORE calling this tool
|
||||
- This tool provides your final answer - no further actions are taken after calling it`
|
||||
|
||||
export namespace SessionPrompt {
|
||||
const log = Log.create({ service: "session.prompt" })
|
||||
export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000
|
||||
|
|
@ -84,11 +92,12 @@ export namespace SessionPrompt {
|
|||
.object({
|
||||
providerID: z.string(),
|
||||
modelID: z.string(),
|
||||
})
|
||||
})
|
||||
.optional(),
|
||||
agent: z.string().optional(),
|
||||
noReply: z.boolean().optional(),
|
||||
tools: z.record(z.string(), z.boolean()).optional(),
|
||||
outputFormat: MessageV2.OutputFormat.optional(),
|
||||
system: z.string().optional(),
|
||||
parts: z.array(
|
||||
z.discriminatedUnion("type", [
|
||||
|
|
@ -238,6 +247,11 @@ export namespace SessionPrompt {
|
|||
|
||||
using _ = defer(() => cancel(sessionID))
|
||||
|
||||
// Structured output state
|
||||
// Note: On session resumption, state is reset but outputFormat is preserved
|
||||
// on the user message and will be retrieved from lastUser below
|
||||
let structuredOutput: unknown | undefined
|
||||
|
||||
let step = 0
|
||||
while (true) {
|
||||
SessionStatus.set(sessionID, { type: "busy" })
|
||||
|
|
@ -514,6 +528,10 @@ export namespace SessionPrompt {
|
|||
model,
|
||||
tools: lastUser.tools,
|
||||
processor,
|
||||
outputFormat: lastUser.outputFormat ?? { type: "text" },
|
||||
onStructuredOutputSuccess: (output) => {
|
||||
structuredOutput = output
|
||||
},
|
||||
})
|
||||
|
||||
if (step === 1) {
|
||||
|
|
@ -547,6 +565,28 @@ export namespace SessionPrompt {
|
|||
tools,
|
||||
model,
|
||||
})
|
||||
|
||||
// Handle structured output logic
|
||||
const outputFormat = lastUser.outputFormat ?? { type: "text" }
|
||||
|
||||
if (result === "stop" && !processor.message.error) {
|
||||
// Check if structured output was captured successfully
|
||||
if (structuredOutput !== undefined) {
|
||||
// Store structured output on the final assistant message
|
||||
processor.message.structured_output = structuredOutput
|
||||
await Session.updateMessage(processor.message)
|
||||
break
|
||||
} else if (outputFormat.type === "json_schema") {
|
||||
// Model stopped without calling StructuredOutput tool
|
||||
processor.message.error = new MessageV2.StructuredOutputError({
|
||||
message: "Model did not produce structured output",
|
||||
retries: 0,
|
||||
}).toObject()
|
||||
await Session.updateMessage(processor.message)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (result === "stop") break
|
||||
continue
|
||||
}
|
||||
|
|
@ -569,12 +609,15 @@ export namespace SessionPrompt {
|
|||
return Provider.defaultModel()
|
||||
}
|
||||
|
||||
async function resolveTools(input: {
|
||||
/** @internal Exported for testing */
|
||||
export async function resolveTools(input: {
|
||||
agent: Agent.Info
|
||||
model: Provider.Model
|
||||
sessionID: string
|
||||
tools?: Record<string, boolean>
|
||||
processor: SessionProcessor.Info
|
||||
outputFormat: MessageV2.OutputFormat
|
||||
onStructuredOutputSuccess?: (output: unknown) => void
|
||||
}) {
|
||||
using _ = log.time("resolveTools")
|
||||
const tools: Record<string, AITool> = {}
|
||||
|
|
@ -711,9 +754,49 @@ export namespace SessionPrompt {
|
|||
}
|
||||
tools[key] = item
|
||||
}
|
||||
|
||||
// Inject StructuredOutput tool if JSON schema mode enabled
|
||||
if (input.outputFormat.type === "json_schema" && input.onStructuredOutputSuccess) {
|
||||
tools["StructuredOutput"] = createStructuredOutputTool({
|
||||
schema: input.outputFormat.schema,
|
||||
onSuccess: input.onStructuredOutputSuccess,
|
||||
})
|
||||
}
|
||||
|
||||
return tools
|
||||
}
|
||||
|
||||
/** @internal Exported for testing */
|
||||
export function createStructuredOutputTool(input: {
|
||||
schema: Record<string, any>
|
||||
onSuccess: (output: unknown) => void
|
||||
}): AITool {
|
||||
// Remove $schema property if present (not needed for tool input)
|
||||
const { $schema, ...toolSchema } = input.schema
|
||||
|
||||
return tool({
|
||||
id: "StructuredOutput" as any,
|
||||
description: STRUCTURED_OUTPUT_DESCRIPTION,
|
||||
inputSchema: jsonSchema(toolSchema as any),
|
||||
async execute(args) {
|
||||
// AI SDK validates args against inputSchema before calling execute()
|
||||
// So args is guaranteed to match the schema at this point
|
||||
input.onSuccess(args)
|
||||
return {
|
||||
output: "Structured output captured successfully.",
|
||||
title: "Structured Output",
|
||||
metadata: { valid: true },
|
||||
}
|
||||
},
|
||||
toModelOutput(result) {
|
||||
return {
|
||||
type: "text",
|
||||
value: result.output,
|
||||
}
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
async function createUserMessage(input: PromptInput) {
|
||||
const agent = await Agent.get(input.agent ?? (await Agent.defaultAgent()))
|
||||
const info: MessageV2.Info = {
|
||||
|
|
@ -727,6 +810,7 @@ export namespace SessionPrompt {
|
|||
agent: agent.name,
|
||||
model: input.model ?? agent.model ?? (await lastModel(input.sessionID)),
|
||||
system: input.system,
|
||||
outputFormat: input.outputFormat,
|
||||
}
|
||||
|
||||
const parts = await Promise.all(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,253 @@
|
|||
import { describe, expect, test } from "bun:test"
|
||||
import { MessageV2 } from "../../src/session/message-v2"
|
||||
import { SessionPrompt } from "../../src/session/prompt"
|
||||
|
||||
describe("structured-output.OutputFormat", () => {
|
||||
test("parses text format", () => {
|
||||
const result = MessageV2.OutputFormat.safeParse({ type: "text" })
|
||||
expect(result.success).toBe(true)
|
||||
if (result.success) {
|
||||
expect(result.data.type).toBe("text")
|
||||
}
|
||||
})
|
||||
|
||||
test("parses json_schema format with defaults", () => {
|
||||
const result = MessageV2.OutputFormat.safeParse({
|
||||
type: "json_schema",
|
||||
schema: { type: "object", properties: { name: { type: "string" } } },
|
||||
})
|
||||
expect(result.success).toBe(true)
|
||||
if (result.success) {
|
||||
expect(result.data.type).toBe("json_schema")
|
||||
if (result.data.type === "json_schema") {
|
||||
expect(result.data.retryCount).toBe(2) // default value
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
test("parses json_schema format with custom retryCount", () => {
|
||||
const result = MessageV2.OutputFormat.safeParse({
|
||||
type: "json_schema",
|
||||
schema: { type: "object" },
|
||||
retryCount: 5,
|
||||
})
|
||||
expect(result.success).toBe(true)
|
||||
if (result.success && result.data.type === "json_schema") {
|
||||
expect(result.data.retryCount).toBe(5)
|
||||
}
|
||||
})
|
||||
|
||||
test("rejects invalid type", () => {
|
||||
const result = MessageV2.OutputFormat.safeParse({ type: "invalid" })
|
||||
expect(result.success).toBe(false)
|
||||
})
|
||||
|
||||
test("rejects json_schema without schema", () => {
|
||||
const result = MessageV2.OutputFormat.safeParse({ type: "json_schema" })
|
||||
expect(result.success).toBe(false)
|
||||
})
|
||||
|
||||
test("rejects negative retryCount", () => {
|
||||
const result = MessageV2.OutputFormat.safeParse({
|
||||
type: "json_schema",
|
||||
schema: { type: "object" },
|
||||
retryCount: -1,
|
||||
})
|
||||
expect(result.success).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe("structured-output.StructuredOutputError", () => {
|
||||
test("creates error with message and retries", () => {
|
||||
const error = new MessageV2.StructuredOutputError({
|
||||
message: "Failed to validate",
|
||||
retries: 3,
|
||||
})
|
||||
|
||||
expect(error.name).toBe("StructuredOutputError")
|
||||
expect(error.data.message).toBe("Failed to validate")
|
||||
expect(error.data.retries).toBe(3)
|
||||
})
|
||||
|
||||
test("converts to object correctly", () => {
|
||||
const error = new MessageV2.StructuredOutputError({
|
||||
message: "Test error",
|
||||
retries: 2,
|
||||
})
|
||||
|
||||
const obj = error.toObject()
|
||||
expect(obj.name).toBe("StructuredOutputError")
|
||||
expect(obj.data.message).toBe("Test error")
|
||||
expect(obj.data.retries).toBe(2)
|
||||
})
|
||||
|
||||
test("isInstance correctly identifies error", () => {
|
||||
const error = new MessageV2.StructuredOutputError({
|
||||
message: "Test",
|
||||
retries: 1,
|
||||
})
|
||||
|
||||
expect(MessageV2.StructuredOutputError.isInstance(error)).toBe(true)
|
||||
expect(MessageV2.StructuredOutputError.isInstance({ name: "other" })).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe("structured-output.UserMessage", () => {
|
||||
test("user message accepts outputFormat", () => {
|
||||
const result = MessageV2.User.safeParse({
|
||||
id: "test-id",
|
||||
sessionID: "test-session",
|
||||
role: "user",
|
||||
time: { created: Date.now() },
|
||||
agent: "default",
|
||||
model: { providerID: "anthropic", modelID: "claude-3" },
|
||||
outputFormat: {
|
||||
type: "json_schema",
|
||||
schema: { type: "object" },
|
||||
},
|
||||
})
|
||||
expect(result.success).toBe(true)
|
||||
})
|
||||
|
||||
test("user message works without outputFormat (optional)", () => {
|
||||
const result = MessageV2.User.safeParse({
|
||||
id: "test-id",
|
||||
sessionID: "test-session",
|
||||
role: "user",
|
||||
time: { created: Date.now() },
|
||||
agent: "default",
|
||||
model: { providerID: "anthropic", modelID: "claude-3" },
|
||||
})
|
||||
expect(result.success).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe("structured-output.AssistantMessage", () => {
|
||||
const baseAssistantMessage = {
|
||||
id: "test-id",
|
||||
sessionID: "test-session",
|
||||
role: "assistant" as const,
|
||||
parentID: "parent-id",
|
||||
modelID: "claude-3",
|
||||
providerID: "anthropic",
|
||||
mode: "default",
|
||||
agent: "default",
|
||||
path: { cwd: "/test", root: "/test" },
|
||||
cost: 0.001,
|
||||
tokens: { input: 100, output: 50, reasoning: 0, cache: { read: 0, write: 0 } },
|
||||
time: { created: Date.now() },
|
||||
}
|
||||
|
||||
test("assistant message accepts structured_output", () => {
|
||||
const result = MessageV2.Assistant.safeParse({
|
||||
...baseAssistantMessage,
|
||||
structured_output: { company: "Anthropic", founded: 2021 },
|
||||
})
|
||||
expect(result.success).toBe(true)
|
||||
if (result.success) {
|
||||
expect(result.data.structured_output).toEqual({ company: "Anthropic", founded: 2021 })
|
||||
}
|
||||
})
|
||||
|
||||
test("assistant message works without structured_output (optional)", () => {
|
||||
const result = MessageV2.Assistant.safeParse(baseAssistantMessage)
|
||||
expect(result.success).toBe(true)
|
||||
})
|
||||
})
|
||||
|
||||
describe("structured-output.createStructuredOutputTool", () => {
|
||||
test("creates tool with correct id", () => {
|
||||
const tool = SessionPrompt.createStructuredOutputTool({
|
||||
schema: { type: "object", properties: { name: { type: "string" } } },
|
||||
onSuccess: () => {},
|
||||
})
|
||||
|
||||
expect(tool.id).toBe("StructuredOutput")
|
||||
})
|
||||
|
||||
test("creates tool with description", () => {
|
||||
const tool = SessionPrompt.createStructuredOutputTool({
|
||||
schema: { type: "object" },
|
||||
onSuccess: () => {},
|
||||
})
|
||||
|
||||
expect(tool.description).toContain("structured format")
|
||||
})
|
||||
|
||||
test("creates tool with schema as inputSchema", () => {
|
||||
const schema = {
|
||||
type: "object",
|
||||
properties: {
|
||||
company: { type: "string" },
|
||||
founded: { type: "number" },
|
||||
},
|
||||
required: ["company"],
|
||||
}
|
||||
|
||||
const tool = SessionPrompt.createStructuredOutputTool({
|
||||
schema,
|
||||
onSuccess: () => {},
|
||||
})
|
||||
|
||||
// AI SDK wraps schema in { jsonSchema: {...} }
|
||||
expect(tool.inputSchema).toBeDefined()
|
||||
const inputSchema = tool.inputSchema as any
|
||||
expect(inputSchema.jsonSchema?.properties?.company).toBeDefined()
|
||||
expect(inputSchema.jsonSchema?.properties?.founded).toBeDefined()
|
||||
})
|
||||
|
||||
test("strips $schema property from inputSchema", () => {
|
||||
const schema = {
|
||||
$schema: "http://json-schema.org/draft-07/schema#",
|
||||
type: "object",
|
||||
properties: { name: { type: "string" } },
|
||||
}
|
||||
|
||||
const tool = SessionPrompt.createStructuredOutputTool({
|
||||
schema,
|
||||
onSuccess: () => {},
|
||||
})
|
||||
|
||||
// AI SDK wraps schema in { jsonSchema: {...} }
|
||||
const inputSchema = tool.inputSchema as any
|
||||
expect(inputSchema.jsonSchema?.$schema).toBeUndefined()
|
||||
})
|
||||
|
||||
test("execute calls onSuccess with args", async () => {
|
||||
let capturedOutput: unknown
|
||||
|
||||
const tool = SessionPrompt.createStructuredOutputTool({
|
||||
schema: { type: "object", properties: { name: { type: "string" } } },
|
||||
onSuccess: (output) => {
|
||||
capturedOutput = output
|
||||
},
|
||||
})
|
||||
|
||||
const testArgs = { name: "Test Company" }
|
||||
const result = await tool.execute(testArgs, {
|
||||
toolCallId: "test-call-id",
|
||||
messages: [],
|
||||
abortSignal: undefined as any,
|
||||
})
|
||||
|
||||
expect(capturedOutput).toEqual(testArgs)
|
||||
expect(result.output).toBe("Structured output captured successfully.")
|
||||
expect(result.metadata.valid).toBe(true)
|
||||
})
|
||||
|
||||
test("toModelOutput returns text value", () => {
|
||||
const tool = SessionPrompt.createStructuredOutputTool({
|
||||
schema: { type: "object" },
|
||||
onSuccess: () => {},
|
||||
})
|
||||
|
||||
const modelOutput = tool.toModelOutput({
|
||||
output: "Test output",
|
||||
title: "Test",
|
||||
metadata: { valid: true },
|
||||
})
|
||||
|
||||
expect(modelOutput.type).toBe("text")
|
||||
expect(modelOutput.value).toBe("Test output")
|
||||
})
|
||||
})
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue