feat: enable prompt caching and cache token tracking for google-vertex-anthropic (#20266)
Signed-off-by: Major Hayden <major@mhtx.net>pull/18291/merge
parent
4dd866d5c4
commit
26cc924ea2
|
|
@ -280,6 +280,7 @@ export namespace ProviderTransform {
|
|||
msgs = normalizeMessages(msgs, model, options)
|
||||
if (
|
||||
(model.providerID === "anthropic" ||
|
||||
model.providerID === "google-vertex-anthropic" ||
|
||||
model.api.id.includes("anthropic") ||
|
||||
model.api.id.includes("claude") ||
|
||||
model.id.includes("anthropic") ||
|
||||
|
|
|
|||
|
|
@ -257,6 +257,9 @@ export namespace Session {
|
|||
const cacheReadInputTokens = safe(input.usage.cachedInputTokens ?? 0)
|
||||
const cacheWriteInputTokens = safe(
|
||||
(input.metadata?.["anthropic"]?.["cacheCreationInputTokens"] ??
|
||||
// google-vertex-anthropic returns metadata under "vertex" key
|
||||
// (AnthropicMessagesLanguageModel custom provider key from 'vertex.anthropic.messages')
|
||||
input.metadata?.["vertex"]?.["cacheCreationInputTokens"] ??
|
||||
// @ts-expect-error
|
||||
input.metadata?.["bedrock"]?.["usage"]?.["cacheWriteInputTokens"] ??
|
||||
// @ts-expect-error
|
||||
|
|
|
|||
|
|
@ -1792,6 +1792,58 @@ describe("ProviderTransform.message - cache control on gateway", () => {
|
|||
},
|
||||
})
|
||||
})
|
||||
|
||||
test("google-vertex-anthropic applies cache control", () => {
|
||||
const model = createModel({
|
||||
providerID: "google-vertex-anthropic",
|
||||
api: {
|
||||
id: "google-vertex-anthropic",
|
||||
url: "https://us-central1-aiplatform.googleapis.com",
|
||||
npm: "@ai-sdk/google-vertex/anthropic",
|
||||
},
|
||||
id: "claude-sonnet-4@20250514",
|
||||
})
|
||||
const msgs = [
|
||||
{
|
||||
role: "system",
|
||||
content: "You are a helpful assistant",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: "Hello",
|
||||
},
|
||||
] as any[]
|
||||
|
||||
const result = ProviderTransform.message(msgs, model, {}) as any[]
|
||||
|
||||
expect(result[0].providerOptions).toEqual({
|
||||
anthropic: {
|
||||
cacheControl: {
|
||||
type: "ephemeral",
|
||||
},
|
||||
},
|
||||
openrouter: {
|
||||
cacheControl: {
|
||||
type: "ephemeral",
|
||||
},
|
||||
},
|
||||
bedrock: {
|
||||
cachePoint: {
|
||||
type: "default",
|
||||
},
|
||||
},
|
||||
openaiCompatible: {
|
||||
cache_control: {
|
||||
type: "ephemeral",
|
||||
},
|
||||
},
|
||||
copilot: {
|
||||
copilot_cache_control: {
|
||||
type: "ephemeral",
|
||||
},
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe("ProviderTransform.variants", () => {
|
||||
|
|
|
|||
|
|
@ -1199,4 +1199,26 @@ describe("session.getUsage", () => {
|
|||
expect(result.tokens.total).toBe(1500)
|
||||
},
|
||||
)
|
||||
|
||||
test("extracts cache write tokens from vertex metadata key", () => {
|
||||
const model = createModel({ context: 100_000, output: 32_000, npm: "@ai-sdk/google-vertex/anthropic" })
|
||||
const result = Session.getUsage({
|
||||
model,
|
||||
usage: {
|
||||
inputTokens: 1000,
|
||||
outputTokens: 500,
|
||||
totalTokens: 1500,
|
||||
cachedInputTokens: 200,
|
||||
},
|
||||
metadata: {
|
||||
vertex: {
|
||||
cacheCreationInputTokens: 300,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
expect(result.tokens.input).toBe(500)
|
||||
expect(result.tokens.cache.read).toBe(200)
|
||||
expect(result.tokens.cache.write).toBe(300)
|
||||
})
|
||||
})
|
||||
|
|
|
|||
Loading…
Reference in New Issue