fix(session): fix token usage double-counting w/ anthropic & bedrock due to AI SDK v6 upgrade (#19758)
Co-authored-by: Aiden Cline <63023139+rekram1-node@users.noreply.github.com> Co-authored-by: Aiden Cline <aidenpcline@gmail.com>pull/19924/head
parent
5c15755a10
commit
72c77d0e7b
|
|
@ -32,7 +32,6 @@ import { ModelID, ProviderID } from "@/provider/schema"
|
|||
import { Permission } from "@/permission"
|
||||
import { Global } from "@/global"
|
||||
import type { LanguageModelV2Usage } from "@ai-sdk/provider"
|
||||
import { iife } from "@/util/iife"
|
||||
import { Effect, Layer, Scope, ServiceMap } from "effect"
|
||||
import { makeRuntime } from "@/effect/run-service"
|
||||
|
||||
|
|
@ -265,27 +264,12 @@ export namespace Session {
|
|||
0) as number,
|
||||
)
|
||||
|
||||
// OpenRouter provides inputTokens as the total count of input tokens (including cached).
|
||||
// AFAIK other providers (OpenRouter/OpenAI/Gemini etc.) do it the same way e.g. vercel/ai#8794 (comment)
|
||||
// Anthropic does it differently though - inputTokens doesn't include cached tokens.
|
||||
// It looks like OpenCode's cost calculation assumes all providers return inputTokens the same way Anthropic does (I'm guessing getUsage logic was originally implemented with anthropic), so it's causing incorrect cost calculation for OpenRouter and others.
|
||||
const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"])
|
||||
const adjustedInputTokens = safe(
|
||||
excludesCachedTokens ? inputTokens : inputTokens - cacheReadInputTokens - cacheWriteInputTokens,
|
||||
)
|
||||
// AI SDK v6 normalized inputTokens to include cached tokens across all providers
|
||||
// (including Anthropic/Bedrock which previously excluded them). Always subtract cache
|
||||
// tokens to get the non-cached input count for separate cost calculation.
|
||||
const adjustedInputTokens = safe(inputTokens - cacheReadInputTokens - cacheWriteInputTokens)
|
||||
|
||||
const total = iife(() => {
|
||||
// Anthropic doesn't provide total_tokens, also ai sdk will vastly undercount if we
|
||||
// don't compute from components
|
||||
if (
|
||||
input.model.api.npm === "@ai-sdk/anthropic" ||
|
||||
input.model.api.npm === "@ai-sdk/amazon-bedrock" ||
|
||||
input.model.api.npm === "@ai-sdk/google-vertex/anthropic"
|
||||
) {
|
||||
return adjustedInputTokens + outputTokens + cacheReadInputTokens + cacheWriteInputTokens
|
||||
}
|
||||
return input.usage.totalTokens
|
||||
})
|
||||
const total = input.usage.totalTokens
|
||||
|
||||
const tokens = {
|
||||
total,
|
||||
|
|
|
|||
|
|
@ -964,8 +964,9 @@ describe("session.getUsage", () => {
|
|||
expect(result.tokens.cache.write).toBe(300)
|
||||
})
|
||||
|
||||
test("does not subtract cached tokens for anthropic provider", () => {
|
||||
test("subtracts cached tokens for anthropic provider", () => {
|
||||
const model = createModel({ context: 100_000, output: 32_000 })
|
||||
// AI SDK v6 normalizes inputTokens to include cached tokens for all providers
|
||||
const result = Session.getUsage({
|
||||
model,
|
||||
usage: {
|
||||
|
|
@ -979,7 +980,7 @@ describe("session.getUsage", () => {
|
|||
},
|
||||
})
|
||||
|
||||
expect(result.tokens.input).toBe(1000)
|
||||
expect(result.tokens.input).toBe(800)
|
||||
expect(result.tokens.cache.read).toBe(200)
|
||||
})
|
||||
|
||||
|
|
@ -1043,11 +1044,10 @@ describe("session.getUsage", () => {
|
|||
"computes total from components for %s models",
|
||||
(npm) => {
|
||||
const model = createModel({ context: 100_000, output: 32_000, npm })
|
||||
// AI SDK v6: inputTokens includes cached tokens for all providers
|
||||
const usage = {
|
||||
inputTokens: 1000,
|
||||
outputTokens: 500,
|
||||
// These providers typically report total as input + output only,
|
||||
// excluding cache read/write.
|
||||
totalTokens: 1500,
|
||||
cachedInputTokens: 200,
|
||||
}
|
||||
|
|
@ -1064,10 +1064,12 @@ describe("session.getUsage", () => {
|
|||
},
|
||||
})
|
||||
|
||||
expect(result.tokens.input).toBe(1000)
|
||||
// inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
|
||||
expect(result.tokens.input).toBe(500)
|
||||
expect(result.tokens.cache.read).toBe(200)
|
||||
expect(result.tokens.cache.write).toBe(300)
|
||||
expect(result.tokens.total).toBe(2000)
|
||||
// total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
|
||||
expect(result.tokens.total).toBe(1500)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -1081,10 +1083,12 @@ describe("session.getUsage", () => {
|
|||
},
|
||||
})
|
||||
|
||||
expect(result.tokens.input).toBe(1000)
|
||||
// inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
|
||||
expect(result.tokens.input).toBe(500)
|
||||
expect(result.tokens.cache.read).toBe(200)
|
||||
expect(result.tokens.cache.write).toBe(300)
|
||||
expect(result.tokens.total).toBe(2000)
|
||||
// total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
|
||||
expect(result.tokens.total).toBe(1500)
|
||||
},
|
||||
)
|
||||
})
|
||||
|
|
|
|||
Loading…
Reference in New Issue