fix(session): fix token usage double-counting w/ anthropic & bedrock due to AI SDK v6 upgrade (#19758)

Co-authored-by: Aiden Cline <63023139+rekram1-node@users.noreply.github.com>
Co-authored-by: Aiden Cline <aidenpcline@gmail.com>
pull/19924/head
ualtinok 2026-03-29 19:40:10 +02:00 committed by GitHub
parent 5c15755a10
commit 72c77d0e7b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 29 deletions

View File

@ -32,7 +32,6 @@ import { ModelID, ProviderID } from "@/provider/schema"
import { Permission } from "@/permission"
import { Global } from "@/global"
import type { LanguageModelV2Usage } from "@ai-sdk/provider"
import { iife } from "@/util/iife"
import { Effect, Layer, Scope, ServiceMap } from "effect"
import { makeRuntime } from "@/effect/run-service"
@ -265,27 +264,12 @@ export namespace Session {
0) as number,
)
// OpenRouter provides inputTokens as the total count of input tokens (including cached).
// AFAIK other providers (OpenRouter/OpenAI/Gemini etc.) do it the same way e.g. vercel/ai#8794 (comment)
// Anthropic does it differently though - inputTokens doesn't include cached tokens.
// It looks like OpenCode's cost calculation assumes all providers return inputTokens the same way Anthropic does (I'm guessing getUsage logic was originally implemented with anthropic), so it's causing incorrect cost calculation for OpenRouter and others.
const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"])
const adjustedInputTokens = safe(
excludesCachedTokens ? inputTokens : inputTokens - cacheReadInputTokens - cacheWriteInputTokens,
)
// AI SDK v6 normalized inputTokens to include cached tokens across all providers
// (including Anthropic/Bedrock which previously excluded them). Always subtract cache
// tokens to get the non-cached input count for separate cost calculation.
const adjustedInputTokens = safe(inputTokens - cacheReadInputTokens - cacheWriteInputTokens)
const total = iife(() => {
// Anthropic doesn't provide total_tokens, also ai sdk will vastly undercount if we
// don't compute from components
if (
input.model.api.npm === "@ai-sdk/anthropic" ||
input.model.api.npm === "@ai-sdk/amazon-bedrock" ||
input.model.api.npm === "@ai-sdk/google-vertex/anthropic"
) {
return adjustedInputTokens + outputTokens + cacheReadInputTokens + cacheWriteInputTokens
}
return input.usage.totalTokens
})
const total = input.usage.totalTokens
const tokens = {
total,

View File

@ -964,8 +964,9 @@ describe("session.getUsage", () => {
expect(result.tokens.cache.write).toBe(300)
})
test("does not subtract cached tokens for anthropic provider", () => {
test("subtracts cached tokens for anthropic provider", () => {
const model = createModel({ context: 100_000, output: 32_000 })
// AI SDK v6 normalizes inputTokens to include cached tokens for all providers
const result = Session.getUsage({
model,
usage: {
@ -979,7 +980,7 @@ describe("session.getUsage", () => {
},
})
expect(result.tokens.input).toBe(1000)
expect(result.tokens.input).toBe(800)
expect(result.tokens.cache.read).toBe(200)
})
@ -1043,11 +1044,10 @@ describe("session.getUsage", () => {
"computes total from components for %s models",
(npm) => {
const model = createModel({ context: 100_000, output: 32_000, npm })
// AI SDK v6: inputTokens includes cached tokens for all providers
const usage = {
inputTokens: 1000,
outputTokens: 500,
// These providers typically report total as input + output only,
// excluding cache read/write.
totalTokens: 1500,
cachedInputTokens: 200,
}
@ -1064,10 +1064,12 @@ describe("session.getUsage", () => {
},
})
expect(result.tokens.input).toBe(1000)
// inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
expect(result.tokens.input).toBe(500)
expect(result.tokens.cache.read).toBe(200)
expect(result.tokens.cache.write).toBe(300)
expect(result.tokens.total).toBe(2000)
// total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
expect(result.tokens.total).toBe(1500)
return
}
@ -1081,10 +1083,12 @@ describe("session.getUsage", () => {
},
})
expect(result.tokens.input).toBe(1000)
// inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500
expect(result.tokens.input).toBe(500)
expect(result.tokens.cache.read).toBe(200)
expect(result.tokens.cache.write).toBe(300)
expect(result.tokens.total).toBe(2000)
// total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300)
expect(result.tokens.total).toBe(1500)
},
)
})