From 72c77d0e7b488dd1c0c62287b56623915b469506 Mon Sep 17 00:00:00 2001 From: ualtinok Date: Sun, 29 Mar 2026 19:40:10 +0200 Subject: [PATCH] fix(session): fix token usage double-counting w/ anthropic & bedrock due to AI SDK v6 upgrade (#19758) Co-authored-by: Aiden Cline <63023139+rekram1-node@users.noreply.github.com> Co-authored-by: Aiden Cline --- packages/opencode/src/session/index.ts | 26 ++++--------------- .../opencode/test/session/compaction.test.ts | 20 ++++++++------ 2 files changed, 17 insertions(+), 29 deletions(-) diff --git a/packages/opencode/src/session/index.ts b/packages/opencode/src/session/index.ts index eb01739c15..371091722e 100644 --- a/packages/opencode/src/session/index.ts +++ b/packages/opencode/src/session/index.ts @@ -32,7 +32,6 @@ import { ModelID, ProviderID } from "@/provider/schema" import { Permission } from "@/permission" import { Global } from "@/global" import type { LanguageModelV2Usage } from "@ai-sdk/provider" -import { iife } from "@/util/iife" import { Effect, Layer, Scope, ServiceMap } from "effect" import { makeRuntime } from "@/effect/run-service" @@ -265,27 +264,12 @@ export namespace Session { 0) as number, ) - // OpenRouter provides inputTokens as the total count of input tokens (including cached). - // AFAIK other providers (OpenRouter/OpenAI/Gemini etc.) do it the same way e.g. vercel/ai#8794 (comment) - // Anthropic does it differently though - inputTokens doesn't include cached tokens. - // It looks like OpenCode's cost calculation assumes all providers return inputTokens the same way Anthropic does (I'm guessing getUsage logic was originally implemented with anthropic), so it's causing incorrect cost calculation for OpenRouter and others. - const excludesCachedTokens = !!(input.metadata?.["anthropic"] || input.metadata?.["bedrock"]) - const adjustedInputTokens = safe( - excludesCachedTokens ? inputTokens : inputTokens - cacheReadInputTokens - cacheWriteInputTokens, - ) + // AI SDK v6 normalized inputTokens to include cached tokens across all providers + // (including Anthropic/Bedrock which previously excluded them). Always subtract cache + // tokens to get the non-cached input count for separate cost calculation. + const adjustedInputTokens = safe(inputTokens - cacheReadInputTokens - cacheWriteInputTokens) - const total = iife(() => { - // Anthropic doesn't provide total_tokens, also ai sdk will vastly undercount if we - // don't compute from components - if ( - input.model.api.npm === "@ai-sdk/anthropic" || - input.model.api.npm === "@ai-sdk/amazon-bedrock" || - input.model.api.npm === "@ai-sdk/google-vertex/anthropic" - ) { - return adjustedInputTokens + outputTokens + cacheReadInputTokens + cacheWriteInputTokens - } - return input.usage.totalTokens - }) + const total = input.usage.totalTokens const tokens = { total, diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 9c8559c35a..8f29b77880 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -964,8 +964,9 @@ describe("session.getUsage", () => { expect(result.tokens.cache.write).toBe(300) }) - test("does not subtract cached tokens for anthropic provider", () => { + test("subtracts cached tokens for anthropic provider", () => { const model = createModel({ context: 100_000, output: 32_000 }) + // AI SDK v6 normalizes inputTokens to include cached tokens for all providers const result = Session.getUsage({ model, usage: { @@ -979,7 +980,7 @@ describe("session.getUsage", () => { }, }) - expect(result.tokens.input).toBe(1000) + expect(result.tokens.input).toBe(800) expect(result.tokens.cache.read).toBe(200) }) @@ -1043,11 +1044,10 @@ describe("session.getUsage", () => { "computes total from components for %s models", (npm) => { const model = createModel({ context: 100_000, output: 32_000, npm }) + // AI SDK v6: inputTokens includes cached tokens for all providers const usage = { inputTokens: 1000, outputTokens: 500, - // These providers typically report total as input + output only, - // excluding cache read/write. totalTokens: 1500, cachedInputTokens: 200, } @@ -1064,10 +1064,12 @@ describe("session.getUsage", () => { }, }) - expect(result.tokens.input).toBe(1000) + // inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500 + expect(result.tokens.input).toBe(500) expect(result.tokens.cache.read).toBe(200) expect(result.tokens.cache.write).toBe(300) - expect(result.tokens.total).toBe(2000) + // total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300) + expect(result.tokens.total).toBe(1500) return } @@ -1081,10 +1083,12 @@ describe("session.getUsage", () => { }, }) - expect(result.tokens.input).toBe(1000) + // inputTokens (1000) includes cache, so adjusted = 1000 - 200 - 300 = 500 + expect(result.tokens.input).toBe(500) expect(result.tokens.cache.read).toBe(200) expect(result.tokens.cache.write).toBe(300) - expect(result.tokens.total).toBe(2000) + // total = adjusted (500) + output (500) + cacheRead (200) + cacheWrite (300) + expect(result.tokens.total).toBe(1500) }, ) })