diff --git a/packages/llm/src/protocols/gemini.ts b/packages/llm/src/protocols/gemini.ts index ff6f3f83ec..a59b0e5017 100644 --- a/packages/llm/src/protocols/gemini.ts +++ b/packages/llm/src/protocols/gemini.ts @@ -285,14 +285,12 @@ const fromRequest = Effect.fn("Gemini.fromRequest")(function* (request: LLMReque // `cachedContentTokenCount` subset. `candidatesTokenCount` is *exclusive* // of `thoughtsTokenCount` — visible-only, not a total — so we sum the two // to produce the inclusive `outputTokens` the rest of the contract expects. +// Output is left undefined when the visible component is missing, so we +// don't fabricate an inclusive number from a partial breakdown. const mapUsage = (usage: GeminiUsage | undefined) => { if (!usage) return undefined const cached = usage.cachedContentTokenCount const nonCached = ProviderShared.subtractTokens(usage.promptTokenCount, cached) - // `candidatesTokenCount` is visible-only; sum with thoughts to produce the - // inclusive `outputTokens` the contract expects. Only compute the total - // when the visible component is reported — otherwise we'd fabricate an - // inclusive number from a partial breakdown. const outputTokens = usage.candidatesTokenCount !== undefined ? usage.candidatesTokenCount + (usage.thoughtsTokenCount ?? 0) diff --git a/packages/llm/src/protocols/shared.ts b/packages/llm/src/protocols/shared.ts index 3b9886553a..a07d38bd19 100644 --- a/packages/llm/src/protocols/shared.ts +++ b/packages/llm/src/protocols/shared.ts @@ -43,12 +43,10 @@ export interface ToolAccumulator { * when at least one is defined. Returns `undefined` when neither input nor * output is known so routes don't publish a misleading `0`. * - * Under the additive `LLM.Usage` contract, `inputTokens` and `outputTokens` - * are the non-cached input and visible output only. The provider-supplied - * `total` is the source of truth when present; the computed fallback - * under-counts cache and reasoning by design and exists mainly so - * Anthropic-style providers (which don't surface a total) still get a - * sensible aggregate on the input + output axes. + * Under the `LLM.Usage` contract, `inputTokens` and `outputTokens` are + * inclusive totals, so the computed fallback already covers cache reads / + * writes and reasoning — used mainly for Anthropic-style providers that + * don't surface a top-level total. */ export const totalTokens = ( inputTokens: number | undefined, @@ -69,7 +67,8 @@ export const totalTokens = ( * * If `total` is `undefined`, returns `undefined` (we don't fabricate * counts). If `subtrahend` is `undefined`, returns `total` unchanged. The - * provider-native breakdown stays available on `Usage.native` for debugging. + * provider-native breakdown stays available on `Usage.providerMetadata` + * for debugging. */ export const subtractTokens = ( total: number | undefined,