fix: harden Kimi Anthropic thinking budgets

This commit is contained in:
Peter Steinberger
2026-05-11 03:59:47 +01:00
parent 8f17356392
commit 25d68d3713
2 changed files with 111 additions and 16 deletions

View File

@@ -307,7 +307,7 @@ describe("kimi tool-call markup wrapper", () => {
});
});
it("enables Kimi Anthropic thinking with a bounded high budget", () => {
it("enables Kimi Anthropic thinking with a high budget and enough output room", () => {
const { streamFn: baseStreamFn, getCapturedPayload } = createPayloadCapturingStream();
const wrapped = wrapKimiProviderStream({
@@ -328,11 +328,67 @@ describe("kimi tool-call markup wrapper", () => {
);
expect(getCapturedPayload()).toEqual({
max_tokens: 9216,
max_tokens: 16000,
thinking: { type: "enabled", budget_tokens: 8192 },
});
});
it("adds the default Kimi Anthropic thinking budget for explicit enabled params", () => {
const cases = ["enabled", true, { type: "enabled" }] as const;
for (const configuredThinking of cases) {
const { streamFn: baseStreamFn, getCapturedPayload } = createPayloadCapturingStream();
const wrapped = wrapKimiProviderStream({
provider: "kimi",
modelId: "kimi-code",
extraParams: { thinking: configuredThinking },
streamFn: baseStreamFn,
} as never);
void wrapped(
{
api: "anthropic-messages",
provider: "kimi",
id: "kimi-code",
} as Model<"anthropic-messages">,
{ messages: [] } as Context,
{},
);
expect(getCapturedPayload()).toEqual({
max_tokens: 16000,
thinking: { type: "enabled", budget_tokens: 1024 },
});
}
});
it("uses the session Kimi Anthropic budget for explicit enabled params when available", () => {
const { streamFn: baseStreamFn, getCapturedPayload } = createPayloadCapturingStream();
const wrapped = wrapKimiProviderStream({
provider: "kimi",
modelId: "kimi-code",
extraParams: { thinking: "enabled" },
thinkingLevel: "medium",
streamFn: baseStreamFn,
} as never);
void wrapped(
{
api: "anthropic-messages",
provider: "kimi",
id: "kimi-code",
} as Model<"anthropic-messages">,
{ messages: [] } as Context,
{},
);
expect(getCapturedPayload()).toEqual({
max_tokens: 16000,
thinking: { type: "enabled", budget_tokens: 4096 },
});
});
it("preserves explicit Kimi Anthropic thinking budgets", () => {
const { streamFn: baseStreamFn, getCapturedPayload } = createPayloadCapturingStream();
@@ -355,11 +411,39 @@ describe("kimi tool-call markup wrapper", () => {
);
expect(getCapturedPayload()).toEqual({
max_tokens: 5120,
max_tokens: 16000,
thinking: { type: "enabled", budget_tokens: 4096 },
});
});
it("preserves larger Kimi Anthropic max_tokens values", () => {
const { streamFn: baseStreamFn, getCapturedPayload } = createPayloadCapturingStream({
max_tokens: 32768,
});
const wrapped = wrapKimiProviderStream({
provider: "kimi",
modelId: "kimi-code",
thinkingLevel: "high",
streamFn: baseStreamFn,
} as never);
void wrapped(
{
api: "anthropic-messages",
provider: "kimi",
id: "kimi-code",
} as Model<"anthropic-messages">,
{ messages: [] } as Context,
{},
);
expect(getCapturedPayload()).toEqual({
max_tokens: 32768,
thinking: { type: "enabled", budget_tokens: 8192 },
});
});
it("bounds Kimi Anthropic thinking for session thinking levels", () => {
const cases = [
["minimal", 1024],
@@ -391,7 +475,7 @@ describe("kimi tool-call markup wrapper", () => {
);
expect(getCapturedPayload()).toEqual({
max_tokens: budgetTokens + 1024,
max_tokens: 16000,
thinking: { type: "enabled", budget_tokens: budgetTokens },
});
}

View File

@@ -42,6 +42,7 @@ const KIMI_ANTHROPIC_THINKING_BUDGETS: Record<Exclude<KimiThinkingLevel, "off">,
max: 8192,
};
const KIMI_ANTHROPIC_VISIBLE_OUTPUT_RESERVE_TOKENS = 1024;
const KIMI_ANTHROPIC_MIN_OUTPUT_TOKENS = 16000;
function normalizeKimiThinkingBudgetTokens(value: unknown): number | undefined {
if (typeof value !== "number" || !Number.isFinite(value)) {
@@ -51,19 +52,27 @@ function normalizeKimiThinkingBudgetTokens(value: unknown): number | undefined {
return normalized >= 1024 ? normalized : undefined;
}
function clampKimiAnthropicMaxTokens(
function normalizeKimiAnthropicMaxTokens(value: unknown): number | undefined {
if (typeof value !== "number" || !Number.isFinite(value)) {
return undefined;
}
const normalized = Math.floor(value);
return normalized > 0 ? normalized : undefined;
}
function ensureKimiAnthropicMaxTokens(
payloadObj: Record<string, unknown>,
thinkingConfig: KimiThinkingConfig,
): void {
if (thinkingConfig.type !== "enabled" || thinkingConfig.budget_tokens === undefined) {
return;
}
const limit = thinkingConfig.budget_tokens + KIMI_ANTHROPIC_VISIBLE_OUTPUT_RESERVE_TOKENS;
const current =
typeof payloadObj.max_tokens === "number" && Number.isFinite(payloadObj.max_tokens)
? Math.floor(payloadObj.max_tokens)
: undefined;
payloadObj.max_tokens = current === undefined ? limit : Math.min(current, limit);
const required = Math.max(
KIMI_ANTHROPIC_MIN_OUTPUT_TOKENS,
thinkingConfig.budget_tokens + KIMI_ANTHROPIC_VISIBLE_OUTPUT_RESERVE_TOKENS,
);
const current = normalizeKimiAnthropicMaxTokens(payloadObj.max_tokens);
payloadObj.max_tokens = current === undefined ? required : Math.max(current, required);
}
function normalizeKimiThinkingType(value: unknown): KimiThinkingType | undefined {
@@ -123,16 +132,18 @@ export function resolveKimiThinkingConfig(params: {
thinkingLevel?: KimiThinkingLevel;
}): KimiThinkingConfig {
const configured = normalizeKimiThinkingConfig(params.configuredThinking);
const levelBudgetTokens = resolveKimiAnthropicThinkingBudgetTokens(params.thinkingLevel);
if (configured) {
return configured;
return configured.type === "enabled" && configured.budget_tokens === undefined
? { type: "enabled", budget_tokens: levelBudgetTokens ?? 1024 }
: configured;
}
if (!params.thinkingLevel || params.thinkingLevel === "off") {
return { type: "disabled" };
}
const budgetTokens = resolveKimiAnthropicThinkingBudgetTokens(params.thinkingLevel);
return budgetTokens === undefined
return levelBudgetTokens === undefined
? { type: "enabled" }
: { type: "enabled", budget_tokens: budgetTokens };
: { type: "enabled", budget_tokens: levelBudgetTokens };
}
export function resolveKimiThinkingType(params: {
@@ -319,7 +330,7 @@ export function createKimiThinkingWrapper(
payloadObj.thinking =
model.api === "anthropic-messages" ? { ...normalized } : { type: normalized.type };
if (model.api === "anthropic-messages") {
clampKimiAnthropicMaxTokens(payloadObj, normalized);
ensureKimiAnthropicMaxTokens(payloadObj, normalized);
}
delete payloadObj.reasoning;
delete payloadObj.reasoning_effort;