mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-13 23:56:07 +00:00
fix: harden Kimi Anthropic thinking budgets
This commit is contained in:
@@ -307,7 +307,7 @@ describe("kimi tool-call markup wrapper", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("enables Kimi Anthropic thinking with a bounded high budget", () => {
|
||||
it("enables Kimi Anthropic thinking with a high budget and enough output room", () => {
|
||||
const { streamFn: baseStreamFn, getCapturedPayload } = createPayloadCapturingStream();
|
||||
|
||||
const wrapped = wrapKimiProviderStream({
|
||||
@@ -328,11 +328,67 @@ describe("kimi tool-call markup wrapper", () => {
|
||||
);
|
||||
|
||||
expect(getCapturedPayload()).toEqual({
|
||||
max_tokens: 9216,
|
||||
max_tokens: 16000,
|
||||
thinking: { type: "enabled", budget_tokens: 8192 },
|
||||
});
|
||||
});
|
||||
|
||||
it("adds the default Kimi Anthropic thinking budget for explicit enabled params", () => {
|
||||
const cases = ["enabled", true, { type: "enabled" }] as const;
|
||||
|
||||
for (const configuredThinking of cases) {
|
||||
const { streamFn: baseStreamFn, getCapturedPayload } = createPayloadCapturingStream();
|
||||
const wrapped = wrapKimiProviderStream({
|
||||
provider: "kimi",
|
||||
modelId: "kimi-code",
|
||||
extraParams: { thinking: configuredThinking },
|
||||
streamFn: baseStreamFn,
|
||||
} as never);
|
||||
|
||||
void wrapped(
|
||||
{
|
||||
api: "anthropic-messages",
|
||||
provider: "kimi",
|
||||
id: "kimi-code",
|
||||
} as Model<"anthropic-messages">,
|
||||
{ messages: [] } as Context,
|
||||
{},
|
||||
);
|
||||
|
||||
expect(getCapturedPayload()).toEqual({
|
||||
max_tokens: 16000,
|
||||
thinking: { type: "enabled", budget_tokens: 1024 },
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it("uses the session Kimi Anthropic budget for explicit enabled params when available", () => {
|
||||
const { streamFn: baseStreamFn, getCapturedPayload } = createPayloadCapturingStream();
|
||||
|
||||
const wrapped = wrapKimiProviderStream({
|
||||
provider: "kimi",
|
||||
modelId: "kimi-code",
|
||||
extraParams: { thinking: "enabled" },
|
||||
thinkingLevel: "medium",
|
||||
streamFn: baseStreamFn,
|
||||
} as never);
|
||||
|
||||
void wrapped(
|
||||
{
|
||||
api: "anthropic-messages",
|
||||
provider: "kimi",
|
||||
id: "kimi-code",
|
||||
} as Model<"anthropic-messages">,
|
||||
{ messages: [] } as Context,
|
||||
{},
|
||||
);
|
||||
|
||||
expect(getCapturedPayload()).toEqual({
|
||||
max_tokens: 16000,
|
||||
thinking: { type: "enabled", budget_tokens: 4096 },
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves explicit Kimi Anthropic thinking budgets", () => {
|
||||
const { streamFn: baseStreamFn, getCapturedPayload } = createPayloadCapturingStream();
|
||||
|
||||
@@ -355,11 +411,39 @@ describe("kimi tool-call markup wrapper", () => {
|
||||
);
|
||||
|
||||
expect(getCapturedPayload()).toEqual({
|
||||
max_tokens: 5120,
|
||||
max_tokens: 16000,
|
||||
thinking: { type: "enabled", budget_tokens: 4096 },
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves larger Kimi Anthropic max_tokens values", () => {
|
||||
const { streamFn: baseStreamFn, getCapturedPayload } = createPayloadCapturingStream({
|
||||
max_tokens: 32768,
|
||||
});
|
||||
|
||||
const wrapped = wrapKimiProviderStream({
|
||||
provider: "kimi",
|
||||
modelId: "kimi-code",
|
||||
thinkingLevel: "high",
|
||||
streamFn: baseStreamFn,
|
||||
} as never);
|
||||
|
||||
void wrapped(
|
||||
{
|
||||
api: "anthropic-messages",
|
||||
provider: "kimi",
|
||||
id: "kimi-code",
|
||||
} as Model<"anthropic-messages">,
|
||||
{ messages: [] } as Context,
|
||||
{},
|
||||
);
|
||||
|
||||
expect(getCapturedPayload()).toEqual({
|
||||
max_tokens: 32768,
|
||||
thinking: { type: "enabled", budget_tokens: 8192 },
|
||||
});
|
||||
});
|
||||
|
||||
it("bounds Kimi Anthropic thinking for session thinking levels", () => {
|
||||
const cases = [
|
||||
["minimal", 1024],
|
||||
@@ -391,7 +475,7 @@ describe("kimi tool-call markup wrapper", () => {
|
||||
);
|
||||
|
||||
expect(getCapturedPayload()).toEqual({
|
||||
max_tokens: budgetTokens + 1024,
|
||||
max_tokens: 16000,
|
||||
thinking: { type: "enabled", budget_tokens: budgetTokens },
|
||||
});
|
||||
}
|
||||
|
||||
@@ -42,6 +42,7 @@ const KIMI_ANTHROPIC_THINKING_BUDGETS: Record<Exclude<KimiThinkingLevel, "off">,
|
||||
max: 8192,
|
||||
};
|
||||
const KIMI_ANTHROPIC_VISIBLE_OUTPUT_RESERVE_TOKENS = 1024;
|
||||
const KIMI_ANTHROPIC_MIN_OUTPUT_TOKENS = 16000;
|
||||
|
||||
function normalizeKimiThinkingBudgetTokens(value: unknown): number | undefined {
|
||||
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||
@@ -51,19 +52,27 @@ function normalizeKimiThinkingBudgetTokens(value: unknown): number | undefined {
|
||||
return normalized >= 1024 ? normalized : undefined;
|
||||
}
|
||||
|
||||
function clampKimiAnthropicMaxTokens(
|
||||
function normalizeKimiAnthropicMaxTokens(value: unknown): number | undefined {
|
||||
if (typeof value !== "number" || !Number.isFinite(value)) {
|
||||
return undefined;
|
||||
}
|
||||
const normalized = Math.floor(value);
|
||||
return normalized > 0 ? normalized : undefined;
|
||||
}
|
||||
|
||||
function ensureKimiAnthropicMaxTokens(
|
||||
payloadObj: Record<string, unknown>,
|
||||
thinkingConfig: KimiThinkingConfig,
|
||||
): void {
|
||||
if (thinkingConfig.type !== "enabled" || thinkingConfig.budget_tokens === undefined) {
|
||||
return;
|
||||
}
|
||||
const limit = thinkingConfig.budget_tokens + KIMI_ANTHROPIC_VISIBLE_OUTPUT_RESERVE_TOKENS;
|
||||
const current =
|
||||
typeof payloadObj.max_tokens === "number" && Number.isFinite(payloadObj.max_tokens)
|
||||
? Math.floor(payloadObj.max_tokens)
|
||||
: undefined;
|
||||
payloadObj.max_tokens = current === undefined ? limit : Math.min(current, limit);
|
||||
const required = Math.max(
|
||||
KIMI_ANTHROPIC_MIN_OUTPUT_TOKENS,
|
||||
thinkingConfig.budget_tokens + KIMI_ANTHROPIC_VISIBLE_OUTPUT_RESERVE_TOKENS,
|
||||
);
|
||||
const current = normalizeKimiAnthropicMaxTokens(payloadObj.max_tokens);
|
||||
payloadObj.max_tokens = current === undefined ? required : Math.max(current, required);
|
||||
}
|
||||
|
||||
function normalizeKimiThinkingType(value: unknown): KimiThinkingType | undefined {
|
||||
@@ -123,16 +132,18 @@ export function resolveKimiThinkingConfig(params: {
|
||||
thinkingLevel?: KimiThinkingLevel;
|
||||
}): KimiThinkingConfig {
|
||||
const configured = normalizeKimiThinkingConfig(params.configuredThinking);
|
||||
const levelBudgetTokens = resolveKimiAnthropicThinkingBudgetTokens(params.thinkingLevel);
|
||||
if (configured) {
|
||||
return configured;
|
||||
return configured.type === "enabled" && configured.budget_tokens === undefined
|
||||
? { type: "enabled", budget_tokens: levelBudgetTokens ?? 1024 }
|
||||
: configured;
|
||||
}
|
||||
if (!params.thinkingLevel || params.thinkingLevel === "off") {
|
||||
return { type: "disabled" };
|
||||
}
|
||||
const budgetTokens = resolveKimiAnthropicThinkingBudgetTokens(params.thinkingLevel);
|
||||
return budgetTokens === undefined
|
||||
return levelBudgetTokens === undefined
|
||||
? { type: "enabled" }
|
||||
: { type: "enabled", budget_tokens: budgetTokens };
|
||||
: { type: "enabled", budget_tokens: levelBudgetTokens };
|
||||
}
|
||||
|
||||
export function resolveKimiThinkingType(params: {
|
||||
@@ -319,7 +330,7 @@ export function createKimiThinkingWrapper(
|
||||
payloadObj.thinking =
|
||||
model.api === "anthropic-messages" ? { ...normalized } : { type: normalized.type };
|
||||
if (model.api === "anthropic-messages") {
|
||||
clampKimiAnthropicMaxTokens(payloadObj, normalized);
|
||||
ensureKimiAnthropicMaxTokens(payloadObj, normalized);
|
||||
}
|
||||
delete payloadObj.reasoning;
|
||||
delete payloadObj.reasoning_effort;
|
||||
|
||||
Reference in New Issue
Block a user