From d44553dade33191c2eba7df9fb9b0def91639369 Mon Sep 17 00:00:00 2001 From: "clawsweeper[bot]" <274271284+clawsweeper[bot]@users.noreply.github.com> Date: Tue, 12 May 2026 12:08:07 +0000 Subject: [PATCH] fix(gateway): wire max_completion_tokens/max_tokens through openai-http (#81013) Summary: - The branch adds Chat Completions token-cap fields to the Gateway request type, forwards them as agent stream parameters, and documents/tests the behavior. - Reproducibility: yes. Source inspection gives a high-confidence current-main path: send `max_completion_toke ... tokens` to `/v1/chat/completions` and observe that the current handler never sets `streamParams.maxTokens`. Automerge notes: - PR branch already contained follow-up commit before automerge: fix(gateway): wire max_completion_tokens/max_tokens through openai-http Validation: - ClawSweeper review passed for head a9c39f7d4a8020847ed7da6ee74773609b9e8c88. - Required merge gates passed before the squash merge. Prepared head SHA: a9c39f7d4a8020847ed7da6ee74773609b9e8c88 Review: https://github.com/openclaw/openclaw/pull/81013#issuecomment-4430303959 Co-authored-by: Bingsen Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com> --- CHANGELOG.md | 1 + docs/gateway/openai-http-api.md | 4 +++ src/gateway/openai-http.test.ts | 61 +++++++++++++++++++++++++++++++++ src/gateway/openai-http.ts | 12 +++++++ 4 files changed, 78 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2723587fc61..5e0f8dcfbd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai ### Changes +- Gateway/OpenAI HTTP: honor `max_completion_tokens` and `max_tokens` on inbound `/v1/chat/completions` requests so client-provided token caps reach the upstream provider via `streamParams.maxTokens`, with `max_completion_tokens` taking precedence when both are sent. Thanks @Lellansin. - Models/OpenAI CLI auth: make `openclaw models auth login --provider openai` start the ChatGPT/Codex account login by default, while `--method api-key` remains the explicit OpenAI API-key setup path. - Google/Gemini: normalize retired Gemini 3 Pro Preview ids inside SDK OAuth auth-result default config patches, so helper-built provider auth flows emit `google/gemini-3.1-pro-preview` for Gemini 3.1 testing. - Google/Gemini: normalize retired Gemini 3 Pro Preview ids returned by direct `openclaw models auth login --set-default` provider auth flows before writing config, so Gemini testing targets `google/gemini-3.1-pro-preview`. diff --git a/docs/gateway/openai-http-api.md b/docs/gateway/openai-http-api.md index 3e2f6cfe1d4..9eac4412fc6 100644 --- a/docs/gateway/openai-http-api.md +++ b/docs/gateway/openai-http-api.md @@ -201,6 +201,10 @@ Set `stream: true` to receive Server-Sent Events (SSE): - `tool_choice`: `"auto"`, `"none"` - `messages[*].role: "tool"` follow-up turns - `messages[*].tool_call_id` for binding tool results back to a prior tool call +- `max_completion_tokens`: number; per-call cap for total completion tokens (reasoning tokens included). Current OpenAI Chat Completions field name; preferred when both `max_completion_tokens` and `max_tokens` are sent. +- `max_tokens`: number; legacy alias accepted for backwards compatibility. Ignored when `max_completion_tokens` is also present. + +When either field is set, the value is forwarded to the upstream provider via the agent stream-param channel. The actual wire field name sent to the upstream provider is chosen by the provider transport: `max_completion_tokens` for OpenAI-family endpoints, and `max_tokens` for providers that only accept the legacy name (such as Mistral and Chutes). ### Unsupported variants diff --git a/src/gateway/openai-http.test.ts b/src/gateway/openai-http.test.ts index 77eb23fffc7..65fca33f7f0 100644 --- a/src/gateway/openai-http.test.ts +++ b/src/gateway/openai-http.test.ts @@ -1273,6 +1273,67 @@ describe("OpenAI-compatible HTTP API (e2e)", () => { } }); + it("forwards inbound max_completion_tokens and max_tokens into streamParams", async () => { + const port = enabledPort; + const mockAgentOnce = (payloads: Array<{ text: string }>) => { + agentCommand.mockClear(); + agentCommand.mockResolvedValueOnce({ payloads } as never); + }; + const getFirstAgentMaxTokens = () => { + const opts = (agentCommand.mock.calls[0] as unknown[] | undefined)?.[0]; + return (opts as { streamParams?: { maxTokens?: number } } | undefined)?.streamParams + ?.maxTokens; + }; + + { + mockAgentOnce([{ text: "hello" }]); + const res = await postChatCompletions(port, { + model: "openclaw", + max_completion_tokens: 256, + messages: [{ role: "user", content: "hi" }], + }); + expect(res.status).toBe(200); + expect(getFirstAgentMaxTokens()).toBe(256); + await res.text(); + } + + { + mockAgentOnce([{ text: "hello" }]); + const res = await postChatCompletions(port, { + model: "openclaw", + max_tokens: 128, + messages: [{ role: "user", content: "hi" }], + }); + expect(res.status).toBe(200); + expect(getFirstAgentMaxTokens()).toBe(128); + await res.text(); + } + + { + mockAgentOnce([{ text: "hello" }]); + const res = await postChatCompletions(port, { + model: "openclaw", + max_completion_tokens: 64, + max_tokens: 999, + messages: [{ role: "user", content: "hi" }], + }); + expect(res.status).toBe(200); + expect(getFirstAgentMaxTokens()).toBe(64); + await res.text(); + } + + { + mockAgentOnce([{ text: "hello" }]); + const res = await postChatCompletions(port, { + model: "openclaw", + messages: [{ role: "user", content: "hi" }], + }); + expect(res.status).toBe(200); + expect(getFirstAgentMaxTokens()).toBeUndefined(); + await res.text(); + } + }); + it("returns 429 for repeated failed auth when gateway.auth.rateLimit is configured", async () => { testState.gatewayAuth = { mode: "token", diff --git a/src/gateway/openai-http.ts b/src/gateway/openai-http.ts index d186270bdfd..95bf730d540 100644 --- a/src/gateway/openai-http.ts +++ b/src/gateway/openai-http.ts @@ -73,6 +73,8 @@ type OpenAiChatCompletionRequest = { tool_choice?: unknown; messages?: unknown; user?: unknown; + max_tokens?: unknown; + max_completion_tokens?: unknown; }; const DEFAULT_OPENAI_CHAT_COMPLETIONS_BODY_BYTES = 20 * 1024 * 1024; @@ -132,6 +134,7 @@ function buildAgentCommandInput(params: { messageChannel: string; senderIsOwner: boolean; abortSignal?: AbortSignal; + streamParams?: { maxTokens?: number }; }) { return { message: params.prompt.message, @@ -147,6 +150,7 @@ function buildAgentCommandInput(params: { senderIsOwner: params.senderIsOwner, allowModelOverride: true as const, abortSignal: params.abortSignal, + streamParams: params.streamParams, }; } @@ -813,6 +817,13 @@ export async function handleOpenAiHttpRequest( const streamIncludeUsage = stream && resolveIncludeUsageForStreaming(payload); const model = typeof payload.model === "string" ? payload.model : "openclaw"; const user = typeof payload.user === "string" ? payload.user : undefined; + const maxTokens = + typeof payload.max_completion_tokens === "number" + ? payload.max_completion_tokens + : typeof payload.max_tokens === "number" + ? payload.max_tokens + : undefined; + const streamParams = maxTokens !== undefined ? { maxTokens } : undefined; const { agentId, sessionKey, messageChannel } = resolveGatewayRequestContext({ req, @@ -897,6 +908,7 @@ export async function handleOpenAiHttpRequest( messageChannel, abortSignal: abortController.signal, senderIsOwner, + streamParams, }); if (!stream) {