From d44553dade33191c2eba7df9fb9b0def91639369 Mon Sep 17 00:00:00 2001
From: "clawsweeper[bot]" <274271284+clawsweeper[bot]@users.noreply.github.com>
Date: Tue, 12 May 2026 12:08:07 +0000
Subject: [PATCH] fix(gateway): wire max_completion_tokens/max_tokens through
 openai-http (#81013)

Summary:
- The branch adds Chat Completions token-cap fields to the Gateway request type, forwards them as agent stream parameters, and documents/tests the behavior.
- Reproducibility: yes. Source inspection gives a high-confidence current-main path: send `max_completion_toke ... tokens` to `/v1/chat/completions` and observe that the current handler never sets `streamParams.maxTokens`.

Automerge notes:
- PR branch already contained follow-up commit before automerge: fix(gateway): wire max_completion_tokens/max_tokens through openai-http

Validation:
- ClawSweeper review passed for head a9c39f7d4a8020847ed7da6ee74773609b9e8c88.
- Required merge gates passed before the squash merge.

Prepared head SHA: a9c39f7d4a8020847ed7da6ee74773609b9e8c88
Review: https://github.com/openclaw/openclaw/pull/81013#issuecomment-4430303959

Co-authored-by: Bingsen <dingheng.huang@urbanic.com>
Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com>
---
 CHANGELOG.md                    |  1 +
 docs/gateway/openai-http-api.md |  4 +++
 src/gateway/openai-http.test.ts | 61 +++++++++++++++++++++++++++++++++
 src/gateway/openai-http.ts      | 12 +++++++
 4 files changed, 78 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2723587fc61..5e0f8dcfbd6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
 
 ### Changes
 
+- Gateway/OpenAI HTTP: honor `max_completion_tokens` and `max_tokens` on inbound `/v1/chat/completions` requests so client-provided token caps reach the upstream provider via `streamParams.maxTokens`, with `max_completion_tokens` taking precedence when both are sent. Thanks @Lellansin.
 - Models/OpenAI CLI auth: make `openclaw models auth login --provider openai` start the ChatGPT/Codex account login by default, while `--method api-key` remains the explicit OpenAI API-key setup path.
 - Google/Gemini: normalize retired Gemini 3 Pro Preview ids inside SDK OAuth auth-result default config patches, so helper-built provider auth flows emit `google/gemini-3.1-pro-preview` for Gemini 3.1 testing.
 - Google/Gemini: normalize retired Gemini 3 Pro Preview ids returned by direct `openclaw models auth login --set-default` provider auth flows before writing config, so Gemini testing targets `google/gemini-3.1-pro-preview`.
diff --git a/docs/gateway/openai-http-api.md b/docs/gateway/openai-http-api.md
index 3e2f6cfe1d4..9eac4412fc6 100644
--- a/docs/gateway/openai-http-api.md
+++ b/docs/gateway/openai-http-api.md
@@ -201,6 +201,10 @@ Set `stream: true` to receive Server-Sent Events (SSE):
 - `tool_choice`: `"auto"`, `"none"`
 - `messages[*].role: "tool"` follow-up turns
 - `messages[*].tool_call_id` for binding tool results back to a prior tool call
+- `max_completion_tokens`: number; per-call cap for total completion tokens (reasoning tokens included). Current OpenAI Chat Completions field name; preferred when both `max_completion_tokens` and `max_tokens` are sent.
+- `max_tokens`: number; legacy alias accepted for backwards compatibility. Ignored when `max_completion_tokens` is also present.
+
+When either field is set, the value is forwarded to the upstream provider via the agent stream-param channel. The actual wire field name sent to the upstream provider is chosen by the provider transport: `max_completion_tokens` for OpenAI-family endpoints, and `max_tokens` for providers that only accept the legacy name (such as Mistral and Chutes).
 
 ### Unsupported variants
 
diff --git a/src/gateway/openai-http.test.ts b/src/gateway/openai-http.test.ts
index 77eb23fffc7..65fca33f7f0 100644
--- a/src/gateway/openai-http.test.ts
+++ b/src/gateway/openai-http.test.ts
@@ -1273,6 +1273,67 @@ describe("OpenAI-compatible HTTP API (e2e)", () => {
     }
   });
 
+  it("forwards inbound max_completion_tokens and max_tokens into streamParams", async () => {
+    const port = enabledPort;
+    const mockAgentOnce = (payloads: Array<{ text: string }>) => {
+      agentCommand.mockClear();
+      agentCommand.mockResolvedValueOnce({ payloads } as never);
+    };
+    const getFirstAgentMaxTokens = () => {
+      const opts = (agentCommand.mock.calls[0] as unknown[] | undefined)?.[0];
+      return (opts as { streamParams?: { maxTokens?: number } } | undefined)?.streamParams
+        ?.maxTokens;
+    };
+
+    {
+      mockAgentOnce([{ text: "hello" }]);
+      const res = await postChatCompletions(port, {
+        model: "openclaw",
+        max_completion_tokens: 256,
+        messages: [{ role: "user", content: "hi" }],
+      });
+      expect(res.status).toBe(200);
+      expect(getFirstAgentMaxTokens()).toBe(256);
+      await res.text();
+    }
+
+    {
+      mockAgentOnce([{ text: "hello" }]);
+      const res = await postChatCompletions(port, {
+        model: "openclaw",
+        max_tokens: 128,
+        messages: [{ role: "user", content: "hi" }],
+      });
+      expect(res.status).toBe(200);
+      expect(getFirstAgentMaxTokens()).toBe(128);
+      await res.text();
+    }
+
+    {
+      mockAgentOnce([{ text: "hello" }]);
+      const res = await postChatCompletions(port, {
+        model: "openclaw",
+        max_completion_tokens: 64,
+        max_tokens: 999,
+        messages: [{ role: "user", content: "hi" }],
+      });
+      expect(res.status).toBe(200);
+      expect(getFirstAgentMaxTokens()).toBe(64);
+      await res.text();
+    }
+
+    {
+      mockAgentOnce([{ text: "hello" }]);
+      const res = await postChatCompletions(port, {
+        model: "openclaw",
+        messages: [{ role: "user", content: "hi" }],
+      });
+      expect(res.status).toBe(200);
+      expect(getFirstAgentMaxTokens()).toBeUndefined();
+      await res.text();
+    }
+  });
+
   it("returns 429 for repeated failed auth when gateway.auth.rateLimit is configured", async () => {
     testState.gatewayAuth = {
       mode: "token",
diff --git a/src/gateway/openai-http.ts b/src/gateway/openai-http.ts
index d186270bdfd..95bf730d540 100644
--- a/src/gateway/openai-http.ts
+++ b/src/gateway/openai-http.ts
@@ -73,6 +73,8 @@ type OpenAiChatCompletionRequest = {
   tool_choice?: unknown;
   messages?: unknown;
   user?: unknown;
+  max_tokens?: unknown;
+  max_completion_tokens?: unknown;
 };
 
 const DEFAULT_OPENAI_CHAT_COMPLETIONS_BODY_BYTES = 20 * 1024 * 1024;
@@ -132,6 +134,7 @@ function buildAgentCommandInput(params: {
   messageChannel: string;
   senderIsOwner: boolean;
   abortSignal?: AbortSignal;
+  streamParams?: { maxTokens?: number };
 }) {
   return {
     message: params.prompt.message,
@@ -147,6 +150,7 @@ function buildAgentCommandInput(params: {
     senderIsOwner: params.senderIsOwner,
     allowModelOverride: true as const,
     abortSignal: params.abortSignal,
+    streamParams: params.streamParams,
   };
 }
 
@@ -813,6 +817,13 @@ export async function handleOpenAiHttpRequest(
   const streamIncludeUsage = stream && resolveIncludeUsageForStreaming(payload);
   const model = typeof payload.model === "string" ? payload.model : "openclaw";
   const user = typeof payload.user === "string" ? payload.user : undefined;
+  const maxTokens =
+    typeof payload.max_completion_tokens === "number"
+      ? payload.max_completion_tokens
+      : typeof payload.max_tokens === "number"
+        ? payload.max_tokens
+        : undefined;
+  const streamParams = maxTokens !== undefined ? { maxTokens } : undefined;
 
   const { agentId, sessionKey, messageChannel } = resolveGatewayRequestContext({
     req,
@@ -897,6 +908,7 @@ export async function handleOpenAiHttpRequest(
     messageChannel,
     abortSignal: abortController.signal,
     senderIsOwner,
+    streamParams,
   });
 
   if (!stream) {