fix(gateway): wire max_completion_tokens/max_tokens through openai-http (#81013)

Summary: - The branch adds Chat Completions token-cap fields to the Gateway request type, forwards them as agent stream parameters, and documents/tests the behavior. - Reproducibility: yes. Source inspection gives a high-confidence current-main path: send `max_completion_toke ... tokens` to `/v1/chat/completions` and observe that the current handler never sets `streamParams.maxTokens`. Automerge notes: - PR branch already contained follow-up commit before automerge: fix(gateway): wire max_completion_tokens/max_tokens through openai-http Validation: - ClawSweeper review passed for head a9c39f7d4a. - Required merge gates passed before the squash merge. Prepared head SHA: a9c39f7d4a Review: https://github.com/openclaw/openclaw/pull/81013#issuecomment-4430303959 Co-authored-by: Bingsen <dingheng.huang@urbanic.com> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com>
2026-05-13 23:56:07 +00:00 · 2026-05-12 12:08:07 +00:00
parent be7ca5d828
commit d44553dade
4 changed files with 78 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai

 ### Changes

+- Gateway/OpenAI HTTP: honor `max_completion_tokens` and `max_tokens` on inbound `/v1/chat/completions` requests so client-provided token caps reach the upstream provider via `streamParams.maxTokens`, with `max_completion_tokens` taking precedence when both are sent. Thanks @Lellansin.
 - Models/OpenAI CLI auth: make `openclaw models auth login --provider openai` start the ChatGPT/Codex account login by default, while `--method api-key` remains the explicit OpenAI API-key setup path.
 - Google/Gemini: normalize retired Gemini 3 Pro Preview ids inside SDK OAuth auth-result default config patches, so helper-built provider auth flows emit `google/gemini-3.1-pro-preview` for Gemini 3.1 testing.
 - Google/Gemini: normalize retired Gemini 3 Pro Preview ids returned by direct `openclaw models auth login --set-default` provider auth flows before writing config, so Gemini testing targets `google/gemini-3.1-pro-preview`.
--- a/docs/gateway/openai-http-api.md
+++ b/docs/gateway/openai-http-api.md
@@ -201,6 +201,10 @@ Set `stream: true` to receive Server-Sent Events (SSE):
 - `tool_choice`: `"auto"`, `"none"`
 - `messages[*].role: "tool"` follow-up turns
 - `messages[*].tool_call_id` for binding tool results back to a prior tool call
+- `max_completion_tokens`: number; per-call cap for total completion tokens (reasoning tokens included). Current OpenAI Chat Completions field name; preferred when both `max_completion_tokens` and `max_tokens` are sent.
+- `max_tokens`: number; legacy alias accepted for backwards compatibility. Ignored when `max_completion_tokens` is also present.
+
+When either field is set, the value is forwarded to the upstream provider via the agent stream-param channel. The actual wire field name sent to the upstream provider is chosen by the provider transport: `max_completion_tokens` for OpenAI-family endpoints, and `max_tokens` for providers that only accept the legacy name (such as Mistral and Chutes).

 ### Unsupported variants

--- a/src/gateway/openai-http.test.ts
+++ b/src/gateway/openai-http.test.ts
@@ -1273,6 +1273,67 @@ describe("OpenAI-compatible HTTP API (e2e)", () => {
    }
  });

+  it("forwards inbound max_completion_tokens and max_tokens into streamParams", async () => {
+    const port = enabledPort;
+    const mockAgentOnce = (payloads: Array<{ text: string }>) => {
+      agentCommand.mockClear();
+      agentCommand.mockResolvedValueOnce({ payloads } as never);
+    };
+    const getFirstAgentMaxTokens = () => {
+      const opts = (agentCommand.mock.calls[0] as unknown[] | undefined)?.[0];
+      return (opts as { streamParams?: { maxTokens?: number } } | undefined)?.streamParams
+        ?.maxTokens;
+    };
+
+    {
+      mockAgentOnce([{ text: "hello" }]);
+      const res = await postChatCompletions(port, {
+        model: "openclaw",
+        max_completion_tokens: 256,
+        messages: [{ role: "user", content: "hi" }],
+      });
+      expect(res.status).toBe(200);
+      expect(getFirstAgentMaxTokens()).toBe(256);
+      await res.text();
+    }
+
+    {
+      mockAgentOnce([{ text: "hello" }]);
+      const res = await postChatCompletions(port, {
+        model: "openclaw",
+        max_tokens: 128,
+        messages: [{ role: "user", content: "hi" }],
+      });
+      expect(res.status).toBe(200);
+      expect(getFirstAgentMaxTokens()).toBe(128);
+      await res.text();
+    }
+
+    {
+      mockAgentOnce([{ text: "hello" }]);
+      const res = await postChatCompletions(port, {
+        model: "openclaw",
+        max_completion_tokens: 64,
+        max_tokens: 999,
+        messages: [{ role: "user", content: "hi" }],
+      });
+      expect(res.status).toBe(200);
+      expect(getFirstAgentMaxTokens()).toBe(64);
+      await res.text();
+    }
+
+    {
+      mockAgentOnce([{ text: "hello" }]);
+      const res = await postChatCompletions(port, {
+        model: "openclaw",
+        messages: [{ role: "user", content: "hi" }],
+      });
+      expect(res.status).toBe(200);
+      expect(getFirstAgentMaxTokens()).toBeUndefined();
+      await res.text();
+    }
+  });
+
  it("returns 429 for repeated failed auth when gateway.auth.rateLimit is configured", async () => {
    testState.gatewayAuth = {
      mode: "token",
--- a/src/gateway/openai-http.ts
+++ b/src/gateway/openai-http.ts
@@ -73,6 +73,8 @@ type OpenAiChatCompletionRequest = {
  tool_choice?: unknown;
  messages?: unknown;
  user?: unknown;
+  max_tokens?: unknown;
+  max_completion_tokens?: unknown;
 };

 const DEFAULT_OPENAI_CHAT_COMPLETIONS_BODY_BYTES = 20 * 1024 * 1024;
@@ -132,6 +134,7 @@ function buildAgentCommandInput(params: {
  messageChannel: string;
  senderIsOwner: boolean;
  abortSignal?: AbortSignal;
+  streamParams?: { maxTokens?: number };
 }) {
  return {
    message: params.prompt.message,
@@ -147,6 +150,7 @@ function buildAgentCommandInput(params: {
    senderIsOwner: params.senderIsOwner,
    allowModelOverride: true as const,
    abortSignal: params.abortSignal,
+    streamParams: params.streamParams,
  };
 }

@@ -813,6 +817,13 @@ export async function handleOpenAiHttpRequest(
  const streamIncludeUsage = stream && resolveIncludeUsageForStreaming(payload);
  const model = typeof payload.model === "string" ? payload.model : "openclaw";
  const user = typeof payload.user === "string" ? payload.user : undefined;
+  const maxTokens =
+    typeof payload.max_completion_tokens === "number"
+      ? payload.max_completion_tokens
+      : typeof payload.max_tokens === "number"
+        ? payload.max_tokens
+        : undefined;
+  const streamParams = maxTokens !== undefined ? { maxTokens } : undefined;

  const { agentId, sessionKey, messageChannel } = resolveGatewayRequestContext({
    req,
@@ -897,6 +908,7 @@ export async function handleOpenAiHttpRequest(
    messageChannel,
    abortSignal: abortController.signal,
    senderIsOwner,
+    streamParams,
  });

  if (!stream) {