fix(gateway): wire max_completion_tokens/max_tokens through openai-http (#81013)

Summary:
- The branch adds Chat Completions token-cap fields to the Gateway request type, forwards them as agent stream parameters, and documents/tests the behavior.
- Reproducibility: yes. Source inspection gives a high-confidence current-main path: send `max_completion_toke ... tokens` to `/v1/chat/completions` and observe that the current handler never sets `streamParams.maxTokens`.

Automerge notes:
- PR branch already contained follow-up commit before automerge: fix(gateway): wire max_completion_tokens/max_tokens through openai-http

Validation:
- ClawSweeper review passed for head a9c39f7d4a.
- Required merge gates passed before the squash merge.

Prepared head SHA: a9c39f7d4a
Review: https://github.com/openclaw/openclaw/pull/81013#issuecomment-4430303959

Co-authored-by: Bingsen <dingheng.huang@urbanic.com>
Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com>
This commit is contained in:
clawsweeper[bot]
2026-05-12 12:08:07 +00:00
committed by GitHub
parent be7ca5d828
commit d44553dade
4 changed files with 78 additions and 0 deletions

View File

@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
### Changes
- Gateway/OpenAI HTTP: honor `max_completion_tokens` and `max_tokens` on inbound `/v1/chat/completions` requests so client-provided token caps reach the upstream provider via `streamParams.maxTokens`, with `max_completion_tokens` taking precedence when both are sent. Thanks @Lellansin.
- Models/OpenAI CLI auth: make `openclaw models auth login --provider openai` start the ChatGPT/Codex account login by default, while `--method api-key` remains the explicit OpenAI API-key setup path.
- Google/Gemini: normalize retired Gemini 3 Pro Preview ids inside SDK OAuth auth-result default config patches, so helper-built provider auth flows emit `google/gemini-3.1-pro-preview` for Gemini 3.1 testing.
- Google/Gemini: normalize retired Gemini 3 Pro Preview ids returned by direct `openclaw models auth login --set-default` provider auth flows before writing config, so Gemini testing targets `google/gemini-3.1-pro-preview`.

View File

@@ -201,6 +201,10 @@ Set `stream: true` to receive Server-Sent Events (SSE):
- `tool_choice`: `"auto"`, `"none"`
- `messages[*].role: "tool"` follow-up turns
- `messages[*].tool_call_id` for binding tool results back to a prior tool call
- `max_completion_tokens`: number; per-call cap for total completion tokens (reasoning tokens included). Current OpenAI Chat Completions field name; preferred when both `max_completion_tokens` and `max_tokens` are sent.
- `max_tokens`: number; legacy alias accepted for backwards compatibility. Ignored when `max_completion_tokens` is also present.
When either field is set, the value is forwarded to the upstream provider via the agent stream-param channel. The actual wire field name sent to the upstream provider is chosen by the provider transport: `max_completion_tokens` for OpenAI-family endpoints, and `max_tokens` for providers that only accept the legacy name (such as Mistral and Chutes).
### Unsupported variants

View File

@@ -1273,6 +1273,67 @@ describe("OpenAI-compatible HTTP API (e2e)", () => {
}
});
it("forwards inbound max_completion_tokens and max_tokens into streamParams", async () => {
const port = enabledPort;
const mockAgentOnce = (payloads: Array<{ text: string }>) => {
agentCommand.mockClear();
agentCommand.mockResolvedValueOnce({ payloads } as never);
};
const getFirstAgentMaxTokens = () => {
const opts = (agentCommand.mock.calls[0] as unknown[] | undefined)?.[0];
return (opts as { streamParams?: { maxTokens?: number } } | undefined)?.streamParams
?.maxTokens;
};
{
mockAgentOnce([{ text: "hello" }]);
const res = await postChatCompletions(port, {
model: "openclaw",
max_completion_tokens: 256,
messages: [{ role: "user", content: "hi" }],
});
expect(res.status).toBe(200);
expect(getFirstAgentMaxTokens()).toBe(256);
await res.text();
}
{
mockAgentOnce([{ text: "hello" }]);
const res = await postChatCompletions(port, {
model: "openclaw",
max_tokens: 128,
messages: [{ role: "user", content: "hi" }],
});
expect(res.status).toBe(200);
expect(getFirstAgentMaxTokens()).toBe(128);
await res.text();
}
{
mockAgentOnce([{ text: "hello" }]);
const res = await postChatCompletions(port, {
model: "openclaw",
max_completion_tokens: 64,
max_tokens: 999,
messages: [{ role: "user", content: "hi" }],
});
expect(res.status).toBe(200);
expect(getFirstAgentMaxTokens()).toBe(64);
await res.text();
}
{
mockAgentOnce([{ text: "hello" }]);
const res = await postChatCompletions(port, {
model: "openclaw",
messages: [{ role: "user", content: "hi" }],
});
expect(res.status).toBe(200);
expect(getFirstAgentMaxTokens()).toBeUndefined();
await res.text();
}
});
it("returns 429 for repeated failed auth when gateway.auth.rateLimit is configured", async () => {
testState.gatewayAuth = {
mode: "token",

View File

@@ -73,6 +73,8 @@ type OpenAiChatCompletionRequest = {
tool_choice?: unknown;
messages?: unknown;
user?: unknown;
max_tokens?: unknown;
max_completion_tokens?: unknown;
};
const DEFAULT_OPENAI_CHAT_COMPLETIONS_BODY_BYTES = 20 * 1024 * 1024;
@@ -132,6 +134,7 @@ function buildAgentCommandInput(params: {
messageChannel: string;
senderIsOwner: boolean;
abortSignal?: AbortSignal;
streamParams?: { maxTokens?: number };
}) {
return {
message: params.prompt.message,
@@ -147,6 +150,7 @@ function buildAgentCommandInput(params: {
senderIsOwner: params.senderIsOwner,
allowModelOverride: true as const,
abortSignal: params.abortSignal,
streamParams: params.streamParams,
};
}
@@ -813,6 +817,13 @@ export async function handleOpenAiHttpRequest(
const streamIncludeUsage = stream && resolveIncludeUsageForStreaming(payload);
const model = typeof payload.model === "string" ? payload.model : "openclaw";
const user = typeof payload.user === "string" ? payload.user : undefined;
const maxTokens =
typeof payload.max_completion_tokens === "number"
? payload.max_completion_tokens
: typeof payload.max_tokens === "number"
? payload.max_tokens
: undefined;
const streamParams = maxTokens !== undefined ? { maxTokens } : undefined;
const { agentId, sessionKey, messageChannel } = resolveGatewayRequestContext({
req,
@@ -897,6 +908,7 @@ export async function handleOpenAiHttpRequest(
messageChannel,
abortSignal: abortController.signal,
senderIsOwner,
streamParams,
});
if (!stream) {