mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-13 23:56:07 +00:00
fix(gateway): wire max_completion_tokens/max_tokens through openai-http (#81013)
Summary: - The branch adds Chat Completions token-cap fields to the Gateway request type, forwards them as agent stream parameters, and documents/tests the behavior. - Reproducibility: yes. Source inspection gives a high-confidence current-main path: send `max_completion_toke ... tokens` to `/v1/chat/completions` and observe that the current handler never sets `streamParams.maxTokens`. Automerge notes: - PR branch already contained follow-up commit before automerge: fix(gateway): wire max_completion_tokens/max_tokens through openai-http Validation: - ClawSweeper review passed for heada9c39f7d4a. - Required merge gates passed before the squash merge. Prepared head SHA:a9c39f7d4aReview: https://github.com/openclaw/openclaw/pull/81013#issuecomment-4430303959 Co-authored-by: Bingsen <dingheng.huang@urbanic.com> Co-authored-by: clawsweeper <274271284+clawsweeper[bot]@users.noreply.github.com>
This commit is contained in:
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Changes
|
||||
|
||||
- Gateway/OpenAI HTTP: honor `max_completion_tokens` and `max_tokens` on inbound `/v1/chat/completions` requests so client-provided token caps reach the upstream provider via `streamParams.maxTokens`, with `max_completion_tokens` taking precedence when both are sent. Thanks @Lellansin.
|
||||
- Models/OpenAI CLI auth: make `openclaw models auth login --provider openai` start the ChatGPT/Codex account login by default, while `--method api-key` remains the explicit OpenAI API-key setup path.
|
||||
- Google/Gemini: normalize retired Gemini 3 Pro Preview ids inside SDK OAuth auth-result default config patches, so helper-built provider auth flows emit `google/gemini-3.1-pro-preview` for Gemini 3.1 testing.
|
||||
- Google/Gemini: normalize retired Gemini 3 Pro Preview ids returned by direct `openclaw models auth login --set-default` provider auth flows before writing config, so Gemini testing targets `google/gemini-3.1-pro-preview`.
|
||||
|
||||
@@ -201,6 +201,10 @@ Set `stream: true` to receive Server-Sent Events (SSE):
|
||||
- `tool_choice`: `"auto"`, `"none"`
|
||||
- `messages[*].role: "tool"` follow-up turns
|
||||
- `messages[*].tool_call_id` for binding tool results back to a prior tool call
|
||||
- `max_completion_tokens`: number; per-call cap for total completion tokens (reasoning tokens included). Current OpenAI Chat Completions field name; preferred when both `max_completion_tokens` and `max_tokens` are sent.
|
||||
- `max_tokens`: number; legacy alias accepted for backwards compatibility. Ignored when `max_completion_tokens` is also present.
|
||||
|
||||
When either field is set, the value is forwarded to the upstream provider via the agent stream-param channel. The actual wire field name sent to the upstream provider is chosen by the provider transport: `max_completion_tokens` for OpenAI-family endpoints, and `max_tokens` for providers that only accept the legacy name (such as Mistral and Chutes).
|
||||
|
||||
### Unsupported variants
|
||||
|
||||
|
||||
@@ -1273,6 +1273,67 @@ describe("OpenAI-compatible HTTP API (e2e)", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("forwards inbound max_completion_tokens and max_tokens into streamParams", async () => {
|
||||
const port = enabledPort;
|
||||
const mockAgentOnce = (payloads: Array<{ text: string }>) => {
|
||||
agentCommand.mockClear();
|
||||
agentCommand.mockResolvedValueOnce({ payloads } as never);
|
||||
};
|
||||
const getFirstAgentMaxTokens = () => {
|
||||
const opts = (agentCommand.mock.calls[0] as unknown[] | undefined)?.[0];
|
||||
return (opts as { streamParams?: { maxTokens?: number } } | undefined)?.streamParams
|
||||
?.maxTokens;
|
||||
};
|
||||
|
||||
{
|
||||
mockAgentOnce([{ text: "hello" }]);
|
||||
const res = await postChatCompletions(port, {
|
||||
model: "openclaw",
|
||||
max_completion_tokens: 256,
|
||||
messages: [{ role: "user", content: "hi" }],
|
||||
});
|
||||
expect(res.status).toBe(200);
|
||||
expect(getFirstAgentMaxTokens()).toBe(256);
|
||||
await res.text();
|
||||
}
|
||||
|
||||
{
|
||||
mockAgentOnce([{ text: "hello" }]);
|
||||
const res = await postChatCompletions(port, {
|
||||
model: "openclaw",
|
||||
max_tokens: 128,
|
||||
messages: [{ role: "user", content: "hi" }],
|
||||
});
|
||||
expect(res.status).toBe(200);
|
||||
expect(getFirstAgentMaxTokens()).toBe(128);
|
||||
await res.text();
|
||||
}
|
||||
|
||||
{
|
||||
mockAgentOnce([{ text: "hello" }]);
|
||||
const res = await postChatCompletions(port, {
|
||||
model: "openclaw",
|
||||
max_completion_tokens: 64,
|
||||
max_tokens: 999,
|
||||
messages: [{ role: "user", content: "hi" }],
|
||||
});
|
||||
expect(res.status).toBe(200);
|
||||
expect(getFirstAgentMaxTokens()).toBe(64);
|
||||
await res.text();
|
||||
}
|
||||
|
||||
{
|
||||
mockAgentOnce([{ text: "hello" }]);
|
||||
const res = await postChatCompletions(port, {
|
||||
model: "openclaw",
|
||||
messages: [{ role: "user", content: "hi" }],
|
||||
});
|
||||
expect(res.status).toBe(200);
|
||||
expect(getFirstAgentMaxTokens()).toBeUndefined();
|
||||
await res.text();
|
||||
}
|
||||
});
|
||||
|
||||
it("returns 429 for repeated failed auth when gateway.auth.rateLimit is configured", async () => {
|
||||
testState.gatewayAuth = {
|
||||
mode: "token",
|
||||
|
||||
@@ -73,6 +73,8 @@ type OpenAiChatCompletionRequest = {
|
||||
tool_choice?: unknown;
|
||||
messages?: unknown;
|
||||
user?: unknown;
|
||||
max_tokens?: unknown;
|
||||
max_completion_tokens?: unknown;
|
||||
};
|
||||
|
||||
const DEFAULT_OPENAI_CHAT_COMPLETIONS_BODY_BYTES = 20 * 1024 * 1024;
|
||||
@@ -132,6 +134,7 @@ function buildAgentCommandInput(params: {
|
||||
messageChannel: string;
|
||||
senderIsOwner: boolean;
|
||||
abortSignal?: AbortSignal;
|
||||
streamParams?: { maxTokens?: number };
|
||||
}) {
|
||||
return {
|
||||
message: params.prompt.message,
|
||||
@@ -147,6 +150,7 @@ function buildAgentCommandInput(params: {
|
||||
senderIsOwner: params.senderIsOwner,
|
||||
allowModelOverride: true as const,
|
||||
abortSignal: params.abortSignal,
|
||||
streamParams: params.streamParams,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -813,6 +817,13 @@ export async function handleOpenAiHttpRequest(
|
||||
const streamIncludeUsage = stream && resolveIncludeUsageForStreaming(payload);
|
||||
const model = typeof payload.model === "string" ? payload.model : "openclaw";
|
||||
const user = typeof payload.user === "string" ? payload.user : undefined;
|
||||
const maxTokens =
|
||||
typeof payload.max_completion_tokens === "number"
|
||||
? payload.max_completion_tokens
|
||||
: typeof payload.max_tokens === "number"
|
||||
? payload.max_tokens
|
||||
: undefined;
|
||||
const streamParams = maxTokens !== undefined ? { maxTokens } : undefined;
|
||||
|
||||
const { agentId, sessionKey, messageChannel } = resolveGatewayRequestContext({
|
||||
req,
|
||||
@@ -897,6 +908,7 @@ export async function handleOpenAiHttpRequest(
|
||||
messageChannel,
|
||||
abortSignal: abortController.signal,
|
||||
senderIsOwner,
|
||||
streamParams,
|
||||
});
|
||||
|
||||
if (!stream) {
|
||||
|
||||
Reference in New Issue
Block a user