diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cbf15708d1..66261a1206f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- OpenAI-compatible models: strip prior assistant reasoning fields from replayed Chat Completions history by default, preventing oMLX/vLLM Qwen follow-up turns from rejecting or stalling on stale `reasoning` payloads. Fixes #46637. Thanks @zipzagster and @lexhoefsloot. - CLI/onboarding: give non-Azure custom providers a safe generated context window and heal legacy 4k wizard entries without overwriting explicit valid small model limits, preventing first-turn compaction loops. Fixes #79428. (#79911) Thanks @Jefsky. - OpenAI-compatible models: add `compat.strictMessageKeys` to strip Chat Completions replay messages to `role` and `content` for strict providers that reject OpenAI-style tool and metadata keys. Fixes #50374. Thanks @choutos. - Ollama: stop native `/api/chat` requests from copying catalog `contextWindow` or `maxTokens` into `options.num_ctx` unless `params.num_ctx` is explicitly configured, avoiding pathological prompt-ingestion latency on local large-context models. Fixes #62267. Thanks @BenSHPD. diff --git a/docs/reference/transcript-hygiene.md b/docs/reference/transcript-hygiene.md index 0233f49e2aa..fbd0e713ea6 100644 --- a/docs/reference/transcript-hygiene.md +++ b/docs/reference/transcript-hygiene.md @@ -124,12 +124,15 @@ inter-session user turns that only have provenance metadata. - Missing OpenAI Responses-family tool outputs are synthesized as `aborted` to match Codex replay normalization. - No thought signature stripping. -**OpenAI-compatible Gemma 4** +**OpenAI-compatible Chat Completions** -- Historical assistant thinking/reasoning blocks are stripped before replay so local - OpenAI-compatible Gemma 4 servers do not receive prior-turn reasoning content. +- Historical assistant thinking/reasoning blocks are stripped before replay so + local and proxy-style OpenAI-compatible servers do not receive prior-turn + reasoning fields such as `reasoning` or `reasoning_content`. - Current same-turn tool-call continuations keep the assistant reasoning block attached to the tool call until the tool result has been replayed. +- Provider-owned exceptions can opt out when their wire protocol requires + replayed reasoning metadata. **Google (Generative AI / Gemini CLI / Antigravity)** diff --git a/extensions/deepseek/index.ts b/extensions/deepseek/index.ts index 8f93bb9eadf..e64d856398c 100644 --- a/extensions/deepseek/index.ts +++ b/extensions/deepseek/index.ts @@ -45,7 +45,10 @@ export default defineSingleProviderPluginEntry({ }), matchesContextOverflowError: ({ errorMessage }) => /\bdeepseek\b.*(?:input.*too long|context.*exceed)/i.test(errorMessage), - ...buildProviderReplayFamilyHooks({ family: "openai-compatible" }), + ...buildProviderReplayFamilyHooks({ + family: "openai-compatible", + dropReasoningFromHistory: false, + }), wrapStreamFn: (ctx) => createDeepSeekV4ThinkingWrapper(ctx.streamFn, ctx.thinkingLevel), resolveThinkingProfile: ({ modelId }) => resolveDeepSeekV4ThinkingProfile(modelId), isModernModelRef: ({ modelId }) => Boolean(resolveDeepSeekV4ThinkingProfile(modelId)), diff --git a/extensions/zai/index.ts b/extensions/zai/index.ts index 210d5b3f0a1..64122b37e29 100644 --- a/extensions/zai/index.ts +++ b/extensions/zai/index.ts @@ -18,8 +18,8 @@ import { validateApiKeyInput, } from "openclaw/plugin-sdk/provider-auth-api-key"; import { + buildProviderReplayFamilyHooks, normalizeModelCompat, - OPENAI_COMPATIBLE_REPLAY_HOOKS, } from "openclaw/plugin-sdk/provider-model-shared"; import { createPayloadPatchStreamWrapper, @@ -319,7 +319,10 @@ export default definePluginEntry({ }), ], resolveDynamicModel: (ctx) => resolveGlm5ForwardCompatModel(ctx), - ...OPENAI_COMPATIBLE_REPLAY_HOOKS, + ...buildProviderReplayFamilyHooks({ + family: "openai-compatible", + dropReasoningFromHistory: false, + }), prepareExtraParams: (ctx) => defaultToolStreamExtraParams(ctx.extraParams), wrapStreamFn: (ctx) => wrapZaiStreamFn(ctx), resolveThinkingProfile: () => ({ diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index e3edebdfc3e..d82beea5b4b 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -1196,7 +1196,7 @@ describe("sanitizeSessionHistory", () => { ]); }); - it("strips prior assistant reasoning for Gemma 4 OpenAI-compatible replay", async () => { + it("strips prior assistant reasoning for Qwen-style OpenAI-compatible replay", async () => { setNonGoogleModelApi(); const messages = castAgentMessages([ @@ -1215,8 +1215,8 @@ describe("sanitizeSessionHistory", () => { const result = await sanitizeSessionHistory({ messages, modelApi: "openai-completions", - provider: "lmstudio", - modelId: "google/gemma-4-26b-a4b-it", + provider: "vllm", + modelId: "Qwen3.6-27B", sessionManager: makeMockSessionManager(), sessionId: TEST_SESSION_ID, }); @@ -1226,7 +1226,7 @@ describe("sanitizeSessionHistory", () => { ]); }); - it("preserves current Gemma 4 tool-call reasoning during tool continuation replay", async () => { + it("preserves current OpenAI-compatible tool-call reasoning during tool continuation replay", async () => { setNonGoogleModelApi(); const messages = castAgentMessages([ @@ -1251,8 +1251,8 @@ describe("sanitizeSessionHistory", () => { const result = await sanitizeSessionHistory({ messages, modelApi: "openai-completions", - provider: "lmstudio", - modelId: "google/gemma-4-26b-a4b-it", + provider: "vllm", + modelId: "Qwen3.6-27B", sessionManager: makeMockSessionManager(), sessionId: TEST_SESSION_ID, }); diff --git a/src/agents/transcript-policy.test.ts b/src/agents/transcript-policy.test.ts index 5f40865024c..707e5ad71db 100644 --- a/src/agents/transcript-policy.test.ts +++ b/src/agents/transcript-policy.test.ts @@ -346,20 +346,20 @@ describe("resolveTranscriptPolicy", () => { expect(policy.validateAnthropicTurns).toBe(true); }); - it("strips historical reasoning for Gemma 4 on OpenAI-compatible providers", () => { + it("strips historical reasoning for strict OpenAI-compatible providers", () => { const policy = resolveTranscriptPolicy({ provider: "custom-openai-proxy", - modelId: "google/gemma-4-26b-a4b-it", + modelId: "qwen3.6-27b", modelApi: "openai-completions", }); expect(policy.dropReasoningFromHistory).toBe(true); - const gemma3Policy = resolveTranscriptPolicy({ + const responsesPolicy = resolveTranscriptPolicy({ provider: "custom-openai-proxy", - modelId: "google/gemma-3-27b-it", - modelApi: "openai-completions", + modelId: "qwen3.6-27b", + modelApi: "openai-responses", }); - expect(gemma3Policy.dropReasoningFromHistory).toBe(false); + expect(responsesPolicy.dropReasoningFromHistory).toBe(false); }); it("falls back to unowned transport defaults when no owning plugin exists", () => { diff --git a/src/agents/transcript-policy.ts b/src/agents/transcript-policy.ts index 7422dcaac21..adb265c7c1e 100644 --- a/src/agents/transcript-policy.ts +++ b/src/agents/transcript-policy.ts @@ -7,10 +7,7 @@ import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.typ import type { ProviderReplayPolicy } from "../plugins/types.js"; import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js"; import { normalizeProviderId } from "./model-selection.js"; -import { - isGemma4ModelRequiringReasoningStrip, - isGoogleModelApi, -} from "./pi-embedded-helpers/google.js"; +import { isGoogleModelApi } from "./pi-embedded-helpers/google.js"; import type { ToolCallIdMode } from "./tool-call-id.js"; export type TranscriptSanitizeMode = "full" | "images-only"; @@ -146,9 +143,7 @@ function buildUnownedProviderTransportReplayFallback(params: { ...(isAnthropic && modelDisablesReasoningEffort(params.model) ? { dropThinkingBlocks: true } : {}), - ...(isStrictOpenAiCompatible && isGemma4ModelRequiringReasoningStrip(modelId) - ? { dropReasoningFromHistory: true } - : {}), + ...(isStrictOpenAiCompatible ? { dropReasoningFromHistory: true } : {}), ...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}), ...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}), ...(isAnthropic || isStrictOpenAiCompatible || isClaudeOpenAiResponses diff --git a/src/plugin-sdk/provider-model-shared.test.ts b/src/plugin-sdk/provider-model-shared.test.ts index 5ff4b08c97c..981f22fe771 100644 --- a/src/plugin-sdk/provider-model-shared.test.ts +++ b/src/plugin-sdk/provider-model-shared.test.ts @@ -22,6 +22,7 @@ describe("buildProviderReplayFamilyHooks", () => { sanitizeToolCallIds: true, applyAssistantFirstOrderingFix: true, validateGeminiTurns: true, + dropReasoningFromHistory: true, }, hasSanitizeReplayHistory: false, reasoningMode: undefined, @@ -196,6 +197,7 @@ describe("buildProviderReplayFamilyHooks", () => { const nativeIdsHooks = buildProviderReplayFamilyHooks({ family: "openai-compatible", sanitizeToolCallIds: false, + dropReasoningFromHistory: false, }); const nativeIdsPolicy = nativeIdsHooks.buildReplayPolicy?.({ provider: "moonshot", diff --git a/src/plugin-sdk/provider-model-shared.ts b/src/plugin-sdk/provider-model-shared.ts index da35431c3e4..27c5e4378f1 100644 --- a/src/plugin-sdk/provider-model-shared.ts +++ b/src/plugin-sdk/provider-model-shared.ts @@ -172,7 +172,11 @@ type ProviderReplayFamilyHooks = Pick< >; type BuildProviderReplayFamilyHooksOptions = - | { family: "openai-compatible"; sanitizeToolCallIds?: boolean } + | { + family: "openai-compatible"; + sanitizeToolCallIds?: boolean; + dropReasoningFromHistory?: boolean; + } | { family: "anthropic-by-model" } | { family: "native-anthropic-by-model" } | { family: "google-gemini" } @@ -187,7 +191,10 @@ export function buildProviderReplayFamilyHooks( ): ProviderReplayFamilyHooks { switch (options.family) { case "openai-compatible": { - const policyOptions = { sanitizeToolCallIds: options.sanitizeToolCallIds }; + const policyOptions = { + sanitizeToolCallIds: options.sanitizeToolCallIds, + dropReasoningFromHistory: options.dropReasoningFromHistory, + }; return { buildReplayPolicy: (ctx: ProviderReplayPolicyContext) => buildOpenAICompatibleReplayPolicy(ctx.modelApi, { diff --git a/src/plugins/provider-replay-helpers.test.ts b/src/plugins/provider-replay-helpers.test.ts index 5e8ac1aedcb..59091722135 100644 --- a/src/plugins/provider-replay-helpers.test.ts +++ b/src/plugins/provider-replay-helpers.test.ts @@ -35,10 +35,10 @@ describe("provider replay helpers", () => { expect(policy).not.toHaveProperty("toolCallIdMode"); }); - it("drops historical reasoning for Gemma 4 openai-completions replay", () => { + it("drops historical reasoning for OpenAI-compatible chat completions replay", () => { expect( buildOpenAICompatibleReplayPolicy("openai-completions", { - modelId: "google/gemma-4-26b-a4b-it", + modelId: "qwen3.6-27b", }), ).toMatchObject({ dropReasoningFromHistory: true, @@ -46,8 +46,17 @@ describe("provider replay helpers", () => { expect( buildOpenAICompatibleReplayPolicy("openai-completions", { modelId: "google/gemma-3-27b-it", + dropReasoningFromHistory: false, }), ).not.toHaveProperty("dropReasoningFromHistory"); + expect( + buildOpenAICompatibleReplayPolicy("openai-completions", { + modelId: "google/gemma-4-26b-a4b-it", + dropReasoningFromHistory: false, + }), + ).toMatchObject({ + dropReasoningFromHistory: true, + }); expect( buildOpenAICompatibleReplayPolicy("openai-responses", { modelId: "google/gemma-4-26b-a4b-it", diff --git a/src/plugins/provider-replay-helpers.ts b/src/plugins/provider-replay-helpers.ts index 4358576aaf4..8df8cbd2b77 100644 --- a/src/plugins/provider-replay-helpers.ts +++ b/src/plugins/provider-replay-helpers.ts @@ -12,7 +12,11 @@ import type { export function buildOpenAICompatibleReplayPolicy( modelApi: string | null | undefined, - options: { sanitizeToolCallIds?: boolean; modelId?: string | null } = {}, + options: { + sanitizeToolCallIds?: boolean; + modelId?: string | null; + dropReasoningFromHistory?: boolean; + } = {}, ): ProviderReplayPolicy | undefined { if ( modelApi !== "openai-completions" && @@ -24,6 +28,7 @@ export function buildOpenAICompatibleReplayPolicy( } const sanitizeToolCallIds = options.sanitizeToolCallIds ?? true; + const dropReasoningFromHistory = options.dropReasoningFromHistory ?? true; return { ...(sanitizeToolCallIds @@ -40,7 +45,8 @@ export function buildOpenAICompatibleReplayPolicy( validateGeminiTurns: false, validateAnthropicTurns: false, }), - ...(modelApi === "openai-completions" && isGemma4ModelId(options.modelId) + ...(modelApi === "openai-completions" && + (dropReasoningFromHistory || isGemma4ModelId(options.modelId)) ? { dropReasoningFromHistory: true } : {}), };