fix: strip OpenAI-compatible replay reasoning

This commit is contained in:
Peter Steinberger
2026-05-10 10:08:26 +01:00
parent 0235040840
commit 9444b2ad9b
11 changed files with 60 additions and 31 deletions

View File

@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai
### Fixes ### Fixes
- OpenAI-compatible models: strip prior assistant reasoning fields from replayed Chat Completions history by default, preventing oMLX/vLLM Qwen follow-up turns from rejecting or stalling on stale `reasoning` payloads. Fixes #46637. Thanks @zipzagster and @lexhoefsloot.
- CLI/onboarding: give non-Azure custom providers a safe generated context window and heal legacy 4k wizard entries without overwriting explicit valid small model limits, preventing first-turn compaction loops. Fixes #79428. (#79911) Thanks @Jefsky. - CLI/onboarding: give non-Azure custom providers a safe generated context window and heal legacy 4k wizard entries without overwriting explicit valid small model limits, preventing first-turn compaction loops. Fixes #79428. (#79911) Thanks @Jefsky.
- OpenAI-compatible models: add `compat.strictMessageKeys` to strip Chat Completions replay messages to `role` and `content` for strict providers that reject OpenAI-style tool and metadata keys. Fixes #50374. Thanks @choutos. - OpenAI-compatible models: add `compat.strictMessageKeys` to strip Chat Completions replay messages to `role` and `content` for strict providers that reject OpenAI-style tool and metadata keys. Fixes #50374. Thanks @choutos.
- Ollama: stop native `/api/chat` requests from copying catalog `contextWindow` or `maxTokens` into `options.num_ctx` unless `params.num_ctx` is explicitly configured, avoiding pathological prompt-ingestion latency on local large-context models. Fixes #62267. Thanks @BenSHPD. - Ollama: stop native `/api/chat` requests from copying catalog `contextWindow` or `maxTokens` into `options.num_ctx` unless `params.num_ctx` is explicitly configured, avoiding pathological prompt-ingestion latency on local large-context models. Fixes #62267. Thanks @BenSHPD.

View File

@@ -124,12 +124,15 @@ inter-session user turns that only have provenance metadata.
- Missing OpenAI Responses-family tool outputs are synthesized as `aborted` to match Codex replay normalization. - Missing OpenAI Responses-family tool outputs are synthesized as `aborted` to match Codex replay normalization.
- No thought signature stripping. - No thought signature stripping.
**OpenAI-compatible Gemma 4** **OpenAI-compatible Chat Completions**
- Historical assistant thinking/reasoning blocks are stripped before replay so local - Historical assistant thinking/reasoning blocks are stripped before replay so
OpenAI-compatible Gemma 4 servers do not receive prior-turn reasoning content. local and proxy-style OpenAI-compatible servers do not receive prior-turn
reasoning fields such as `reasoning` or `reasoning_content`.
- Current same-turn tool-call continuations keep the assistant reasoning block - Current same-turn tool-call continuations keep the assistant reasoning block
attached to the tool call until the tool result has been replayed. attached to the tool call until the tool result has been replayed.
- Provider-owned exceptions can opt out when their wire protocol requires
replayed reasoning metadata.
**Google (Generative AI / Gemini CLI / Antigravity)** **Google (Generative AI / Gemini CLI / Antigravity)**

View File

@@ -45,7 +45,10 @@ export default defineSingleProviderPluginEntry({
}), }),
matchesContextOverflowError: ({ errorMessage }) => matchesContextOverflowError: ({ errorMessage }) =>
/\bdeepseek\b.*(?:input.*too long|context.*exceed)/i.test(errorMessage), /\bdeepseek\b.*(?:input.*too long|context.*exceed)/i.test(errorMessage),
...buildProviderReplayFamilyHooks({ family: "openai-compatible" }), ...buildProviderReplayFamilyHooks({
family: "openai-compatible",
dropReasoningFromHistory: false,
}),
wrapStreamFn: (ctx) => createDeepSeekV4ThinkingWrapper(ctx.streamFn, ctx.thinkingLevel), wrapStreamFn: (ctx) => createDeepSeekV4ThinkingWrapper(ctx.streamFn, ctx.thinkingLevel),
resolveThinkingProfile: ({ modelId }) => resolveDeepSeekV4ThinkingProfile(modelId), resolveThinkingProfile: ({ modelId }) => resolveDeepSeekV4ThinkingProfile(modelId),
isModernModelRef: ({ modelId }) => Boolean(resolveDeepSeekV4ThinkingProfile(modelId)), isModernModelRef: ({ modelId }) => Boolean(resolveDeepSeekV4ThinkingProfile(modelId)),

View File

@@ -18,8 +18,8 @@ import {
validateApiKeyInput, validateApiKeyInput,
} from "openclaw/plugin-sdk/provider-auth-api-key"; } from "openclaw/plugin-sdk/provider-auth-api-key";
import { import {
buildProviderReplayFamilyHooks,
normalizeModelCompat, normalizeModelCompat,
OPENAI_COMPATIBLE_REPLAY_HOOKS,
} from "openclaw/plugin-sdk/provider-model-shared"; } from "openclaw/plugin-sdk/provider-model-shared";
import { import {
createPayloadPatchStreamWrapper, createPayloadPatchStreamWrapper,
@@ -319,7 +319,10 @@ export default definePluginEntry({
}), }),
], ],
resolveDynamicModel: (ctx) => resolveGlm5ForwardCompatModel(ctx), resolveDynamicModel: (ctx) => resolveGlm5ForwardCompatModel(ctx),
...OPENAI_COMPATIBLE_REPLAY_HOOKS, ...buildProviderReplayFamilyHooks({
family: "openai-compatible",
dropReasoningFromHistory: false,
}),
prepareExtraParams: (ctx) => defaultToolStreamExtraParams(ctx.extraParams), prepareExtraParams: (ctx) => defaultToolStreamExtraParams(ctx.extraParams),
wrapStreamFn: (ctx) => wrapZaiStreamFn(ctx), wrapStreamFn: (ctx) => wrapZaiStreamFn(ctx),
resolveThinkingProfile: () => ({ resolveThinkingProfile: () => ({

View File

@@ -1196,7 +1196,7 @@ describe("sanitizeSessionHistory", () => {
]); ]);
}); });
it("strips prior assistant reasoning for Gemma 4 OpenAI-compatible replay", async () => { it("strips prior assistant reasoning for Qwen-style OpenAI-compatible replay", async () => {
setNonGoogleModelApi(); setNonGoogleModelApi();
const messages = castAgentMessages([ const messages = castAgentMessages([
@@ -1215,8 +1215,8 @@ describe("sanitizeSessionHistory", () => {
const result = await sanitizeSessionHistory({ const result = await sanitizeSessionHistory({
messages, messages,
modelApi: "openai-completions", modelApi: "openai-completions",
provider: "lmstudio", provider: "vllm",
modelId: "google/gemma-4-26b-a4b-it", modelId: "Qwen3.6-27B",
sessionManager: makeMockSessionManager(), sessionManager: makeMockSessionManager(),
sessionId: TEST_SESSION_ID, sessionId: TEST_SESSION_ID,
}); });
@@ -1226,7 +1226,7 @@ describe("sanitizeSessionHistory", () => {
]); ]);
}); });
it("preserves current Gemma 4 tool-call reasoning during tool continuation replay", async () => { it("preserves current OpenAI-compatible tool-call reasoning during tool continuation replay", async () => {
setNonGoogleModelApi(); setNonGoogleModelApi();
const messages = castAgentMessages([ const messages = castAgentMessages([
@@ -1251,8 +1251,8 @@ describe("sanitizeSessionHistory", () => {
const result = await sanitizeSessionHistory({ const result = await sanitizeSessionHistory({
messages, messages,
modelApi: "openai-completions", modelApi: "openai-completions",
provider: "lmstudio", provider: "vllm",
modelId: "google/gemma-4-26b-a4b-it", modelId: "Qwen3.6-27B",
sessionManager: makeMockSessionManager(), sessionManager: makeMockSessionManager(),
sessionId: TEST_SESSION_ID, sessionId: TEST_SESSION_ID,
}); });

View File

@@ -346,20 +346,20 @@ describe("resolveTranscriptPolicy", () => {
expect(policy.validateAnthropicTurns).toBe(true); expect(policy.validateAnthropicTurns).toBe(true);
}); });
it("strips historical reasoning for Gemma 4 on OpenAI-compatible providers", () => { it("strips historical reasoning for strict OpenAI-compatible providers", () => {
const policy = resolveTranscriptPolicy({ const policy = resolveTranscriptPolicy({
provider: "custom-openai-proxy", provider: "custom-openai-proxy",
modelId: "google/gemma-4-26b-a4b-it", modelId: "qwen3.6-27b",
modelApi: "openai-completions", modelApi: "openai-completions",
}); });
expect(policy.dropReasoningFromHistory).toBe(true); expect(policy.dropReasoningFromHistory).toBe(true);
const gemma3Policy = resolveTranscriptPolicy({ const responsesPolicy = resolveTranscriptPolicy({
provider: "custom-openai-proxy", provider: "custom-openai-proxy",
modelId: "google/gemma-3-27b-it", modelId: "qwen3.6-27b",
modelApi: "openai-completions", modelApi: "openai-responses",
}); });
expect(gemma3Policy.dropReasoningFromHistory).toBe(false); expect(responsesPolicy.dropReasoningFromHistory).toBe(false);
}); });
it("falls back to unowned transport defaults when no owning plugin exists", () => { it("falls back to unowned transport defaults when no owning plugin exists", () => {

View File

@@ -7,10 +7,7 @@ import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.typ
import type { ProviderReplayPolicy } from "../plugins/types.js"; import type { ProviderReplayPolicy } from "../plugins/types.js";
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js"; import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
import { normalizeProviderId } from "./model-selection.js"; import { normalizeProviderId } from "./model-selection.js";
import { import { isGoogleModelApi } from "./pi-embedded-helpers/google.js";
isGemma4ModelRequiringReasoningStrip,
isGoogleModelApi,
} from "./pi-embedded-helpers/google.js";
import type { ToolCallIdMode } from "./tool-call-id.js"; import type { ToolCallIdMode } from "./tool-call-id.js";
export type TranscriptSanitizeMode = "full" | "images-only"; export type TranscriptSanitizeMode = "full" | "images-only";
@@ -146,9 +143,7 @@ function buildUnownedProviderTransportReplayFallback(params: {
...(isAnthropic && modelDisablesReasoningEffort(params.model) ...(isAnthropic && modelDisablesReasoningEffort(params.model)
? { dropThinkingBlocks: true } ? { dropThinkingBlocks: true }
: {}), : {}),
...(isStrictOpenAiCompatible && isGemma4ModelRequiringReasoningStrip(modelId) ...(isStrictOpenAiCompatible ? { dropReasoningFromHistory: true } : {}),
? { dropReasoningFromHistory: true }
: {}),
...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}), ...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}),
...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}), ...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}),
...(isAnthropic || isStrictOpenAiCompatible || isClaudeOpenAiResponses ...(isAnthropic || isStrictOpenAiCompatible || isClaudeOpenAiResponses

View File

@@ -22,6 +22,7 @@ describe("buildProviderReplayFamilyHooks", () => {
sanitizeToolCallIds: true, sanitizeToolCallIds: true,
applyAssistantFirstOrderingFix: true, applyAssistantFirstOrderingFix: true,
validateGeminiTurns: true, validateGeminiTurns: true,
dropReasoningFromHistory: true,
}, },
hasSanitizeReplayHistory: false, hasSanitizeReplayHistory: false,
reasoningMode: undefined, reasoningMode: undefined,
@@ -196,6 +197,7 @@ describe("buildProviderReplayFamilyHooks", () => {
const nativeIdsHooks = buildProviderReplayFamilyHooks({ const nativeIdsHooks = buildProviderReplayFamilyHooks({
family: "openai-compatible", family: "openai-compatible",
sanitizeToolCallIds: false, sanitizeToolCallIds: false,
dropReasoningFromHistory: false,
}); });
const nativeIdsPolicy = nativeIdsHooks.buildReplayPolicy?.({ const nativeIdsPolicy = nativeIdsHooks.buildReplayPolicy?.({
provider: "moonshot", provider: "moonshot",

View File

@@ -172,7 +172,11 @@ type ProviderReplayFamilyHooks = Pick<
>; >;
type BuildProviderReplayFamilyHooksOptions = type BuildProviderReplayFamilyHooksOptions =
| { family: "openai-compatible"; sanitizeToolCallIds?: boolean } | {
family: "openai-compatible";
sanitizeToolCallIds?: boolean;
dropReasoningFromHistory?: boolean;
}
| { family: "anthropic-by-model" } | { family: "anthropic-by-model" }
| { family: "native-anthropic-by-model" } | { family: "native-anthropic-by-model" }
| { family: "google-gemini" } | { family: "google-gemini" }
@@ -187,7 +191,10 @@ export function buildProviderReplayFamilyHooks(
): ProviderReplayFamilyHooks { ): ProviderReplayFamilyHooks {
switch (options.family) { switch (options.family) {
case "openai-compatible": { case "openai-compatible": {
const policyOptions = { sanitizeToolCallIds: options.sanitizeToolCallIds }; const policyOptions = {
sanitizeToolCallIds: options.sanitizeToolCallIds,
dropReasoningFromHistory: options.dropReasoningFromHistory,
};
return { return {
buildReplayPolicy: (ctx: ProviderReplayPolicyContext) => buildReplayPolicy: (ctx: ProviderReplayPolicyContext) =>
buildOpenAICompatibleReplayPolicy(ctx.modelApi, { buildOpenAICompatibleReplayPolicy(ctx.modelApi, {

View File

@@ -35,10 +35,10 @@ describe("provider replay helpers", () => {
expect(policy).not.toHaveProperty("toolCallIdMode"); expect(policy).not.toHaveProperty("toolCallIdMode");
}); });
it("drops historical reasoning for Gemma 4 openai-completions replay", () => { it("drops historical reasoning for OpenAI-compatible chat completions replay", () => {
expect( expect(
buildOpenAICompatibleReplayPolicy("openai-completions", { buildOpenAICompatibleReplayPolicy("openai-completions", {
modelId: "google/gemma-4-26b-a4b-it", modelId: "qwen3.6-27b",
}), }),
).toMatchObject({ ).toMatchObject({
dropReasoningFromHistory: true, dropReasoningFromHistory: true,
@@ -46,8 +46,17 @@ describe("provider replay helpers", () => {
expect( expect(
buildOpenAICompatibleReplayPolicy("openai-completions", { buildOpenAICompatibleReplayPolicy("openai-completions", {
modelId: "google/gemma-3-27b-it", modelId: "google/gemma-3-27b-it",
dropReasoningFromHistory: false,
}), }),
).not.toHaveProperty("dropReasoningFromHistory"); ).not.toHaveProperty("dropReasoningFromHistory");
expect(
buildOpenAICompatibleReplayPolicy("openai-completions", {
modelId: "google/gemma-4-26b-a4b-it",
dropReasoningFromHistory: false,
}),
).toMatchObject({
dropReasoningFromHistory: true,
});
expect( expect(
buildOpenAICompatibleReplayPolicy("openai-responses", { buildOpenAICompatibleReplayPolicy("openai-responses", {
modelId: "google/gemma-4-26b-a4b-it", modelId: "google/gemma-4-26b-a4b-it",

View File

@@ -12,7 +12,11 @@ import type {
export function buildOpenAICompatibleReplayPolicy( export function buildOpenAICompatibleReplayPolicy(
modelApi: string | null | undefined, modelApi: string | null | undefined,
options: { sanitizeToolCallIds?: boolean; modelId?: string | null } = {}, options: {
sanitizeToolCallIds?: boolean;
modelId?: string | null;
dropReasoningFromHistory?: boolean;
} = {},
): ProviderReplayPolicy | undefined { ): ProviderReplayPolicy | undefined {
if ( if (
modelApi !== "openai-completions" && modelApi !== "openai-completions" &&
@@ -24,6 +28,7 @@ export function buildOpenAICompatibleReplayPolicy(
} }
const sanitizeToolCallIds = options.sanitizeToolCallIds ?? true; const sanitizeToolCallIds = options.sanitizeToolCallIds ?? true;
const dropReasoningFromHistory = options.dropReasoningFromHistory ?? true;
return { return {
...(sanitizeToolCallIds ...(sanitizeToolCallIds
@@ -40,7 +45,8 @@ export function buildOpenAICompatibleReplayPolicy(
validateGeminiTurns: false, validateGeminiTurns: false,
validateAnthropicTurns: false, validateAnthropicTurns: false,
}), }),
...(modelApi === "openai-completions" && isGemma4ModelId(options.modelId) ...(modelApi === "openai-completions" &&
(dropReasoningFromHistory || isGemma4ModelId(options.modelId))
? { dropReasoningFromHistory: true } ? { dropReasoningFromHistory: true }
: {}), : {}),
}; };