fix: strip OpenAI-compatible replay reasoning

This commit is contained in:
Peter Steinberger
2026-05-10 10:08:26 +01:00
parent 0235040840
commit 9444b2ad9b
11 changed files with 60 additions and 31 deletions

View File

@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- OpenAI-compatible models: strip prior assistant reasoning fields from replayed Chat Completions history by default, preventing oMLX/vLLM Qwen follow-up turns from rejecting or stalling on stale `reasoning` payloads. Fixes #46637. Thanks @zipzagster and @lexhoefsloot.
- CLI/onboarding: give non-Azure custom providers a safe generated context window and heal legacy 4k wizard entries without overwriting explicit valid small model limits, preventing first-turn compaction loops. Fixes #79428. (#79911) Thanks @Jefsky.
- OpenAI-compatible models: add `compat.strictMessageKeys` to strip Chat Completions replay messages to `role` and `content` for strict providers that reject OpenAI-style tool and metadata keys. Fixes #50374. Thanks @choutos.
- Ollama: stop native `/api/chat` requests from copying catalog `contextWindow` or `maxTokens` into `options.num_ctx` unless `params.num_ctx` is explicitly configured, avoiding pathological prompt-ingestion latency on local large-context models. Fixes #62267. Thanks @BenSHPD.

View File

@@ -124,12 +124,15 @@ inter-session user turns that only have provenance metadata.
- Missing OpenAI Responses-family tool outputs are synthesized as `aborted` to match Codex replay normalization.
- No thought signature stripping.
**OpenAI-compatible Gemma 4**
**OpenAI-compatible Chat Completions**
- Historical assistant thinking/reasoning blocks are stripped before replay so local
OpenAI-compatible Gemma 4 servers do not receive prior-turn reasoning content.
- Historical assistant thinking/reasoning blocks are stripped before replay so
local and proxy-style OpenAI-compatible servers do not receive prior-turn
reasoning fields such as `reasoning` or `reasoning_content`.
- Current same-turn tool-call continuations keep the assistant reasoning block
attached to the tool call until the tool result has been replayed.
- Provider-owned exceptions can opt out when their wire protocol requires
replayed reasoning metadata.
**Google (Generative AI / Gemini CLI / Antigravity)**

View File

@@ -45,7 +45,10 @@ export default defineSingleProviderPluginEntry({
}),
matchesContextOverflowError: ({ errorMessage }) =>
/\bdeepseek\b.*(?:input.*too long|context.*exceed)/i.test(errorMessage),
...buildProviderReplayFamilyHooks({ family: "openai-compatible" }),
...buildProviderReplayFamilyHooks({
family: "openai-compatible",
dropReasoningFromHistory: false,
}),
wrapStreamFn: (ctx) => createDeepSeekV4ThinkingWrapper(ctx.streamFn, ctx.thinkingLevel),
resolveThinkingProfile: ({ modelId }) => resolveDeepSeekV4ThinkingProfile(modelId),
isModernModelRef: ({ modelId }) => Boolean(resolveDeepSeekV4ThinkingProfile(modelId)),

View File

@@ -18,8 +18,8 @@ import {
validateApiKeyInput,
} from "openclaw/plugin-sdk/provider-auth-api-key";
import {
buildProviderReplayFamilyHooks,
normalizeModelCompat,
OPENAI_COMPATIBLE_REPLAY_HOOKS,
} from "openclaw/plugin-sdk/provider-model-shared";
import {
createPayloadPatchStreamWrapper,
@@ -319,7 +319,10 @@ export default definePluginEntry({
}),
],
resolveDynamicModel: (ctx) => resolveGlm5ForwardCompatModel(ctx),
...OPENAI_COMPATIBLE_REPLAY_HOOKS,
...buildProviderReplayFamilyHooks({
family: "openai-compatible",
dropReasoningFromHistory: false,
}),
prepareExtraParams: (ctx) => defaultToolStreamExtraParams(ctx.extraParams),
wrapStreamFn: (ctx) => wrapZaiStreamFn(ctx),
resolveThinkingProfile: () => ({

View File

@@ -1196,7 +1196,7 @@ describe("sanitizeSessionHistory", () => {
]);
});
it("strips prior assistant reasoning for Gemma 4 OpenAI-compatible replay", async () => {
it("strips prior assistant reasoning for Qwen-style OpenAI-compatible replay", async () => {
setNonGoogleModelApi();
const messages = castAgentMessages([
@@ -1215,8 +1215,8 @@ describe("sanitizeSessionHistory", () => {
const result = await sanitizeSessionHistory({
messages,
modelApi: "openai-completions",
provider: "lmstudio",
modelId: "google/gemma-4-26b-a4b-it",
provider: "vllm",
modelId: "Qwen3.6-27B",
sessionManager: makeMockSessionManager(),
sessionId: TEST_SESSION_ID,
});
@@ -1226,7 +1226,7 @@ describe("sanitizeSessionHistory", () => {
]);
});
it("preserves current Gemma 4 tool-call reasoning during tool continuation replay", async () => {
it("preserves current OpenAI-compatible tool-call reasoning during tool continuation replay", async () => {
setNonGoogleModelApi();
const messages = castAgentMessages([
@@ -1251,8 +1251,8 @@ describe("sanitizeSessionHistory", () => {
const result = await sanitizeSessionHistory({
messages,
modelApi: "openai-completions",
provider: "lmstudio",
modelId: "google/gemma-4-26b-a4b-it",
provider: "vllm",
modelId: "Qwen3.6-27B",
sessionManager: makeMockSessionManager(),
sessionId: TEST_SESSION_ID,
});

View File

@@ -346,20 +346,20 @@ describe("resolveTranscriptPolicy", () => {
expect(policy.validateAnthropicTurns).toBe(true);
});
it("strips historical reasoning for Gemma 4 on OpenAI-compatible providers", () => {
it("strips historical reasoning for strict OpenAI-compatible providers", () => {
const policy = resolveTranscriptPolicy({
provider: "custom-openai-proxy",
modelId: "google/gemma-4-26b-a4b-it",
modelId: "qwen3.6-27b",
modelApi: "openai-completions",
});
expect(policy.dropReasoningFromHistory).toBe(true);
const gemma3Policy = resolveTranscriptPolicy({
const responsesPolicy = resolveTranscriptPolicy({
provider: "custom-openai-proxy",
modelId: "google/gemma-3-27b-it",
modelApi: "openai-completions",
modelId: "qwen3.6-27b",
modelApi: "openai-responses",
});
expect(gemma3Policy.dropReasoningFromHistory).toBe(false);
expect(responsesPolicy.dropReasoningFromHistory).toBe(false);
});
it("falls back to unowned transport defaults when no owning plugin exists", () => {

View File

@@ -7,10 +7,7 @@ import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.typ
import type { ProviderReplayPolicy } from "../plugins/types.js";
import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
import { normalizeProviderId } from "./model-selection.js";
import {
isGemma4ModelRequiringReasoningStrip,
isGoogleModelApi,
} from "./pi-embedded-helpers/google.js";
import { isGoogleModelApi } from "./pi-embedded-helpers/google.js";
import type { ToolCallIdMode } from "./tool-call-id.js";
export type TranscriptSanitizeMode = "full" | "images-only";
@@ -146,9 +143,7 @@ function buildUnownedProviderTransportReplayFallback(params: {
...(isAnthropic && modelDisablesReasoningEffort(params.model)
? { dropThinkingBlocks: true }
: {}),
...(isStrictOpenAiCompatible && isGemma4ModelRequiringReasoningStrip(modelId)
? { dropReasoningFromHistory: true }
: {}),
...(isStrictOpenAiCompatible ? { dropReasoningFromHistory: true } : {}),
...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}),
...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}),
...(isAnthropic || isStrictOpenAiCompatible || isClaudeOpenAiResponses

View File

@@ -22,6 +22,7 @@ describe("buildProviderReplayFamilyHooks", () => {
sanitizeToolCallIds: true,
applyAssistantFirstOrderingFix: true,
validateGeminiTurns: true,
dropReasoningFromHistory: true,
},
hasSanitizeReplayHistory: false,
reasoningMode: undefined,
@@ -196,6 +197,7 @@ describe("buildProviderReplayFamilyHooks", () => {
const nativeIdsHooks = buildProviderReplayFamilyHooks({
family: "openai-compatible",
sanitizeToolCallIds: false,
dropReasoningFromHistory: false,
});
const nativeIdsPolicy = nativeIdsHooks.buildReplayPolicy?.({
provider: "moonshot",

View File

@@ -172,7 +172,11 @@ type ProviderReplayFamilyHooks = Pick<
>;
type BuildProviderReplayFamilyHooksOptions =
| { family: "openai-compatible"; sanitizeToolCallIds?: boolean }
| {
family: "openai-compatible";
sanitizeToolCallIds?: boolean;
dropReasoningFromHistory?: boolean;
}
| { family: "anthropic-by-model" }
| { family: "native-anthropic-by-model" }
| { family: "google-gemini" }
@@ -187,7 +191,10 @@ export function buildProviderReplayFamilyHooks(
): ProviderReplayFamilyHooks {
switch (options.family) {
case "openai-compatible": {
const policyOptions = { sanitizeToolCallIds: options.sanitizeToolCallIds };
const policyOptions = {
sanitizeToolCallIds: options.sanitizeToolCallIds,
dropReasoningFromHistory: options.dropReasoningFromHistory,
};
return {
buildReplayPolicy: (ctx: ProviderReplayPolicyContext) =>
buildOpenAICompatibleReplayPolicy(ctx.modelApi, {

View File

@@ -35,10 +35,10 @@ describe("provider replay helpers", () => {
expect(policy).not.toHaveProperty("toolCallIdMode");
});
it("drops historical reasoning for Gemma 4 openai-completions replay", () => {
it("drops historical reasoning for OpenAI-compatible chat completions replay", () => {
expect(
buildOpenAICompatibleReplayPolicy("openai-completions", {
modelId: "google/gemma-4-26b-a4b-it",
modelId: "qwen3.6-27b",
}),
).toMatchObject({
dropReasoningFromHistory: true,
@@ -46,8 +46,17 @@ describe("provider replay helpers", () => {
expect(
buildOpenAICompatibleReplayPolicy("openai-completions", {
modelId: "google/gemma-3-27b-it",
dropReasoningFromHistory: false,
}),
).not.toHaveProperty("dropReasoningFromHistory");
expect(
buildOpenAICompatibleReplayPolicy("openai-completions", {
modelId: "google/gemma-4-26b-a4b-it",
dropReasoningFromHistory: false,
}),
).toMatchObject({
dropReasoningFromHistory: true,
});
expect(
buildOpenAICompatibleReplayPolicy("openai-responses", {
modelId: "google/gemma-4-26b-a4b-it",

View File

@@ -12,7 +12,11 @@ import type {
export function buildOpenAICompatibleReplayPolicy(
modelApi: string | null | undefined,
options: { sanitizeToolCallIds?: boolean; modelId?: string | null } = {},
options: {
sanitizeToolCallIds?: boolean;
modelId?: string | null;
dropReasoningFromHistory?: boolean;
} = {},
): ProviderReplayPolicy | undefined {
if (
modelApi !== "openai-completions" &&
@@ -24,6 +28,7 @@ export function buildOpenAICompatibleReplayPolicy(
}
const sanitizeToolCallIds = options.sanitizeToolCallIds ?? true;
const dropReasoningFromHistory = options.dropReasoningFromHistory ?? true;
return {
...(sanitizeToolCallIds
@@ -40,7 +45,8 @@ export function buildOpenAICompatibleReplayPolicy(
validateGeminiTurns: false,
validateAnthropicTurns: false,
}),
...(modelApi === "openai-completions" && isGemma4ModelId(options.modelId)
...(modelApi === "openai-completions" &&
(dropReasoningFromHistory || isGemma4ModelId(options.modelId))
? { dropReasoningFromHistory: true }
: {}),
};