fix: strip OpenAI-compatible replay reasoning

2026-05-13 15:47:28 +00:00 · 2026-05-10 10:08:26 +01:00
parent 0235040840
commit 9444b2ad9b
11 changed files with 60 additions and 31 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- OpenAI-compatible models: strip prior assistant reasoning fields from replayed Chat Completions history by default, preventing oMLX/vLLM Qwen follow-up turns from rejecting or stalling on stale `reasoning` payloads. Fixes #46637. Thanks @zipzagster and @lexhoefsloot.
 - CLI/onboarding: give non-Azure custom providers a safe generated context window and heal legacy 4k wizard entries without overwriting explicit valid small model limits, preventing first-turn compaction loops. Fixes #79428. (#79911) Thanks @Jefsky.
 - OpenAI-compatible models: add `compat.strictMessageKeys` to strip Chat Completions replay messages to `role` and `content` for strict providers that reject OpenAI-style tool and metadata keys. Fixes #50374. Thanks @choutos.
 - Ollama: stop native `/api/chat` requests from copying catalog `contextWindow` or `maxTokens` into `options.num_ctx` unless `params.num_ctx` is explicitly configured, avoiding pathological prompt-ingestion latency on local large-context models. Fixes #62267. Thanks @BenSHPD.
--- a/docs/reference/transcript-hygiene.md
+++ b/docs/reference/transcript-hygiene.md
@@ -124,12 +124,15 @@ inter-session user turns that only have provenance metadata.
 - Missing OpenAI Responses-family tool outputs are synthesized as `aborted` to match Codex replay normalization.
 - No thought signature stripping.

-**OpenAI-compatible Gemma 4**
+**OpenAI-compatible Chat Completions**

- Historical assistant thinking/reasoning blocks are stripped before replay so local
-  OpenAI-compatible Gemma 4 servers do not receive prior-turn reasoning content.
+- Historical assistant thinking/reasoning blocks are stripped before replay so
+  local and proxy-style OpenAI-compatible servers do not receive prior-turn
+  reasoning fields such as `reasoning` or `reasoning_content`.
 - Current same-turn tool-call continuations keep the assistant reasoning block
  attached to the tool call until the tool result has been replayed.
+- Provider-owned exceptions can opt out when their wire protocol requires
+  replayed reasoning metadata.

 **Google (Generative AI / Gemini CLI / Antigravity)**

--- a/extensions/deepseek/index.ts
+++ b/extensions/deepseek/index.ts
@@ -45,7 +45,10 @@ export default defineSingleProviderPluginEntry({
      }),
    matchesContextOverflowError: ({ errorMessage }) =>
      /\bdeepseek\b.*(?:input.*too long|context.*exceed)/i.test(errorMessage),
-    ...buildProviderReplayFamilyHooks({ family: "openai-compatible" }),
+    ...buildProviderReplayFamilyHooks({
+      family: "openai-compatible",
+      dropReasoningFromHistory: false,
+    }),
    wrapStreamFn: (ctx) => createDeepSeekV4ThinkingWrapper(ctx.streamFn, ctx.thinkingLevel),
    resolveThinkingProfile: ({ modelId }) => resolveDeepSeekV4ThinkingProfile(modelId),
    isModernModelRef: ({ modelId }) => Boolean(resolveDeepSeekV4ThinkingProfile(modelId)),
--- a/extensions/zai/index.ts
+++ b/extensions/zai/index.ts
@@ -18,8 +18,8 @@ import {
  validateApiKeyInput,
 } from "openclaw/plugin-sdk/provider-auth-api-key";
 import {
+  buildProviderReplayFamilyHooks,
  normalizeModelCompat,
-  OPENAI_COMPATIBLE_REPLAY_HOOKS,
 } from "openclaw/plugin-sdk/provider-model-shared";
 import {
  createPayloadPatchStreamWrapper,
@@ -319,7 +319,10 @@ export default definePluginEntry({
        }),
      ],
      resolveDynamicModel: (ctx) => resolveGlm5ForwardCompatModel(ctx),
-      ...OPENAI_COMPATIBLE_REPLAY_HOOKS,
+      ...buildProviderReplayFamilyHooks({
+        family: "openai-compatible",
+        dropReasoningFromHistory: false,
+      }),
      prepareExtraParams: (ctx) => defaultToolStreamExtraParams(ctx.extraParams),
      wrapStreamFn: (ctx) => wrapZaiStreamFn(ctx),
      resolveThinkingProfile: () => ({
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -1196,7 +1196,7 @@ describe("sanitizeSessionHistory", () => {
    ]);
  });

-  it("strips prior assistant reasoning for Gemma 4 OpenAI-compatible replay", async () => {
+  it("strips prior assistant reasoning for Qwen-style OpenAI-compatible replay", async () => {
    setNonGoogleModelApi();

    const messages = castAgentMessages([
@@ -1215,8 +1215,8 @@ describe("sanitizeSessionHistory", () => {
    const result = await sanitizeSessionHistory({
      messages,
      modelApi: "openai-completions",
-      provider: "lmstudio",
-      modelId: "google/gemma-4-26b-a4b-it",
+      provider: "vllm",
+      modelId: "Qwen3.6-27B",
      sessionManager: makeMockSessionManager(),
      sessionId: TEST_SESSION_ID,
    });
@@ -1226,7 +1226,7 @@ describe("sanitizeSessionHistory", () => {
    ]);
  });

-  it("preserves current Gemma 4 tool-call reasoning during tool continuation replay", async () => {
+  it("preserves current OpenAI-compatible tool-call reasoning during tool continuation replay", async () => {
    setNonGoogleModelApi();

    const messages = castAgentMessages([
@@ -1251,8 +1251,8 @@ describe("sanitizeSessionHistory", () => {
    const result = await sanitizeSessionHistory({
      messages,
      modelApi: "openai-completions",
-      provider: "lmstudio",
-      modelId: "google/gemma-4-26b-a4b-it",
+      provider: "vllm",
+      modelId: "Qwen3.6-27B",
      sessionManager: makeMockSessionManager(),
      sessionId: TEST_SESSION_ID,
    });
--- a/src/agents/transcript-policy.test.ts
+++ b/src/agents/transcript-policy.test.ts
@@ -346,20 +346,20 @@ describe("resolveTranscriptPolicy", () => {
    expect(policy.validateAnthropicTurns).toBe(true);
  });

-  it("strips historical reasoning for Gemma 4 on OpenAI-compatible providers", () => {
+  it("strips historical reasoning for strict OpenAI-compatible providers", () => {
    const policy = resolveTranscriptPolicy({
      provider: "custom-openai-proxy",
-      modelId: "google/gemma-4-26b-a4b-it",
+      modelId: "qwen3.6-27b",
      modelApi: "openai-completions",
    });
    expect(policy.dropReasoningFromHistory).toBe(true);

-    const gemma3Policy = resolveTranscriptPolicy({
+    const responsesPolicy = resolveTranscriptPolicy({
      provider: "custom-openai-proxy",
-      modelId: "google/gemma-3-27b-it",
-      modelApi: "openai-completions",
+      modelId: "qwen3.6-27b",
+      modelApi: "openai-responses",
    });
-    expect(gemma3Policy.dropReasoningFromHistory).toBe(false);
+    expect(responsesPolicy.dropReasoningFromHistory).toBe(false);
  });

  it("falls back to unowned transport defaults when no owning plugin exists", () => {
--- a/src/agents/transcript-policy.ts
+++ b/src/agents/transcript-policy.ts
@@ -7,10 +7,7 @@ import type { ProviderRuntimeModel } from "../plugins/provider-runtime-model.typ
 import type { ProviderReplayPolicy } from "../plugins/types.js";
 import { normalizeLowercaseStringOrEmpty } from "../shared/string-coerce.js";
 import { normalizeProviderId } from "./model-selection.js";
-import {
-  isGemma4ModelRequiringReasoningStrip,
-  isGoogleModelApi,
-} from "./pi-embedded-helpers/google.js";
+import { isGoogleModelApi } from "./pi-embedded-helpers/google.js";
 import type { ToolCallIdMode } from "./tool-call-id.js";

 export type TranscriptSanitizeMode = "full" | "images-only";
@@ -146,9 +143,7 @@ function buildUnownedProviderTransportReplayFallback(params: {
    ...(isAnthropic && modelDisablesReasoningEffort(params.model)
      ? { dropThinkingBlocks: true }
      : {}),
-    ...(isStrictOpenAiCompatible && isGemma4ModelRequiringReasoningStrip(modelId)
-      ? { dropReasoningFromHistory: true }
-      : {}),
+    ...(isStrictOpenAiCompatible ? { dropReasoningFromHistory: true } : {}),
    ...(isGoogle || isStrictOpenAiCompatible ? { applyAssistantFirstOrderingFix: true } : {}),
    ...(isGoogle || isStrictOpenAiCompatible ? { validateGeminiTurns: true } : {}),
    ...(isAnthropic || isStrictOpenAiCompatible || isClaudeOpenAiResponses
--- a/src/plugin-sdk/provider-model-shared.test.ts
+++ b/src/plugin-sdk/provider-model-shared.test.ts
@@ -22,6 +22,7 @@ describe("buildProviderReplayFamilyHooks", () => {
          sanitizeToolCallIds: true,
          applyAssistantFirstOrderingFix: true,
          validateGeminiTurns: true,
+          dropReasoningFromHistory: true,
        },
        hasSanitizeReplayHistory: false,
        reasoningMode: undefined,
@@ -196,6 +197,7 @@ describe("buildProviderReplayFamilyHooks", () => {
    const nativeIdsHooks = buildProviderReplayFamilyHooks({
      family: "openai-compatible",
      sanitizeToolCallIds: false,
+      dropReasoningFromHistory: false,
    });
    const nativeIdsPolicy = nativeIdsHooks.buildReplayPolicy?.({
      provider: "moonshot",
--- a/src/plugin-sdk/provider-model-shared.ts
+++ b/src/plugin-sdk/provider-model-shared.ts
@@ -172,7 +172,11 @@ type ProviderReplayFamilyHooks = Pick<
 >;

 type BuildProviderReplayFamilyHooksOptions =
-  | { family: "openai-compatible"; sanitizeToolCallIds?: boolean }
+  | {
+      family: "openai-compatible";
+      sanitizeToolCallIds?: boolean;
+      dropReasoningFromHistory?: boolean;
+    }
  | { family: "anthropic-by-model" }
  | { family: "native-anthropic-by-model" }
  | { family: "google-gemini" }
@@ -187,7 +191,10 @@ export function buildProviderReplayFamilyHooks(
 ): ProviderReplayFamilyHooks {
  switch (options.family) {
    case "openai-compatible": {
-      const policyOptions = { sanitizeToolCallIds: options.sanitizeToolCallIds };
+      const policyOptions = {
+        sanitizeToolCallIds: options.sanitizeToolCallIds,
+        dropReasoningFromHistory: options.dropReasoningFromHistory,
+      };
      return {
        buildReplayPolicy: (ctx: ProviderReplayPolicyContext) =>
          buildOpenAICompatibleReplayPolicy(ctx.modelApi, {
--- a/src/plugins/provider-replay-helpers.test.ts
+++ b/src/plugins/provider-replay-helpers.test.ts
@@ -35,10 +35,10 @@ describe("provider replay helpers", () => {
    expect(policy).not.toHaveProperty("toolCallIdMode");
  });

-  it("drops historical reasoning for Gemma 4 openai-completions replay", () => {
+  it("drops historical reasoning for OpenAI-compatible chat completions replay", () => {
    expect(
      buildOpenAICompatibleReplayPolicy("openai-completions", {
-        modelId: "google/gemma-4-26b-a4b-it",
+        modelId: "qwen3.6-27b",
      }),
    ).toMatchObject({
      dropReasoningFromHistory: true,
@@ -46,8 +46,17 @@ describe("provider replay helpers", () => {
    expect(
      buildOpenAICompatibleReplayPolicy("openai-completions", {
        modelId: "google/gemma-3-27b-it",
+        dropReasoningFromHistory: false,
      }),
    ).not.toHaveProperty("dropReasoningFromHistory");
+    expect(
+      buildOpenAICompatibleReplayPolicy("openai-completions", {
+        modelId: "google/gemma-4-26b-a4b-it",
+        dropReasoningFromHistory: false,
+      }),
+    ).toMatchObject({
+      dropReasoningFromHistory: true,
+    });
    expect(
      buildOpenAICompatibleReplayPolicy("openai-responses", {
        modelId: "google/gemma-4-26b-a4b-it",
--- a/src/plugins/provider-replay-helpers.ts
+++ b/src/plugins/provider-replay-helpers.ts
@@ -12,7 +12,11 @@ import type {

 export function buildOpenAICompatibleReplayPolicy(
  modelApi: string | null | undefined,
-  options: { sanitizeToolCallIds?: boolean; modelId?: string | null } = {},
+  options: {
+    sanitizeToolCallIds?: boolean;
+    modelId?: string | null;
+    dropReasoningFromHistory?: boolean;
+  } = {},
 ): ProviderReplayPolicy | undefined {
  if (
    modelApi !== "openai-completions" &&
@@ -24,6 +28,7 @@ export function buildOpenAICompatibleReplayPolicy(
  }

  const sanitizeToolCallIds = options.sanitizeToolCallIds ?? true;
+  const dropReasoningFromHistory = options.dropReasoningFromHistory ?? true;

  return {
    ...(sanitizeToolCallIds
@@ -40,7 +45,8 @@ export function buildOpenAICompatibleReplayPolicy(
          validateGeminiTurns: false,
          validateAnthropicTurns: false,
        }),
-    ...(modelApi === "openai-completions" && isGemma4ModelId(options.modelId)
+    ...(modelApi === "openai-completions" &&
+    (dropReasoningFromHistory || isGemma4ModelId(options.modelId))
      ? { dropReasoningFromHistory: true }
      : {}),
  };