fix: cap hard context guard references

2026-05-13 23:56:07 +00:00 · 2026-04-27 13:10:33 -05:00
parent 1c00247ebe
commit 9c80383639
3 changed files with 16 additions and 2 deletions
--- a/docs/gateway/local-models.md
+++ b/docs/gateway/local-models.md
@@ -319,7 +319,7 @@ Compatibility notes for stricter OpenAI-compatible backends:
  OpenClaw process RSS/heap snapshot in diagnostics. For LM Studio/Ollama
  memory pressure, match that timestamp against the server log or macOS crash /
  jetsam log to confirm whether the model server was killed.
- OpenClaw derives context-window preflight thresholds from the detected model window, or from the uncapped model window when `agents.defaults.contextTokens` lowers the effective window. It warns below 20% with an **8k** floor and blocks below 10% with a **4k** floor. If you hit that preflight, raise the server/model context limit or choose a larger model.
+- OpenClaw derives context-window preflight thresholds from the detected model window, or from the uncapped model window when `agents.defaults.contextTokens` lowers the effective window. It warns below 20% with an **8k** floor. Hard blocks use the 10% threshold with a **4k** floor, capped to the effective context window so oversized model metadata cannot reject an otherwise valid user cap. If you hit that preflight, raise the server/model context limit or choose a larger model.
 - Context errors? Lower `contextWindow` or raise your server limit.
 - OpenAI-compatible server returns `messages[].content ... expected a string`?
  Add `compat.requiresStringContent: true` on that model entry.
--- a/src/agents/context-window-guard.test.ts
+++ b/src/agents/context-window-guard.test.ts
@@ -267,6 +267,16 @@ describe("context-window-guard", () => {
    expect(guard.shouldBlock).toBe(false);
  });

+  it("does not let inflated reference metadata hard-block a valid effective cap", () => {
+    const guard = evaluateContextWindowGuard({
+      info: { tokens: 20_000, referenceTokens: 1_000_000_000, source: "agentContextTokens" },
+    });
+    expect(guard.hardMinTokens).toBe(20_000);
+    expect(guard.warnBelowTokens).toBe(200_000_000);
+    expect(guard.shouldWarn).toBe(true);
+    expect(guard.shouldBlock).toBe(false);
+  });
+
  it("adds a local-model hint to warning messages for localhost endpoints", () => {
    const guard = evaluateContextWindowGuard({
      info: { tokens: 6_000, source: "model" },
--- a/src/agents/context-window-guard.ts
+++ b/src/agents/context-window-guard.ts
@@ -174,7 +174,11 @@ export function evaluateContextWindowGuard(params: {
    1,
    Math.floor(params.warnBelowTokens ?? resolvedThresholds.warnBelowTokens),
  );
-  const hardMin = Math.max(1, Math.floor(params.hardMinTokens ?? resolvedThresholds.hardMinTokens));
+  const defaultHardMin = Math.min(
+    resolvedThresholds.hardMinTokens,
+    Math.max(tokens, CONTEXT_WINDOW_HARD_MIN_TOKENS),
+  );
+  const hardMin = Math.max(1, Math.floor(params.hardMinTokens ?? defaultHardMin));
  return {
    ...params.info,
    tokens,