diff --git a/README.md b/README.md
index 7a3b80b..4151aca 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,8 @@ Enable Opencode to authenticate against **Antigravity** (Google's IDE) via OAuth
 
 - **Google OAuth sign-in** (multi-account via `opencode auth login`) with automatic token refresh
 - **Multi-account load balancing** Automatically cycle through multiple Google accounts to maximize rate limits
+- **Real-time SSE streaming** including thinking blocks and incremental output
+- **Advanced Claude support** Interleaved thinking, stable multi-turn signatures, and validated tool calling
 - **Automatic endpoint fallback** between Antigravity API endpoints (daily → autopush → prod)
 - **Antigravity API compatibility** for OpenAI-style requests
 - **Debug logging** for requests and responses
@@ -193,6 +195,24 @@ The `/connect` command in the TUI adds accounts non-destructively — it will ne
 - If Google revokes a refresh token (`invalid_grant`), that account is automatically removed from the pool
 - Rerun `opencode auth login` to re-add the account
 
+## Streaming & thinking
+
+This plugin supports **real-time SSE streaming**, meaning you see thinking blocks and text output incrementally as they are generated.
+
+### Claude Thinking & Tools
+
+For models like `claude-opus-4-5-thinking`:
+
+- **Interleaved Thinking:** The plugin automatically enables `anthropic-beta: interleaved-thinking-2025-05-14`. This allows Claude to think *between* tool calls and after tool results, improving complex reasoning.
+- **Smart System Hints:** A system instruction is silently added to encourage the model to "think" before and during tool use.
+- **Multi-turn Stability:** Thinking signatures are cached and restored using a stable `sessionId`, preventing "invalid signature" errors in long conversations.
+- **Thinking Budget Safety:** If a thinking budget is enabled, the plugin ensures output token limits are high enough to avoid budget-related errors.
+- **Tool Use:** Tool calls and responses are assigned proper IDs, and tool calling is set to validated mode for better Claude compatibility.
+
+> **Limit:** Once the final text response starts streaming, you won't see new thinking blocks for that turn, even if the model generates them internally.
+
+**Troubleshooting:** If you see signature errors in multi-turn tool loops, restart `opencode` to reset the plugin session/signature cache.
+
 ## Debugging
 
 Enable verbose logging:
diff --git a/package.json b/package.json
index b4a9753..cec5d82 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "opencode-antigravity-auth",
-  "version": "1.1.2",
+  "version": "1.2.0",
   "description": "Google Antigravity IDE OAuth auth plugin for Opencode - access Gemini 3 Pro and Claude 4.5 using Google credentials",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",
diff --git a/src/plugin.ts b/src/plugin.ts
index 10d708e..02932d6 100644
--- a/src/plugin.ts
+++ b/src/plugin.ts
@@ -324,6 +324,7 @@ export const createAntigravityPlugin = (providerId: string) => async (
             projectId?: string;
             endpoint?: string;
             effectiveModel?: string;
+            sessionId?: string;
             toolDebugMissing?: number;
             toolDebugSummary?: string;
             toolDebugPayload?: string;
@@ -534,6 +535,7 @@ export const createAntigravityPlugin = (providerId: string) => async (
                       projectId: prepared.projectId,
                       endpoint: prepared.endpoint,
                       effectiveModel: prepared.effectiveModel,
+                      sessionId: prepared.sessionId,
                       toolDebugMissing: prepared.toolDebugMissing,
                       toolDebugSummary: prepared.toolDebugSummary,
                       toolDebugPayload: prepared.toolDebugPayload,
@@ -553,6 +555,7 @@ export const createAntigravityPlugin = (providerId: string) => async (
                       projectId: prepared.projectId,
                       endpoint: prepared.endpoint,
                       effectiveModel: prepared.effectiveModel,
+                      sessionId: prepared.sessionId,
                       toolDebugMissing: prepared.toolDebugMissing,
                       toolDebugSummary: prepared.toolDebugSummary,
                       toolDebugPayload: prepared.toolDebugPayload,
@@ -580,6 +583,7 @@ export const createAntigravityPlugin = (providerId: string) => async (
                     projectId: prepared.projectId,
                     endpoint: prepared.endpoint,
                     effectiveModel: prepared.effectiveModel,
+                    sessionId: prepared.sessionId,
                     toolDebugMissing: prepared.toolDebugMissing,
                     toolDebugSummary: prepared.toolDebugSummary,
                     toolDebugPayload: prepared.toolDebugPayload,
@@ -596,6 +600,7 @@ export const createAntigravityPlugin = (providerId: string) => async (
                   prepared.projectId,
                   prepared.endpoint,
                   prepared.effectiveModel,
+                  prepared.sessionId,
                   prepared.toolDebugMissing,
                   prepared.toolDebugSummary,
                   prepared.toolDebugPayload,
@@ -627,6 +632,7 @@ export const createAntigravityPlugin = (providerId: string) => async (
                 lastFailure.projectId,
                 lastFailure.endpoint,
                 lastFailure.effectiveModel,
+                lastFailure.sessionId,
                 lastFailure.toolDebugMissing,
                 lastFailure.toolDebugSummary,
                 lastFailure.toolDebugPayload,
diff --git a/src/plugin/auth.test.ts b/src/plugin/auth.test.ts
new file mode 100644
index 0000000..692b1ad
--- /dev/null
+++ b/src/plugin/auth.test.ts
@@ -0,0 +1,195 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+import { isOAuthAuth, parseRefreshParts, formatRefreshParts, accessTokenExpired } from "./auth";
+import type { OAuthAuthDetails, ApiKeyAuthDetails } from "./types";
+
+describe("isOAuthAuth", () => {
+  it("returns true for oauth auth type", () => {
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token|project",
+      access: "access-token",
+      expires: Date.now() + 3600000,
+    };
+    expect(isOAuthAuth(auth)).toBe(true);
+  });
+
+  it("returns false for api_key auth type", () => {
+    const auth: ApiKeyAuthDetails = {
+      type: "api_key",
+      key: "some-api-key",
+    };
+    expect(isOAuthAuth(auth)).toBe(false);
+  });
+});
+
+describe("parseRefreshParts", () => {
+  it("parses refresh token with all parts", () => {
+    const result = parseRefreshParts("refreshToken|projectId|managedProjectId");
+    expect(result).toEqual({
+      refreshToken: "refreshToken",
+      projectId: "projectId",
+      managedProjectId: "managedProjectId",
+    });
+  });
+
+  it("parses refresh token with only refresh and project", () => {
+    const result = parseRefreshParts("refreshToken|projectId");
+    expect(result).toEqual({
+      refreshToken: "refreshToken",
+      projectId: "projectId",
+      managedProjectId: undefined,
+    });
+  });
+
+  it("parses refresh token with only refresh token", () => {
+    const result = parseRefreshParts("refreshToken");
+    expect(result).toEqual({
+      refreshToken: "refreshToken",
+      projectId: undefined,
+      managedProjectId: undefined,
+    });
+  });
+
+  it("handles empty string", () => {
+    const result = parseRefreshParts("");
+    expect(result).toEqual({
+      refreshToken: "",
+      projectId: undefined,
+      managedProjectId: undefined,
+    });
+  });
+
+  it("handles empty parts", () => {
+    const result = parseRefreshParts("refreshToken||managedProjectId");
+    expect(result).toEqual({
+      refreshToken: "refreshToken",
+      projectId: undefined,
+      managedProjectId: "managedProjectId",
+    });
+  });
+
+  it("handles undefined/null-like input", () => {
+    // @ts-expect-error - testing edge case
+    const result = parseRefreshParts(undefined);
+    expect(result).toEqual({
+      refreshToken: "",
+      projectId: undefined,
+      managedProjectId: undefined,
+    });
+  });
+});
+
+describe("formatRefreshParts", () => {
+  it("formats all parts", () => {
+    const result = formatRefreshParts({
+      refreshToken: "refreshToken",
+      projectId: "projectId",
+      managedProjectId: "managedProjectId",
+    });
+    expect(result).toBe("refreshToken|projectId|managedProjectId");
+  });
+
+  it("formats without managed project id", () => {
+    const result = formatRefreshParts({
+      refreshToken: "refreshToken",
+      projectId: "projectId",
+    });
+    expect(result).toBe("refreshToken|projectId");
+  });
+
+  it("formats without project id but with managed project id", () => {
+    const result = formatRefreshParts({
+      refreshToken: "refreshToken",
+      managedProjectId: "managedProjectId",
+    });
+    expect(result).toBe("refreshToken||managedProjectId");
+  });
+
+  it("formats with only refresh token", () => {
+    const result = formatRefreshParts({
+      refreshToken: "refreshToken",
+    });
+    expect(result).toBe("refreshToken|");
+  });
+
+  it("round-trips correctly with parseRefreshParts", () => {
+    const original = {
+      refreshToken: "rt123",
+      projectId: "proj456",
+      managedProjectId: "managed789",
+    };
+    const formatted = formatRefreshParts(original);
+    const parsed = parseRefreshParts(formatted);
+    expect(parsed).toEqual(original);
+  });
+});
+
+describe("accessTokenExpired", () => {
+  beforeEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("returns true when access token is missing", () => {
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token",
+      access: undefined,
+      expires: Date.now() + 3600000,
+    };
+    expect(accessTokenExpired(auth)).toBe(true);
+  });
+
+  it("returns true when expires is missing", () => {
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token",
+      access: "access-token",
+      expires: undefined,
+    };
+    expect(accessTokenExpired(auth)).toBe(true);
+  });
+
+  it("returns true when token is expired", () => {
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token",
+      access: "access-token",
+      expires: Date.now() - 1000, // expired 1 second ago
+    };
+    expect(accessTokenExpired(auth)).toBe(true);
+  });
+
+  it("returns true when token expires within buffer period (60 seconds)", () => {
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token",
+      access: "access-token",
+      expires: Date.now() + 30000, // expires in 30 seconds (within 60s buffer)
+    };
+    expect(accessTokenExpired(auth)).toBe(true);
+  });
+
+  it("returns false when token is valid and outside buffer period", () => {
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token",
+      access: "access-token",
+      expires: Date.now() + 120000, // expires in 2 minutes
+    };
+    expect(accessTokenExpired(auth)).toBe(false);
+  });
+
+  it("returns false when token expires exactly at buffer boundary", () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date(0));
+
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token",
+      access: "access-token",
+      expires: 60001, // expires 60001ms from now, just outside 60s buffer
+    };
+    expect(accessTokenExpired(auth)).toBe(false);
+  });
+});
diff --git a/src/plugin/cache.test.ts b/src/plugin/cache.test.ts
new file mode 100644
index 0000000..9d828c9
--- /dev/null
+++ b/src/plugin/cache.test.ts
@@ -0,0 +1,295 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+import {
+  resolveCachedAuth,
+  storeCachedAuth,
+  clearCachedAuth,
+  cacheSignature,
+  getCachedSignature,
+  clearSignatureCache,
+} from "./cache";
+import type { OAuthAuthDetails } from "./types";
+
+function createAuth(overrides: Partial<OAuthAuthDetails> = {}): OAuthAuthDetails {
+  return {
+    type: "oauth",
+    refresh: "refresh-token|project-id",
+    access: "access-token",
+    expires: Date.now() + 3600000,
+    ...overrides,
+  };
+}
+
+describe("Auth Cache", () => {
+  beforeEach(() => {
+    vi.useRealTimers();
+    clearCachedAuth();
+  });
+
+  afterEach(() => {
+    clearCachedAuth();
+  });
+
+  describe("resolveCachedAuth", () => {
+    it("returns input auth when no cache exists and caches it", () => {
+      const auth = createAuth();
+      const result = resolveCachedAuth(auth);
+      expect(result).toEqual(auth);
+    });
+
+    it("returns input auth when refresh key is empty", () => {
+      const auth = createAuth({ refresh: "" });
+      const result = resolveCachedAuth(auth);
+      expect(result).toEqual(auth);
+    });
+
+    it("returns input auth when it has valid (unexpired) access token", () => {
+      const oldAuth = createAuth({ access: "old-access", expires: Date.now() + 3600000 });
+      resolveCachedAuth(oldAuth); // cache it
+
+      const newAuth = createAuth({ access: "new-access", expires: Date.now() + 7200000 });
+      const result = resolveCachedAuth(newAuth);
+      expect(result.access).toBe("new-access");
+    });
+
+    it("returns cached auth when input auth is expired but cached is valid", () => {
+      vi.useFakeTimers();
+      vi.setSystemTime(new Date(0));
+
+      const validAuth = createAuth({
+        access: "valid-access",
+        expires: 3600000, // expires at t=3600000
+      });
+      resolveCachedAuth(validAuth); // cache it
+
+      // Now create an expired auth with the same refresh token
+      const expiredAuth = createAuth({
+        access: "expired-access",
+        expires: 30000, // expires within buffer (60s)
+      });
+
+      const result = resolveCachedAuth(expiredAuth);
+      expect(result.access).toBe("valid-access");
+    });
+
+    it("returns input auth when both are expired (updates cache)", () => {
+      vi.useFakeTimers();
+      vi.setSystemTime(new Date(0));
+
+      const expiredCached = createAuth({
+        access: "cached-expired",
+        expires: 30000, // expired within buffer
+      });
+      resolveCachedAuth(expiredCached);
+
+      const expiredNew = createAuth({
+        access: "new-expired",
+        expires: 20000, // also expired within buffer
+      });
+
+      const result = resolveCachedAuth(expiredNew);
+      expect(result.access).toBe("new-expired");
+    });
+  });
+
+  describe("storeCachedAuth", () => {
+    it("stores auth in cache", () => {
+      const auth = createAuth({ access: "stored-access" });
+      storeCachedAuth(auth);
+
+      const expiredAuth = createAuth({ access: "expired", expires: Date.now() - 1000 });
+      const result = resolveCachedAuth(expiredAuth);
+      expect(result.access).toBe("stored-access");
+    });
+
+    it("does nothing when refresh key is empty", () => {
+      const auth = createAuth({ refresh: "", access: "no-key-access" });
+      storeCachedAuth(auth);
+
+      // Should not be retrievable since key was empty
+      const testAuth = createAuth({ refresh: "", access: "test" });
+      const result = resolveCachedAuth(testAuth);
+      expect(result.access).toBe("test"); // returns the input, not cached
+    });
+
+    it("does nothing when refresh key is whitespace only", () => {
+      const auth = createAuth({ refresh: "   ", access: "whitespace-access" });
+      storeCachedAuth(auth);
+
+      const testAuth = createAuth({ refresh: "   ", access: "test" });
+      const result = resolveCachedAuth(testAuth);
+      expect(result.access).toBe("test");
+    });
+  });
+
+  describe("clearCachedAuth", () => {
+    it("clears all cache when no argument provided", () => {
+      storeCachedAuth(createAuth({ refresh: "token1|p", access: "access1" }));
+      storeCachedAuth(createAuth({ refresh: "token2|p", access: "access2" }));
+
+      clearCachedAuth();
+
+      const auth1 = createAuth({ refresh: "token1|p", access: "new1" });
+      const auth2 = createAuth({ refresh: "token2|p", access: "new2" });
+
+      expect(resolveCachedAuth(auth1).access).toBe("new1");
+      expect(resolveCachedAuth(auth2).access).toBe("new2");
+    });
+
+    it("clears specific refresh token from cache", () => {
+      storeCachedAuth(createAuth({ refresh: "token1|p", access: "access1" }));
+      storeCachedAuth(createAuth({ refresh: "token2|p", access: "access2" }));
+
+      clearCachedAuth("token1|p");
+
+      // token1 should be cleared
+      const expiredAuth1 = createAuth({ refresh: "token1|p", access: "new1", expires: Date.now() - 1000 });
+      expect(resolveCachedAuth(expiredAuth1).access).toBe("new1");
+
+      // token2 should still be cached
+      const expiredAuth2 = createAuth({ refresh: "token2|p", access: "new2", expires: Date.now() - 1000 });
+      expect(resolveCachedAuth(expiredAuth2).access).toBe("access2");
+    });
+  });
+});
+
+describe("Signature Cache", () => {
+  beforeEach(() => {
+    vi.useRealTimers();
+    clearSignatureCache();
+  });
+
+  afterEach(() => {
+    clearSignatureCache();
+  });
+
+  describe("cacheSignature", () => {
+    it("caches a signature for session and text", () => {
+      cacheSignature("session1", "thinking text", "sig123");
+      const result = getCachedSignature("session1", "thinking text");
+      expect(result).toBe("sig123");
+    });
+
+    it("does nothing when sessionId is empty", () => {
+      cacheSignature("", "text", "sig");
+      expect(getCachedSignature("", "text")).toBeUndefined();
+    });
+
+    it("does nothing when text is empty", () => {
+      cacheSignature("session", "", "sig");
+      expect(getCachedSignature("session", "")).toBeUndefined();
+    });
+
+    it("does nothing when signature is empty", () => {
+      cacheSignature("session", "text", "");
+      expect(getCachedSignature("session", "text")).toBeUndefined();
+    });
+
+    it("stores multiple signatures per session", () => {
+      cacheSignature("session1", "text1", "sig1");
+      cacheSignature("session1", "text2", "sig2");
+
+      expect(getCachedSignature("session1", "text1")).toBe("sig1");
+      expect(getCachedSignature("session1", "text2")).toBe("sig2");
+    });
+
+    it("stores signatures for different sessions independently", () => {
+      cacheSignature("session1", "text", "sig1");
+      cacheSignature("session2", "text", "sig2");
+
+      expect(getCachedSignature("session1", "text")).toBe("sig1");
+      expect(getCachedSignature("session2", "text")).toBe("sig2");
+    });
+  });
+
+  describe("getCachedSignature", () => {
+    it("returns undefined when session not found", () => {
+      expect(getCachedSignature("unknown", "text")).toBeUndefined();
+    });
+
+    it("returns undefined when text not found in session", () => {
+      cacheSignature("session", "known-text", "sig");
+      expect(getCachedSignature("session", "unknown-text")).toBeUndefined();
+    });
+
+    it("returns undefined when sessionId is empty", () => {
+      expect(getCachedSignature("", "text")).toBeUndefined();
+    });
+
+    it("returns undefined when text is empty", () => {
+      expect(getCachedSignature("session", "")).toBeUndefined();
+    });
+
+    it("returns undefined when signature is expired", () => {
+      vi.useFakeTimers();
+      vi.setSystemTime(new Date(0));
+
+      cacheSignature("session", "text", "sig");
+
+      // Advance time past TTL (1 hour = 3600000ms)
+      vi.setSystemTime(new Date(3600001));
+
+      expect(getCachedSignature("session", "text")).toBeUndefined();
+    });
+
+    it("returns signature when not expired", () => {
+      vi.useFakeTimers();
+      vi.setSystemTime(new Date(0));
+
+      cacheSignature("session", "text", "sig");
+
+      // Advance time but stay within TTL
+      vi.setSystemTime(new Date(3599999));
+
+      expect(getCachedSignature("session", "text")).toBe("sig");
+    });
+  });
+
+  describe("clearSignatureCache", () => {
+    it("clears all signature cache when no argument provided", () => {
+      cacheSignature("session1", "text", "sig1");
+      cacheSignature("session2", "text", "sig2");
+
+      clearSignatureCache();
+
+      expect(getCachedSignature("session1", "text")).toBeUndefined();
+      expect(getCachedSignature("session2", "text")).toBeUndefined();
+    });
+
+    it("clears specific session from cache", () => {
+      cacheSignature("session1", "text", "sig1");
+      cacheSignature("session2", "text", "sig2");
+
+      clearSignatureCache("session1");
+
+      expect(getCachedSignature("session1", "text")).toBeUndefined();
+      expect(getCachedSignature("session2", "text")).toBe("sig2");
+    });
+  });
+
+  describe("cache eviction", () => {
+    it("evicts entries when at capacity", () => {
+      vi.useFakeTimers();
+      vi.setSystemTime(new Date(0));
+
+      // Fill cache with 100 entries (MAX_ENTRIES_PER_SESSION)
+      for (let i = 0; i < 100; i++) {
+        vi.setSystemTime(new Date(i * 1000)); // stagger timestamps
+        cacheSignature("session", `text-${i}`, `sig-${i}`);
+      }
+
+      // Reset time to check entries
+      vi.setSystemTime(new Date(100 * 1000));
+
+      // Adding one more should trigger eviction
+      cacheSignature("session", "new-text", "new-sig");
+
+      // New entry should exist
+      expect(getCachedSignature("session", "new-text")).toBe("new-sig");
+
+      // Some old entries should have been evicted (oldest 25%)
+      // Entry at index 0 (timestamp 0) should be evicted
+      expect(getCachedSignature("session", "text-0")).toBeUndefined();
+    });
+  });
+});
diff --git a/src/plugin/cache.ts b/src/plugin/cache.ts
index 5b5f8c9..847355f 100644
--- a/src/plugin/cache.ts
+++ b/src/plugin/cache.ts
@@ -63,3 +63,104 @@ export function clearCachedAuth(refresh?: string): void {
     authCache.delete(key);
   }
 }
+
+// ============================================================================
+// Thinking Signature Cache (for Claude multi-turn conversations)
+// ============================================================================
+
+interface SignatureEntry {
+  signature: string;
+  timestamp: number;
+}
+
+// Map: sessionId -> Map<textHash, SignatureEntry>
+const signatureCache = new Map<string, Map<string, SignatureEntry>>();
+
+// Cache entries expire after 1 hour
+const SIGNATURE_CACHE_TTL_MS = 60 * 60 * 1000;
+
+// Maximum entries per session to prevent memory bloat
+const MAX_ENTRIES_PER_SESSION = 100;
+
+/**
+ * Simple hash function for text content.
+ */
+function hashText(text: string): string {
+  let hash = 0;
+  for (let i = 0; i < text.length; i++) {
+    const char = text.charCodeAt(i);
+    hash = ((hash << 5) - hash) + char;
+    hash = hash & hash; // Convert to 32bit integer
+  }
+  return hash.toString(36);
+}
+
+/**
+ * Caches a thinking signature for a given session and text.
+ * Used for Claude models that require signed thinking blocks in multi-turn conversations.
+ */
+export function cacheSignature(sessionId: string, text: string, signature: string): void {
+  if (!sessionId || !text || !signature) return;
+
+  let sessionCache = signatureCache.get(sessionId);
+  if (!sessionCache) {
+    sessionCache = new Map();
+    signatureCache.set(sessionId, sessionCache);
+  }
+
+  // Evict old entries if we're at capacity
+  if (sessionCache.size >= MAX_ENTRIES_PER_SESSION) {
+    const now = Date.now();
+    for (const [key, entry] of sessionCache.entries()) {
+      if (now - entry.timestamp > SIGNATURE_CACHE_TTL_MS) {
+        sessionCache.delete(key);
+      }
+    }
+    // If still at capacity, remove oldest entries
+    if (sessionCache.size >= MAX_ENTRIES_PER_SESSION) {
+      const entries = Array.from(sessionCache.entries())
+        .sort((a, b) => a[1].timestamp - b[1].timestamp);
+      const toRemove = entries.slice(0, Math.floor(MAX_ENTRIES_PER_SESSION / 4));
+      for (const [key] of toRemove) {
+        sessionCache.delete(key);
+      }
+    }
+  }
+
+  const textHash = hashText(text);
+  sessionCache.set(textHash, { signature, timestamp: Date.now() });
+}
+
+/**
+ * Retrieves a cached signature for a given session and text.
+ * Returns undefined if not found or expired.
+ */
+export function getCachedSignature(sessionId: string, text: string): string | undefined {
+  if (!sessionId || !text) return undefined;
+
+  const sessionCache = signatureCache.get(sessionId);
+  if (!sessionCache) return undefined;
+
+  const textHash = hashText(text);
+  const entry = sessionCache.get(textHash);
+  if (!entry) return undefined;
+
+  // Check if expired
+  if (Date.now() - entry.timestamp > SIGNATURE_CACHE_TTL_MS) {
+    sessionCache.delete(textHash);
+    return undefined;
+  }
+
+  return entry.signature;
+}
+
+/**
+ * Clears signature cache for a specific session or all sessions.
+ */
+export function clearSignatureCache(sessionId?: string): void {
+  if (sessionId) {
+    signatureCache.delete(sessionId);
+  } else {
+    signatureCache.clear();
+  }
+}
diff --git a/src/plugin/request-helpers.test.ts b/src/plugin/request-helpers.test.ts
new file mode 100644
index 0000000..7f2db97
--- /dev/null
+++ b/src/plugin/request-helpers.test.ts
@@ -0,0 +1,531 @@
+import { describe, expect, it } from "vitest";
+
+import {
+  isThinkingCapableModel,
+  extractThinkingConfig,
+  resolveThinkingConfig,
+  filterUnsignedThinkingBlocks,
+  transformThinkingParts,
+  normalizeThinkingConfig,
+  parseAntigravityApiBody,
+  extractUsageMetadata,
+  extractUsageFromSsePayload,
+  rewriteAntigravityPreviewAccessError,
+  DEFAULT_THINKING_BUDGET,
+} from "./request-helpers";
+
+describe("isThinkingCapableModel", () => {
+  it("returns true for models with 'thinking' in name", () => {
+    expect(isThinkingCapableModel("claude-thinking")).toBe(true);
+    expect(isThinkingCapableModel("CLAUDE-THINKING-4")).toBe(true);
+    expect(isThinkingCapableModel("model-thinking-v1")).toBe(true);
+  });
+
+  it("returns true for models with 'gemini-3' in name", () => {
+    expect(isThinkingCapableModel("gemini-3-pro")).toBe(true);
+    expect(isThinkingCapableModel("GEMINI-3-flash")).toBe(true);
+    expect(isThinkingCapableModel("gemini-3")).toBe(true);
+  });
+
+  it("returns true for models with 'opus' in name", () => {
+    expect(isThinkingCapableModel("claude-opus")).toBe(true);
+    expect(isThinkingCapableModel("claude-4-opus")).toBe(true);
+    expect(isThinkingCapableModel("OPUS")).toBe(true);
+  });
+
+  it("returns false for non-thinking models", () => {
+    expect(isThinkingCapableModel("claude-sonnet")).toBe(false);
+    expect(isThinkingCapableModel("gemini-2-pro")).toBe(false);
+    expect(isThinkingCapableModel("gpt-4")).toBe(false);
+  });
+});
+
+describe("extractThinkingConfig", () => {
+  it("extracts thinkingConfig from generationConfig", () => {
+    const result = extractThinkingConfig(
+      {},
+      { thinkingConfig: { includeThoughts: true, thinkingBudget: 8000 } },
+      undefined,
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: 8000 });
+  });
+
+  it("extracts thinkingConfig from extra_body", () => {
+    const result = extractThinkingConfig(
+      {},
+      undefined,
+      { thinkingConfig: { includeThoughts: true, thinkingBudget: 4000 } },
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: 4000 });
+  });
+
+  it("extracts thinkingConfig from requestPayload directly", () => {
+    const result = extractThinkingConfig(
+      { thinkingConfig: { includeThoughts: false, thinkingBudget: 2000 } },
+      undefined,
+      undefined,
+    );
+    expect(result).toEqual({ includeThoughts: false, thinkingBudget: 2000 });
+  });
+
+  it("prioritizes generationConfig over extra_body", () => {
+    const result = extractThinkingConfig(
+      {},
+      { thinkingConfig: { includeThoughts: true, thinkingBudget: 8000 } },
+      { thinkingConfig: { includeThoughts: false, thinkingBudget: 4000 } },
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: 8000 });
+  });
+
+  it("converts Anthropic-style thinking config", () => {
+    const result = extractThinkingConfig(
+      { thinking: { type: "enabled", budgetTokens: 10000 } },
+      undefined,
+      undefined,
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: 10000 });
+  });
+
+  it("uses default budget for Anthropic-style without budgetTokens", () => {
+    const result = extractThinkingConfig(
+      { thinking: { type: "enabled" } },
+      undefined,
+      undefined,
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: DEFAULT_THINKING_BUDGET });
+  });
+
+  it("returns undefined when no config found", () => {
+    expect(extractThinkingConfig({}, undefined, undefined)).toBeUndefined();
+  });
+
+  it("uses default budget when thinkingBudget not specified", () => {
+    const result = extractThinkingConfig(
+      {},
+      { thinkingConfig: { includeThoughts: true } },
+      undefined,
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: DEFAULT_THINKING_BUDGET });
+  });
+});
+
+describe("resolveThinkingConfig", () => {
+  it("keeps thinking enabled for Claude models with assistant history", () => {
+    const result = resolveThinkingConfig(
+      { includeThoughts: true, thinkingBudget: 8000 },
+      true, // isThinkingModel
+      true, // isClaudeModel
+      true, // hasAssistantHistory
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: 8000 });
+  });
+
+  it("enables thinking for thinking-capable models without user config", () => {
+    const result = resolveThinkingConfig(
+      undefined,
+      true, // isThinkingModel
+      false, // isClaudeModel
+      false, // hasAssistantHistory
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: DEFAULT_THINKING_BUDGET });
+  });
+
+  it("respects user config for non-Claude models", () => {
+    const userConfig = { includeThoughts: false, thinkingBudget: 5000 };
+    const result = resolveThinkingConfig(
+      userConfig,
+      true,
+      false,
+      false,
+    );
+    expect(result).toEqual(userConfig);
+  });
+
+  it("returns user config for Claude without history", () => {
+    const userConfig = { includeThoughts: true, thinkingBudget: 8000 };
+    const result = resolveThinkingConfig(
+      userConfig,
+      true,
+      true, // isClaudeModel
+      false, // no history
+    );
+    expect(result).toEqual(userConfig);
+  });
+
+  it("returns undefined for non-thinking model without user config", () => {
+    const result = resolveThinkingConfig(
+      undefined,
+      false, // not thinking model
+      false,
+      false,
+    );
+    expect(result).toBeUndefined();
+  });
+});
+
+describe("filterUnsignedThinkingBlocks", () => {
+  it("filters out unsigned thinking parts", () => {
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          { type: "thinking", text: "thinking without signature" },
+          { type: "text", text: "visible text" },
+        ],
+      },
+    ];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result[0].parts).toHaveLength(1);
+    expect(result[0].parts[0].type).toBe("text");
+  });
+
+  it("keeps signed thinking parts with valid signatures", () => {
+    const validSignature = "a".repeat(60);
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          { type: "thinking", text: "thinking with signature", signature: validSignature },
+          { type: "text", text: "visible text" },
+        ],
+      },
+    ];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result[0].parts).toHaveLength(2);
+    expect(result[0].parts[0].signature).toBe(validSignature);
+  });
+
+  it("filters thinking parts with short signatures", () => {
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          { type: "thinking", text: "thinking with short signature", signature: "sig123" },
+          { type: "text", text: "visible text" },
+        ],
+      },
+    ];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result[0].parts).toHaveLength(1);
+    expect(result[0].parts[0].type).toBe("text");
+  });
+
+  it("handles Gemini-style thought parts with valid signatures", () => {
+    const validSignature = "b".repeat(55);
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          { thought: true, text: "no signature" },
+          { thought: true, text: "has signature", thoughtSignature: validSignature },
+        ],
+      },
+    ];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result[0].parts).toHaveLength(1);
+    expect(result[0].parts[0].thoughtSignature).toBe(validSignature);
+  });
+
+  it("filters Gemini-style thought parts with short signatures", () => {
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          { thought: true, text: "has short signature", thoughtSignature: "sig" },
+        ],
+      },
+    ];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result[0].parts).toHaveLength(0);
+  });
+
+  it("preserves non-thinking parts", () => {
+    const contents = [
+      {
+        role: "user",
+        parts: [{ text: "hello" }],
+      },
+    ];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result).toEqual(contents);
+  });
+
+  it("handles empty parts array", () => {
+    const contents = [{ role: "model", parts: [] }];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result[0].parts).toEqual([]);
+  });
+
+  it("handles missing parts", () => {
+    const contents = [{ role: "model" }];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result).toEqual(contents);
+  });
+});
+
+describe("transformThinkingParts", () => {
+  it("transforms Anthropic-style thinking blocks to reasoning", () => {
+    const response = {
+      content: [
+        { type: "thinking", thinking: "my thoughts" },
+        { type: "text", text: "visible" },
+      ],
+    };
+    const result = transformThinkingParts(response) as any;
+    expect(result.content[0].type).toBe("reasoning");
+    expect(result.content[0].thought).toBe(true);
+    expect(result.reasoning_content).toBe("my thoughts");
+  });
+
+  it("transforms Gemini-style candidates", () => {
+    const response = {
+      candidates: [
+        {
+          content: {
+            parts: [
+              { thought: true, text: "thinking here" },
+              { text: "output" },
+            ],
+          },
+        },
+      ],
+    };
+    const result = transformThinkingParts(response) as any;
+    expect(result.candidates[0].content.parts[0].type).toBe("reasoning");
+    expect(result.candidates[0].reasoning_content).toBe("thinking here");
+  });
+
+  it("handles non-object input", () => {
+    expect(transformThinkingParts(null)).toBeNull();
+    expect(transformThinkingParts(undefined)).toBeUndefined();
+    expect(transformThinkingParts("string")).toBe("string");
+  });
+
+  it("preserves other response properties", () => {
+    const response = {
+      content: [],
+      id: "resp-123",
+      model: "claude-4",
+    };
+    const result = transformThinkingParts(response) as any;
+    expect(result.id).toBe("resp-123");
+    expect(result.model).toBe("claude-4");
+  });
+});
+
+describe("normalizeThinkingConfig", () => {
+  it("returns undefined for non-object input", () => {
+    expect(normalizeThinkingConfig(null)).toBeUndefined();
+    expect(normalizeThinkingConfig(undefined)).toBeUndefined();
+    expect(normalizeThinkingConfig("string")).toBeUndefined();
+  });
+
+  it("normalizes valid config", () => {
+    const result = normalizeThinkingConfig({
+      thinkingBudget: 8000,
+      includeThoughts: true,
+    });
+    expect(result).toEqual({
+      thinkingBudget: 8000,
+      includeThoughts: true,
+    });
+  });
+
+  it("handles snake_case property names", () => {
+    const result = normalizeThinkingConfig({
+      thinking_budget: 4000,
+      include_thoughts: true,
+    });
+    expect(result).toEqual({
+      thinkingBudget: 4000,
+      includeThoughts: true,
+    });
+  });
+
+  it("disables includeThoughts when budget is 0", () => {
+    const result = normalizeThinkingConfig({
+      thinkingBudget: 0,
+      includeThoughts: true,
+    });
+    expect(result?.includeThoughts).toBe(false);
+  });
+
+  it("returns undefined when both values are absent/undefined", () => {
+    const result = normalizeThinkingConfig({});
+    expect(result).toBeUndefined();
+  });
+
+  it("handles non-finite budget values", () => {
+    const result = normalizeThinkingConfig({
+      thinkingBudget: Infinity,
+      includeThoughts: true,
+    });
+    // When budget is non-finite (undefined), includeThoughts is forced to false
+    expect(result).toEqual({ includeThoughts: false });
+  });
+});
+
+describe("parseAntigravityApiBody", () => {
+  it("parses valid JSON object", () => {
+    const result = parseAntigravityApiBody('{"response": {"text": "hello"}}');
+    expect(result).toEqual({ response: { text: "hello" } });
+  });
+
+  it("extracts first object from array", () => {
+    const result = parseAntigravityApiBody('[{"response": "first"}, {"response": "second"}]');
+    expect(result).toEqual({ response: "first" });
+  });
+
+  it("returns null for invalid JSON", () => {
+    expect(parseAntigravityApiBody("not json")).toBeNull();
+  });
+
+  it("returns null for empty array", () => {
+    expect(parseAntigravityApiBody("[]")).toBeNull();
+  });
+
+  it("returns null for primitive values", () => {
+    expect(parseAntigravityApiBody('"string"')).toBeNull();
+    expect(parseAntigravityApiBody("123")).toBeNull();
+  });
+
+  it("handles array with null values", () => {
+    const result = parseAntigravityApiBody('[null, {"valid": true}]');
+    expect(result).toEqual({ valid: true });
+  });
+});
+
+describe("extractUsageMetadata", () => {
+  it("extracts usage from response.usageMetadata", () => {
+    const body = {
+      response: {
+        usageMetadata: {
+          totalTokenCount: 1000,
+          promptTokenCount: 500,
+          candidatesTokenCount: 500,
+          cachedContentTokenCount: 100,
+        },
+      },
+    };
+    const result = extractUsageMetadata(body);
+    expect(result).toEqual({
+      totalTokenCount: 1000,
+      promptTokenCount: 500,
+      candidatesTokenCount: 500,
+      cachedContentTokenCount: 100,
+    });
+  });
+
+  it("returns null when no usageMetadata", () => {
+    expect(extractUsageMetadata({ response: {} })).toBeNull();
+    expect(extractUsageMetadata({})).toBeNull();
+  });
+
+  it("handles partial usage data", () => {
+    const body = {
+      response: {
+        usageMetadata: {
+          totalTokenCount: 1000,
+        },
+      },
+    };
+    const result = extractUsageMetadata(body);
+    expect(result).toEqual({
+      totalTokenCount: 1000,
+      promptTokenCount: undefined,
+      candidatesTokenCount: undefined,
+      cachedContentTokenCount: undefined,
+    });
+  });
+
+  it("filters non-finite numbers", () => {
+    const body = {
+      response: {
+        usageMetadata: {
+          totalTokenCount: Infinity,
+          promptTokenCount: NaN,
+          candidatesTokenCount: 100,
+        },
+      },
+    };
+    const result = extractUsageMetadata(body);
+    expect(result?.totalTokenCount).toBeUndefined();
+    expect(result?.promptTokenCount).toBeUndefined();
+    expect(result?.candidatesTokenCount).toBe(100);
+  });
+});
+
+describe("extractUsageFromSsePayload", () => {
+  it("extracts usage from SSE data line", () => {
+    const payload = `data: {"response": {"usageMetadata": {"totalTokenCount": 500}}}`;
+    const result = extractUsageFromSsePayload(payload);
+    expect(result?.totalTokenCount).toBe(500);
+  });
+
+  it("handles multiple SSE lines", () => {
+    const payload = `data: {"response": {}}
+data: {"response": {"usageMetadata": {"totalTokenCount": 1000}}}`;
+    const result = extractUsageFromSsePayload(payload);
+    expect(result?.totalTokenCount).toBe(1000);
+  });
+
+  it("returns null when no usage found", () => {
+    const payload = `data: {"response": {"text": "hello"}}`;
+    const result = extractUsageFromSsePayload(payload);
+    expect(result).toBeNull();
+  });
+
+  it("ignores non-data lines", () => {
+    const payload = `: keepalive
+event: message
+data: {"response": {"usageMetadata": {"totalTokenCount": 200}}}`;
+    const result = extractUsageFromSsePayload(payload);
+    expect(result?.totalTokenCount).toBe(200);
+  });
+
+  it("handles malformed JSON gracefully", () => {
+    const payload = `data: not json
+data: {"response": {"usageMetadata": {"totalTokenCount": 300}}}`;
+    const result = extractUsageFromSsePayload(payload);
+    expect(result?.totalTokenCount).toBe(300);
+  });
+});
+
+describe("rewriteAntigravityPreviewAccessError", () => {
+  it("returns null for non-404 status", () => {
+    const body = { error: { message: "Not found" } };
+    expect(rewriteAntigravityPreviewAccessError(body, 400)).toBeNull();
+    expect(rewriteAntigravityPreviewAccessError(body, 500)).toBeNull();
+  });
+
+  it("rewrites error for Antigravity model on 404", () => {
+    const body = { error: { message: "Model not found" } };
+    const result = rewriteAntigravityPreviewAccessError(body, 404, "claude-opus");
+    expect(result?.error?.message).toContain("Model not found");
+    expect(result?.error?.message).toContain("preview access");
+  });
+
+  it("rewrites error when error message contains antigravity", () => {
+    const body = { error: { message: "antigravity model unavailable" } };
+    const result = rewriteAntigravityPreviewAccessError(body, 404);
+    expect(result?.error?.message).toContain("preview access");
+  });
+
+  it("returns null for 404 with non-antigravity model", () => {
+    const body = { error: { message: "Model not found" } };
+    const result = rewriteAntigravityPreviewAccessError(body, 404, "gemini-pro");
+    expect(result).toBeNull();
+  });
+
+  it("provides default message when error message is empty", () => {
+    const body = { error: { message: "" } };
+    const result = rewriteAntigravityPreviewAccessError(body, 404, "opus-model");
+    expect(result?.error?.message).toContain("Antigravity preview features are not enabled");
+  });
+
+  it("detects Claude models in requested model name", () => {
+    const body = { error: {} };
+    const result = rewriteAntigravityPreviewAccessError(body, 404, "claude-3-sonnet");
+    expect(result?.error?.message).toContain("preview access");
+  });
+});
diff --git a/src/plugin/request-helpers.ts b/src/plugin/request-helpers.ts
index 90fca15..b416ddf 100644
--- a/src/plugin/request-helpers.ts
+++ b/src/plugin/request-helpers.ts
@@ -89,19 +89,17 @@ export function extractThinkingConfig(
 
 /**
  * Determines the final thinking configuration based on model capabilities and user settings.
- * Claude models require signed thinking blocks for multi-turn conversations.
- * Since previous thinking blocks may lack signatures, we disable thinking for Claude multi-turn.
+ * For Claude thinking models, we keep thinking enabled even in multi-turn conversations.
+ * The filterUnsignedThinkingBlocks function will handle signature validation/restoration.
  */
 export function resolveThinkingConfig(
   userConfig: ThinkingConfig | undefined,
   isThinkingModel: boolean,
-  isClaudeModel: boolean,
-  hasAssistantHistory: boolean,
+  _isClaudeModel: boolean,
+  _hasAssistantHistory: boolean,
 ): ThinkingConfig | undefined {
-  if (isClaudeModel && hasAssistantHistory) {
-    return { includeThoughts: false, thinkingBudget: 0 };
-  }
-
+  // For thinking-capable models (including Claude thinking models), enable thinking by default
+  // The signature validation/restoration is handled by filterUnsignedThinkingBlocks
   if (isThinkingModel && !userConfig) {
     return { includeThoughts: true, thinkingBudget: DEFAULT_THINKING_BUDGET };
   }
@@ -121,34 +119,187 @@ function isThinkingPart(part: Record<string, unknown>): boolean {
 
 /**
  * Checks if a thinking part has a valid signature.
+ * A valid signature is a non-empty string with at least 50 characters.
  */
 function hasValidSignature(part: Record<string, unknown>): boolean {
-  if (part.thought === true) {
-    return Boolean(part.thoughtSignature);
+  const signature = part.thought === true ? part.thoughtSignature : part.signature;
+  return typeof signature === "string" && signature.length >= 50;
+}
+
+/**
+ * Gets the text content from a thinking part.
+ */
+function getThinkingText(part: Record<string, unknown>): string {
+  if (typeof part.text === "string") return part.text;
+  if (typeof part.thinking === "string") return part.thinking;
+
+  // Some SDKs wrap thinking in an object like { text: "...", cache_control: {...} }
+  if (part.thinking && typeof part.thinking === "object") {
+    const maybeText = (part.thinking as any).text ?? (part.thinking as any).thinking;
+    if (typeof maybeText === "string") return maybeText;
   }
-  return Boolean(part.signature);
+
+  return "";
+}
+
+/**
+ * Recursively strips cache_control and providerOptions from any object.
+ * These fields can be injected by SDKs, but Claude rejects them inside thinking blocks.
+ */
+function stripCacheControlRecursively(obj: unknown): unknown {
+  if (obj === null || obj === undefined) return obj;
+  if (typeof obj !== "object") return obj;
+  if (Array.isArray(obj)) return obj.map(item => stripCacheControlRecursively(item));
+
+  const result: Record<string, unknown> = {};
+  for (const [key, value] of Object.entries(obj as Record<string, unknown>)) {
+    if (key === "cache_control" || key === "providerOptions") continue;
+    result[key] = stripCacheControlRecursively(value);
+  }
+  return result;
+}
+
+/**
+ * Sanitizes a thinking part by keeping only the allowed fields.
+ * In particular, ensures `thinking` is a string (not an object with cache_control).
+ */
+function sanitizeThinkingPart(part: Record<string, unknown>): Record<string, unknown> {
+  // Gemini-style thought blocks: { thought: true, text, thoughtSignature }
+  if (part.thought === true) {
+    const sanitized: Record<string, unknown> = { thought: true };
+
+    if (part.text !== undefined) {
+      // If text is wrapped, extract the inner string.
+      if (typeof part.text === "object" && part.text !== null) {
+        const maybeText = (part.text as any).text;
+        sanitized.text = typeof maybeText === "string" ? maybeText : part.text;
+      } else {
+        sanitized.text = part.text;
+      }
+    }
+
+    if (part.thoughtSignature !== undefined) sanitized.thoughtSignature = part.thoughtSignature;
+    return sanitized;
+  }
+
+  // Anthropic-style thinking blocks: { type: "thinking", thinking, signature }
+  if (part.type === "thinking" || part.thinking !== undefined) {
+    const sanitized: Record<string, unknown> = { type: "thinking" };
+
+    let thinkingContent: unknown = part.thinking ?? part.text;
+    if (thinkingContent !== undefined && typeof thinkingContent === "object" && thinkingContent !== null) {
+      const maybeText = (thinkingContent as any).text ?? (thinkingContent as any).thinking;
+      thinkingContent = typeof maybeText === "string" ? maybeText : "";
+    }
+
+    if (thinkingContent !== undefined) sanitized.thinking = thinkingContent;
+    if (part.signature !== undefined) sanitized.signature = part.signature;
+    return sanitized;
+  }
+
+  // Fallback: strip cache_control recursively.
+  return stripCacheControlRecursively(part) as Record<string, unknown>;
+}
+
+function filterContentArray(
+  contentArray: any[],
+  sessionId?: string,
+  getCachedSignatureFn?: (sessionId: string, text: string) => string | undefined,
+): any[] {
+  const filtered: any[] = [];
+
+  for (const item of contentArray) {
+    if (!item || typeof item !== "object") {
+      filtered.push(item);
+      continue;
+    }
+
+    if (!isThinkingPart(item)) {
+      filtered.push(item);
+      continue;
+    }
+
+    if (hasValidSignature(item)) {
+      filtered.push(sanitizeThinkingPart(item));
+      continue;
+    }
+
+    if (sessionId && getCachedSignatureFn) {
+      const text = getThinkingText(item);
+      if (text) {
+        const cachedSignature = getCachedSignatureFn(sessionId, text);
+        if (cachedSignature && cachedSignature.length >= 50) {
+          const restoredPart = { ...item };
+          if ((item as any).thought === true) {
+            (restoredPart as any).thoughtSignature = cachedSignature;
+          } else {
+            (restoredPart as any).signature = cachedSignature;
+          }
+          filtered.push(sanitizeThinkingPart(restoredPart as Record<string, unknown>));
+          continue;
+        }
+      }
+    }
+
+    // Drop unsigned/invalid thinking blocks.
+  }
+
+  return filtered;
 }
 
 /**
  * Filters out unsigned thinking blocks from contents (required by Claude API).
+ * Attempts to restore signatures from cache for thinking blocks that lack valid signatures.
+ * 
+ * @param contents - The contents array from the request
+ * @param sessionId - Optional session ID for signature cache lookup
+ * @param getCachedSignatureFn - Optional function to retrieve cached signatures
  */
-export function filterUnsignedThinkingBlocks(contents: any[]): any[] {
+export function filterUnsignedThinkingBlocks(
+  contents: any[],
+  sessionId?: string,
+  getCachedSignatureFn?: (sessionId: string, text: string) => string | undefined,
+): any[] {
   return contents.map((content: any) => {
-    if (!content || !Array.isArray(content.parts)) {
+    if (!content || typeof content !== "object") {
       return content;
     }
 
-    const filteredParts = content.parts.filter((part: any) => {
-      if (!part || typeof part !== "object") {
-        return true;
-      }
-      if (isThinkingPart(part)) {
-        return hasValidSignature(part);
-      }
-      return true;
-    });
+    // Gemini format: contents[].parts[]
+    if (Array.isArray((content as any).parts)) {
+      const filteredParts = filterContentArray((content as any).parts, sessionId, getCachedSignatureFn);
+      return { ...content, parts: filteredParts };
+    }
 
-    return { ...content, parts: filteredParts };
+    // Some Anthropic-style payloads may appear here as contents[].content[]
+    if (Array.isArray((content as any).content)) {
+      const filteredContent = filterContentArray((content as any).content, sessionId, getCachedSignatureFn);
+      return { ...content, content: filteredContent };
+    }
+
+    return content;
+  });
+}
+
+/**
+ * Filters thinking blocks from Anthropic-style messages[] payloads.
+ */
+export function filterMessagesThinkingBlocks(
+  messages: any[],
+  sessionId?: string,
+  getCachedSignatureFn?: (sessionId: string, text: string) => string | undefined,
+): any[] {
+  return messages.map((message: any) => {
+    if (!message || typeof message !== "object") {
+      return message;
+    }
+
+    if (Array.isArray((message as any).content)) {
+      const filteredContent = filterContentArray((message as any).content, sessionId, getCachedSignatureFn);
+      return { ...message, content: filteredContent };
+    }
+
+    return message;
   });
 }
 
diff --git a/src/plugin/request.ts b/src/plugin/request.ts
index 5f749bc..d783959 100644
--- a/src/plugin/request.ts
+++ b/src/plugin/request.ts
@@ -3,12 +3,14 @@ import {
   ANTIGRAVITY_HEADERS,
   ANTIGRAVITY_ENDPOINT,
 } from "../constants";
+import { cacheSignature, getCachedSignature } from "./cache";
 import { logAntigravityDebugResponse, type AntigravityDebugContext } from "./debug";
 import {
   extractThinkingConfig,
   extractUsageFromSsePayload,
   extractUsageMetadata,
   filterUnsignedThinkingBlocks,
+  filterMessagesThinkingBlocks,
   isThinkingCapableModel,
   normalizeThinkingConfig,
   parseAntigravityApiBody,
@@ -18,6 +20,20 @@ import {
   type AntigravityApiBody,
 } from "./request-helpers";
 
+/**
+ * Stable session ID for the plugin's lifetime.
+ * This is used for caching thinking signatures across multi-turn conversations.
+ * Generated once at plugin load time and reused for all requests.
+ */
+const PLUGIN_SESSION_ID = `-${Math.floor(Math.random() * 9_000_000_000_000_000_000)}`;
+
+/**
+ * Gets the stable session ID for this plugin instance.
+ */
+export function getPluginSessionId(): string {
+  return PLUGIN_SESSION_ID;
+}
+
 function generateSyntheticProjectId(): string {
   const adjectives = ["useful", "bright", "swift", "calm", "bold"];
   const nouns = ["fuze", "wave", "spark", "flow", "core"];
@@ -65,31 +81,39 @@ function transformStreamingPayload(payload: string): string {
 
 /**
  * Creates a TransformStream that processes SSE chunks incrementally,
- * transforming each line as it arrives for true streaming support.
+ * transforming each line as it arrives for true real-time streaming support.
+ * Optionally caches thinking signatures for Claude multi-turn conversations.
  */
-function createStreamingTransformer(): TransformStream<Uint8Array, Uint8Array> {
+function createStreamingTransformer(sessionId?: string): TransformStream<Uint8Array, Uint8Array> {
   const decoder = new TextDecoder();
   const encoder = new TextEncoder();
   let buffer = "";
+  // Buffer for accumulating thinking text per candidate index (for signature caching)
+  const thoughtBuffer = new Map<number, string>();
 
   return new TransformStream({
     transform(chunk, controller) {
+      // Decode chunk with stream: true to handle multi-byte characters correctly
       buffer += decoder.decode(chunk, { stream: true });
 
-      // Process complete lines
+      // Process complete lines immediately for real-time streaming
       const lines = buffer.split("\n");
       // Keep the last incomplete line in buffer
       buffer = lines.pop() || "";
 
       for (const line of lines) {
-        const transformedLine = transformSseLine(line);
+        // Transform and forward each line immediately
+        const transformedLine = transformSseLine(line, sessionId, thoughtBuffer);
         controller.enqueue(encoder.encode(transformedLine + "\n"));
       }
     },
     flush(controller) {
+      // Flush any remaining bytes from TextDecoder
+      buffer += decoder.decode();
+
       // Process any remaining data in buffer
       if (buffer) {
-        const transformedLine = transformSseLine(buffer);
+        const transformedLine = transformSseLine(buffer, sessionId, thoughtBuffer);
         controller.enqueue(encoder.encode(transformedLine));
       }
     },
@@ -98,8 +122,13 @@ function createStreamingTransformer(): TransformStream<Uint8Array, Uint8Array> {
 
 /**
  * Transforms a single SSE line, extracting and transforming the inner response.
+ * Optionally caches thinking signatures for Claude multi-turn support.
  */
-function transformSseLine(line: string): string {
+function transformSseLine(
+  line: string,
+  sessionId?: string,
+  thoughtBuffer?: Map<number, string>,
+): string {
   if (!line.startsWith("data:")) {
     return line;
   }
@@ -110,6 +139,10 @@ function transformSseLine(line: string): string {
   try {
     const parsed = JSON.parse(json) as { response?: unknown };
     if (parsed.response !== undefined) {
+      // Cache thinking signatures for Claude multi-turn support
+      if (sessionId && thoughtBuffer) {
+        cacheThinkingSignatures(parsed.response, sessionId, thoughtBuffer);
+      }
       const transformed = transformThinkingParts(parsed.response);
       return `data: ${JSON.stringify(transformed)}`;
     }
@@ -117,6 +150,58 @@ function transformSseLine(line: string): string {
   return line;
 }
 
+/**
+ * Extracts and caches thinking signatures from a response for Claude multi-turn support.
+ */
+function cacheThinkingSignatures(
+  response: unknown,
+  sessionId: string,
+  thoughtBuffer: Map<number, string>,
+): void {
+  if (!response || typeof response !== "object") return;
+
+  const resp = response as Record<string, unknown>;
+
+  // Handle Gemini-style candidates array (Claude through Antigravity uses this format)
+  if (Array.isArray(resp.candidates)) {
+    resp.candidates.forEach((candidate: any, index: number) => {
+      if (!candidate?.content?.parts) return;
+
+      candidate.content.parts.forEach((part: any) => {
+        // Collect thinking text
+        if (part.thought === true || part.type === "thinking") {
+          const text = part.text || part.thinking || "";
+          if (text) {
+            const current = thoughtBuffer.get(index) ?? "";
+            thoughtBuffer.set(index, current + text);
+          }
+        }
+
+        // Cache signature when we receive it
+        if (part.thoughtSignature) {
+          const fullText = thoughtBuffer.get(index) ?? "";
+          if (fullText && sessionId) {
+            cacheSignature(sessionId, fullText, part.thoughtSignature);
+          }
+        }
+      });
+    });
+  }
+
+  // Handle Anthropic-style content array
+  if (Array.isArray(resp.content)) {
+    let thinkingText = "";
+    resp.content.forEach((block: any) => {
+      if (block?.type === "thinking") {
+        thinkingText += block.thinking || block.text || "";
+      }
+      if (block?.signature && thinkingText && sessionId) {
+        cacheSignature(sessionId, thinkingText, block.signature);
+      }
+    });
+  }
+}
+
 /**
  * Rewrites OpenAI-style requests into Antigravity shape, normalizing model, headers,
  * optional cached_content, and thinking config. Also toggles streaming mode for SSE actions.
@@ -127,13 +212,14 @@ export function prepareAntigravityRequest(
   accessToken: string,
   projectId: string,
   endpointOverride?: string,
-): { request: RequestInfo; init: RequestInit; streaming: boolean; requestedModel?: string; effectiveModel?: string; projectId?: string; endpoint?: string; toolDebugMissing?: number; toolDebugSummary?: string; toolDebugPayload?: string } {
+): { request: RequestInfo; init: RequestInit; streaming: boolean; requestedModel?: string; effectiveModel?: string; projectId?: string; endpoint?: string; sessionId?: string; toolDebugMissing?: number; toolDebugSummary?: string; toolDebugPayload?: string } {
   const baseInit: RequestInit = { ...init };
   const headers = new Headers(init?.headers ?? {});
   let resolvedProjectId = projectId?.trim() || "";
   let toolDebugMissing = 0;
   const toolDebugSummaries: string[] = [];
   let toolDebugPayload: string | undefined;
+  let sessionId: string | undefined;
 
   if (!isGenerativeLanguageRequest(input)) {
     return {
@@ -163,6 +249,7 @@ export function prepareAntigravityRequest(
   const transformedUrl = `${baseEndpoint}/v1internal:${rawAction}${streaming ? "?alt=sse" : ""
     }`;
   const isClaudeModel = upstreamModel.toLowerCase().includes("claude");
+  const isClaudeThinkingModel = isClaudeModel && upstreamModel.toLowerCase().includes("thinking");
 
   let body = baseInit.body;
   if (typeof baseInit.body === "string" && baseInit.body) {
@@ -175,6 +262,47 @@ export function prepareAntigravityRequest(
           ...parsedBody,
           model: effectiveModel,
         } as Record<string, unknown>;
+
+        // Some callers may already send an Antigravity-wrapped body.
+        // We still need to sanitize Claude thinking blocks (remove cache_control)
+        // and attach a stable sessionId so multi-turn signature caching works.
+        const requestRoot = wrappedBody.request;
+        const requestObjects: Array<Record<string, unknown>> = [];
+
+        if (requestRoot && typeof requestRoot === "object") {
+          requestObjects.push(requestRoot as Record<string, unknown>);
+          const nested = (requestRoot as any).request;
+          if (nested && typeof nested === "object") {
+            requestObjects.push(nested as Record<string, unknown>);
+          }
+        }
+
+        if (requestObjects.length > 0) {
+          sessionId = PLUGIN_SESSION_ID;
+        }
+
+        for (const req of requestObjects) {
+          // Use stable session ID for signature caching across multi-turn conversations
+          (req as any).sessionId = PLUGIN_SESSION_ID;
+
+          if (isClaudeModel) {
+            if (Array.isArray((req as any).contents)) {
+              (req as any).contents = filterUnsignedThinkingBlocks(
+                (req as any).contents,
+                PLUGIN_SESSION_ID,
+                getCachedSignature,
+              );
+            }
+            if (Array.isArray((req as any).messages)) {
+              (req as any).messages = filterMessagesThinkingBlocks(
+                (req as any).messages,
+                PLUGIN_SESSION_ID,
+                getCachedSignature,
+              );
+            }
+          }
+        }
+
         body = JSON.stringify(wrappedBody);
       } else {
         const requestPayload: Record<string, unknown> = { ...parsedBody };
@@ -182,6 +310,21 @@ export function prepareAntigravityRequest(
         const rawGenerationConfig = requestPayload.generationConfig as Record<string, unknown> | undefined;
         const extraBody = requestPayload.extra_body as Record<string, unknown> | undefined;
 
+        if (isClaudeModel) {
+          if (!requestPayload.toolConfig) {
+            requestPayload.toolConfig = {};
+          }
+          if (typeof requestPayload.toolConfig === "object" && requestPayload.toolConfig !== null) {
+            const toolConfig = requestPayload.toolConfig as Record<string, unknown>;
+            if (!toolConfig.functionCallingConfig) {
+              toolConfig.functionCallingConfig = {};
+            }
+            if (typeof toolConfig.functionCallingConfig === "object" && toolConfig.functionCallingConfig !== null) {
+              (toolConfig.functionCallingConfig as Record<string, unknown>).mode = "VALIDATED";
+            }
+          }
+        }
+
         // Resolve thinking configuration based on user settings and model capabilities
         const userThinkingConfig = extractThinkingConfig(requestPayload, rawGenerationConfig, extraBody);
         const hasAssistantHistory = Array.isArray(requestPayload.contents) &&
@@ -196,11 +339,41 @@ export function prepareAntigravityRequest(
 
         const normalizedThinking = normalizeThinkingConfig(finalThinkingConfig);
         if (normalizedThinking) {
+          const thinkingBudget = normalizedThinking.thinkingBudget;
+          const thinkingConfig: Record<string, unknown> = isClaudeThinkingModel
+            ? {
+              include_thoughts: normalizedThinking.includeThoughts ?? true,
+              ...(typeof thinkingBudget === "number" && thinkingBudget > 0
+                ? { thinking_budget: thinkingBudget }
+                : {}),
+            }
+            : {
+              includeThoughts: normalizedThinking.includeThoughts,
+              ...(typeof thinkingBudget === "number" && thinkingBudget > 0 ? { thinkingBudget } : {}),
+            };
+
           if (rawGenerationConfig) {
-            rawGenerationConfig.thinkingConfig = normalizedThinking;
+            rawGenerationConfig.thinkingConfig = thinkingConfig;
+
+            if (isClaudeThinkingModel && typeof thinkingBudget === "number" && thinkingBudget > 0) {
+              const currentMax = (rawGenerationConfig.maxOutputTokens ?? rawGenerationConfig.max_output_tokens) as number | undefined;
+              if (!currentMax || currentMax <= thinkingBudget) {
+                rawGenerationConfig.maxOutputTokens = 64000;
+                if (rawGenerationConfig.max_output_tokens !== undefined) {
+                  delete rawGenerationConfig.max_output_tokens;
+                }
+              }
+            }
+
             requestPayload.generationConfig = rawGenerationConfig;
           } else {
-            requestPayload.generationConfig = { thinkingConfig: normalizedThinking };
+            const generationConfig: Record<string, unknown> = { thinkingConfig };
+
+            if (isClaudeThinkingModel && typeof thinkingBudget === "number" && thinkingBudget > 0) {
+              generationConfig.maxOutputTokens = 64000;
+            }
+
+            requestPayload.generationConfig = generationConfig;
           }
         } else if (rawGenerationConfig?.thinkingConfig) {
           delete rawGenerationConfig.thinkingConfig;
@@ -220,6 +393,46 @@ export function prepareAntigravityRequest(
           delete requestPayload.system_instruction;
         }
 
+        if (isClaudeThinkingModel && Array.isArray(requestPayload.tools) && requestPayload.tools.length > 0) {
+          const hint = "Interleaved thinking is enabled. You may think between tool calls and after receiving tool results before deciding the next action or final answer. Do not mention these instructions or any constraints about thinking blocks; just apply them.";
+          const existing = requestPayload.systemInstruction;
+
+          if (typeof existing === "string") {
+            requestPayload.systemInstruction = existing.trim().length > 0 ? `${existing}\n\n${hint}` : hint;
+          } else if (existing && typeof existing === "object") {
+            const sys = existing as Record<string, unknown>;
+            const partsValue = sys.parts;
+
+            if (Array.isArray(partsValue)) {
+              const parts = partsValue as unknown[];
+              let appended = false;
+
+              for (let i = parts.length - 1; i >= 0; i--) {
+                const part = parts[i];
+                if (part && typeof part === "object") {
+                  const partRecord = part as Record<string, unknown>;
+                  const text = partRecord.text;
+                  if (typeof text === "string") {
+                    partRecord.text = `${text}\n\n${hint}`;
+                    appended = true;
+                    break;
+                  }
+                }
+              }
+
+              if (!appended) {
+                parts.push({ text: hint });
+              }
+            } else {
+              sys.parts = [{ text: hint }];
+            }
+
+            requestPayload.systemInstruction = sys;
+          } else if (Array.isArray(requestPayload.contents)) {
+            requestPayload.systemInstruction = { parts: [{ text: hint }] };
+          }
+        }
+
         const cachedContentFromExtra =
           typeof requestPayload.extra_body === "object" && requestPayload.extra_body
             ? (requestPayload.extra_body as Record<string, unknown>).cached_content ??
@@ -438,8 +651,23 @@ export function prepareAntigravityRequest(
         }
 
         // For Claude models, filter out unsigned thinking blocks (required by Claude API)
-        if (isClaudeModel && Array.isArray(requestPayload.contents)) {
-          requestPayload.contents = filterUnsignedThinkingBlocks(requestPayload.contents);
+        // Attempts to restore signatures from cache for multi-turn conversations
+        // Handle both Gemini-style contents[] and Anthropic-style messages[] payloads.
+        if (isClaudeModel) {
+          if (Array.isArray(requestPayload.contents)) {
+            requestPayload.contents = filterUnsignedThinkingBlocks(
+              requestPayload.contents,
+              PLUGIN_SESSION_ID,
+              getCachedSignature,
+            );
+          }
+          if (Array.isArray(requestPayload.messages)) {
+            requestPayload.messages = filterMessagesThinkingBlocks(
+              requestPayload.messages,
+              PLUGIN_SESSION_ID,
+              getCachedSignature,
+            );
+          }
         }
 
         // For Claude models, ensure functionCall/tool use parts carry IDs (required by Anthropic).
@@ -520,7 +748,9 @@ export function prepareAntigravityRequest(
           requestId: "agent-" + crypto.randomUUID(),
         });
         if (wrappedBody.request && typeof wrappedBody.request === 'object') {
-          (wrappedBody.request as any).sessionId = "-" + Math.floor(Math.random() * 9000000000000000000).toString();
+          // Use stable session ID for signature caching across multi-turn conversations
+          sessionId = PLUGIN_SESSION_ID;
+          (wrappedBody.request as any).sessionId = sessionId;
         }
 
         body = JSON.stringify(wrappedBody);
@@ -534,6 +764,21 @@ export function prepareAntigravityRequest(
     headers.set("Accept", "text/event-stream");
   }
 
+  // Add interleaved thinking header for Claude thinking models
+  // This enables real-time streaming of thinking tokens
+  if (isClaudeThinkingModel) {
+    const existing = headers.get("anthropic-beta");
+    const interleavedHeader = "interleaved-thinking-2025-05-14";
+
+    if (existing) {
+      if (!existing.includes(interleavedHeader)) {
+        headers.set("anthropic-beta", `${existing},${interleavedHeader}`);
+      }
+    } else {
+      headers.set("anthropic-beta", interleavedHeader);
+    }
+  }
+
   headers.set("User-Agent", ANTIGRAVITY_HEADERS["User-Agent"]);
   headers.set("X-Goog-Api-Client", ANTIGRAVITY_HEADERS["X-Goog-Api-Client"]);
   headers.set("Client-Metadata", ANTIGRAVITY_HEADERS["Client-Metadata"]);
@@ -554,6 +799,7 @@ export function prepareAntigravityRequest(
     effectiveModel: upstreamModel,
     projectId: resolvedProjectId,
     endpoint: transformedUrl,
+    sessionId,
     toolDebugMissing,
     toolDebugSummary: toolDebugSummaries.slice(0, 20).join(" | "),
     toolDebugPayload,
@@ -564,7 +810,8 @@ export function prepareAntigravityRequest(
  * Normalizes Antigravity responses: applies retry headers, extracts cache usage into headers,
  * rewrites preview errors, flattens streaming payloads, and logs debug metadata.
  *
- * For streaming SSE responses, uses TransformStream for true incremental streaming.
+ * For streaming SSE responses, uses TransformStream for true real-time incremental streaming.
+ * Thinking/reasoning tokens are transformed and forwarded immediately as they arrive.
  */
 export async function transformAntigravityResponse(
   response: Response,
@@ -574,6 +821,7 @@ export async function transformAntigravityResponse(
   projectId?: string,
   endpoint?: string,
   effectiveModel?: string,
+  sessionId?: string,
   toolDebugMissing?: number,
   toolDebugSummary?: string,
   toolDebugPayload?: string,
@@ -590,51 +838,18 @@ export async function transformAntigravityResponse(
   }
 
   // For successful streaming responses, use TransformStream to transform SSE events
-  // while maintaining real-time streaming (no buffering of entire response)
+  // while maintaining real-time streaming (no buffering of entire response).
+  // This enables thinking tokens to be displayed as they arrive, like the Codex plugin.
   if (streaming && response.ok && isEventStreamResponse && response.body) {
     const headers = new Headers(response.headers);
 
-    // Buffer for partial SSE events that span chunks
-    let buffer = "";
-    const decoder = new TextDecoder();
-    const encoder = new TextEncoder();
-
-    const transformStream = new TransformStream<Uint8Array, Uint8Array>({
-      transform(chunk, controller) {
-        // Decode chunk with stream: true to handle multi-byte characters
-        buffer += decoder.decode(chunk, { stream: true });
-
-        // Split on double newline (SSE event delimiter)
-        const events = buffer.split("\n\n");
-
-        // Keep last part in buffer (may be incomplete)
-        buffer = events.pop() || "";
-
-        // Process and forward complete events immediately
-        for (const event of events) {
-          if (event.trim()) {
-            const transformed = transformStreamingPayload(event);
-            controller.enqueue(encoder.encode(transformed + "\n\n"));
-          }
-        }
-      },
-      flush(controller) {
-        // Flush any remaining bytes from TextDecoder
-        buffer += decoder.decode();
-
-        // Handle any remaining data at stream end
-        if (buffer.trim()) {
-          const transformed = transformStreamingPayload(buffer);
-          controller.enqueue(encoder.encode(transformed));
-        }
-      }
-    });
-
     logAntigravityDebugResponse(debugContext, response, {
-      note: "Streaming SSE response (transformed)",
+      note: "Streaming SSE response (real-time transform)",
     });
 
-    return new Response(response.body.pipeThrough(transformStream), {
+    // Use the optimized line-by-line transformer for immediate forwarding
+    // This ensures thinking/reasoning content streams in real-time
+    return new Response(response.body.pipeThrough(createStreamingTransformer(sessionId)), {
       status: response.status,
       statusText: response.statusText,
       headers,
diff --git a/tsconfig.build.json b/tsconfig.build.json
index e96ed9e..9ddd578 100644
--- a/tsconfig.build.json
+++ b/tsconfig.build.json
@@ -8,5 +8,6 @@
     "sourceMap": true,
     "allowImportingTsExtensions": false
   },
-  "include": ["src/**/*.ts", "src/**/*.tsx", "index.ts"]
+  "include": ["src/**/*.ts", "src/**/*.tsx", "index.ts"],
+  "exclude": ["src/**/*.test.ts", "src/**/*.test.tsx", "src/**/*.spec.ts", "src/**/*.spec.tsx"]
 }