Merge pull request #28 from NoeFabris/claude-improvements

2026-05-20 20:36:21 +00:00 · 2025-12-18 12:15:27 +00:00
parent 75a1ea8059 dd17d3516c
commit 5e735b6049
13 changed files with 2914 additions and 90 deletions
--- a/README.md
+++ b/README.md
@@ -8,6 +8,8 @@ Enable Opencode to authenticate against **Antigravity** (Google's IDE) via OAuth

 - **Google OAuth sign-in** (multi-account via `opencode auth login`) with automatic token refresh
 - **Multi-account load balancing** Automatically cycle through multiple Google accounts to maximize rate limits
+- **Real-time SSE streaming** including thinking blocks and incremental output
+- **Advanced Claude support** Interleaved thinking, stable multi-turn signatures, and validated tool calling
 - **Automatic endpoint fallback** between Antigravity API endpoints (daily → autopush → prod)
 - **Antigravity API compatibility** for OpenAI-style requests
 - **Debug logging** for requests and responses
@@ -204,6 +206,29 @@ The `/connect` command in the TUI adds accounts non-destructively — it will ne
 - If Google revokes a refresh token (`invalid_grant`), that account is automatically removed from the pool
 - Rerun `opencode auth login` to re-add the account

+## Architecture & Flow
+
+For contributors and advanced users, see the detailed documentation:
+
+- **[Claude Model Flow](docs/CLAUDE_MODEL_FLOW.md)** - Full request/response flow, improvements, and fixes
+- **[Antigravity API Spec](docs/ANTIGRAVITY_API_SPEC.md)** - API reference and schema support matrix
+
+## Streaming & thinking
+
+This plugin supports **real-time SSE streaming**, meaning you see thinking blocks and text output incrementally as they are generated.
+
+### Claude Thinking & Tools
+
+For models like `claude-opus-4-5-thinking`:
+
+- **Interleaved Thinking:** The plugin automatically enables `anthropic-beta: interleaved-thinking-2025-05-14`. This allows Claude to think *between* tool calls and after tool results, improving complex reasoning.
+- **Smart System Hints:** A system instruction is silently added to encourage the model to "think" before and during tool use.
+- **Multi-turn Stability:** Thinking signatures are cached and restored using a stable `sessionId`, preventing "invalid signature" errors in long conversations.
+- **Thinking Budget Safety:** If a thinking budget is enabled, the plugin ensures output token limits are high enough to avoid budget-related errors.
+- **Tool Use:** Tool calls and responses are assigned proper IDs, and tool calling is set to validated mode for better Claude compatibility.
+
+**Troubleshooting:** If you see signature errors in multi-turn tool loops, restart `opencode` to reset the plugin session/signature cache.
+
 ## Debugging

 Enable verbose logging:
--- a/docs/ANTIGRAVITY_API_SPEC.md
+++ b/docs/ANTIGRAVITY_API_SPEC.md
@@ -0,0 +1,558 @@
+# Antigravity Unified Gateway API Specification
+
+**Version:** 1.0
+**Last Updated:** December 13, 2025
+**Status:** Verified by Direct API Testing
+
+---
+
+## Overview
+
+Antigravity is Google's **Unified Gateway API** for accessing multiple AI models (Claude, Gemini, GPT-OSS) through a single, consistent Gemini-style interface. It is NOT the same as Vertex AI's direct model APIs.
+
+### Key Characteristics
+
+- **Single API format** for all models (Gemini-style)
+- **Project-based access** via Google Cloud authentication
+- **Internal routing** to model backends (Vertex AI for Claude, Gemini API for Gemini)
+- **Unified response format** (`candidates[]` structure for all models)
+
+---
+
+## Endpoints
+
+| Environment | URL | Status |
+|-------------|-----|--------|
+| **Daily (Sandbox)** | `https://daily-cloudcode-pa.sandbox.googleapis.com` | ✅ Active |
+| **Production** | `https://cloudcode-pa.googleapis.com` | ✅ Active |
+| **Autopush (Sandbox)** | `https://autopush-cloudcode-pa.sandbox.googleapis.com` | ❌ Unavailable |
+
+### API Actions
+
+| Action | Path | Description |
+|--------|------|-------------|
+| Generate Content | `/v1internal:generateContent` | Non-streaming request |
+| Stream Generate | `/v1internal:streamGenerateContent?alt=sse` | Streaming (SSE) request |
+| Load Code Assist | `/v1internal:loadCodeAssist` | Project discovery |
+| Onboard User | `/v1internal:onboardUser` | User onboarding |
+
+---
+
+## Authentication
+
+### OAuth 2.0 Setup
+
+```
+Authorization URL: https://accounts.google.com/o/oauth2/auth
+Token URL: https://oauth2.googleapis.com/token
+```
+
+### Required Scopes
+
+```
+https://www.googleapis.com/auth/cloud-platform
+https://www.googleapis.com/auth/userinfo.email
+https://www.googleapis.com/auth/userinfo.profile
+https://www.googleapis.com/auth/cclog
+https://www.googleapis.com/auth/experimentsandconfigs
+```
+
+### Required Headers
+
+```http
+Authorization: Bearer {access_token}
+Content-Type: application/json
+User-Agent: antigravity/1.11.5 windows/amd64
+X-Goog-Api-Client: google-cloud-sdk vscode_cloudshelleditor/0.1
+Client-Metadata: {"ideType":"IDE_UNSPECIFIED","platform":"PLATFORM_UNSPECIFIED","pluginType":"GEMINI"}
+```
+
+For streaming requests, also include:
+```http
+Accept: text/event-stream
+```
+
+---
+
+## Available Models
+
+| Model Name | Model ID | Type | Status |
+|------------|----------|------|--------|
+| Claude Sonnet 4.5 | `claude-sonnet-4-5` | Anthropic | ✅ Verified |
+| Claude Sonnet 4.5 Thinking | `claude-sonnet-4-5-thinking` | Anthropic | ✅ Verified |
+| Claude Opus 4.5 Thinking | `claude-opus-4-5-thinking` | Anthropic | ✅ Verified |
+| Gemini 3 Pro High | `gemini-3-pro-high` | Google | ✅ Verified |
+| Gemini 3 Pro Low | `gemini-3-pro-low` | Google | ✅ Verified |
+| GPT-OSS 120B Medium | `gpt-oss-120b-medium` | Other | ✅ Verified |
+
+---
+
+## Request Format
+
+### Basic Structure
+
+```json
+{
+  "project": "{project_id}",
+  "model": "{model_id}",
+  "request": {
+    "contents": [...],
+    "generationConfig": {...},
+    "systemInstruction": {...},
+    "tools": [...]
+  },
+  "userAgent": "antigravity",
+  "requestId": "{unique_id}"
+}
+```
+
+### Contents Array (REQUIRED)
+
+**⚠️ IMPORTANT: Must use Gemini-style format. Anthropic-style `messages` array is NOT supported.**
+
+```json
+{
+  "contents": [
+    {
+      "role": "user",
+      "parts": [
+        { "text": "Your message here" }
+      ]
+    },
+    {
+      "role": "model",
+      "parts": [
+        { "text": "Assistant response" }
+      ]
+    }
+  ]
+}
+```
+
+#### Role Values
+- `user` - Human/user messages
+- `model` - Assistant responses (NOT `assistant`)
+
+### Generation Config
+
+```json
+{
+  "generationConfig": {
+    "maxOutputTokens": 1000,
+    "temperature": 0.7,
+    "topP": 0.95,
+    "topK": 40,
+    "stopSequences": ["STOP"],
+    "thinkingConfig": {
+      "thinkingBudget": 8000,
+      "includeThoughts": true
+    }
+  }
+}
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `maxOutputTokens` | number | Maximum tokens in response |
+| `temperature` | number | Randomness (0.0 - 2.0) |
+| `topP` | number | Nucleus sampling threshold |
+| `topK` | number | Top-K sampling |
+| `stopSequences` | string[] | Stop generation triggers |
+| `thinkingConfig` | object | Extended thinking config |
+
+### System Instructions
+
+**⚠️ Must be an object with `parts`, NOT a plain string.**
+
+```json
+// ✅ CORRECT
+{
+  "systemInstruction": {
+    "parts": [
+      { "text": "You are a helpful assistant." }
+    ]
+  }
+}
+
+// ❌ WRONG - Will return 400 error
+{
+  "systemInstruction": "You are a helpful assistant."
+}
+```
+
+### Tools / Function Calling
+
+```json
+{
+  "tools": [
+    {
+      "functionDeclarations": [
+        {
+          "name": "get_weather",
+          "description": "Get weather for a location",
+          "parameters": {
+            "type": "object",
+            "properties": {
+              "location": {
+                "type": "string",
+                "description": "City name"
+              }
+            },
+            "required": ["location"]
+          }
+        }
+      ]
+    }
+  ]
+}
+```
+
+### Function Name Rules
+
+| Rule | Description |
+|------|-------------|
+| First character | Must be a letter (a-z, A-Z) or underscore (_) |
+| Allowed characters | `a-zA-Z0-9`, underscores (`_`), dots (`.`), colons (`:`), dashes (`-`) |
+| Max length | 64 characters |
+| Not allowed | Slashes (`/`), spaces, other special characters |
+
+**Examples:**
+- ✅ `get_weather` - Valid
+- ✅ `mcp:mongodb.query` - Valid (colons and dots allowed)
+- ✅ `read-file` - Valid (dashes allowed)
+- ❌ `mcp/query` - Invalid (slashes not allowed)
+- ❌ `123_tool` - Invalid (must start with letter or underscore)
+
+### JSON Schema Support
+
+| Feature | Status | Notes |
+|---------|--------|-------|
+| `type` | ✅ Supported | `object`, `string`, `number`, `integer`, `boolean`, `array` |
+| `properties` | ✅ Supported | Object properties |
+| `required` | ✅ Supported | Required fields array |
+| `description` | ✅ Supported | Field descriptions |
+| `enum` | ✅ Supported | Enumerated values |
+| `items` | ✅ Supported | Array item schema |
+| `anyOf` | ✅ Supported | Converted to `any_of` internally |
+| `allOf` | ✅ Supported | Converted to `all_of` internally |
+| `oneOf` | ✅ Supported | Converted to `one_of` internally |
+| `additionalProperties` | ✅ Supported | Additional properties schema |
+| `const` | ❌ NOT Supported | Use `enum: [value]` instead |
+| `$ref` | ❌ NOT Supported | Inline the schema instead |
+| `$defs` / `definitions` | ❌ NOT Supported | Inline definitions instead |
+| `$schema` | ❌ NOT Supported | Strip from schema |
+| `$id` | ❌ NOT Supported | Strip from schema |
+| `default` | ❌ NOT Supported | Strip from schema |
+| `examples` | ❌ NOT Supported | Strip from schema |
+| `title` (nested) | ⚠️ Caution | May cause issues in nested objects |
+
+**⚠️ IMPORTANT:** The following features will cause a 400 error if sent to the API:
+- `const` - Convert to `enum: [value]` instead
+- `$ref` / `$defs` - Inline the schema definitions
+- `$schema` / `$id` - Strip these metadata fields
+- `default` / `examples` - Strip these documentation fields
+
+```json
+// ❌ WRONG - Will return 400 error
+{ "type": { "const": "email" } }
+
+// ✅ CORRECT - Use enum instead
+{ "type": { "enum": ["email"] } }
+```
+
+**Note:** The plugin automatically handles these conversions via the `schema-transform.ts` module.
+
+---
+
+## Response Format
+
+### Non-Streaming Response
+
+```json
+{
+  "response": {
+    "candidates": [
+      {
+        "content": {
+          "role": "model",
+          "parts": [
+            { "text": "Response text here" }
+          ]
+        },
+        "finishReason": "STOP"
+      }
+    ],
+    "usageMetadata": {
+      "promptTokenCount": 16,
+      "candidatesTokenCount": 4,
+      "totalTokenCount": 20
+    },
+    "modelVersion": "claude-sonnet-4-5",
+    "responseId": "msg_vrtx_..."
+  },
+  "traceId": "abc123..."
+}
+```
+
+### Streaming Response (SSE)
+
+Content-Type: `text/event-stream`
+
+```
+data: {"response": {"candidates": [{"content": {"role": "model", "parts": [{"text": "Hello"}]}}], "usageMetadata": {...}, "modelVersion": "...", "responseId": "..."}, "traceId": "..."}
+
+data: {"response": {"candidates": [{"content": {"role": "model", "parts": [{"text": " world"}]}, "finishReason": "STOP"}], "usageMetadata": {...}}, "traceId": "..."}
+
+```
+
+### Response Fields
+
+| Field | Description |
+|-------|-------------|
+| `response.candidates` | Array of response candidates |
+| `response.candidates[].content.role` | Always `"model"` |
+| `response.candidates[].content.parts` | Array of content parts |
+| `response.candidates[].finishReason` | `STOP`, `MAX_TOKENS`, `OTHER` |
+| `response.usageMetadata.promptTokenCount` | Input tokens |
+| `response.usageMetadata.candidatesTokenCount` | Output tokens |
+| `response.usageMetadata.totalTokenCount` | Total tokens |
+| `response.usageMetadata.thoughtsTokenCount` | Thinking tokens (Gemini) |
+| `response.modelVersion` | Actual model used |
+| `response.responseId` | Request ID (format varies by model) |
+| `traceId` | Trace ID for debugging |
+
+### Response ID Formats
+
+| Model Type | Format | Example |
+|------------|--------|---------|
+| Claude | `msg_vrtx_...` | `msg_vrtx_01UDKZG8PWPj9mjajje8d7u7` |
+| Gemini | Base64-like | `ypM9abPqFKWl0-kPvamgqQw` |
+| GPT-OSS | Base64-like | `y5M9aZaSKq6z2roPoJ7pEA` |
+
+---
+
+## Function Call Response
+
+When the model wants to call a function:
+
+```json
+{
+  "response": {
+    "candidates": [
+      {
+        "content": {
+          "role": "model",
+          "parts": [
+            {
+              "functionCall": {
+                "name": "get_weather",
+                "args": {
+                  "location": "Paris"
+                },
+                "id": "toolu_vrtx_01PDbPTJgBJ3AJ8BCnSXvUqk"
+              }
+            }
+          ]
+        },
+        "finishReason": "OTHER"
+      }
+    ]
+  }
+}
+```
+
+### Providing Function Results
+
+```json
+{
+  "contents": [
+    { "role": "user", "parts": [{ "text": "What's the weather?" }] },
+    { "role": "model", "parts": [{ "functionCall": { "name": "get_weather", "args": {...}, "id": "..." } }] },
+    { "role": "user", "parts": [{ "functionResponse": { "name": "get_weather", "id": "...", "response": { "temperature": "22C" } } }] }
+  ]
+}
+```
+
+---
+
+## Thinking / Extended Reasoning
+
+### Thinking Config
+
+For thinking-capable models (`*-thinking`), use:
+
+```json
+{
+  "generationConfig": {
+    "maxOutputTokens": 10000,
+    "thinkingConfig": {
+      "thinkingBudget": 8000,
+      "includeThoughts": true
+    }
+  }
+}
+```
+
+**⚠️ IMPORTANT: `maxOutputTokens` must be GREATER than `thinkingBudget`**
+
+### Thinking Response (Gemini)
+
+Gemini models return thinking with signatures:
+
+```json
+{
+  "parts": [
+    {
+      "thoughtSignature": "ErADCq0DAXLI2nx...",
+      "text": "Let me think about this..."
+    },
+    {
+      "text": "The answer is..."
+    }
+  ]
+}
+```
+
+### Thinking Response (Claude)
+
+Claude thinking models may include `thought: true` parts:
+
+```json
+{
+  "parts": [
+    {
+      "thought": true,
+      "text": "Reasoning process...",
+      "thoughtSignature": "..."
+    },
+    {
+      "text": "Final answer..."
+    }
+  ]
+}
+```
+
+---
+
+## Error Responses
+
+### Error Structure
+
+```json
+{
+  "error": {
+    "code": 400,
+    "message": "Error description",
+    "status": "INVALID_ARGUMENT",
+    "details": [...]
+  }
+}
+```
+
+### Common Error Codes
+
+| Code | Status | Description |
+|------|--------|-------------|
+| 400 | `INVALID_ARGUMENT` | Invalid request format |
+| 401 | `UNAUTHENTICATED` | Invalid/expired token |
+| 403 | `PERMISSION_DENIED` | No access to resource |
+| 404 | `NOT_FOUND` | Model not found |
+| 429 | `RESOURCE_EXHAUSTED` | Rate limit exceeded |
+
+### Rate Limit Response
+
+```json
+{
+  "error": {
+    "code": 429,
+    "message": "You have exhausted your capacity on this model. Your quota will reset after 3s.",
+    "status": "RESOURCE_EXHAUSTED",
+    "details": [
+      {
+        "@type": "type.googleapis.com/google.rpc.RetryInfo",
+        "retryDelay": "3.957525076s"
+      }
+    ]
+  }
+}
+```
+
+---
+
+## NOT Supported
+
+The following Anthropic/Vertex AI features are **NOT supported**:
+
+| Feature | Error |
+|---------|-------|
+| `anthropic_version` | Unknown field |
+| `messages` array | Unknown field |
+| `max_tokens` | Unknown field |
+| Plain string `systemInstruction` | Invalid value |
+| `system_instruction` (snake_case at root) | Unknown field |
+| JSON Schema `const` | Unknown field (use `enum: [value]`) |
+| JSON Schema `$ref` | Not supported (inline instead) |
+| JSON Schema `$defs` | Not supported (inline instead) |
+| Tool names with `/` | Invalid (use `_` or `:` instead) |
+| Tool names starting with digit | Invalid (must start with letter/underscore) |
+
+---
+
+## Complete Request Example
+
+```json
+{
+  "project": "my-project-id",
+  "model": "claude-sonnet-4-5",
+  "request": {
+    "contents": [
+      {
+        "role": "user",
+        "parts": [
+          { "text": "Hello, how are you?" }
+        ]
+      }
+    ],
+    "systemInstruction": {
+      "parts": [
+        { "text": "You are a helpful assistant." }
+      ]
+    },
+    "generationConfig": {
+      "maxOutputTokens": 1000,
+      "temperature": 0.7
+    }
+  },
+  "userAgent": "antigravity",
+  "requestId": "agent-abc123"
+}
+```
+
+---
+
+## Response Headers
+
+| Header | Description |
+|--------|-------------|
+| `x-cloudaicompanion-trace-id` | Trace ID for debugging |
+| `server-timing` | Request duration |
+
+---
+
+## Comparison: Antigravity vs Vertex AI Anthropic
+
+| Feature | Antigravity | Vertex AI Anthropic |
+|---------|-------------|---------------------|
+| Endpoint | `cloudcode-pa.googleapis.com` | `aiplatform.googleapis.com` |
+| Request format | Gemini-style `contents` | Anthropic `messages` |
+| `anthropic_version` | Not used | Required |
+| Model names | Simple (`claude-sonnet-4-5`) | Versioned (`claude-4-5@date`) |
+| Response format | `candidates[]` | Anthropic `content[]` |
+| Multi-model support | Yes (Claude, Gemini, etc.) | Anthropic only |
+
+---
+
+## Changelog
+
+- **2025-12-14**: Added function calling quirks, JSON Schema support matrix, tool name rules
+- **2025-12-13**: Initial specification based on direct API testing
--- a/docs/CLAUDE_MODEL_FLOW.md
+++ b/docs/CLAUDE_MODEL_FLOW.md
@@ -0,0 +1,396 @@
+# Claude Model Flow: OpenCode → Plugin → Antigravity API
+
+**Version:** 1.1  
+**Last Updated:** December 2025  
+**Branches:** `claude-improvements`, `improve-tools-call-sanitizer`
+
+---
+
+## Overview
+
+This document explains how Claude models are handled through the Antigravity plugin, including the full request/response flow, all quirks and adaptations, and recent improvements.
+
+### Why Special Handling?
+
+Claude models via Antigravity require special handling because:
+
+1. **Gemini-style format** - Antigravity uses `contents[]` with `parts[]`, not Anthropic's `messages[]`
+2. **Thinking signatures** - Multi-turn conversations require signed thinking blocks
+3. **Tool schema restrictions** - Claude rejects unsupported JSON Schema features (`const`, `$ref`, etc.)
+4. **SDK injection** - OpenCode SDKs may inject fields (`cache_control`) that Claude rejects
+5. **OpenCode expectations** - Response format must be transformed to match OpenCode's expected structure
+
+---
+
+## Full Request Flow
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  OpenCode Request (OpenAI-style)                            │
+│  POST to generativelanguage.googleapis.com/models/claude-*  │
+└─────────────────────────────────────────────────────────────┘
+                              ↓
+┌─────────────────────────────────────────────────────────────┐
+│  plugin.ts (fetch interceptor)                              │
+│  • Account selection & round-robin rotation                 │
+│  • Token refresh if expired                                 │
+│  • Rate limit handling (429 → switch account or wait)       │
+│  • Endpoint fallback (daily → autopush → prod)              │
+└─────────────────────────────────────────────────────────────┘
+                              ↓
+┌─────────────────────────────────────────────────────────────┐
+│  request.ts :: prepareAntigravityRequest()                  │
+│  • Detect Claude model from URL                             │
+│  • Set toolConfig.functionCallingConfig.mode = "VALIDATED"  │
+│  • Configure thinkingConfig for *-thinking models           │
+│  • Sanitize tool schemas (allowlist approach)               │
+│  • Add placeholder property for empty tool schemas          │
+│  • Filter unsigned thinking blocks from history             │
+│  • Restore signatures from cache if available               │
+│  • Assign tool call/response IDs (FIFO matching)            │
+│  • Inject interleaved-thinking system hint                  │
+│  • Add anthropic-beta: interleaved-thinking-2025-05-14      │
+│  • Wrap in Antigravity format: {project, model, request}    │
+└─────────────────────────────────────────────────────────────┘
+                              ↓
+┌─────────────────────────────────────────────────────────────┐
+│  Antigravity API                                            │
+│  POST https://cloudcode-pa.googleapis.com/v1internal:*      │
+│  • Gemini-style request format                              │
+│  • Returns SSE stream with candidates[] structure           │
+└─────────────────────────────────────────────────────────────┘
+                              ↓
+┌─────────────────────────────────────────────────────────────┐
+│  request.ts :: transformAntigravityResponse()               │
+│  • Real-time SSE TransformStream (line-by-line)             │
+│  • Cache thinking signatures for multi-turn reuse           │
+│  • Transform thought parts → reasoning format               │
+│  • Unwrap response envelope for OpenCode                    │
+│  • Extract and forward usage metadata                       │
+└─────────────────────────────────────────────────────────────┘
+                              ↓
+┌─────────────────────────────────────────────────────────────┐
+│  OpenCode Response (streamed incrementally)                 │
+│  Thinking tokens visible as they arrive                     │
+│  Format: type: "reasoning" with thought: true               │
+└─────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Claude-Specific Quirks & Adaptations
+
+This section documents all 36 quirks and adaptations required for Claude models to work properly through the Antigravity unified gateway and with OpenCode.
+
+### 1. Request Format Quirks
+
+These quirks handle the translation from OpenCode/Anthropic format to Antigravity's Gemini-style format.
+
+| # | Quirk | Problem | Adaptation |
+|---|-------|---------|------------|
+| 1 | **Message format** | Antigravity uses Gemini-style, not Anthropic | Transform `messages[]` → `contents[].parts[]` |
+| 2 | **Role names** | Claude uses `assistant` | Map to `model` for Antigravity |
+| 3 | **System instruction format** | Plain string rejected (400 error) | Wrap in `{ parts: [{ text: "..." }] }` |
+| 4 | **Field name casing** | `system_instruction` (snake_case) rejected | Convert to `systemInstruction` (camelCase) |
+
+**Example - System Instruction:**
+```json
+// ❌ WRONG - Returns 400 error
+{ "systemInstruction": "You are helpful." }
+
+// ✅ CORRECT - Wrapped format
+{ "systemInstruction": { "parts": [{ "text": "You are helpful." }] } }
+```
+
+### 2. Tool/Function Calling Quirks
+
+These quirks ensure tools work correctly with Claude's VALIDATED mode via Antigravity.
+
+| # | Quirk | Problem | Adaptation |
+|---|-------|---------|------------|
+| 5 | **VALIDATED mode** | Default mode may fail | Force `toolConfig.functionCallingConfig.mode = "VALIDATED"` |
+| 6 | **Unsupported schema features** | `const`, `$ref`, `$defs`, `default`, `examples` cause 400 | Allowlist-based `sanitizeSchema()` strips all unsupported fields |
+| 7 | **`const` keyword** | Not supported by gateway | Convert `const: "value"` → `enum: ["value"]` |
+| 8 | **Empty schemas** | VALIDATED mode fails on `{type: "object"}` with no properties | Add placeholder `reason` property with `required: ["reason"]` |
+| 9 | **Empty `items`** | `items: {}` is invalid | Convert to `items: { type: "string" }` |
+| 10 | **Tool name characters** | Special chars like `/` rejected | Replace `[^a-zA-Z0-9_-]` with `_`, max 64 chars |
+| 11 | **Tool structure variants** | SDKs send various formats (function, custom, etc.) | Normalize all to `{ functionDeclarations: [...] }` |
+| 12 | **Tool call/response IDs** | Claude requires matching IDs for function responses | Assign IDs via FIFO queue per function name |
+
+**Schema Allowlist (only these fields are kept):**
+- `type`, `properties`, `required`, `description`, `enum`, `items`, `additionalProperties`
+
+**Example - Const Conversion:**
+```json
+// ❌ WRONG - Returns 400 error
+{ "type": { "const": "email" } }
+
+// ✅ CORRECT - Converted by plugin
+{ "type": { "enum": ["email"] } }
+```
+
+**Example - Empty Schema Fix:**
+```json
+// ❌ WRONG - VALIDATED mode fails
+{ "type": "object", "properties": {} }
+
+// ✅ CORRECT - Placeholder added
+{
+  "type": "object",
+  "properties": {
+    "reason": { "type": "string", "description": "Brief explanation of why you are calling this tool" }
+  },
+  "required": ["reason"]
+}
+```
+
+### 3. Thinking Block Quirks (Multi-turn)
+
+These quirks handle Claude's requirement for signed thinking blocks in multi-turn conversations.
+
+| # | Quirk | Problem | Adaptation |
+|---|-------|---------|------------|
+| 13 | **Signature requirement** | Multi-turn needs signed thinking blocks or 400 error | Cache signatures by session ID + text hash |
+| 14 | **`cache_control` in thinking** | SDK injects, Claude rejects (400) | `stripCacheControlRecursively()` removes at any depth |
+| 15 | **`providerOptions` in thinking** | SDK injects, Claude rejects | Strip via `sanitizeThinkingPart()` |
+| 16 | **Wrapped `thinking` field** | SDK may wrap: `{ thinking: { text: "...", cache_control: {} } }` | Extract inner text string only |
+| 17 | **Trailing thinking blocks** | Claude rejects assistant messages ending with unsigned thinking | `removeTrailingThinkingBlocks()` with signature check |
+| 18 | **Unsigned blocks in history** | Claude rejects unsigned thinking in multi-turn | Filter out or restore signature from cache |
+| 19 | **Format variants** | Gemini: `thought: true`, Anthropic: `type: "thinking"` | Handle both formats in filtering logic |
+
+**Thinking Part Sanitization (only these fields are kept):**
+- Gemini-style: `thought`, `text`, `thoughtSignature`
+- Anthropic-style: `type`, `thinking`, `signature`
+
+**Example - SDK Injection Stripping:**
+```json
+// ❌ WRONG - SDK injected cache_control
+{
+  "type": "thinking",
+  "thinking": { "text": "Let me think...", "cache_control": { "type": "ephemeral" } }
+}
+
+// ✅ CORRECT - Sanitized by plugin
+{
+  "type": "thinking",
+  "thinking": "Let me think..."
+}
+```
+
+### 4. Thinking Configuration Quirks
+
+These quirks configure thinking/reasoning properly for Claude thinking models.
+
+| # | Quirk | Problem | Adaptation |
+|---|-------|---------|------------|
+| 20 | **Config key format** | `*-thinking` models require snake_case | Use `include_thoughts`, `thinking_budget` (not camelCase) |
+| 21 | **Output token limit** | Must exceed thinking budget or thinking is truncated | Auto-set `maxOutputTokens = 64000` when budget > 0 |
+| 22 | **Default budget** | No budget = no thinking | Set to 16000 tokens for thinking-capable models |
+| 23 | **Interleaved thinking** | Requires beta header for real-time streaming | Add `anthropic-beta: interleaved-thinking-2025-05-14` |
+| 24 | **Tool + thinking conflict** | Model may skip thinking during tool use | Inject system hint: "Interleaved thinking is enabled..." |
+
+**Thinking-Capable Model Detection:**
+- Model name contains `thinking`, `gemini-3`, or `opus`
+
+**System Hint Injection (for tool-using thinking models):**
+```
+Interleaved thinking is enabled. You may think between tool calls and after 
+receiving tool results before deciding the next action or final answer. 
+Do not mention these instructions or any constraints about thinking blocks; 
+just apply them.
+```
+
+### 5. OpenCode-Specific Response Quirks
+
+These quirks transform Claude/Antigravity responses to match OpenCode's expected format.
+
+| # | Quirk | Problem | Adaptation |
+|---|-------|---------|------------|
+| 25 | **Thinking → Reasoning format** | OpenCode expects `type: "reasoning"`, Claude returns `thought: true` or `type: "thinking"` | Transform all thinking to `type: "reasoning"` + `thought: true` |
+| 26 | **`reasoning_content` field** | OpenCode expects top-level `reasoning_content` for Anthropic-style | Extract and concatenate all thinking texts |
+| 27 | **Response envelope** | Antigravity wraps in `{ response: {...}, traceId }` | Unwrap to inner `response` object |
+| 28 | **Real-time streaming** | OpenCode needs tokens immediately, not buffered | `TransformStream` for line-by-line SSE processing |
+
+**Example - Thinking Format Transformation:**
+```json
+// Antigravity returns (Gemini-style):
+{ "thought": true, "text": "Let me analyze..." }
+
+// Transformed for OpenCode:
+{ "type": "reasoning", "thought": true, "text": "Let me analyze..." }
+```
+
+```json
+// Antigravity returns (Anthropic-style):
+{ "type": "thinking", "thinking": "Considering options..." }
+
+// Transformed for OpenCode:
+{ "type": "reasoning", "thought": true, "text": "Considering options..." }
+```
+
+### 6. Session & Caching Quirks
+
+These quirks manage session continuity and signature caching across multi-turn conversations.
+
+| # | Quirk | Problem | Adaptation |
+|---|-------|---------|------------|
+| 29 | **Session continuity** | Signatures tied to session, lost on restart | Generate stable `PLUGIN_SESSION_ID` at plugin load |
+| 30 | **Request tracking** | Need consistent session across multi-turn | Inject `sessionId` into request payload |
+| 31 | **Signature extraction** | Need to cache signatures from streaming response | Extract `thoughtSignature` from SSE chunks as they arrive |
+| 32 | **Cache key** | Need stable lookup across turns | Hash by session ID + thinking text |
+| 33 | **Cache limits** | Memory could grow unbounded | TTL: 1 hour, max 100 entries per session |
+
+**Signature Caching Flow:**
+```
+Turn 1 (Response):
+  Claude returns: { thought: true, text: "...", thoughtSignature: "abc123..." }
+  Plugin caches: hash("...") → "abc123..."
+
+Turn 2 (Request):
+  OpenCode sends thinking block without signature
+  Plugin looks up: hash("...") → "abc123..."
+  Plugin restores signature before sending to Antigravity
+```
+
+### 7. Error Handling Quirks
+
+These quirks improve error handling and debugging for Claude requests.
+
+| # | Quirk | Problem | Adaptation |
+|---|-------|---------|------------|
+| 34 | **Rate limit format** | `RetryInfo.retryDelay: "3.957s"` not standard HTTP | Parse to `Retry-After` and `retry-after-ms` headers |
+| 35 | **Debug visibility** | Errors lack context for debugging | Inject model, project, endpoint, status into error message |
+| 36 | **Preview access** | 404 for unenrolled users is confusing | Rewrite with preview access link |
+
+**Example - Enhanced Error Message:**
+```
+Original error: "Model not found"
+
+Enhanced by plugin:
+"Model not found
+
+[Debug Info]
+Requested Model: claude-sonnet-4-5-thinking
+Effective Model: claude-sonnet-4-5-thinking
+Project: my-project-id
+Endpoint: https://cloudcode-pa.googleapis.com/v1internal:streamGenerateContent
+Status: 404"
+```
+
+---
+
+## Improvements from `claude-improvements` Branch
+
+| Feature | Description | Location |
+|---------|-------------|----------|
+| **Signature Caching** | Cache thinking signatures by text hash for multi-turn conversations. Prevents "invalid signature" errors. | `cache.ts` |
+| **Real-time Streaming** | `TransformStream` processes SSE line-by-line for immediate token display | `request.ts:87-121` |
+| **Interleaved Thinking** | Auto-enable `anthropic-beta: interleaved-thinking-2025-05-14` header | `request.ts:813-824` |
+| **Validated Tool Calling** | Set `functionCallingConfig.mode = "VALIDATED"` for Claude models | `request.ts:314-325` |
+| **System Hints** | Auto-inject thinking hint into system instruction for tool-using models | `request.ts:396-434` |
+| **Output Token Safety** | Auto-set `maxOutputTokens = 64000` when thinking budget is enabled | `request.ts:358-377` |
+| **Stable Session ID** | Use `PLUGIN_SESSION_ID` across all requests for consistent signature caching | `request.ts:28` |
+
+---
+
+## Fixes from `improve-tools-call-sanitizer` Branch
+
+| Fix | Problem | Solution | Location |
+|-----|---------|----------|----------|
+| **Thinking Block Sanitization** | Claude API rejects `cache_control` and `providerOptions` inside thinking blocks | `sanitizeThinkingPart()` extracts only allowed fields (`type`, `thinking`, `signature`, `thought`, `text`, `thoughtSignature`) | `request-helpers.ts:179-215` |
+| **Deep Cache Control Strip** | SDK may nest `cache_control` in wrapped objects | `stripCacheControlRecursively()` removes at any depth | `request-helpers.ts:162-173` |
+| **Trailing Thinking Preservation** | Signed trailing thinking blocks were being incorrectly removed | `removeTrailingThinkingBlocks()` now checks `hasValidSignature()` before removal | `request-helpers.ts:125-131` |
+| **Signature Validation** | Need to identify valid signatures | `hasValidSignature()` checks for string ≥50 chars | `request-helpers.ts:137-140` |
+| **Schema Sanitization** | Claude rejects `const`, `$ref`, `$defs`, `default`, `examples` | Allowlist-based `sanitizeSchema()` keeps only basic features | `request.ts:468-523` |
+| **Empty Schema Fix** | Claude VALIDATED mode fails on `{type: "object"}` with no properties | Add placeholder `reason` property with `required: ["reason"]` | `request.ts:529-539` |
+| **Const → Enum Conversion** | `const` not supported | Convert `const: "value"` to `enum: ["value"]` | `request.ts:489-491` |
+
+---
+
+## Key Components Reference
+
+### `src/plugin.ts`
+Entry point. Intercepts `fetch()` for `generativelanguage.googleapis.com` requests. Manages account pool, token refresh, rate limits, and endpoint fallbacks.
+
+### `src/plugin/request.ts`
+| Function | Purpose |
+|----------|---------|
+| `prepareAntigravityRequest()` | Transforms OpenAI-style → Antigravity wrapped format |
+| `transformAntigravityResponse()` | Processes SSE stream, caches signatures, transforms thinking parts |
+| `createStreamingTransformer()` | Real-time line-by-line SSE processing |
+| `cacheThinkingSignatures()` | Extracts and caches signatures from response stream |
+| `sanitizeSchema()` | Allowlist-based schema sanitization for tools |
+| `normalizeSchema()` | Adds placeholder for empty tool schemas |
+
+### `src/plugin/request-helpers.ts`
+| Function | Purpose |
+|----------|---------|
+| `filterUnsignedThinkingBlocks()` | Filters/sanitizes thinking blocks in `contents[]` |
+| `filterMessagesThinkingBlocks()` | Same for Anthropic-style `messages[]` |
+| `sanitizeThinkingPart()` | Normalizes thinking block structure, strips SDK fields |
+| `stripCacheControlRecursively()` | Deep removal of `cache_control` and `providerOptions` |
+| `hasValidSignature()` | Validates signature presence and length (≥50 chars) |
+| `removeTrailingThinkingBlocks()` | Removes unsigned trailing thinking from assistant messages |
+| `getThinkingText()` | Extracts text from various thinking block formats |
+| `transformThinkingParts()` | Converts thinking → reasoning format for OpenCode |
+| `isThinkingCapableModel()` | Detects thinking-capable models by name |
+| `extractThinkingConfig()` | Extracts thinking config from various request locations |
+| `resolveThinkingConfig()` | Determines final thinking config based on model capabilities |
+| `normalizeThinkingConfig()` | Validates and normalizes thinking configuration |
+
+### `src/plugin/cache.ts`
+| Function | Purpose |
+|----------|---------|
+| `cacheSignature()` | Store signature by session ID + text hash |
+| `getCachedSignature()` | Retrieve cached signature for restoration |
+| **TTL:** 1 hour | **Max:** 100 entries per session |
+
+---
+
+## Troubleshooting
+
+| Error | Cause | Solution |
+|-------|-------|----------|
+| `invalid thinking signature` | Signature lost in multi-turn | Restart `opencode` to reset signature cache |
+| `Unknown field: cache_control` | SDK injected unsupported field | Plugin auto-strips; update plugin if persists |
+| `Unknown field: const` | Schema uses `const` keyword | Plugin auto-converts to `enum`; check schema |
+| `Unknown field: $ref` | Schema uses JSON Schema references | Inline the referenced schema instead |
+| `400 INVALID_ARGUMENT` on tools | Unsupported schema feature | Plugin auto-sanitizes; check `ANTIGRAVITY_API_SPEC.md` |
+| `Empty args object` error | Tool has no parameters | Plugin adds placeholder `reason` property |
+| `Function name invalid` | Tool name contains `/` or starts with digit | Plugin auto-sanitizes names |
+| Thinking not visible | Thinking budget exhausted or output limit too low | Plugin auto-configures; check model config |
+| Thinking stops during tool use | Model not using interleaved thinking | Plugin injects system hint; ensure `*-thinking` model |
+| `404 NOT_FOUND` on model | Preview access not enabled | Request preview access via provided link |
+| Rate limited (429) | Quota exhausted | Plugin extracts `Retry-After`; wait or switch account |
+
+---
+
+## Changelog
+
+### `improve-tools-call-sanitizer` Branch
+
+| Commit | Description |
+|--------|-------------|
+| `ae86e3a` | Enhanced `removeTrailingThinkingBlocks` to preserve blocks with valid signatures |
+| `08f9da9` | Added thinking block sanitization (`sanitizeThinkingPart`, `stripCacheControlRecursively`, `hasValidSignature`) |
+
+### `claude-improvements` Branch
+
+| Commit | Description |
+|--------|-------------|
+| `314ac9d` | Added thinking signature caching for multi-turn stability |
+| `5a28b41` | Initial Claude improvements with streaming, interleaved thinking, validated tools |
+
+### Documentation
+
+| Version | Description |
+|---------|-------------|
+| 1.1 | Added comprehensive "Claude-Specific Quirks & Adaptations" section with 36 quirks |
+| 1.0 | Initial documentation with flow diagram and branch summaries |
+
+---
+
+## See Also
+
+- [ANTIGRAVITY_API_SPEC.md](./ANTIGRAVITY_API_SPEC.md) - Full Antigravity API reference
+- [README.md](../README.md) - Plugin setup and usage
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "opencode-antigravity-auth",
-  "version": "1.1.4",
+  "version": "1.2.0",
  "description": "Google Antigravity IDE OAuth auth plugin for Opencode - access Gemini 3 Pro and Claude 4.5 using Google credentials",
  "main": "./dist/index.js",
  "types": "./dist/index.d.ts",
--- a/src/plugin.ts
+++ b/src/plugin.ts
@@ -324,6 +324,7 @@ export const createAntigravityPlugin = (providerId: string) => async (
            projectId?: string;
            endpoint?: string;
            effectiveModel?: string;
+            sessionId?: string;
            toolDebugMissing?: number;
            toolDebugSummary?: string;
            toolDebugPayload?: string;
@@ -534,6 +535,7 @@ export const createAntigravityPlugin = (providerId: string) => async (
                      projectId: prepared.projectId,
                      endpoint: prepared.endpoint,
                      effectiveModel: prepared.effectiveModel,
+                      sessionId: prepared.sessionId,
                      toolDebugMissing: prepared.toolDebugMissing,
                      toolDebugSummary: prepared.toolDebugSummary,
                      toolDebugPayload: prepared.toolDebugPayload,
@@ -553,6 +555,7 @@ export const createAntigravityPlugin = (providerId: string) => async (
                      projectId: prepared.projectId,
                      endpoint: prepared.endpoint,
                      effectiveModel: prepared.effectiveModel,
+                      sessionId: prepared.sessionId,
                      toolDebugMissing: prepared.toolDebugMissing,
                      toolDebugSummary: prepared.toolDebugSummary,
                      toolDebugPayload: prepared.toolDebugPayload,
@@ -580,6 +583,7 @@ export const createAntigravityPlugin = (providerId: string) => async (
                    projectId: prepared.projectId,
                    endpoint: prepared.endpoint,
                    effectiveModel: prepared.effectiveModel,
+                    sessionId: prepared.sessionId,
                    toolDebugMissing: prepared.toolDebugMissing,
                    toolDebugSummary: prepared.toolDebugSummary,
                    toolDebugPayload: prepared.toolDebugPayload,
@@ -596,6 +600,7 @@ export const createAntigravityPlugin = (providerId: string) => async (
                  prepared.projectId,
                  prepared.endpoint,
                  prepared.effectiveModel,
+                  prepared.sessionId,
                  prepared.toolDebugMissing,
                  prepared.toolDebugSummary,
                  prepared.toolDebugPayload,
@@ -627,6 +632,7 @@ export const createAntigravityPlugin = (providerId: string) => async (
                lastFailure.projectId,
                lastFailure.endpoint,
                lastFailure.effectiveModel,
+                lastFailure.sessionId,
                lastFailure.toolDebugMissing,
                lastFailure.toolDebugSummary,
                lastFailure.toolDebugPayload,
--- a/src/plugin/auth.test.ts
+++ b/src/plugin/auth.test.ts
@@ -0,0 +1,195 @@
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+import { isOAuthAuth, parseRefreshParts, formatRefreshParts, accessTokenExpired } from "./auth";
+import type { OAuthAuthDetails, ApiKeyAuthDetails } from "./types";
+
+describe("isOAuthAuth", () => {
+  it("returns true for oauth auth type", () => {
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token|project",
+      access: "access-token",
+      expires: Date.now() + 3600000,
+    };
+    expect(isOAuthAuth(auth)).toBe(true);
+  });
+
+  it("returns false for api_key auth type", () => {
+    const auth: ApiKeyAuthDetails = {
+      type: "api_key",
+      key: "some-api-key",
+    };
+    expect(isOAuthAuth(auth)).toBe(false);
+  });
+});
+
+describe("parseRefreshParts", () => {
+  it("parses refresh token with all parts", () => {
+    const result = parseRefreshParts("refreshToken|projectId|managedProjectId");
+    expect(result).toEqual({
+      refreshToken: "refreshToken",
+      projectId: "projectId",
+      managedProjectId: "managedProjectId",
+    });
+  });
+
+  it("parses refresh token with only refresh and project", () => {
+    const result = parseRefreshParts("refreshToken|projectId");
+    expect(result).toEqual({
+      refreshToken: "refreshToken",
+      projectId: "projectId",
+      managedProjectId: undefined,
+    });
+  });
+
+  it("parses refresh token with only refresh token", () => {
+    const result = parseRefreshParts("refreshToken");
+    expect(result).toEqual({
+      refreshToken: "refreshToken",
+      projectId: undefined,
+      managedProjectId: undefined,
+    });
+  });
+
+  it("handles empty string", () => {
+    const result = parseRefreshParts("");
+    expect(result).toEqual({
+      refreshToken: "",
+      projectId: undefined,
+      managedProjectId: undefined,
+    });
+  });
+
+  it("handles empty parts", () => {
+    const result = parseRefreshParts("refreshToken||managedProjectId");
+    expect(result).toEqual({
+      refreshToken: "refreshToken",
+      projectId: undefined,
+      managedProjectId: "managedProjectId",
+    });
+  });
+
+  it("handles undefined/null-like input", () => {
+    // @ts-expect-error - testing edge case
+    const result = parseRefreshParts(undefined);
+    expect(result).toEqual({
+      refreshToken: "",
+      projectId: undefined,
+      managedProjectId: undefined,
+    });
+  });
+});
+
+describe("formatRefreshParts", () => {
+  it("formats all parts", () => {
+    const result = formatRefreshParts({
+      refreshToken: "refreshToken",
+      projectId: "projectId",
+      managedProjectId: "managedProjectId",
+    });
+    expect(result).toBe("refreshToken|projectId|managedProjectId");
+  });
+
+  it("formats without managed project id", () => {
+    const result = formatRefreshParts({
+      refreshToken: "refreshToken",
+      projectId: "projectId",
+    });
+    expect(result).toBe("refreshToken|projectId");
+  });
+
+  it("formats without project id but with managed project id", () => {
+    const result = formatRefreshParts({
+      refreshToken: "refreshToken",
+      managedProjectId: "managedProjectId",
+    });
+    expect(result).toBe("refreshToken||managedProjectId");
+  });
+
+  it("formats with only refresh token", () => {
+    const result = formatRefreshParts({
+      refreshToken: "refreshToken",
+    });
+    expect(result).toBe("refreshToken|");
+  });
+
+  it("round-trips correctly with parseRefreshParts", () => {
+    const original = {
+      refreshToken: "rt123",
+      projectId: "proj456",
+      managedProjectId: "managed789",
+    };
+    const formatted = formatRefreshParts(original);
+    const parsed = parseRefreshParts(formatted);
+    expect(parsed).toEqual(original);
+  });
+});
+
+describe("accessTokenExpired", () => {
+  beforeEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("returns true when access token is missing", () => {
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token",
+      access: undefined,
+      expires: Date.now() + 3600000,
+    };
+    expect(accessTokenExpired(auth)).toBe(true);
+  });
+
+  it("returns true when expires is missing", () => {
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token",
+      access: "access-token",
+      expires: undefined,
+    };
+    expect(accessTokenExpired(auth)).toBe(true);
+  });
+
+  it("returns true when token is expired", () => {
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token",
+      access: "access-token",
+      expires: Date.now() - 1000, // expired 1 second ago
+    };
+    expect(accessTokenExpired(auth)).toBe(true);
+  });
+
+  it("returns true when token expires within buffer period (60 seconds)", () => {
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token",
+      access: "access-token",
+      expires: Date.now() + 30000, // expires in 30 seconds (within 60s buffer)
+    };
+    expect(accessTokenExpired(auth)).toBe(true);
+  });
+
+  it("returns false when token is valid and outside buffer period", () => {
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token",
+      access: "access-token",
+      expires: Date.now() + 120000, // expires in 2 minutes
+    };
+    expect(accessTokenExpired(auth)).toBe(false);
+  });
+
+  it("returns false when token expires exactly at buffer boundary", () => {
+    vi.useFakeTimers();
+    vi.setSystemTime(new Date(0));
+
+    const auth: OAuthAuthDetails = {
+      type: "oauth",
+      refresh: "token",
+      access: "access-token",
+      expires: 60001, // expires 60001ms from now, just outside 60s buffer
+    };
+    expect(accessTokenExpired(auth)).toBe(false);
+  });
+});
--- a/src/plugin/cache.test.ts
+++ b/src/plugin/cache.test.ts
@@ -0,0 +1,295 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+
+import {
+  resolveCachedAuth,
+  storeCachedAuth,
+  clearCachedAuth,
+  cacheSignature,
+  getCachedSignature,
+  clearSignatureCache,
+} from "./cache";
+import type { OAuthAuthDetails } from "./types";
+
+function createAuth(overrides: Partial<OAuthAuthDetails> = {}): OAuthAuthDetails {
+  return {
+    type: "oauth",
+    refresh: "refresh-token|project-id",
+    access: "access-token",
+    expires: Date.now() + 3600000,
+    ...overrides,
+  };
+}
+
+describe("Auth Cache", () => {
+  beforeEach(() => {
+    vi.useRealTimers();
+    clearCachedAuth();
+  });
+
+  afterEach(() => {
+    clearCachedAuth();
+  });
+
+  describe("resolveCachedAuth", () => {
+    it("returns input auth when no cache exists and caches it", () => {
+      const auth = createAuth();
+      const result = resolveCachedAuth(auth);
+      expect(result).toEqual(auth);
+    });
+
+    it("returns input auth when refresh key is empty", () => {
+      const auth = createAuth({ refresh: "" });
+      const result = resolveCachedAuth(auth);
+      expect(result).toEqual(auth);
+    });
+
+    it("returns input auth when it has valid (unexpired) access token", () => {
+      const oldAuth = createAuth({ access: "old-access", expires: Date.now() + 3600000 });
+      resolveCachedAuth(oldAuth); // cache it
+
+      const newAuth = createAuth({ access: "new-access", expires: Date.now() + 7200000 });
+      const result = resolveCachedAuth(newAuth);
+      expect(result.access).toBe("new-access");
+    });
+
+    it("returns cached auth when input auth is expired but cached is valid", () => {
+      vi.useFakeTimers();
+      vi.setSystemTime(new Date(0));
+
+      const validAuth = createAuth({
+        access: "valid-access",
+        expires: 3600000, // expires at t=3600000
+      });
+      resolveCachedAuth(validAuth); // cache it
+
+      // Now create an expired auth with the same refresh token
+      const expiredAuth = createAuth({
+        access: "expired-access",
+        expires: 30000, // expires within buffer (60s)
+      });
+
+      const result = resolveCachedAuth(expiredAuth);
+      expect(result.access).toBe("valid-access");
+    });
+
+    it("returns input auth when both are expired (updates cache)", () => {
+      vi.useFakeTimers();
+      vi.setSystemTime(new Date(0));
+
+      const expiredCached = createAuth({
+        access: "cached-expired",
+        expires: 30000, // expired within buffer
+      });
+      resolveCachedAuth(expiredCached);
+
+      const expiredNew = createAuth({
+        access: "new-expired",
+        expires: 20000, // also expired within buffer
+      });
+
+      const result = resolveCachedAuth(expiredNew);
+      expect(result.access).toBe("new-expired");
+    });
+  });
+
+  describe("storeCachedAuth", () => {
+    it("stores auth in cache", () => {
+      const auth = createAuth({ access: "stored-access" });
+      storeCachedAuth(auth);
+
+      const expiredAuth = createAuth({ access: "expired", expires: Date.now() - 1000 });
+      const result = resolveCachedAuth(expiredAuth);
+      expect(result.access).toBe("stored-access");
+    });
+
+    it("does nothing when refresh key is empty", () => {
+      const auth = createAuth({ refresh: "", access: "no-key-access" });
+      storeCachedAuth(auth);
+
+      // Should not be retrievable since key was empty
+      const testAuth = createAuth({ refresh: "", access: "test" });
+      const result = resolveCachedAuth(testAuth);
+      expect(result.access).toBe("test"); // returns the input, not cached
+    });
+
+    it("does nothing when refresh key is whitespace only", () => {
+      const auth = createAuth({ refresh: "   ", access: "whitespace-access" });
+      storeCachedAuth(auth);
+
+      const testAuth = createAuth({ refresh: "   ", access: "test" });
+      const result = resolveCachedAuth(testAuth);
+      expect(result.access).toBe("test");
+    });
+  });
+
+  describe("clearCachedAuth", () => {
+    it("clears all cache when no argument provided", () => {
+      storeCachedAuth(createAuth({ refresh: "token1|p", access: "access1" }));
+      storeCachedAuth(createAuth({ refresh: "token2|p", access: "access2" }));
+
+      clearCachedAuth();
+
+      const auth1 = createAuth({ refresh: "token1|p", access: "new1" });
+      const auth2 = createAuth({ refresh: "token2|p", access: "new2" });
+
+      expect(resolveCachedAuth(auth1).access).toBe("new1");
+      expect(resolveCachedAuth(auth2).access).toBe("new2");
+    });
+
+    it("clears specific refresh token from cache", () => {
+      storeCachedAuth(createAuth({ refresh: "token1|p", access: "access1" }));
+      storeCachedAuth(createAuth({ refresh: "token2|p", access: "access2" }));
+
+      clearCachedAuth("token1|p");
+
+      // token1 should be cleared
+      const expiredAuth1 = createAuth({ refresh: "token1|p", access: "new1", expires: Date.now() - 1000 });
+      expect(resolveCachedAuth(expiredAuth1).access).toBe("new1");
+
+      // token2 should still be cached
+      const expiredAuth2 = createAuth({ refresh: "token2|p", access: "new2", expires: Date.now() - 1000 });
+      expect(resolveCachedAuth(expiredAuth2).access).toBe("access2");
+    });
+  });
+});
+
+describe("Signature Cache", () => {
+  beforeEach(() => {
+    vi.useRealTimers();
+    clearSignatureCache();
+  });
+
+  afterEach(() => {
+    clearSignatureCache();
+  });
+
+  describe("cacheSignature", () => {
+    it("caches a signature for session and text", () => {
+      cacheSignature("session1", "thinking text", "sig123");
+      const result = getCachedSignature("session1", "thinking text");
+      expect(result).toBe("sig123");
+    });
+
+    it("does nothing when sessionId is empty", () => {
+      cacheSignature("", "text", "sig");
+      expect(getCachedSignature("", "text")).toBeUndefined();
+    });
+
+    it("does nothing when text is empty", () => {
+      cacheSignature("session", "", "sig");
+      expect(getCachedSignature("session", "")).toBeUndefined();
+    });
+
+    it("does nothing when signature is empty", () => {
+      cacheSignature("session", "text", "");
+      expect(getCachedSignature("session", "text")).toBeUndefined();
+    });
+
+    it("stores multiple signatures per session", () => {
+      cacheSignature("session1", "text1", "sig1");
+      cacheSignature("session1", "text2", "sig2");
+
+      expect(getCachedSignature("session1", "text1")).toBe("sig1");
+      expect(getCachedSignature("session1", "text2")).toBe("sig2");
+    });
+
+    it("stores signatures for different sessions independently", () => {
+      cacheSignature("session1", "text", "sig1");
+      cacheSignature("session2", "text", "sig2");
+
+      expect(getCachedSignature("session1", "text")).toBe("sig1");
+      expect(getCachedSignature("session2", "text")).toBe("sig2");
+    });
+  });
+
+  describe("getCachedSignature", () => {
+    it("returns undefined when session not found", () => {
+      expect(getCachedSignature("unknown", "text")).toBeUndefined();
+    });
+
+    it("returns undefined when text not found in session", () => {
+      cacheSignature("session", "known-text", "sig");
+      expect(getCachedSignature("session", "unknown-text")).toBeUndefined();
+    });
+
+    it("returns undefined when sessionId is empty", () => {
+      expect(getCachedSignature("", "text")).toBeUndefined();
+    });
+
+    it("returns undefined when text is empty", () => {
+      expect(getCachedSignature("session", "")).toBeUndefined();
+    });
+
+    it("returns undefined when signature is expired", () => {
+      vi.useFakeTimers();
+      vi.setSystemTime(new Date(0));
+
+      cacheSignature("session", "text", "sig");
+
+      // Advance time past TTL (1 hour = 3600000ms)
+      vi.setSystemTime(new Date(3600001));
+
+      expect(getCachedSignature("session", "text")).toBeUndefined();
+    });
+
+    it("returns signature when not expired", () => {
+      vi.useFakeTimers();
+      vi.setSystemTime(new Date(0));
+
+      cacheSignature("session", "text", "sig");
+
+      // Advance time but stay within TTL
+      vi.setSystemTime(new Date(3599999));
+
+      expect(getCachedSignature("session", "text")).toBe("sig");
+    });
+  });
+
+  describe("clearSignatureCache", () => {
+    it("clears all signature cache when no argument provided", () => {
+      cacheSignature("session1", "text", "sig1");
+      cacheSignature("session2", "text", "sig2");
+
+      clearSignatureCache();
+
+      expect(getCachedSignature("session1", "text")).toBeUndefined();
+      expect(getCachedSignature("session2", "text")).toBeUndefined();
+    });
+
+    it("clears specific session from cache", () => {
+      cacheSignature("session1", "text", "sig1");
+      cacheSignature("session2", "text", "sig2");
+
+      clearSignatureCache("session1");
+
+      expect(getCachedSignature("session1", "text")).toBeUndefined();
+      expect(getCachedSignature("session2", "text")).toBe("sig2");
+    });
+  });
+
+  describe("cache eviction", () => {
+    it("evicts entries when at capacity", () => {
+      vi.useFakeTimers();
+      vi.setSystemTime(new Date(0));
+
+      // Fill cache with 100 entries (MAX_ENTRIES_PER_SESSION)
+      for (let i = 0; i < 100; i++) {
+        vi.setSystemTime(new Date(i * 1000)); // stagger timestamps
+        cacheSignature("session", `text-${i}`, `sig-${i}`);
+      }
+
+      // Reset time to check entries
+      vi.setSystemTime(new Date(100 * 1000));
+
+      // Adding one more should trigger eviction
+      cacheSignature("session", "new-text", "new-sig");
+
+      // New entry should exist
+      expect(getCachedSignature("session", "new-text")).toBe("new-sig");
+
+      // Some old entries should have been evicted (oldest 25%)
+      // Entry at index 0 (timestamp 0) should be evicted
+      expect(getCachedSignature("session", "text-0")).toBeUndefined();
+    });
+  });
+});
--- a/src/plugin/cache.ts
+++ b/src/plugin/cache.ts
@@ -1,5 +1,6 @@
 import { accessTokenExpired } from "./auth";
 import type { OAuthAuthDetails } from "./types";
+import { createHash } from "node:crypto";

 const authCache = new Map<string, OAuthAuthDetails>();

@@ -63,3 +64,103 @@ export function clearCachedAuth(refresh?: string): void {
    authCache.delete(key);
  }
 }
+
+// ============================================================================
+// Thinking Signature Cache (for Claude multi-turn conversations)
+// ============================================================================
+
+interface SignatureEntry {
+  signature: string;
+  timestamp: number;
+}
+
+// Map: sessionId -> Map<textHash, SignatureEntry>
+const signatureCache = new Map<string, Map<string, SignatureEntry>>();
+
+// Cache entries expire after 1 hour
+const SIGNATURE_CACHE_TTL_MS = 60 * 60 * 1000;
+
+// Maximum entries per session to prevent memory bloat
+const MAX_ENTRIES_PER_SESSION = 100;
+
+// 16 hex chars = 64-bit key space; keeps memory bounded while making collisions extremely unlikely.
+const SIGNATURE_TEXT_HASH_HEX_LEN = 16;
+
+/**
+ * Hashes text content into a stable, Unicode-safe key.
+ *
+ * Uses SHA-256 over UTF-8 bytes and truncates to keep memory usage bounded.
+ */
+function hashText(text: string): string {
+  return createHash("sha256").update(text, "utf8").digest("hex").slice(0, SIGNATURE_TEXT_HASH_HEX_LEN);
+}
+
+/**
+ * Caches a thinking signature for a given session and text.
+ * Used for Claude models that require signed thinking blocks in multi-turn conversations.
+ */
+export function cacheSignature(sessionId: string, text: string, signature: string): void {
+  if (!sessionId || !text || !signature) return;
+
+  let sessionCache = signatureCache.get(sessionId);
+  if (!sessionCache) {
+    sessionCache = new Map();
+    signatureCache.set(sessionId, sessionCache);
+  }
+
+  // Evict old entries if we're at capacity
+  if (sessionCache.size >= MAX_ENTRIES_PER_SESSION) {
+    const now = Date.now();
+    for (const [key, entry] of sessionCache.entries()) {
+      if (now - entry.timestamp > SIGNATURE_CACHE_TTL_MS) {
+        sessionCache.delete(key);
+      }
+    }
+    // If still at capacity, remove oldest entries
+    if (sessionCache.size >= MAX_ENTRIES_PER_SESSION) {
+      const entries = Array.from(sessionCache.entries())
+        .sort((a, b) => a[1].timestamp - b[1].timestamp);
+      const toRemove = entries.slice(0, Math.floor(MAX_ENTRIES_PER_SESSION / 4));
+      for (const [key] of toRemove) {
+        sessionCache.delete(key);
+      }
+    }
+  }
+
+  const textHash = hashText(text);
+  sessionCache.set(textHash, { signature, timestamp: Date.now() });
+}
+
+/**
+ * Retrieves a cached signature for a given session and text.
+ * Returns undefined if not found or expired.
+ */
+export function getCachedSignature(sessionId: string, text: string): string | undefined {
+  if (!sessionId || !text) return undefined;
+
+  const sessionCache = signatureCache.get(sessionId);
+  if (!sessionCache) return undefined;
+
+  const textHash = hashText(text);
+  const entry = sessionCache.get(textHash);
+  if (!entry) return undefined;
+
+  // Check if expired
+  if (Date.now() - entry.timestamp > SIGNATURE_CACHE_TTL_MS) {
+    sessionCache.delete(textHash);
+    return undefined;
+  }
+
+  return entry.signature;
+}
+
+/**
+ * Clears signature cache for a specific session or all sessions.
+ */
+export function clearSignatureCache(sessionId?: string): void {
+  if (sessionId) {
+    signatureCache.delete(sessionId);
+  } else {
+    signatureCache.clear();
+  }
+}
--- a/src/plugin/request-helpers.test.ts
+++ b/src/plugin/request-helpers.test.ts
@@ -0,0 +1,798 @@
+import { describe, expect, it } from "vitest";
+
+import {
+  isThinkingCapableModel,
+  extractThinkingConfig,
+  resolveThinkingConfig,
+  filterUnsignedThinkingBlocks,
+  filterMessagesThinkingBlocks,
+  transformThinkingParts,
+  normalizeThinkingConfig,
+  parseAntigravityApiBody,
+  extractUsageMetadata,
+  extractUsageFromSsePayload,
+  rewriteAntigravityPreviewAccessError,
+  DEFAULT_THINKING_BUDGET,
+} from "./request-helpers";
+
+describe("sanitizeThinkingPart (covered via filtering)", () => {
+  it("extracts wrapped text and strips SDK fields for Gemini-style thought blocks", () => {
+    const validSignature = "s".repeat(60);
+
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          {
+            thought: true,
+            text: {
+              text: "wrapped thought",
+              cache_control: { type: "ephemeral" },
+              providerOptions: { injected: true },
+            },
+            thoughtSignature: validSignature,
+            cache_control: { type: "ephemeral" },
+            providerOptions: { injected: true },
+          },
+        ],
+      },
+    ];
+
+    const result = filterUnsignedThinkingBlocks(contents) as any;
+    expect(result[0].parts).toHaveLength(1);
+    expect(result[0].parts[0]).toEqual({
+      thought: true,
+      text: "wrapped thought",
+      thoughtSignature: validSignature,
+    });
+
+    // Ensure injected fields are removed
+    expect(result[0].parts[0].cache_control).toBeUndefined();
+    expect(result[0].parts[0].providerOptions).toBeUndefined();
+  });
+
+  it("extracts wrapped thinking text and strips SDK fields for Anthropic-style thinking blocks", () => {
+    const validSignature = "a".repeat(60);
+
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          {
+            type: "thinking",
+            thinking: {
+              text: "wrapped thinking",
+              cache_control: { type: "ephemeral" },
+              providerOptions: { injected: true },
+            },
+            signature: validSignature,
+            cache_control: { type: "ephemeral" },
+            providerOptions: { injected: true },
+          },
+        ],
+      },
+    ];
+
+    const result = filterUnsignedThinkingBlocks(contents) as any;
+    expect(result[0].parts).toHaveLength(1);
+    expect(result[0].parts[0]).toEqual({
+      type: "thinking",
+      thinking: "wrapped thinking",
+      signature: validSignature,
+    });
+  });
+
+  it("preserves signatures while dropping cache_control/providerOptions during signature restoration", () => {
+    const cachedSignature = "c".repeat(60);
+    const getCachedSignatureFn = (_sessionId: string, _text: string) => cachedSignature;
+
+    const messages = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: {
+              thinking: "restore me",
+              cache_control: { type: "ephemeral" },
+            },
+            // no signature present (forces restore)
+            providerOptions: { injected: true },
+          },
+          { type: "text", text: "visible" },
+        ],
+      },
+    ];
+
+    const result = filterMessagesThinkingBlocks(messages, "session-1", getCachedSignatureFn) as any;
+    expect(result[0].content[0]).toEqual({
+      type: "thinking",
+      thinking: "restore me",
+      signature: cachedSignature,
+    });
+  });
+
+  it("falls back to recursive stripping for signed reasoning blocks and removes nested SDK fields", () => {
+    const validSignature = "z".repeat(60);
+
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          {
+            type: "reasoning",
+            signature: validSignature,
+            cache_control: { type: "ephemeral" },
+            providerOptions: { injected: true },
+            meta: {
+              keep: true,
+              cache_control: { nested: true },
+              arr: [
+                { providerOptions: { nested: true }, keep: 1 },
+                { cache_control: { nested: true }, keep: 2 },
+              ],
+            },
+          },
+          { type: "text", text: "visible" },
+        ],
+      },
+    ];
+
+    const result = filterUnsignedThinkingBlocks(contents) as any;
+    expect(result[0].parts[0]).toEqual({
+      type: "reasoning",
+      signature: validSignature,
+      meta: {
+        keep: true,
+        arr: [
+          { keep: 1 },
+          { keep: 2 },
+        ],
+      },
+    });
+  });
+});
+
+describe("isThinkingCapableModel", () => {
+  it("returns true for models with 'thinking' in name", () => {
+    expect(isThinkingCapableModel("claude-thinking")).toBe(true);
+    expect(isThinkingCapableModel("CLAUDE-THINKING-4")).toBe(true);
+    expect(isThinkingCapableModel("model-thinking-v1")).toBe(true);
+  });
+
+  it("returns true for models with 'gemini-3' in name", () => {
+    expect(isThinkingCapableModel("gemini-3-pro")).toBe(true);
+    expect(isThinkingCapableModel("GEMINI-3-flash")).toBe(true);
+    expect(isThinkingCapableModel("gemini-3")).toBe(true);
+  });
+
+  it("returns true for models with 'opus' in name", () => {
+    expect(isThinkingCapableModel("claude-opus")).toBe(true);
+    expect(isThinkingCapableModel("claude-4-opus")).toBe(true);
+    expect(isThinkingCapableModel("OPUS")).toBe(true);
+  });
+
+  it("returns false for non-thinking models", () => {
+    expect(isThinkingCapableModel("claude-sonnet")).toBe(false);
+    expect(isThinkingCapableModel("gemini-2-pro")).toBe(false);
+    expect(isThinkingCapableModel("gpt-4")).toBe(false);
+  });
+});
+
+describe("extractThinkingConfig", () => {
+  it("extracts thinkingConfig from generationConfig", () => {
+    const result = extractThinkingConfig(
+      {},
+      { thinkingConfig: { includeThoughts: true, thinkingBudget: 8000 } },
+      undefined,
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: 8000 });
+  });
+
+  it("extracts thinkingConfig from extra_body", () => {
+    const result = extractThinkingConfig(
+      {},
+      undefined,
+      { thinkingConfig: { includeThoughts: true, thinkingBudget: 4000 } },
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: 4000 });
+  });
+
+  it("extracts thinkingConfig from requestPayload directly", () => {
+    const result = extractThinkingConfig(
+      { thinkingConfig: { includeThoughts: false, thinkingBudget: 2000 } },
+      undefined,
+      undefined,
+    );
+    expect(result).toEqual({ includeThoughts: false, thinkingBudget: 2000 });
+  });
+
+  it("prioritizes generationConfig over extra_body", () => {
+    const result = extractThinkingConfig(
+      {},
+      { thinkingConfig: { includeThoughts: true, thinkingBudget: 8000 } },
+      { thinkingConfig: { includeThoughts: false, thinkingBudget: 4000 } },
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: 8000 });
+  });
+
+  it("converts Anthropic-style thinking config", () => {
+    const result = extractThinkingConfig(
+      { thinking: { type: "enabled", budgetTokens: 10000 } },
+      undefined,
+      undefined,
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: 10000 });
+  });
+
+  it("uses default budget for Anthropic-style without budgetTokens", () => {
+    const result = extractThinkingConfig(
+      { thinking: { type: "enabled" } },
+      undefined,
+      undefined,
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: DEFAULT_THINKING_BUDGET });
+  });
+
+  it("returns undefined when no config found", () => {
+    expect(extractThinkingConfig({}, undefined, undefined)).toBeUndefined();
+  });
+
+  it("uses default budget when thinkingBudget not specified", () => {
+    const result = extractThinkingConfig(
+      {},
+      { thinkingConfig: { includeThoughts: true } },
+      undefined,
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: DEFAULT_THINKING_BUDGET });
+  });
+});
+
+describe("resolveThinkingConfig", () => {
+  it("keeps thinking enabled for Claude models with assistant history", () => {
+    const result = resolveThinkingConfig(
+      { includeThoughts: true, thinkingBudget: 8000 },
+      true, // isThinkingModel
+      true, // isClaudeModel
+      true, // hasAssistantHistory
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: 8000 });
+  });
+
+  it("enables thinking for thinking-capable models without user config", () => {
+    const result = resolveThinkingConfig(
+      undefined,
+      true, // isThinkingModel
+      false, // isClaudeModel
+      false, // hasAssistantHistory
+    );
+    expect(result).toEqual({ includeThoughts: true, thinkingBudget: DEFAULT_THINKING_BUDGET });
+  });
+
+  it("respects user config for non-Claude models", () => {
+    const userConfig = { includeThoughts: false, thinkingBudget: 5000 };
+    const result = resolveThinkingConfig(
+      userConfig,
+      true,
+      false,
+      false,
+    );
+    expect(result).toEqual(userConfig);
+  });
+
+  it("returns user config for Claude without history", () => {
+    const userConfig = { includeThoughts: true, thinkingBudget: 8000 };
+    const result = resolveThinkingConfig(
+      userConfig,
+      true,
+      true, // isClaudeModel
+      false, // no history
+    );
+    expect(result).toEqual(userConfig);
+  });
+
+  it("returns undefined for non-thinking model without user config", () => {
+    const result = resolveThinkingConfig(
+      undefined,
+      false, // not thinking model
+      false,
+      false,
+    );
+    expect(result).toBeUndefined();
+  });
+});
+
+describe("filterUnsignedThinkingBlocks", () => {
+  it("filters out unsigned thinking parts", () => {
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          { type: "thinking", text: "thinking without signature" },
+          { type: "text", text: "visible text" },
+        ],
+      },
+    ];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result[0].parts).toHaveLength(1);
+    expect(result[0].parts[0].type).toBe("text");
+  });
+
+  it("keeps signed thinking parts with valid signatures", () => {
+    const validSignature = "a".repeat(60);
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          { type: "thinking", text: "thinking with signature", signature: validSignature },
+          { type: "text", text: "visible text" },
+        ],
+      },
+    ];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result[0].parts).toHaveLength(2);
+    expect(result[0].parts[0].signature).toBe(validSignature);
+  });
+
+  it("filters thinking parts with short signatures", () => {
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          { type: "thinking", text: "thinking with short signature", signature: "sig123" },
+          { type: "text", text: "visible text" },
+        ],
+      },
+    ];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result[0].parts).toHaveLength(1);
+    expect(result[0].parts[0].type).toBe("text");
+  });
+
+  it("handles Gemini-style thought parts with valid signatures", () => {
+    const validSignature = "b".repeat(55);
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          { thought: true, text: "no signature" },
+          { thought: true, text: "has signature", thoughtSignature: validSignature },
+        ],
+      },
+    ];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result[0].parts).toHaveLength(1);
+    expect(result[0].parts[0].thoughtSignature).toBe(validSignature);
+  });
+
+  it("filters Gemini-style thought parts with short signatures", () => {
+    const contents = [
+      {
+        role: "model",
+        parts: [
+          { thought: true, text: "has short signature", thoughtSignature: "sig" },
+        ],
+      },
+    ];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result[0].parts).toHaveLength(0);
+  });
+
+  it("preserves non-thinking parts", () => {
+    const contents = [
+      {
+        role: "user",
+        parts: [{ text: "hello" }],
+      },
+    ];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result).toEqual(contents);
+  });
+
+  it("handles empty parts array", () => {
+    const contents = [{ role: "model", parts: [] }];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result[0].parts).toEqual([]);
+  });
+
+  it("handles missing parts", () => {
+    const contents = [{ role: "model" }];
+    const result = filterUnsignedThinkingBlocks(contents);
+    expect(result).toEqual(contents);
+  });
+});
+
+describe("filterMessagesThinkingBlocks", () => {
+  it("filters out unsigned thinking blocks in messages[].content", () => {
+    const messages = [
+      {
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "no signature" },
+          { type: "text", text: "visible" },
+        ],
+      },
+    ];
+
+    const result = filterMessagesThinkingBlocks(messages) as any;
+    expect(result[0].content).toHaveLength(1);
+    expect(result[0].content[0].type).toBe("text");
+  });
+
+  it("keeps signed thinking blocks with valid signatures and sanitizes injected fields", () => {
+    const validSignature = "a".repeat(60);
+    const messages = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: { text: "wrapped", cache_control: { type: "ephemeral" } },
+            signature: validSignature,
+            cache_control: { type: "ephemeral" },
+            providerOptions: { injected: true },
+          },
+          { type: "text", text: "visible" },
+        ],
+      },
+    ];
+
+    const result = filterMessagesThinkingBlocks(messages) as any;
+    expect(result[0].content[0]).toEqual({
+      type: "thinking",
+      thinking: "wrapped",
+      signature: validSignature,
+    });
+  });
+
+  it("filters thinking blocks with short signatures", () => {
+    const messages = [
+      {
+        role: "assistant",
+        content: [
+          { type: "thinking", thinking: "short sig", signature: "sig123" },
+          { type: "text", text: "visible" },
+        ],
+      },
+    ];
+
+    const result = filterMessagesThinkingBlocks(messages) as any;
+    expect(result[0].content).toEqual([{ type: "text", text: "visible" }]);
+  });
+
+  it("restores a missing signature from cache and preserves it after sanitization", () => {
+    const cachedSignature = "c".repeat(60);
+    const getCachedSignatureFn = (_sessionId: string, _text: string) => cachedSignature;
+
+    const messages = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "thinking",
+            thinking: { thinking: "restore me", providerOptions: { injected: true } },
+            // no signature present (forces restore)
+            cache_control: { type: "ephemeral" },
+          },
+          { type: "text", text: "visible" },
+        ],
+      },
+    ];
+
+    const result = filterMessagesThinkingBlocks(messages, "session-1", getCachedSignatureFn) as any;
+    expect(result[0].content[0]).toEqual({
+      type: "thinking",
+      thinking: "restore me",
+      signature: cachedSignature,
+    });
+  });
+
+  it("handles Gemini-style thought blocks inside messages content", () => {
+    const validSignature = "b".repeat(60);
+    const messages = [
+      {
+        role: "assistant",
+        content: [
+          {
+            thought: true,
+            text: { text: "wrapped thought", cache_control: { type: "ephemeral" } },
+            thoughtSignature: validSignature,
+            providerOptions: { injected: true },
+          },
+          { type: "text", text: "visible" },
+        ],
+      },
+    ];
+
+    const result = filterMessagesThinkingBlocks(messages) as any;
+    expect(result[0].content[0]).toEqual({
+      thought: true,
+      text: "wrapped thought",
+      thoughtSignature: validSignature,
+    });
+  });
+
+  it("preserves non-thinking blocks and returns message unchanged when content is missing", () => {
+    const messages: any[] = [
+      { role: "assistant", content: [{ type: "text", text: "hello" }] },
+      { role: "assistant" },
+    ];
+
+    const result = filterMessagesThinkingBlocks(messages) as any;
+    expect(result[0]).toEqual(messages[0]);
+    expect(result[1]).toEqual(messages[1]);
+  });
+
+  it("handles non-object messages gracefully", () => {
+    const messages: any[] = [null, "string", 123, { role: "assistant", content: [] }];
+    const result = filterMessagesThinkingBlocks(messages) as any;
+    expect(result).toEqual(messages);
+  });
+});
+
+describe("transformThinkingParts", () => {
+  it("transforms Anthropic-style thinking blocks to reasoning", () => {
+    const response = {
+      content: [
+        { type: "thinking", thinking: "my thoughts" },
+        { type: "text", text: "visible" },
+      ],
+    };
+    const result = transformThinkingParts(response) as any;
+    expect(result.content[0].type).toBe("reasoning");
+    expect(result.content[0].thought).toBe(true);
+    expect(result.reasoning_content).toBe("my thoughts");
+  });
+
+  it("transforms Gemini-style candidates", () => {
+    const response = {
+      candidates: [
+        {
+          content: {
+            parts: [
+              { thought: true, text: "thinking here" },
+              { text: "output" },
+            ],
+          },
+        },
+      ],
+    };
+    const result = transformThinkingParts(response) as any;
+    expect(result.candidates[0].content.parts[0].type).toBe("reasoning");
+    expect(result.candidates[0].reasoning_content).toBe("thinking here");
+  });
+
+  it("handles non-object input", () => {
+    expect(transformThinkingParts(null)).toBeNull();
+    expect(transformThinkingParts(undefined)).toBeUndefined();
+    expect(transformThinkingParts("string")).toBe("string");
+  });
+
+  it("preserves other response properties", () => {
+    const response = {
+      content: [],
+      id: "resp-123",
+      model: "claude-4",
+    };
+    const result = transformThinkingParts(response) as any;
+    expect(result.id).toBe("resp-123");
+    expect(result.model).toBe("claude-4");
+  });
+});
+
+describe("normalizeThinkingConfig", () => {
+  it("returns undefined for non-object input", () => {
+    expect(normalizeThinkingConfig(null)).toBeUndefined();
+    expect(normalizeThinkingConfig(undefined)).toBeUndefined();
+    expect(normalizeThinkingConfig("string")).toBeUndefined();
+  });
+
+  it("normalizes valid config", () => {
+    const result = normalizeThinkingConfig({
+      thinkingBudget: 8000,
+      includeThoughts: true,
+    });
+    expect(result).toEqual({
+      thinkingBudget: 8000,
+      includeThoughts: true,
+    });
+  });
+
+  it("handles snake_case property names", () => {
+    const result = normalizeThinkingConfig({
+      thinking_budget: 4000,
+      include_thoughts: true,
+    });
+    expect(result).toEqual({
+      thinkingBudget: 4000,
+      includeThoughts: true,
+    });
+  });
+
+  it("disables includeThoughts when budget is 0", () => {
+    const result = normalizeThinkingConfig({
+      thinkingBudget: 0,
+      includeThoughts: true,
+    });
+    expect(result?.includeThoughts).toBe(false);
+  });
+
+  it("returns undefined when both values are absent/undefined", () => {
+    const result = normalizeThinkingConfig({});
+    expect(result).toBeUndefined();
+  });
+
+  it("handles non-finite budget values", () => {
+    const result = normalizeThinkingConfig({
+      thinkingBudget: Infinity,
+      includeThoughts: true,
+    });
+    // When budget is non-finite (undefined), includeThoughts is forced to false
+    expect(result).toEqual({ includeThoughts: false });
+  });
+});
+
+describe("parseAntigravityApiBody", () => {
+  it("parses valid JSON object", () => {
+    const result = parseAntigravityApiBody('{"response": {"text": "hello"}}');
+    expect(result).toEqual({ response: { text: "hello" } });
+  });
+
+  it("extracts first object from array", () => {
+    const result = parseAntigravityApiBody('[{"response": "first"}, {"response": "second"}]');
+    expect(result).toEqual({ response: "first" });
+  });
+
+  it("returns null for invalid JSON", () => {
+    expect(parseAntigravityApiBody("not json")).toBeNull();
+  });
+
+  it("returns null for empty array", () => {
+    expect(parseAntigravityApiBody("[]")).toBeNull();
+  });
+
+  it("returns null for primitive values", () => {
+    expect(parseAntigravityApiBody('"string"')).toBeNull();
+    expect(parseAntigravityApiBody("123")).toBeNull();
+  });
+
+  it("handles array with null values", () => {
+    const result = parseAntigravityApiBody('[null, {"valid": true}]');
+    expect(result).toEqual({ valid: true });
+  });
+});
+
+describe("extractUsageMetadata", () => {
+  it("extracts usage from response.usageMetadata", () => {
+    const body = {
+      response: {
+        usageMetadata: {
+          totalTokenCount: 1000,
+          promptTokenCount: 500,
+          candidatesTokenCount: 500,
+          cachedContentTokenCount: 100,
+        },
+      },
+    };
+    const result = extractUsageMetadata(body);
+    expect(result).toEqual({
+      totalTokenCount: 1000,
+      promptTokenCount: 500,
+      candidatesTokenCount: 500,
+      cachedContentTokenCount: 100,
+    });
+  });
+
+  it("returns null when no usageMetadata", () => {
+    expect(extractUsageMetadata({ response: {} })).toBeNull();
+    expect(extractUsageMetadata({})).toBeNull();
+  });
+
+  it("handles partial usage data", () => {
+    const body = {
+      response: {
+        usageMetadata: {
+          totalTokenCount: 1000,
+        },
+      },
+    };
+    const result = extractUsageMetadata(body);
+    expect(result).toEqual({
+      totalTokenCount: 1000,
+      promptTokenCount: undefined,
+      candidatesTokenCount: undefined,
+      cachedContentTokenCount: undefined,
+    });
+  });
+
+  it("filters non-finite numbers", () => {
+    const body = {
+      response: {
+        usageMetadata: {
+          totalTokenCount: Infinity,
+          promptTokenCount: NaN,
+          candidatesTokenCount: 100,
+        },
+      },
+    };
+    const result = extractUsageMetadata(body);
+    expect(result?.totalTokenCount).toBeUndefined();
+    expect(result?.promptTokenCount).toBeUndefined();
+    expect(result?.candidatesTokenCount).toBe(100);
+  });
+});
+
+describe("extractUsageFromSsePayload", () => {
+  it("extracts usage from SSE data line", () => {
+    const payload = `data: {"response": {"usageMetadata": {"totalTokenCount": 500}}}`;
+    const result = extractUsageFromSsePayload(payload);
+    expect(result?.totalTokenCount).toBe(500);
+  });
+
+  it("handles multiple SSE lines", () => {
+    const payload = `data: {"response": {}}
+data: {"response": {"usageMetadata": {"totalTokenCount": 1000}}}`;
+    const result = extractUsageFromSsePayload(payload);
+    expect(result?.totalTokenCount).toBe(1000);
+  });
+
+  it("returns null when no usage found", () => {
+    const payload = `data: {"response": {"text": "hello"}}`;
+    const result = extractUsageFromSsePayload(payload);
+    expect(result).toBeNull();
+  });
+
+  it("ignores non-data lines", () => {
+    const payload = `: keepalive
+event: message
+data: {"response": {"usageMetadata": {"totalTokenCount": 200}}}`;
+    const result = extractUsageFromSsePayload(payload);
+    expect(result?.totalTokenCount).toBe(200);
+  });
+
+  it("handles malformed JSON gracefully", () => {
+    const payload = `data: not json
+data: {"response": {"usageMetadata": {"totalTokenCount": 300}}}`;
+    const result = extractUsageFromSsePayload(payload);
+    expect(result?.totalTokenCount).toBe(300);
+  });
+});
+
+describe("rewriteAntigravityPreviewAccessError", () => {
+  it("returns null for non-404 status", () => {
+    const body = { error: { message: "Not found" } };
+    expect(rewriteAntigravityPreviewAccessError(body, 400)).toBeNull();
+    expect(rewriteAntigravityPreviewAccessError(body, 500)).toBeNull();
+  });
+
+  it("rewrites error for Antigravity model on 404", () => {
+    const body = { error: { message: "Model not found" } };
+    const result = rewriteAntigravityPreviewAccessError(body, 404, "claude-opus");
+    expect(result?.error?.message).toContain("Model not found");
+    expect(result?.error?.message).toContain("preview access");
+  });
+
+  it("rewrites error when error message contains antigravity", () => {
+    const body = { error: { message: "antigravity model unavailable" } };
+    const result = rewriteAntigravityPreviewAccessError(body, 404);
+    expect(result?.error?.message).toContain("preview access");
+  });
+
+  it("returns null for 404 with non-antigravity model", () => {
+    const body = { error: { message: "Model not found" } };
+    const result = rewriteAntigravityPreviewAccessError(body, 404, "gemini-pro");
+    expect(result).toBeNull();
+  });
+
+  it("provides default message when error message is empty", () => {
+    const body = { error: { message: "" } };
+    const result = rewriteAntigravityPreviewAccessError(body, 404, "opus-model");
+    expect(result?.error?.message).toContain("Antigravity preview features are not enabled");
+  });
+
+  it("detects Claude models in requested model name", () => {
+    const body = { error: {} };
+    const result = rewriteAntigravityPreviewAccessError(body, 404, "claude-3-sonnet");
+    expect(result?.error?.message).toContain("preview access");
+  });
+});
--- a/src/plugin/request-helpers.ts
+++ b/src/plugin/request-helpers.ts
@@ -89,19 +89,17 @@ export function extractThinkingConfig(

 /**
 * Determines the final thinking configuration based on model capabilities and user settings.
- * Claude models require signed thinking blocks for multi-turn conversations.
- * Since previous thinking blocks may lack signatures, we disable thinking for Claude multi-turn.
+ * For Claude thinking models, we keep thinking enabled even in multi-turn conversations.
+ * The filterUnsignedThinkingBlocks function will handle signature validation/restoration.
 */
 export function resolveThinkingConfig(
  userConfig: ThinkingConfig | undefined,
  isThinkingModel: boolean,
-  isClaudeModel: boolean,
-  hasAssistantHistory: boolean,
+  _isClaudeModel: boolean,
+  _hasAssistantHistory: boolean,
 ): ThinkingConfig | undefined {
-  if (isClaudeModel && hasAssistantHistory) {
-    return { includeThoughts: false, thinkingBudget: 0 };
-  }
-
+  // For thinking-capable models (including Claude thinking models), enable thinking by default
+  // The signature validation/restoration is handled by filterUnsignedThinkingBlocks
  if (isThinkingModel && !userConfig) {
    return { includeThoughts: true, thinkingBudget: DEFAULT_THINKING_BUDGET };
  }
@@ -119,36 +117,220 @@ function isThinkingPart(part: Record<string, unknown>): boolean {
    || part.thought === true;
 }

+/**
+ * Removes trailing thinking blocks from a content array.
+ * Claude API requires that assistant messages don't end with thinking blocks.
+ * Only removes unsigned thinking blocks; preserves those with valid signatures.
+ */
+function removeTrailingThinkingBlocks(contentArray: any[]): any[] {
+  const result = [...contentArray];
+  while (result.length > 0 && isThinkingPart(result[result.length - 1]) && !hasValidSignature(result[result.length - 1])) {
+    result.pop();
+  }
+  return result;
+}
+
 /**
 * Checks if a thinking part has a valid signature.
+ * A valid signature is a non-empty string with at least 50 characters.
 */
 function hasValidSignature(part: Record<string, unknown>): boolean {
-  if (part.thought === true) {
-    return Boolean(part.thoughtSignature);
+  const signature = part.thought === true ? part.thoughtSignature : part.signature;
+  return typeof signature === "string" && signature.length >= 50;
+}
+
+/**
+ * Gets the text content from a thinking part.
+ */
+function getThinkingText(part: Record<string, unknown>): string {
+  if (typeof part.text === "string") return part.text;
+  if (typeof part.thinking === "string") return part.thinking;
+
+  // Some SDKs wrap thinking in an object like { text: "...", cache_control: {...} }
+  if (part.thinking && typeof part.thinking === "object") {
+    const maybeText = (part.thinking as any).text ?? (part.thinking as any).thinking;
+    if (typeof maybeText === "string") return maybeText;
  }
-  return Boolean(part.signature);
+
+  return "";
+}
+
+/**
+ * Recursively strips cache_control and providerOptions from any object.
+ * These fields can be injected by SDKs, but Claude rejects them inside thinking blocks.
+ */
+function stripCacheControlRecursively(obj: unknown): unknown {
+  if (obj === null || obj === undefined) return obj;
+  if (typeof obj !== "object") return obj;
+  if (Array.isArray(obj)) return obj.map(item => stripCacheControlRecursively(item));
+
+  const result: Record<string, unknown> = {};
+  for (const [key, value] of Object.entries(obj as Record<string, unknown>)) {
+    if (key === "cache_control" || key === "providerOptions") continue;
+    result[key] = stripCacheControlRecursively(value);
+  }
+  return result;
+}
+
+/**
+ * Sanitizes a thinking part by keeping only the allowed fields.
+ * In particular, ensures `thinking` is a string (not an object with cache_control).
+ */
+function sanitizeThinkingPart(part: Record<string, unknown>): Record<string, unknown> {
+  // Gemini-style thought blocks: { thought: true, text, thoughtSignature }
+  if (part.thought === true) {
+    const sanitized: Record<string, unknown> = { thought: true };
+
+    if (part.text !== undefined) {
+      // If text is wrapped, extract the inner string.
+      if (typeof part.text === "object" && part.text !== null) {
+        const maybeText = (part.text as any).text;
+        sanitized.text = typeof maybeText === "string" ? maybeText : part.text;
+      } else {
+        sanitized.text = part.text;
+      }
+    }
+
+    if (part.thoughtSignature !== undefined) sanitized.thoughtSignature = part.thoughtSignature;
+    return sanitized;
+  }
+
+  // Anthropic-style thinking blocks: { type: "thinking", thinking, signature }
+  if (part.type === "thinking" || part.thinking !== undefined) {
+    const sanitized: Record<string, unknown> = { type: "thinking" };
+
+    let thinkingContent: unknown = part.thinking ?? part.text;
+    if (thinkingContent !== undefined && typeof thinkingContent === "object" && thinkingContent !== null) {
+      const maybeText = (thinkingContent as any).text ?? (thinkingContent as any).thinking;
+      thinkingContent = typeof maybeText === "string" ? maybeText : "";
+    }
+
+    if (thinkingContent !== undefined) sanitized.thinking = thinkingContent;
+    if (part.signature !== undefined) sanitized.signature = part.signature;
+    return sanitized;
+  }
+
+  // Fallback: strip cache_control recursively.
+  return stripCacheControlRecursively(part) as Record<string, unknown>;
+}
+
+function filterContentArray(
+  contentArray: any[],
+  sessionId?: string,
+  getCachedSignatureFn?: (sessionId: string, text: string) => string | undefined,
+): any[] {
+  const filtered: any[] = [];
+
+  for (const item of contentArray) {
+    if (!item || typeof item !== "object") {
+      filtered.push(item);
+      continue;
+    }
+
+    if (!isThinkingPart(item)) {
+      filtered.push(item);
+      continue;
+    }
+
+    if (hasValidSignature(item)) {
+      filtered.push(sanitizeThinkingPart(item));
+      continue;
+    }
+
+    if (sessionId && getCachedSignatureFn) {
+      const text = getThinkingText(item);
+      if (text) {
+        const cachedSignature = getCachedSignatureFn(sessionId, text);
+        if (cachedSignature && cachedSignature.length >= 50) {
+          const restoredPart = { ...item };
+          if ((item as any).thought === true) {
+            (restoredPart as any).thoughtSignature = cachedSignature;
+          } else {
+            (restoredPart as any).signature = cachedSignature;
+          }
+          filtered.push(sanitizeThinkingPart(restoredPart as Record<string, unknown>));
+          continue;
+        }
+      }
+    }
+
+    // Drop unsigned/invalid thinking blocks.
+  }
+
+  return filtered;
 }

 /**
 * Filters out unsigned thinking blocks from contents (required by Claude API).
+ * Attempts to restore signatures from cache for thinking blocks that lack valid signatures.
+ * 
+ * @param contents - The contents array from the request
+ * @param sessionId - Optional session ID for signature cache lookup
+ * @param getCachedSignatureFn - Optional function to retrieve cached signatures
 */
-export function filterUnsignedThinkingBlocks(contents: any[]): any[] {
+export function filterUnsignedThinkingBlocks(
+  contents: any[],
+  sessionId?: string,
+  getCachedSignatureFn?: (sessionId: string, text: string) => string | undefined,
+): any[] {
  return contents.map((content: any) => {
-    if (!content || !Array.isArray(content.parts)) {
+    if (!content || typeof content !== "object") {
      return content;
    }

-    const filteredParts = content.parts.filter((part: any) => {
-      if (!part || typeof part !== "object") {
-        return true;
-      }
-      if (isThinkingPart(part)) {
-        return hasValidSignature(part);
-      }
-      return true;
-    });
+    // Gemini format: contents[].parts[]
+    if (Array.isArray((content as any).parts)) {
+      let filteredParts = filterContentArray((content as any).parts, sessionId, getCachedSignatureFn);

-    return { ...content, parts: filteredParts };
+      // Remove trailing thinking blocks for model role (assistant equivalent in Gemini)
+      if ((content as any).role === "model") {
+        filteredParts = removeTrailingThinkingBlocks(filteredParts);
+      }
+
+      return { ...content, parts: filteredParts };
+    }
+
+    // Some Anthropic-style payloads may appear here as contents[].content[]
+    if (Array.isArray((content as any).content)) {
+      let filteredContent = filterContentArray((content as any).content, sessionId, getCachedSignatureFn);
+
+      // Claude API requires assistant messages don't end with thinking blocks
+      if ((content as any).role === "assistant") {
+        filteredContent = removeTrailingThinkingBlocks(filteredContent);
+      }
+
+      return { ...content, content: filteredContent };
+    }
+
+    return content;
+  });
+}
+
+/**
+ * Filters thinking blocks from Anthropic-style messages[] payloads.
+ */
+export function filterMessagesThinkingBlocks(
+  messages: any[],
+  sessionId?: string,
+  getCachedSignatureFn?: (sessionId: string, text: string) => string | undefined,
+): any[] {
+  return messages.map((message: any) => {
+    if (!message || typeof message !== "object") {
+      return message;
+    }
+
+    if (Array.isArray((message as any).content)) {
+      let filteredContent = filterContentArray((message as any).content, sessionId, getCachedSignatureFn);
+
+      // Claude API requires assistant messages don't end with thinking blocks
+      if ((message as any).role === "assistant") {
+        filteredContent = removeTrailingThinkingBlocks(filteredContent);
+      }
+
+      return { ...message, content: filteredContent };
+    }
+
+    return message;
  });
 }

--- a/src/plugin/request.ts
+++ b/src/plugin/request.ts
@@ -3,12 +3,14 @@ import {
  ANTIGRAVITY_HEADERS,
  ANTIGRAVITY_ENDPOINT,
 } from "../constants";
+import { cacheSignature, getCachedSignature } from "./cache";
 import { logAntigravityDebugResponse, type AntigravityDebugContext } from "./debug";
 import {
  extractThinkingConfig,
  extractUsageFromSsePayload,
  extractUsageMetadata,
  filterUnsignedThinkingBlocks,
+  filterMessagesThinkingBlocks,
  isThinkingCapableModel,
  normalizeThinkingConfig,
  parseAntigravityApiBody,
@@ -18,6 +20,23 @@ import {
  type AntigravityApiBody,
 } from "./request-helpers";

+/**
+ * Stable session ID for the plugin's lifetime.
+ * This is used for caching thinking signatures across multi-turn conversations.
+ * Generated once at plugin load time and reused for all requests.
+ */
+const PLUGIN_SESSION_ID = `-${crypto.randomUUID()}`;
+
+// Claude thinking models need a sufficiently large max output token limit when thinking is enabled.
+const CLAUDE_THINKING_MAX_OUTPUT_TOKENS = 64_000;
+
+/**
+ * Gets the stable session ID for this plugin instance.
+ */
+export function getPluginSessionId(): string {
+  return PLUGIN_SESSION_ID;
+}
+
 function generateSyntheticProjectId(): string {
  const adjectives = ["useful", "bright", "swift", "calm", "bold"];
  const nouns = ["fuze", "wave", "spark", "flow", "core"];
@@ -65,31 +84,39 @@ function transformStreamingPayload(payload: string): string {

 /**
 * Creates a TransformStream that processes SSE chunks incrementally,
- * transforming each line as it arrives for true streaming support.
+ * transforming each line as it arrives for true real-time streaming support.
+ * Optionally caches thinking signatures for Claude multi-turn conversations.
 */
-function createStreamingTransformer(): TransformStream<Uint8Array, Uint8Array> {
+function createStreamingTransformer(sessionId?: string): TransformStream<Uint8Array, Uint8Array> {
  const decoder = new TextDecoder();
  const encoder = new TextEncoder();
  let buffer = "";
+  // Buffer for accumulating thinking text per candidate index (for signature caching)
+  const thoughtBuffer = new Map<number, string>();

  return new TransformStream({
    transform(chunk, controller) {
+      // Decode chunk with stream: true to handle multi-byte characters correctly
      buffer += decoder.decode(chunk, { stream: true });

-      // Process complete lines
+      // Process complete lines immediately for real-time streaming
      const lines = buffer.split("\n");
      // Keep the last incomplete line in buffer
      buffer = lines.pop() || "";

      for (const line of lines) {
-        const transformedLine = transformSseLine(line);
+        // Transform and forward each line immediately
+        const transformedLine = transformSseLine(line, sessionId, thoughtBuffer);
        controller.enqueue(encoder.encode(transformedLine + "\n"));
      }
    },
    flush(controller) {
+      // Flush any remaining bytes from TextDecoder
+      buffer += decoder.decode();
+
      // Process any remaining data in buffer
      if (buffer) {
-        const transformedLine = transformSseLine(buffer);
+        const transformedLine = transformSseLine(buffer, sessionId, thoughtBuffer);
        controller.enqueue(encoder.encode(transformedLine));
      }
    },
@@ -98,8 +125,13 @@ function createStreamingTransformer(): TransformStream<Uint8Array, Uint8Array> {

 /**
 * Transforms a single SSE line, extracting and transforming the inner response.
+ * Optionally caches thinking signatures for Claude multi-turn support.
 */
-function transformSseLine(line: string): string {
+function transformSseLine(
+  line: string,
+  sessionId?: string,
+  thoughtBuffer?: Map<number, string>,
+): string {
  if (!line.startsWith("data:")) {
    return line;
  }
@@ -110,6 +142,10 @@ function transformSseLine(line: string): string {
  try {
    const parsed = JSON.parse(json) as { response?: unknown };
    if (parsed.response !== undefined) {
+      // Cache thinking signatures for Claude multi-turn support
+      if (sessionId && thoughtBuffer) {
+        cacheThinkingSignatures(parsed.response, sessionId, thoughtBuffer);
+      }
      const transformed = transformThinkingParts(parsed.response);
      return `data: ${JSON.stringify(transformed)}`;
    }
@@ -117,6 +153,58 @@ function transformSseLine(line: string): string {
  return line;
 }

+/**
+ * Extracts and caches thinking signatures from a response for Claude multi-turn support.
+ */
+function cacheThinkingSignatures(
+  response: unknown,
+  sessionId: string,
+  thoughtBuffer: Map<number, string>,
+): void {
+  if (!response || typeof response !== "object") return;
+
+  const resp = response as Record<string, unknown>;
+
+  // Handle Gemini-style candidates array (Claude through Antigravity uses this format)
+  if (Array.isArray(resp.candidates)) {
+    resp.candidates.forEach((candidate: any, index: number) => {
+      if (!candidate?.content?.parts) return;
+
+      candidate.content.parts.forEach((part: any) => {
+        // Collect thinking text
+        if (part.thought === true || part.type === "thinking") {
+          const text = part.text || part.thinking || "";
+          if (text) {
+            const current = thoughtBuffer.get(index) ?? "";
+            thoughtBuffer.set(index, current + text);
+          }
+        }
+
+        // Cache signature when we receive it
+        if (part.thoughtSignature) {
+          const fullText = thoughtBuffer.get(index) ?? "";
+          if (fullText && sessionId) {
+            cacheSignature(sessionId, fullText, part.thoughtSignature);
+          }
+        }
+      });
+    });
+  }
+
+  // Handle Anthropic-style content array
+  if (Array.isArray(resp.content)) {
+    let thinkingText = "";
+    resp.content.forEach((block: any) => {
+      if (block?.type === "thinking") {
+        thinkingText += block.thinking || block.text || "";
+      }
+      if (block?.signature && thinkingText && sessionId) {
+        cacheSignature(sessionId, thinkingText, block.signature);
+      }
+    });
+  }
+}
+
 /**
 * Rewrites OpenAI-style requests into Antigravity shape, normalizing model, headers,
 * optional cached_content, and thinking config. Also toggles streaming mode for SSE actions.
@@ -127,13 +215,14 @@ export function prepareAntigravityRequest(
  accessToken: string,
  projectId: string,
  endpointOverride?: string,
-): { request: RequestInfo; init: RequestInit; streaming: boolean; requestedModel?: string; effectiveModel?: string; projectId?: string; endpoint?: string; toolDebugMissing?: number; toolDebugSummary?: string; toolDebugPayload?: string } {
+): { request: RequestInfo; init: RequestInit; streaming: boolean; requestedModel?: string; effectiveModel?: string; projectId?: string; endpoint?: string; sessionId?: string; toolDebugMissing?: number; toolDebugSummary?: string; toolDebugPayload?: string } {
  const baseInit: RequestInit = { ...init };
  const headers = new Headers(init?.headers ?? {});
  let resolvedProjectId = projectId?.trim() || "";
  let toolDebugMissing = 0;
  const toolDebugSummaries: string[] = [];
  let toolDebugPayload: string | undefined;
+  let sessionId: string | undefined;

  if (!isGenerativeLanguageRequest(input)) {
    return {
@@ -163,6 +252,7 @@ export function prepareAntigravityRequest(
  const transformedUrl = `${baseEndpoint}/v1internal:${rawAction}${streaming ? "?alt=sse" : ""
    }`;
  const isClaudeModel = upstreamModel.toLowerCase().includes("claude");
+  const isClaudeThinkingModel = isClaudeModel && upstreamModel.toLowerCase().includes("thinking");

  let body = baseInit.body;
  if (typeof baseInit.body === "string" && baseInit.body) {
@@ -175,6 +265,47 @@ export function prepareAntigravityRequest(
          ...parsedBody,
          model: effectiveModel,
        } as Record<string, unknown>;
+
+        // Some callers may already send an Antigravity-wrapped body.
+        // We still need to sanitize Claude thinking blocks (remove cache_control)
+        // and attach a stable sessionId so multi-turn signature caching works.
+        const requestRoot = wrappedBody.request;
+        const requestObjects: Array<Record<string, unknown>> = [];
+
+        if (requestRoot && typeof requestRoot === "object") {
+          requestObjects.push(requestRoot as Record<string, unknown>);
+          const nested = (requestRoot as any).request;
+          if (nested && typeof nested === "object") {
+            requestObjects.push(nested as Record<string, unknown>);
+          }
+        }
+
+        if (requestObjects.length > 0) {
+          sessionId = PLUGIN_SESSION_ID;
+        }
+
+        for (const req of requestObjects) {
+          // Use stable session ID for signature caching across multi-turn conversations
+          (req as any).sessionId = PLUGIN_SESSION_ID;
+
+          if (isClaudeModel) {
+            if (Array.isArray((req as any).contents)) {
+              (req as any).contents = filterUnsignedThinkingBlocks(
+                (req as any).contents,
+                PLUGIN_SESSION_ID,
+                getCachedSignature,
+              );
+            }
+            if (Array.isArray((req as any).messages)) {
+              (req as any).messages = filterMessagesThinkingBlocks(
+                (req as any).messages,
+                PLUGIN_SESSION_ID,
+                getCachedSignature,
+              );
+            }
+          }
+        }
+
        body = JSON.stringify(wrappedBody);
      } else {
        const requestPayload: Record<string, unknown> = { ...parsedBody };
@@ -182,6 +313,21 @@ export function prepareAntigravityRequest(
        const rawGenerationConfig = requestPayload.generationConfig as Record<string, unknown> | undefined;
        const extraBody = requestPayload.extra_body as Record<string, unknown> | undefined;

+        if (isClaudeModel) {
+          if (!requestPayload.toolConfig) {
+            requestPayload.toolConfig = {};
+          }
+          if (typeof requestPayload.toolConfig === "object" && requestPayload.toolConfig !== null) {
+            const toolConfig = requestPayload.toolConfig as Record<string, unknown>;
+            if (!toolConfig.functionCallingConfig) {
+              toolConfig.functionCallingConfig = {};
+            }
+            if (typeof toolConfig.functionCallingConfig === "object" && toolConfig.functionCallingConfig !== null) {
+              (toolConfig.functionCallingConfig as Record<string, unknown>).mode = "VALIDATED";
+            }
+          }
+        }
+
        // Resolve thinking configuration based on user settings and model capabilities
        const userThinkingConfig = extractThinkingConfig(requestPayload, rawGenerationConfig, extraBody);
        const hasAssistantHistory = Array.isArray(requestPayload.contents) &&
@@ -196,11 +342,41 @@ export function prepareAntigravityRequest(

        const normalizedThinking = normalizeThinkingConfig(finalThinkingConfig);
        if (normalizedThinking) {
+          const thinkingBudget = normalizedThinking.thinkingBudget;
+          const thinkingConfig: Record<string, unknown> = isClaudeThinkingModel
+            ? {
+              include_thoughts: normalizedThinking.includeThoughts ?? true,
+              ...(typeof thinkingBudget === "number" && thinkingBudget > 0
+                ? { thinking_budget: thinkingBudget }
+                : {}),
+            }
+            : {
+              includeThoughts: normalizedThinking.includeThoughts,
+              ...(typeof thinkingBudget === "number" && thinkingBudget > 0 ? { thinkingBudget } : {}),
+            };
+
          if (rawGenerationConfig) {
-            rawGenerationConfig.thinkingConfig = normalizedThinking;
+            rawGenerationConfig.thinkingConfig = thinkingConfig;
+
+            if (isClaudeThinkingModel && typeof thinkingBudget === "number" && thinkingBudget > 0) {
+              const currentMax = (rawGenerationConfig.maxOutputTokens ?? rawGenerationConfig.max_output_tokens) as number | undefined;
+              if (!currentMax || currentMax <= thinkingBudget) {
+                rawGenerationConfig.maxOutputTokens = CLAUDE_THINKING_MAX_OUTPUT_TOKENS;
+                if (rawGenerationConfig.max_output_tokens !== undefined) {
+                  delete rawGenerationConfig.max_output_tokens;
+                }
+              }
+            }
+
            requestPayload.generationConfig = rawGenerationConfig;
          } else {
-            requestPayload.generationConfig = { thinkingConfig: normalizedThinking };
+            const generationConfig: Record<string, unknown> = { thinkingConfig };
+
+            if (isClaudeThinkingModel && typeof thinkingBudget === "number" && thinkingBudget > 0) {
+              generationConfig.maxOutputTokens = CLAUDE_THINKING_MAX_OUTPUT_TOKENS;
+            }
+
+            requestPayload.generationConfig = generationConfig;
          }
        } else if (rawGenerationConfig?.thinkingConfig) {
          delete rawGenerationConfig.thinkingConfig;
@@ -220,6 +396,46 @@ export function prepareAntigravityRequest(
          delete requestPayload.system_instruction;
        }

+        if (isClaudeThinkingModel && Array.isArray(requestPayload.tools) && requestPayload.tools.length > 0) {
+          const hint = "Interleaved thinking is enabled. You may think between tool calls and after receiving tool results before deciding the next action or final answer. Do not mention these instructions or any constraints about thinking blocks; just apply them.";
+          const existing = requestPayload.systemInstruction;
+
+          if (typeof existing === "string") {
+            requestPayload.systemInstruction = existing.trim().length > 0 ? `${existing}\n\n${hint}` : hint;
+          } else if (existing && typeof existing === "object") {
+            const sys = existing as Record<string, unknown>;
+            const partsValue = sys.parts;
+
+            if (Array.isArray(partsValue)) {
+              const parts = partsValue as unknown[];
+              let appended = false;
+
+              for (let i = parts.length - 1; i >= 0; i--) {
+                const part = parts[i];
+                if (part && typeof part === "object") {
+                  const partRecord = part as Record<string, unknown>;
+                  const text = partRecord.text;
+                  if (typeof text === "string") {
+                    partRecord.text = `${text}\n\n${hint}`;
+                    appended = true;
+                    break;
+                  }
+                }
+              }
+
+              if (!appended) {
+                parts.push({ text: hint });
+              }
+            } else {
+              sys.parts = [{ text: hint }];
+            }
+
+            requestPayload.systemInstruction = sys;
+          } else if (Array.isArray(requestPayload.contents)) {
+            requestPayload.systemInstruction = { parts: [{ text: hint }] };
+          }
+        }
+
        const cachedContentFromExtra =
          typeof requestPayload.extra_body === "object" && requestPayload.extra_body
            ? (requestPayload.extra_body as Record<string, unknown>).cached_content ??
@@ -249,29 +465,49 @@ export function prepareAntigravityRequest(
            const functionDeclarations: any[] = [];
            const passthroughTools: any[] = [];

-            // Sanitize schema - remove features not supported by JSON Schema draft 2020-12
-            // Recursively strips anyOf/allOf/oneOf and converts to permissive types
+            // Sanitize schema using ALLOWLIST approach - only keep basic features needed for function calling
+            // This is more aggressive than blocklisting, ensuring any unknown/unsupported features are stripped
+            // See docs/ANTIGRAVITY_API_SPEC.md for full list of unsupported features
            const sanitizeSchema = (schema: any): any => {
              if (!schema || typeof schema !== "object") {
                return schema;
              }

+              // Only keep these basic schema features (allowlist approach)
+              // Everything else gets stripped automatically
+              const ALLOWED_KEYS = new Set([
+                "type",
+                "properties",
+                "required",
+                "description",
+                "enum",
+                "items",
+                "additionalProperties",
+              ]);
+
              const sanitized: any = {};

              for (const key of Object.keys(schema)) {
-                // Skip anyOf/allOf/oneOf - not well supported
-                if (key === "anyOf" || key === "allOf" || key === "oneOf") {
+                // Convert "const" to "enum: [value]" (const is not supported but enum is)
+                if (key === "const") {
+                  sanitized.enum = [schema[key]];
+                  continue;
+                }
+
+                // Skip keys not in allowlist
+                if (!ALLOWED_KEYS.has(key)) {
                  continue;
                }

                const value = schema[key];

                if (key === "items" && value && typeof value === "object") {
-                  // Handle array items - if it has anyOf, replace with permissive type
-                  if (value.anyOf || value.allOf || value.oneOf) {
-                    sanitized.items = {};
+                  const sanitizedItems = sanitizeSchema(value);
+                  // Empty items schema {} is invalid - convert to permissive string type
+                  if (Object.keys(sanitizedItems).length === 0) {
+                    sanitized.items = { type: "string" };
                  } else {
-                    sanitized.items = sanitizeSchema(value);
+                    sanitized.items = sanitizedItems;
                  }
                } else if (key === "properties" && value && typeof value === "object") {
                  // Recursively sanitize properties
@@ -290,14 +526,38 @@ export function prepareAntigravityRequest(
            };

            const normalizeSchema = (schema: any) => {
+              // Helper to create a placeholder schema for empty parameter tools
+              // Antigravity API in VALIDATED mode cannot handle truly empty schemas
+              // The placeholder must be REQUIRED so the model sends a non-empty args object
+              const createPlaceholderSchema = (base: any = {}) => ({
+                ...base,
+                type: "object",
+                properties: {
+                  reason: {
+                    type: "string",
+                    description: "Brief explanation of why you are calling this tool",
+                  },
+                },
+                required: ["reason"],
+              });
+
              if (!schema || typeof schema !== "object") {
                toolDebugMissing += 1;
-                // Minimal fallback for tools without schemas
-                return { type: "object" };
+                // Fallback for tools without schemas - add dummy property for Antigravity API
+                return createPlaceholderSchema();
              }

-              // Sanitize and pass through
-              return sanitizeSchema(schema);
+              const sanitized = sanitizeSchema(schema);
+
+              // Check if schema is effectively empty (type: object with no properties)
+              if (
+                sanitized.type === "object" &&
+                (!sanitized.properties || Object.keys(sanitized.properties).length === 0)
+              ) {
+                return createPlaceholderSchema(sanitized);
+              }
+
+              return sanitized;
            };

            requestPayload.tools.forEach((tool: any, idx: number) => {
@@ -438,8 +698,23 @@ export function prepareAntigravityRequest(
        }

        // For Claude models, filter out unsigned thinking blocks (required by Claude API)
-        if (isClaudeModel && Array.isArray(requestPayload.contents)) {
-          requestPayload.contents = filterUnsignedThinkingBlocks(requestPayload.contents);
+        // Attempts to restore signatures from cache for multi-turn conversations
+        // Handle both Gemini-style contents[] and Anthropic-style messages[] payloads.
+        if (isClaudeModel) {
+          if (Array.isArray(requestPayload.contents)) {
+            requestPayload.contents = filterUnsignedThinkingBlocks(
+              requestPayload.contents,
+              PLUGIN_SESSION_ID,
+              getCachedSignature,
+            );
+          }
+          if (Array.isArray(requestPayload.messages)) {
+            requestPayload.messages = filterMessagesThinkingBlocks(
+              requestPayload.messages,
+              PLUGIN_SESSION_ID,
+              getCachedSignature,
+            );
+          }
        }

        // For Claude models, ensure functionCall/tool use parts carry IDs (required by Anthropic).
@@ -520,7 +795,9 @@ export function prepareAntigravityRequest(
          requestId: "agent-" + crypto.randomUUID(),
        });
        if (wrappedBody.request && typeof wrappedBody.request === 'object') {
-          (wrappedBody.request as any).sessionId = "-" + Math.floor(Math.random() * 9000000000000000000).toString();
+          // Use stable session ID for signature caching across multi-turn conversations
+          sessionId = PLUGIN_SESSION_ID;
+          (wrappedBody.request as any).sessionId = sessionId;
        }

        body = JSON.stringify(wrappedBody);
@@ -534,6 +811,21 @@ export function prepareAntigravityRequest(
    headers.set("Accept", "text/event-stream");
  }

+  // Add interleaved thinking header for Claude thinking models
+  // This enables real-time streaming of thinking tokens
+  if (isClaudeThinkingModel) {
+    const existing = headers.get("anthropic-beta");
+    const interleavedHeader = "interleaved-thinking-2025-05-14";
+
+    if (existing) {
+      if (!existing.includes(interleavedHeader)) {
+        headers.set("anthropic-beta", `${existing},${interleavedHeader}`);
+      }
+    } else {
+      headers.set("anthropic-beta", interleavedHeader);
+    }
+  }
+
  headers.set("User-Agent", ANTIGRAVITY_HEADERS["User-Agent"]);
  headers.set("X-Goog-Api-Client", ANTIGRAVITY_HEADERS["X-Goog-Api-Client"]);
  headers.set("Client-Metadata", ANTIGRAVITY_HEADERS["Client-Metadata"]);
@@ -554,6 +846,7 @@ export function prepareAntigravityRequest(
    effectiveModel: upstreamModel,
    projectId: resolvedProjectId,
    endpoint: transformedUrl,
+    sessionId,
    toolDebugMissing,
    toolDebugSummary: toolDebugSummaries.slice(0, 20).join(" | "),
    toolDebugPayload,
@@ -564,7 +857,8 @@ export function prepareAntigravityRequest(
 * Normalizes Antigravity responses: applies retry headers, extracts cache usage into headers,
 * rewrites preview errors, flattens streaming payloads, and logs debug metadata.
 *
- * For streaming SSE responses, uses TransformStream for true incremental streaming.
+ * For streaming SSE responses, uses TransformStream for true real-time incremental streaming.
+ * Thinking/reasoning tokens are transformed and forwarded immediately as they arrive.
 */
 export async function transformAntigravityResponse(
  response: Response,
@@ -574,6 +868,7 @@ export async function transformAntigravityResponse(
  projectId?: string,
  endpoint?: string,
  effectiveModel?: string,
+  sessionId?: string,
  toolDebugMissing?: number,
  toolDebugSummary?: string,
  toolDebugPayload?: string,
@@ -590,51 +885,18 @@ export async function transformAntigravityResponse(
  }

  // For successful streaming responses, use TransformStream to transform SSE events
-  // while maintaining real-time streaming (no buffering of entire response)
+  // while maintaining real-time streaming (no buffering of entire response).
+  // This enables thinking tokens to be displayed as they arrive, like the Codex plugin.
  if (streaming && response.ok && isEventStreamResponse && response.body) {
    const headers = new Headers(response.headers);

-    // Buffer for partial SSE events that span chunks
-    let buffer = "";
-    const decoder = new TextDecoder();
-    const encoder = new TextEncoder();
-
-    const transformStream = new TransformStream<Uint8Array, Uint8Array>({
-      transform(chunk, controller) {
-        // Decode chunk with stream: true to handle multi-byte characters
-        buffer += decoder.decode(chunk, { stream: true });
-
-        // Split on double newline (SSE event delimiter)
-        const events = buffer.split("\n\n");
-
-        // Keep last part in buffer (may be incomplete)
-        buffer = events.pop() || "";
-
-        // Process and forward complete events immediately
-        for (const event of events) {
-          if (event.trim()) {
-            const transformed = transformStreamingPayload(event);
-            controller.enqueue(encoder.encode(transformed + "\n\n"));
-          }
-        }
-      },
-      flush(controller) {
-        // Flush any remaining bytes from TextDecoder
-        buffer += decoder.decode();
-
-        // Handle any remaining data at stream end
-        if (buffer.trim()) {
-          const transformed = transformStreamingPayload(buffer);
-          controller.enqueue(encoder.encode(transformed));
-        }
-      }
-    });
-
    logAntigravityDebugResponse(debugContext, response, {
-      note: "Streaming SSE response (transformed)",
+      note: "Streaming SSE response (real-time transform)",
    });

-    return new Response(response.body.pipeThrough(transformStream), {
+    // Use the optimized line-by-line transformer for immediate forwarding
+    // This ensures thinking/reasoning content streams in real-time
+    return new Response(response.body.pipeThrough(createStreamingTransformer(sessionId)), {
      status: response.status,
      statusText: response.statusText,
      headers,
--- a/src/plugin/types.ts
+++ b/src/plugin/types.ts
@@ -8,12 +8,17 @@ export interface OAuthAuthDetails {
  expires?: number;
 }

+export interface ApiKeyAuthDetails {
+  type: "api_key";
+  key: string;
+}
+
 export interface NonOAuthAuthDetails {
  type: string;
  [key: string]: unknown;
 }

-export type AuthDetails = OAuthAuthDetails | NonOAuthAuthDetails;
+export type AuthDetails = OAuthAuthDetails | ApiKeyAuthDetails | NonOAuthAuthDetails;

 export type GetAuth = () => Promise<AuthDetails>;

--- a/tsconfig.build.json
+++ b/tsconfig.build.json
@@ -8,5 +8,6 @@
    "sourceMap": true,
    "allowImportingTsExtensions": false
  },
-  "include": ["src/**/*.ts", "src/**/*.tsx", "index.ts"]
+  "include": ["src/**/*.ts", "src/**/*.tsx", "index.ts"],
+  "exclude": ["src/**/*.test.ts", "src/**/*.test.tsx", "src/**/*.spec.ts", "src/**/*.spec.tsx"]
 }