Merge branch 'dev' into upgrade-sst-version

2026-05-13 23:52:06 +00:00 · 2026-05-11 11:07:44 +02:00
parent c7949fb32c 2d0d3d596e
commit b4cb9bdd2b
588 changed files with 38374 additions and 16014 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -68,6 +68,11 @@ jobs:
        env:
          OPENCODE_EXPERIMENTAL_DISABLE_FILEWATCHER: ${{ runner.os == 'Windows' && 'true' || 'false' }}

+      - name: Run HttpApi exerciser gates
+        if: runner.os == 'Linux'
+        working-directory: packages/opencode
+        run: bun run test:httpapi
+
      - name: Publish unit reports
        if: always()
        uses: mikepenz/action-junit-report@v6
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ node_modules
 .worktrees
 .sst
 .env
+.env.local
 .idea
 .vscode
 .codex
--- a/.gitleaksignore
+++ b/.gitleaksignore
@@ -0,0 +1,5 @@
+# Fake secret-looking strings used by HTTP recorder redaction tests.
+afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:generic-api-key:69
+afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:generic-api-key:92
+afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:generic-api-key:146
+afa57acfda894e0ebf3c637dd710310b705c0a2f:packages/http-recorder/test/record-replay.test.ts:gcp-api-key:71
--- a/.opencode/plugins/tui-smoke.tsx
+++ b/.opencode/plugins/tui-smoke.tsx
@@ -2,87 +2,62 @@
 import { useTerminalDimensions, type JSX } from "@opentui/solid"
 import { useBindings, useKeymapSelector } from "@opentui/keymap/solid"
 import { RGBA, VignetteEffect, type KeyEvent, type Renderable } from "@opentui/core"
-import { resolveBindingSections, type BindingSectionsConfig, type BindingValue } from "@opentui/keymap/extras"
-import type { Binding } from "@opentui/keymap"
+import { createBindingLookup, type BindingConfig } from "@opentui/keymap/extras"
 import type { TuiPlugin, TuiPluginApi, TuiPluginMeta, TuiPluginModule, TuiSlotPlugin } from "@opencode-ai/plugin/tui"

 const tabs = ["overview", "counter", "help"]
 const command = {
-  modal: "plugin.smoke.modal",
-  screen: "plugin.smoke.screen",
-  alert: "plugin.smoke.alert",
-  confirm: "plugin.smoke.confirm",
-  prompt: "plugin.smoke.prompt",
-  select: "plugin.smoke.select",
-  host: "plugin.smoke.host",
-  home: "plugin.smoke.home",
-  toast: "plugin.smoke.toast",
-  dialog_close: "plugin.smoke.dialog.close",
-  local_push: "plugin.smoke.local.push",
-  local_pop: "plugin.smoke.local.pop",
-  screen_home: "plugin.smoke.screen.home",
-  screen_left: "plugin.smoke.screen.left",
-  screen_right: "plugin.smoke.screen.right",
-  screen_up: "plugin.smoke.screen.up",
-  screen_down: "plugin.smoke.screen.down",
-  screen_modal: "plugin.smoke.screen.modal",
-  screen_local: "plugin.smoke.screen.local",
-  screen_host: "plugin.smoke.screen.host",
-  screen_alert: "plugin.smoke.screen.alert",
-  screen_confirm: "plugin.smoke.screen.confirm",
-  screen_prompt: "plugin.smoke.screen.prompt",
-  screen_select: "plugin.smoke.screen.select",
-  modal_accept: "plugin.smoke.modal.accept",
-  modal_close: "plugin.smoke.modal.close",
-} as const
-
-const sectionNames = ["global", "dialog", "local", "screen", "modal"] as const
-type SectionName = (typeof sectionNames)[number]
-type SectionConfig = Record<string, BindingValue<Renderable, KeyEvent>>
-type ResolvedSections = Record<SectionName, Binding<Renderable, KeyEvent>[]>
-type SmokeKeymap = {
-  sections?: Partial<Record<SectionName, SectionConfig>>
+  modal: "smoke_modal",
+  screen: "smoke_screen",
+  alert: "smoke_alert",
+  confirm: "smoke_confirm",
+  prompt: "smoke_prompt",
+  select: "smoke_select",
+  host: "smoke_host",
+  home: "smoke_home",
+  toast: "smoke_toast",
+  dialog_close: "smoke_dialog_close",
+  local_push: "smoke_local_push",
+  local_pop: "smoke_local_pop",
+  screen_home: "smoke_screen_home",
+  screen_left: "smoke_screen_left",
+  screen_right: "smoke_screen_right",
+  screen_up: "smoke_screen_up",
+  screen_down: "smoke_screen_down",
+  screen_modal: "smoke_screen_modal",
+  screen_local: "smoke_screen_local",
+  screen_host: "smoke_screen_host",
+  screen_alert: "smoke_screen_alert",
+  screen_confirm: "smoke_screen_confirm",
+  screen_prompt: "smoke_screen_prompt",
+  screen_select: "smoke_screen_select",
+  modal_accept: "smoke_modal_accept",
+  modal_close: "smoke_modal_close",
 }

-type SmokeOptions = {
-  enabled?: boolean
-  label?: unknown
-  route?: unknown
-  vignette?: unknown
-  keymap?: SmokeKeymap
-}
+type SmokeBindings = BindingConfig<Renderable, KeyEvent>

 const defaultKeymap = {
-  global: {
-    [command.modal]: "ctrl+shift+m",
-    [command.screen]: "ctrl+shift+o",
-  },
-  dialog: {
-    [command.dialog_close]: "escape",
-  },
-  local: {
-    [command.local_push]: "enter,return",
-    [command.local_pop]: "escape,q,backspace",
-  },
-  screen: {
-    [command.screen_home]: "escape,ctrl+h",
-    [command.screen_left]: "left,h",
-    [command.screen_right]: "right,l",
-    [command.screen_up]: "up,k",
-    [command.screen_down]: "down,j",
-    [command.screen_modal]: "ctrl+shift+m",
-    [command.screen_local]: "x",
-    [command.screen_host]: "z",
-    [command.screen_alert]: "a",
-    [command.screen_confirm]: "c",
-    [command.screen_prompt]: "p",
-    [command.screen_select]: "s",
-  },
-  modal: {
-    [command.modal_accept]: "enter,return",
-    [command.modal_close]: "escape",
-  },
-} satisfies Record<SectionName, SectionConfig>
+  [command.modal]: "ctrl+shift+m",
+  [command.screen]: "ctrl+shift+o",
+  [command.dialog_close]: "escape",
+  [command.local_push]: "enter,return",
+  [command.local_pop]: "escape,q,backspace",
+  [command.screen_home]: "escape,ctrl+h",
+  [command.screen_left]: "left,h",
+  [command.screen_right]: "right,l",
+  [command.screen_up]: "up,k",
+  [command.screen_down]: "down,j",
+  [command.screen_modal]: "ctrl+shift+m",
+  [command.screen_local]: "x",
+  [command.screen_host]: "z",
+  [command.screen_alert]: "a",
+  [command.screen_confirm]: "c",
+  [command.screen_prompt]: "p",
+  [command.screen_select]: "s",
+  [command.modal_accept]: "enter,return",
+  [command.modal_close]: "escape",
+}

 const pick = (value: unknown, fallback: string) => {
  if (typeof value !== "string") return fallback
@@ -95,11 +70,14 @@ const num = (value: unknown, fallback: number) => {
  return value
 }

+const record = (value: unknown): value is Record<string, unknown> =>
+  !!value && typeof value === "object" && !Array.isArray(value)
+
 type Cfg = {
  label: string
  route: string
  vignette: number
-  keymap: SmokeKeymap | undefined
+  keybinds: SmokeBindings | undefined
 }

 type Route = {
@@ -116,12 +94,12 @@ type State = {
  local: number
 }

-const cfg = (options: SmokeOptions | undefined) => {
+const cfg = (options: Record<string, unknown> | undefined) => {
  return {
    label: pick(options?.label, "smoke"),
    route: pick(options?.route, "workspace-smoke"),
    vignette: Math.max(0, num(options?.vignette, 0.35)),
-    keymap: options?.keymap,
+    keybinds: record(options?.keybinds) ? (options.keybinds as SmokeBindings) : undefined,
  }
 }

@@ -132,21 +110,8 @@ const names = (input: Cfg) => {
  }
 }

-function createKeys(input: SmokeKeymap | undefined): { sections: ResolvedSections } {
-  const sections = resolveBindingSections(
-    {
-      global: { ...defaultKeymap.global, ...input?.sections?.global },
-      dialog: { ...defaultKeymap.dialog, ...input?.sections?.dialog },
-      local: { ...defaultKeymap.local, ...input?.sections?.local },
-      screen: { ...defaultKeymap.screen, ...input?.sections?.screen },
-      modal: { ...defaultKeymap.modal, ...input?.sections?.modal },
-    } satisfies BindingSectionsConfig<Renderable, KeyEvent>,
-    { sections: sectionNames },
-  ).sections
-
-  return {
-    sections,
-  }
+function createKeys(input: SmokeBindings | undefined) {
+  return createBindingLookup({ ...defaultKeymap, ...input })
 }

 type Keys = ReturnType<typeof createKeys>
@@ -376,7 +341,7 @@ const Screen = (props: {
        },
      },
    ],
-    bindings: props.keys.sections.dialog,
+    bindings: props.keys.gather("smoke.dialog", [command.dialog_close]),
  }))

  useBindings(() => ({
@@ -395,7 +360,7 @@ const Screen = (props: {
        },
      },
    ],
-    bindings: props.keys.sections.local,
+    bindings: props.keys.gather("smoke.local", [command.local_push, command.local_pop]),
  }))

  useBindings(() => ({
@@ -478,7 +443,20 @@ const Screen = (props: {
        },
      },
    ],
-    bindings: props.keys.sections.screen,
+    bindings: props.keys.gather("smoke.screen", [
+      command.screen_home,
+      command.screen_left,
+      command.screen_right,
+      command.screen_up,
+      command.screen_down,
+      command.screen_modal,
+      command.screen_local,
+      command.screen_host,
+      command.screen_alert,
+      command.screen_confirm,
+      command.screen_prompt,
+      command.screen_select,
+    ]),
  }))
  const shortcuts = useKeymapSelector((keymap) => {
    const bindings = keymap.getCommandBindings({
@@ -687,7 +665,7 @@ const Modal = (props: {
        },
      },
    ],
-    bindings: props.keys.sections.modal,
+    bindings: props.keys.gather("smoke.modal", [command.modal_accept, command.modal_close]),
  }))
  const shortcuts = useKeymapSelector((keymap) => {
    const bindings = keymap.getCommandBindings({
@@ -766,25 +744,8 @@ const home = (api: TuiPluginApi, input: Cfg) => ({
    },
    home_prompt(ctx, value) {
      const skin = look(ctx.theme.current)
-      type Prompt = (props: {
-        workspaceID?: string
-        visible?: boolean
-        disabled?: boolean
-        onSubmit?: () => void
-        hint?: JSX.Element
-        right?: JSX.Element
-        showPlaceholder?: boolean
-        placeholders?: {
-          normal?: string[]
-          shell?: string[]
-        }
-      }) => JSX.Element
-      type Slot = (
-        props: { name: string; mode?: unknown; children?: JSX.Element } & Record<string, unknown>,
-      ) => JSX.Element | null
-      const ui = api.ui as TuiPluginApi["ui"] & { Prompt: Prompt; Slot: Slot }
-      const Prompt = ui.Prompt
-      const Slot = ui.Slot
+      const Prompt = api.ui.Prompt
+      const Slot = api.ui.Slot
      const normal = [
        `[SMOKE] route check for ${input.label}`,
        "[SMOKE] confirm home_prompt slot override",
@@ -1003,20 +964,29 @@ const reg = (api: TuiPluginApi, input: Cfg, keys: Keys) => {
        },
      },
    ],
-    bindings: keys.sections.global,
+    bindings: keys.gather("smoke.global", [
+      command.modal,
+      command.screen,
+      command.alert,
+      command.confirm,
+      command.prompt,
+      command.select,
+      command.host,
+      command.home,
+      command.toast,
+    ]),
  })
 }

 const tui: TuiPlugin = async (api, options, meta) => {
-  const input = options as SmokeOptions | undefined
-  if (input?.enabled === false) return
+  if (options?.enabled === false) return

  await api.theme.install("./smoke-theme.json")
  api.theme.set("smoke-theme")

-  const value = cfg(input)
+  const value = cfg(options)
  const route = names(value)
-  const keys = createKeys(value.keymap)
+  const keys = createKeys(value.keybinds)
  const fx = new VignetteEffect(value.vignette)
  const post = fx.apply.bind(fx)
  api.renderer.addPostProcessFn(post)
--- a/.opencode/tui.json
+++ b/.opencode/tui.json
@@ -6,20 +6,12 @@
      {
        "enabled": false,
        "label": "workspace",
-        "keymap": {
-          "sections": {
-            "global": {
-              "plugin.smoke.modal": "ctrl+alt+m",
-              "plugin.smoke.screen": "ctrl+alt+o"
-            },
-            "screen": {
-              "plugin.smoke.screen.home": "escape,ctrl+shift+h",
-              "plugin.smoke.screen.modal": "ctrl+alt+m"
-            },
-            "dialog": {
-              "plugin.smoke.dialog.close": "escape,q"
-            }
-          }
+        "keybinds": {
+          "smoke_modal": "ctrl+alt+m",
+          "smoke_screen": "ctrl+alt+o",
+          "smoke_screen_home": "escape,ctrl+shift+h",
+          "smoke_screen_modal": "ctrl+alt+m",
+          "smoke_dialog_close": "escape,q"
        }
      }
    ]
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -9,6 +9,7 @@
 ### General Principles

 - Keep things in one function unless composable or reusable
+- Do not extract single-use helpers preemptively. Inline the logic at the call site unless the helper is reused, hides a genuinely complex boundary, or has a clear independent name that improves the caller.
 - Avoid `try`/`catch` where possible
 - Avoid using the `any` type
 - Use Bun APIs when possible, like `Bun.file()`
--- a/bun.lock
+++ b/bun.lock
@@ -29,7 +29,7 @@
    },
    "packages/app": {
      "name": "@opencode-ai/app",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "@kobalte/core": "catalog:",
        "@opencode-ai/core": "workspace:*",
@@ -85,7 +85,7 @@
    },
    "packages/console/app": {
      "name": "@opencode-ai/console-app",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "@cloudflare/vite-plugin": "1.15.2",
        "@ibm/plex": "6.4.1",
@@ -111,6 +111,7 @@
        "zod": "catalog:",
      },
      "devDependencies": {
+        "@types/bun": "catalog:",
        "@typescript/native-preview": "catalog:",
        "@webgpu/types": "0.1.54",
        "typescript": "catalog:",
@@ -119,7 +120,7 @@
    },
    "packages/console/core": {
      "name": "@opencode-ai/console-core",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "@aws-sdk/client-sts": "3.782.0",
        "@jsx-email/render": "1.1.1",
@@ -146,7 +147,7 @@
    },
    "packages/console/function": {
      "name": "@opencode-ai/console-function",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "@ai-sdk/anthropic": "3.0.64",
        "@ai-sdk/openai": "3.0.48",
@@ -170,7 +171,7 @@
    },
    "packages/console/mail": {
      "name": "@opencode-ai/console-mail",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "@jsx-email/all": "2.2.3",
        "@jsx-email/cli": "1.4.3",
@@ -194,7 +195,7 @@
    },
    "packages/core": {
      "name": "@opencode-ai/core",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "bin": {
        "opencode": "./bin/opencode",
      },
@@ -228,7 +229,7 @@
    },
    "packages/desktop": {
      "name": "@opencode-ai/desktop",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "drizzle-orm": "catalog:",
        "effect": "catalog:",
@@ -282,7 +283,7 @@
    },
    "packages/enterprise": {
      "name": "@opencode-ai/enterprise",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "@opencode-ai/core": "workspace:*",
        "@opencode-ai/ui": "workspace:*",
@@ -302,6 +303,7 @@
      "devDependencies": {
        "@cloudflare/workers-types": "catalog:",
        "@tailwindcss/vite": "catalog:",
+        "@types/bun": "catalog:",
        "@types/luxon": "catalog:",
        "@typescript/native-preview": "catalog:",
        "tailwindcss": "catalog:",
@@ -311,7 +313,7 @@
    },
    "packages/function": {
      "name": "@opencode-ai/function",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "@octokit/auth-app": "8.0.1",
        "@octokit/rest": "catalog:",
@@ -325,9 +327,40 @@
        "typescript": "catalog:",
      },
    },
+    "packages/http-recorder": {
+      "name": "@opencode-ai/http-recorder",
+      "version": "1.14.48",
+      "dependencies": {
+        "@effect/platform-node": "catalog:",
+        "effect": "catalog:",
+      },
+      "devDependencies": {
+        "@tsconfig/bun": "catalog:",
+        "@types/bun": "catalog:",
+        "@typescript/native-preview": "catalog:",
+      },
+    },
+    "packages/llm": {
+      "name": "@opencode-ai/llm",
+      "version": "1.14.48",
+      "dependencies": {
+        "@smithy/eventstream-codec": "4.2.14",
+        "@smithy/util-utf8": "4.2.2",
+        "aws4fetch": "1.0.20",
+        "effect": "catalog:",
+      },
+      "devDependencies": {
+        "@clack/prompts": "1.0.0-alpha.1",
+        "@effect/platform-node": "catalog:",
+        "@opencode-ai/http-recorder": "workspace:*",
+        "@tsconfig/bun": "catalog:",
+        "@types/bun": "catalog:",
+        "@typescript/native-preview": "catalog:",
+      },
+    },
    "packages/opencode": {
      "name": "opencode",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "bin": {
        "opencode": "./bin/opencode",
      },
@@ -360,10 +393,6 @@
        "@effect/opentelemetry": "catalog:",
        "@effect/platform-node": "catalog:",
        "@gitlab/opencode-gitlab-auth": "1.3.3",
-        "@hono/node-server": "1.19.11",
-        "@hono/node-ws": "1.3.0",
-        "@hono/standard-validator": "0.1.5",
-        "@hono/zod-validator": "catalog:",
        "@lydell/node-pty": "catalog:",
        "@modelcontextprotocol/sdk": "1.27.1",
        "@octokit/graphql": "9.0.2",
@@ -383,6 +412,7 @@
        "@opentui/solid": "catalog:",
        "@parcel/watcher": "2.5.1",
        "@pierre/diffs": "catalog:",
+        "@silvia-odwyer/photon-node": "0.3.4",
        "@solid-primitives/event-bus": "1.1.2",
        "@solid-primitives/scheduled": "1.5.2",
        "@standard-schema/spec": "1.0.0",
@@ -404,8 +434,6 @@
        "glob": "13.0.5",
        "google-auth-library": "10.5.0",
        "gray-matter": "4.0.3",
-        "hono": "catalog:",
-        "hono-openapi": "catalog:",
        "ignore": "7.0.5",
        "immer": "11.1.4",
        "jsonc-parser": "3.3.1",
@@ -432,7 +460,6 @@
        "xdg-basedir": "5.1.0",
        "yargs": "18.0.0",
        "zod": "catalog:",
-        "zod-to-json-schema": "3.24.5",
      },
      "devDependencies": {
        "@babel/core": "7.28.4",
@@ -465,12 +492,11 @@
        "typescript": "catalog:",
        "vscode-languageserver-types": "3.17.5",
        "why-is-node-running": "3.2.2",
-        "zod-to-json-schema": "3.24.5",
      },
    },
    "packages/plugin": {
      "name": "@opencode-ai/plugin",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "@opencode-ai/sdk": "workspace:*",
        "effect": "catalog:",
@@ -486,9 +512,9 @@
        "typescript": "catalog:",
      },
      "peerDependencies": {
-        "@opentui/core": ">=0.2.5",
-        "@opentui/keymap": ">=0.2.5",
-        "@opentui/solid": ">=0.2.5",
+        "@opentui/core": ">=0.2.6",
+        "@opentui/keymap": ">=0.2.6",
+        "@opentui/solid": ">=0.2.6",
      },
      "optionalPeers": [
        "@opentui/core",
@@ -508,7 +534,7 @@
    },
    "packages/sdk/js": {
      "name": "@opencode-ai/sdk",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "cross-spawn": "catalog:",
      },
@@ -523,7 +549,7 @@
    },
    "packages/slack": {
      "name": "@opencode-ai/slack",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "@opencode-ai/sdk": "workspace:*",
        "@slack/bolt": "^3.17.1",
@@ -558,7 +584,7 @@
    },
    "packages/ui": {
      "name": "@opencode-ai/ui",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "@kobalte/core": "catalog:",
        "@opencode-ai/core": "workspace:*",
@@ -607,7 +633,7 @@
    },
    "packages/web": {
      "name": "@opencode-ai/web",
-      "version": "1.14.41",
+      "version": "1.14.48",
      "dependencies": {
        "@astrojs/cloudflare": "12.6.3",
        "@astrojs/markdown-remark": "6.3.1",
@@ -652,6 +678,7 @@
    "solid-js@1.9.10": "patches/solid-js@1.9.10.patch",
    "@standard-community/standard-openapi@0.2.9": "patches/@standard-community%2Fstandard-openapi@0.2.9.patch",
    "@npmcli/agent@4.0.0": "patches/@npmcli%2Fagent@4.0.0.patch",
+    "@silvia-odwyer/photon-node@0.3.4": "patches/@silvia-odwyer%2Fphoton-node@0.3.4.patch",
  },
  "overrides": {
    "@types/bun": "catalog:",
@@ -667,9 +694,9 @@
    "@npmcli/arborist": "9.4.0",
    "@octokit/rest": "22.0.0",
    "@openauthjs/openauth": "0.0.0-20250322224806",
-    "@opentui/core": "0.2.5",
-    "@opentui/keymap": "0.2.5",
-    "@opentui/solid": "0.2.5",
+    "@opentui/core": "0.2.6",
+    "@opentui/keymap": "0.2.6",
+    "@opentui/solid": "0.2.6",
    "@pierre/diffs": "1.1.0-beta.18",
    "@playwright/test": "1.59.1",
    "@sentry/solid": "10.36.0",
@@ -1210,8 +1237,6 @@

    "@hono/node-server": ["@hono/node-server@1.19.11", "", { "peerDependencies": { "hono": "^4" } }, "sha512-dr8/3zEaB+p0D2n/IUrlPF1HZm586qgJNXK1a9fhg/PzdtkK7Ksd5l312tJX2yBuALqDYBlG20QEbayqPyxn+g=="],

-    "@hono/node-ws": ["@hono/node-ws@1.3.0", "", { "dependencies": { "ws": "^8.17.0" }, "peerDependencies": { "@hono/node-server": "^1.19.2", "hono": "^4.6.0" } }, "sha512-ju25YbbvLuXdqBCmLZLqnNYu1nbHIQjoyUqA8ApZOeL1k4skuiTcw5SW77/5SUYo2Xi2NVBJoVlfQurnKEp03Q=="],
-
    "@hono/standard-validator": ["@hono/standard-validator@0.1.5", "", { "peerDependencies": { "@standard-schema/spec": "1.0.0", "hono": ">=3.9.0" } }, "sha512-EIyZPPwkyLn6XKwFj5NBEWHXhXbgmnVh2ceIFo5GO7gKI9WmzTjPDKnppQB0KrqKeAkq3kpoW4SIbu5X1dgx3w=="],

    "@hono/zod-validator": ["@hono/zod-validator@0.4.2", "", { "peerDependencies": { "hono": ">=3.9.0", "zod": "^3.19.1" } }, "sha512-1rrlBg+EpDPhzOV4hT9pxr5+xDVmKuz6YJl+la7VCwK6ass5ldyKm5fD+umJdV2zhHD6jROoCCv8NbTwyfhT0g=="],
@@ -1558,6 +1583,10 @@

    "@opencode-ai/function": ["@opencode-ai/function@workspace:packages/function"],

+    "@opencode-ai/http-recorder": ["@opencode-ai/http-recorder@workspace:packages/http-recorder"],
+
+    "@opencode-ai/llm": ["@opencode-ai/llm@workspace:packages/llm"],
+
    "@opencode-ai/plugin": ["@opencode-ai/plugin@workspace:packages/plugin"],

    "@opencode-ai/script": ["@opencode-ai/script@workspace:packages/script"],
@@ -1600,23 +1629,23 @@

    "@opentelemetry/semantic-conventions": ["@opentelemetry/semantic-conventions@1.40.0", "", {}, "sha512-cifvXDhcqMwwTlTK04GBNeIe7yyo28Mfby85QXFe1Yk8nmi36Ab/5UQwptOx84SsoGNRg+EVSjwzfSZMy6pmlw=="],

-    "@opentui/core": ["@opentui/core@0.2.5", "", { "dependencies": { "bun-ffi-structs": "0.2.2", "diff": "9.0.0", "marked": "17.0.1", "string-width": "7.2.0", "strip-ansi": "7.1.2", "yoga-layout": "3.2.1" }, "optionalDependencies": { "@opentui/core-darwin-arm64": "0.2.5", "@opentui/core-darwin-x64": "0.2.5", "@opentui/core-linux-arm64": "0.2.5", "@opentui/core-linux-x64": "0.2.5", "@opentui/core-win32-arm64": "0.2.5", "@opentui/core-win32-x64": "0.2.5" }, "peerDependencies": { "web-tree-sitter": "0.25.10" } }, "sha512-A5DNOW39S60LtOcBdWYx7fuIGsPcClzbdKz9WuLp+wgy0Bt/jPw5XX6dk3k4dCX4jmhA1nX7x7680+GXLHPL6Q=="],
+    "@opentui/core": ["@opentui/core@0.2.6", "", { "dependencies": { "bun-ffi-structs": "0.2.2", "diff": "9.0.0", "marked": "17.0.1", "string-width": "7.2.0", "strip-ansi": "7.1.2", "yoga-layout": "3.2.1" }, "optionalDependencies": { "@opentui/core-darwin-arm64": "0.2.6", "@opentui/core-darwin-x64": "0.2.6", "@opentui/core-linux-arm64": "0.2.6", "@opentui/core-linux-x64": "0.2.6", "@opentui/core-win32-arm64": "0.2.6", "@opentui/core-win32-x64": "0.2.6" }, "peerDependencies": { "web-tree-sitter": "0.25.10" } }, "sha512-dBpMaWVM7wtW2/2TlGPrkPjg6gOL3MVU/5XXk+U1LDJB8L4q4NeYWVdzfAVNcEvgmuuCy/cVqdY2D4ei+e7MMg=="],

-    "@opentui/core-darwin-arm64": ["@opentui/core-darwin-arm64@0.2.5", "", { "os": "darwin", "cpu": "arm64" }, "sha512-Jdl8TN7oxV8NTaKZsUAt0B/A4hIYiyUKwXNSe4w1OchNMlgjwF1fx/7RhgHXSvWh1Fcqi1IH5FfhsmO89Aed1A=="],
+    "@opentui/core-darwin-arm64": ["@opentui/core-darwin-arm64@0.2.6", "", { "os": "darwin", "cpu": "arm64" }, "sha512-hR5nsxNj+059utzenTCF0kealUlibON6fLuebFUCGM/5kJnqa+shIh0XbUDFm0+F47vqVUgZufBdUuieQZIbvQ=="],

-    "@opentui/core-darwin-x64": ["@opentui/core-darwin-x64@0.2.5", "", { "os": "darwin", "cpu": "x64" }, "sha512-78sKg0ZvwFHzZZGJCeaSNIVi2dadDxQymHAmrK698zEgnQr4eLVVB+MxNpxJx55/z9Y+YqbfSZaobC6w6Q3y5A=="],
+    "@opentui/core-darwin-x64": ["@opentui/core-darwin-x64@0.2.6", "", { "os": "darwin", "cpu": "x64" }, "sha512-pJ/bH4WC/mbBaakM1YdH6TVo67jhy0KPd61bCz97w0I/PJGr8fmNKvhmMt/AwyFgOQi3FYZiEKLMpGdvUcSsrQ=="],

-    "@opentui/core-linux-arm64": ["@opentui/core-linux-arm64@0.2.5", "", { "os": "linux", "cpu": "arm64" }, "sha512-BtqbOjP64hKQaVd0ApHunt0MjkEEKTvxpaBwk7OhwVCoYakQBDZTZXUQ9zuPXvaHc9IF286z1PnJGLu0t11BAw=="],
+    "@opentui/core-linux-arm64": ["@opentui/core-linux-arm64@0.2.6", "", { "os": "linux", "cpu": "arm64" }, "sha512-9Pnd3kOxig8ii+/IqYheOPEgferylsQA0L6tKBnHQ9jRlCJOcu0Rv65Jepueh212vevdV9DzPURJnhejG06J6g=="],

-    "@opentui/core-linux-x64": ["@opentui/core-linux-x64@0.2.5", "", { "os": "linux", "cpu": "x64" }, "sha512-c3sEXtmOd1E5R4wfWh/MejplxgApYKqzyJ0AVMTU8pU1MHRAMwD8UFDMSVQhl7rYMTuBYPWok3IoCK2u8a2A4A=="],
+    "@opentui/core-linux-x64": ["@opentui/core-linux-x64@0.2.6", "", { "os": "linux", "cpu": "x64" }, "sha512-458Mx9tBzEPzfft8cSt5ZaIpEepoxBXBOL6AUVmDTKWaZ3uouraPcEKraGAyvOTDQp2XDI3R8c/2GdaR77FaUQ=="],

-    "@opentui/core-win32-arm64": ["@opentui/core-win32-arm64@0.2.5", "", { "os": "win32", "cpu": "arm64" }, "sha512-WlgpYkgmuvMPc2mYGJSwN7c+VGAxiZvMKwZEbS+w9PMj7sJhvY+zFrOJNFpvjbAFw8vS3Kz39km4Nj7GF8JH6w=="],
+    "@opentui/core-win32-arm64": ["@opentui/core-win32-arm64@0.2.6", "", { "os": "win32", "cpu": "arm64" }, "sha512-BDUrdrT1RCcVnQoHJmUut4y811jDBAEtc6GJFB4Gs265Be8SrTjVCus6p2fSQ7j9sZQ1OcjO+5+4NkheSZICDQ=="],

-    "@opentui/core-win32-x64": ["@opentui/core-win32-x64@0.2.5", "", { "os": "win32", "cpu": "x64" }, "sha512-4X7BHJ7Wztzj7p0E+SsN0d4goUVU7Dy2VnhnD4n65ODgVbW59iqasAvbnPLbX3ghjgKiwQ+2SD+ImCIHE6uCAA=="],
+    "@opentui/core-win32-x64": ["@opentui/core-win32-x64@0.2.6", "", { "os": "win32", "cpu": "x64" }, "sha512-SUYAzRJ9TSoD2Qt8kn6FJz6dbTrFEPVig5mScB4zFGgGQO/Bbod2/Q31vLS/IQrX+FDb67WaErD+kuMCnMPPLA=="],

-    "@opentui/keymap": ["@opentui/keymap@0.2.5", "", { "dependencies": { "@opentui/core": "0.2.5" }, "peerDependencies": { "@opentui/react": "0.2.5", "@opentui/solid": "0.2.5", "react": ">=19.2.0", "solid-js": "1.9.12" }, "optionalPeers": ["@opentui/react", "@opentui/solid", "react", "solid-js"] }, "sha512-/B6Gy9LLRRKhvyDV1rFX0p7BUN8NQOcXwTV8E0xb7ym1yREvVmij+hCRkXXddMme2HW9NmV0+RRHo4kJzJxkNQ=="],
+    "@opentui/keymap": ["@opentui/keymap@0.2.6", "", { "dependencies": { "@opentui/core": "0.2.6" }, "peerDependencies": { "@opentui/react": "0.2.6", "@opentui/solid": "0.2.6", "react": ">=19.2.0", "solid-js": "1.9.12" }, "optionalPeers": ["@opentui/react", "@opentui/solid", "react", "solid-js"] }, "sha512-+6OYuedrFCKVo4ryGFNwws++2VOmPcXU3PwpY0mP47gYQY2nvQ+etWIs2Y7r5eMIqUfxVCldkKsrzcEcA4tb/A=="],

-    "@opentui/solid": ["@opentui/solid@0.2.5", "", { "dependencies": { "@babel/core": "7.28.0", "@babel/preset-typescript": "7.27.1", "@opentui/core": "0.2.5", "babel-plugin-module-resolver": "5.0.2", "babel-preset-solid": "1.9.12", "entities": "7.0.1", "s-js": "^0.4.9" }, "peerDependencies": { "solid-js": "1.9.12" } }, "sha512-M8MxDYJzjtF8TvxB6Q7656GOSS+QIg89jD0jf/asfF4qeip5TQhNZ3ba+R1v2fVuIkQCyRJzTtOtMZiglzGKPQ=="],
+    "@opentui/solid": ["@opentui/solid@0.2.6", "", { "dependencies": { "@babel/core": "7.28.0", "@babel/preset-typescript": "7.27.1", "@opentui/core": "0.2.6", "babel-plugin-module-resolver": "5.0.2", "babel-preset-solid": "1.9.12", "entities": "7.0.1", "s-js": "^0.4.9" }, "peerDependencies": { "solid-js": "1.9.12" } }, "sha512-2y225WlOGi/fCaajkxBmLyVW8Cr+OmhowHdvrYcz5w2kBD15sKbJLIYu1G9DxceirT1uIyasGy2TGzRRcVkTDg=="],

    "@oslojs/asn1": ["@oslojs/asn1@1.0.0", "", { "dependencies": { "@oslojs/binary": "1.0.0" } }, "sha512-zw/wn0sj0j0QKbIXfIlnEcTviaCzYOY3V5rAyjR6YtOByFtJiT574+8p9Wlach0lZH9fddD4yb9laEAIl4vXQA=="],

@@ -2014,6 +2043,8 @@

    "@sigstore/verify": ["@sigstore/verify@3.1.0", "", { "dependencies": { "@sigstore/bundle": "^4.0.0", "@sigstore/core": "^3.1.0", "@sigstore/protobuf-specs": "^0.5.0" } }, "sha512-mNe0Iigql08YupSOGv197YdHpPPr+EzDZmfCgMc7RPNaZTw5aLN01nBl6CHJOh3BGtnMIj83EeN4butBchc8Ag=="],

+    "@silvia-odwyer/photon-node": ["@silvia-odwyer/photon-node@0.3.4", "", {}, "sha512-bnly4BKB3KDTFxrUIcgCLbaeVVS8lrAkri1pEzskpmxu9MdfGQTy8b8EgcD83ywD3RPMsIulY8xJH5Awa+t9fA=="],
+
    "@sindresorhus/is": ["@sindresorhus/is@4.6.0", "", {}, "sha512-t09vSN3MdfsyCHoFcTRCH/iUtG7OJ0CsjzB8cjAmKc/va/kIgeDI/TxsigdncE/4be734m0cvIYwNaV4i2XqAw=="],

    "@slack/bolt": ["@slack/bolt@3.22.0", "", { "dependencies": { "@slack/logger": "^4.0.0", "@slack/oauth": "^2.6.3", "@slack/socket-mode": "^1.3.6", "@slack/types": "^2.13.0", "@slack/web-api": "^6.13.0", "@types/express": "^4.16.1", "@types/promise.allsettled": "^1.0.3", "@types/tsscmp": "^1.0.0", "axios": "^1.7.4", "express": "^4.21.0", "path-to-regexp": "^8.1.0", "promise.allsettled": "^1.0.2", "raw-body": "^2.3.3", "tsscmp": "^1.0.6" } }, "sha512-iKDqGPEJDnrVwxSVlFW6OKTkijd7s4qLBeSufoBsTM0reTyfdp/5izIQVkxNfzjHi3o6qjdYbRXkYad5HBsBog=="],
@@ -5636,6 +5667,10 @@

    "@opencode-ai/desktop/typescript": ["typescript@5.6.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw=="],

+    "@opencode-ai/llm/@smithy/eventstream-codec": ["@smithy/eventstream-codec@4.2.14", "", { "dependencies": { "@aws-crypto/crc32": "5.2.0", "@smithy/types": "^4.14.1", "@smithy/util-hex-encoding": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-erZq0nOIpzfeZdCyzZjdJb4nVSKLUmSkaQUVkRGQTXs30gyUGeKnrYEg+Xe1W5gE3aReS7IgsvANwVPxSzY6Pw=="],
+
+    "@opencode-ai/llm/@smithy/util-utf8": ["@smithy/util-utf8@4.2.2", "", { "dependencies": { "@smithy/util-buffer-from": "^4.2.2", "tslib": "^2.6.2" } }, "sha512-75MeYpjdWRe8M5E3AW0O4Cx3UadweS+cwdXjwYGBW5h/gxxnbeZ877sLPX/ZJA9GVTlL/qG0dXP29JWFCD1Ayw=="],
+
    "@opencode-ai/ui/@solid-primitives/resize-observer": ["@solid-primitives/resize-observer@2.1.3", "", { "dependencies": { "@solid-primitives/event-listener": "^2.4.3", "@solid-primitives/rootless": "^1.5.2", "@solid-primitives/static-store": "^0.1.2", "@solid-primitives/utils": "^6.3.2" }, "peerDependencies": { "solid-js": "^1.6.12" } }, "sha512-zBLje5E06TgOg93S7rGPldmhDnouNGhvfZVKOp+oG2XU8snA+GoCSSCz1M+jpNAg5Ek2EakU5UVQqL152WmdXQ=="],

    "@opencode-ai/web/@shikijs/transformers": ["@shikijs/transformers@3.20.0", "", { "dependencies": { "@shikijs/core": "3.20.0", "@shikijs/types": "3.20.0" } }, "sha512-PrHHMRr3Q5W1qB/42kJW6laqFyWdhrPF2hNR9qjOm1xcSiAO3hAHo7HaVyHE6pMyevmy3i51O8kuGGXC78uK3g=="],
@@ -6712,6 +6747,8 @@

    "@opencode-ai/desktop/@actions/artifact/@actions/http-client": ["@actions/http-client@2.2.3", "", { "dependencies": { "tunnel": "^0.0.6", "undici": "^5.25.4" } }, "sha512-mx8hyJi/hjFvbPokCg4uRd4ZX78t+YyRPtnKWwIl+RzNaVuFpQHfmlGVfsKEJN8LwTCvL+DfVgAM04XaHkm6bA=="],

+    "@opencode-ai/llm/@smithy/eventstream-codec/@smithy/types": ["@smithy/types@4.14.1", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-59b5HtSVrVR/eYNei3BUj3DCPKD/G7EtDDe7OEJE7i7FtQFugYo6MxbotS8mVJkLNVf8gYaAlEBwwtJ9HzhWSg=="],
+
    "@opencode-ai/web/@shikijs/transformers/@shikijs/core": ["@shikijs/core@3.20.0", "", { "dependencies": { "@shikijs/types": "3.20.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4", "hast-util-to-html": "^9.0.5" } }, "sha512-f2ED7HYV4JEk827mtMDwe/yQ25pRiXZmtHjWF8uzZKuKiEsJR7Ce1nuQ+HhV9FzDcbIo4ObBCD9GPTzNuy9S1g=="],

    "@opencode-ai/web/@shikijs/transformers/@shikijs/types": ["@shikijs/types@3.20.0", "", { "dependencies": { "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-lhYAATn10nkZcBQ0BlzSbJA3wcmL5MXUUF8d2Zzon6saZDlToKaiRX60n2+ZaHJCmXEcZRWNzn+k9vplr8Jhsw=="],
--- a/infra/monitoring.ts
+++ b/infra/monitoring.ts
@@ -38,7 +38,10 @@ const modelHttpErrorsQuery = (product: "go" | "zen") => {
    calculatedFields: [
      {
        name: "is_failed_http_status",
-        expression: `IF(AND(GTE($status, "400"), NOT(EQUALS($status, "401"))), 1, 0)`,
+        expression:
+          product === "go"
+            ? `IF(AND(GTE($status, "400"), NOT(EQUALS($status, "401")), NOT(EQUALS($status, "429"))), 1, 0)`
+            : `IF(AND(GTE($status, "400"), NOT(EQUALS($status, "401"))), 1, 0)`,
      },
    ],
    calculations: [
@@ -66,7 +69,7 @@ const providerHttpErrorsQuery = (product: "go" | "zen") => {
      },
      {
        name: "is_failed_provider_http_status",
-        expression: `IF(GTE($llm.error.code, "400"), 1, 0)`,
+        expression: `IF(GT($llm.error.code, "400"), 1, 0)`,
      },
    ],
    calculations: [
--- a/nix/hashes.json
+++ b/nix/hashes.json
@@ -1,8 +1,8 @@
 {
  "nodeModules": {
-    "x86_64-linux": "sha256-UxWxALOCC/n6JNFcu/IKjC/B9bySQmcr2riWO1Doc3s=",
-    "aarch64-linux": "sha256-QLM8fPkPOukwOLR26zgZHhWEbfaEhmIqIJSjoQYOvfg=",
-    "aarch64-darwin": "sha256-GjgCPpkTzqkeiLsp2P+Awtm0K0XKTJV7v9QJoGC02YU=",
-    "x86_64-darwin": "sha256-pm7xhKAUBgp+zDh1KzyOlKS2TYJpSdDPnZFqFHrflSA="
+    "x86_64-linux": "sha256-baGxh+hk/rPhg0xI/OdMDz6dPwncgercYNBdTPnLX9o=",
+    "aarch64-linux": "sha256-VTWKq679B3Q4ZnAoQzC4VSCYA09wWecNJ+JajvjNB1U=",
+    "aarch64-darwin": "sha256-orf2zIBMTiiQrt/6qCzE+o0oKhv6u8zXF9DH1Bo3lbo=",
+    "x86_64-darwin": "sha256-1MZC1fadRoY4lhkmjlcUQTLYH9Q8pDI1bxd5f94f1xU="
  }
 }
--- a/package.json
+++ b/package.json
@@ -35,9 +35,9 @@
      "@types/cross-spawn": "6.0.6",
      "@octokit/rest": "22.0.0",
      "@hono/zod-validator": "0.4.2",
-      "@opentui/core": "0.2.5",
-      "@opentui/keymap": "0.2.5",
-      "@opentui/solid": "0.2.5",
+      "@opentui/core": "0.2.6",
+      "@opentui/keymap": "0.2.6",
+      "@opentui/solid": "0.2.6",
      "ulid": "3.0.1",
      "@kobalte/core": "0.13.11",
      "@types/luxon": "3.7.1",
@@ -133,6 +133,7 @@
  },
  "patchedDependencies": {
    "@npmcli/agent@4.0.0": "patches/@npmcli%2Fagent@4.0.0.patch",
+    "@silvia-odwyer/photon-node@0.3.4": "patches/@silvia-odwyer%2Fphoton-node@0.3.4.patch",
    "@standard-community/standard-openapi@0.2.9": "patches/@standard-community%2Fstandard-openapi@0.2.9.patch",
    "solid-js@1.9.10": "patches/solid-js@1.9.10.patch"
  }
--- a/packages/app/package.json
+++ b/packages/app/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@opencode-ai/app",
-  "version": "1.14.41",
+  "version": "1.14.48",
  "description": "",
  "type": "module",
  "exports": {
--- a/packages/app/src/context/global-sync/child-store.ts
+++ b/packages/app/src/context/global-sync/child-store.ts
@@ -231,6 +231,7 @@ export function createChildStoreManager(input: {
            limit: 5,
            message: {},
            part: {},
+            part_text_accum_delta: {},
          })
          children[key] = child
          disposers.set(key, dispose)
--- a/packages/app/src/context/global-sync/event-reducer.test.ts
+++ b/packages/app/src/context/global-sync/event-reducer.test.ts
@@ -81,6 +81,7 @@ const baseState = (input: Partial<State> = {}) =>
    limit: 10,
    message: {},
    part: {},
+    part_text_accum_delta: {},
    ...input,
  }) as State

--- a/packages/app/src/context/global-sync/event-reducer.ts
+++ b/packages/app/src/context/global-sync/event-reducer.ts
@@ -211,6 +211,12 @@ export function applyDirectoryEvent(input: {
            const result = Binary.search(messages, props.messageID, (m) => m.id)
            if (result.found) messages.splice(result.index, 1)
          }
+          const parts = draft.part[props.messageID]
+          if (parts) {
+            for (const part of parts) {
+              delete draft.part_text_accum_delta[part.id]
+            }
+          }
          delete draft.part[props.messageID]
        }),
      )
@@ -219,6 +225,11 @@ export function applyDirectoryEvent(input: {
    case "message.part.updated": {
      const part = (event.properties as { part: Part }).part
      if (SKIP_PARTS.has(part.type)) break
+      input.setStore(
+        produce((draft) => {
+          delete draft.part_text_accum_delta[part.id]
+        }),
+      )
      const parts = input.store.part[part.messageID]
      if (!parts) {
        input.setStore("part", part.messageID, [part])
@@ -240,6 +251,11 @@ export function applyDirectoryEvent(input: {
    }
    case "message.part.removed": {
      const props = event.properties as { messageID: string; partID: string }
+      input.setStore(
+        produce((draft) => {
+          delete draft.part_text_accum_delta[props.partID]
+        }),
+      )
      const parts = input.store.part[props.messageID]
      if (!parts) break
      const result = Binary.search(parts, props.partID, (p) => p.id)
@@ -263,6 +279,7 @@ export function applyDirectoryEvent(input: {
      if (!parts) break
      const result = Binary.search(parts, props.partID, (p) => p.id)
      if (!result.found) break
+      input.setStore("part_text_accum_delta", props.partID, (existing) => (existing ?? "") + props.delta)
      input.setStore(
        "part",
        props.messageID,
--- a/packages/app/src/context/global-sync/session-cache.test.ts
+++ b/packages/app/src/context/global-sync/session-cache.test.ts
@@ -39,6 +39,7 @@ describe("app session cache", () => {
      part: Record<string, Part[] | undefined>
      permission: Record<string, PermissionRequest[] | undefined>
      question: Record<string, QuestionRequest[] | undefined>
+      part_text_accum_delta: Record<string, string | undefined>
    } = {
      session_status: { ses_1: { type: "busy" } as SessionStatus },
      session_diff: { ses_1: [] },
@@ -47,12 +48,14 @@ describe("app session cache", () => {
      part: { msg_1: [part("prt_1", "ses_1", "msg_1")] },
      permission: { ses_1: [] as PermissionRequest[] },
      question: { ses_1: [] as QuestionRequest[] },
+      part_text_accum_delta: { prt_1: "streamed text" },
    }

    dropSessionCaches(store, ["ses_1"])

    expect(store.message.ses_1).toBeUndefined()
    expect(store.part.msg_1).toBeUndefined()
+    expect(store.part_text_accum_delta.prt_1).toBeUndefined()
    expect(store.todo.ses_1).toBeUndefined()
    expect(store.session_diff.ses_1).toBeUndefined()
    expect(store.session_status.ses_1).toBeUndefined()
@@ -70,6 +73,7 @@ describe("app session cache", () => {
      part: Record<string, Part[] | undefined>
      permission: Record<string, PermissionRequest[] | undefined>
      question: Record<string, QuestionRequest[] | undefined>
+      part_text_accum_delta: Record<string, string | undefined>
    } = {
      session_status: {},
      session_diff: {},
@@ -78,6 +82,7 @@ describe("app session cache", () => {
      part: { [m.id]: [part("prt_1", "ses_1", m.id)] },
      permission: {},
      question: {},
+      part_text_accum_delta: {},
    }

    dropSessionCaches(store, ["ses_1"])
--- a/packages/app/src/context/global-sync/session-cache.ts
+++ b/packages/app/src/context/global-sync/session-cache.ts
@@ -18,6 +18,7 @@ type SessionCache = {
  part: Record<string, Part[] | undefined>
  permission: Record<string, PermissionRequest[] | undefined>
  question: Record<string, QuestionRequest[] | undefined>
+  part_text_accum_delta: Record<string, string | undefined>
 }

 export function dropSessionCaches(store: SessionCache, sessionIDs: Iterable<string>) {
@@ -27,6 +28,9 @@ export function dropSessionCaches(store: SessionCache, sessionIDs: Iterable<stri
  for (const key of Object.keys(store.part)) {
    const parts = store.part[key]
    if (!parts?.some((part) => stale.has(part?.sessionID ?? ""))) continue
+    for (const part of parts) {
+      delete store.part_text_accum_delta[part.id]
+    }
    delete store.part[key]
  }

--- a/packages/app/src/context/global-sync/types.ts
+++ b/packages/app/src/context/global-sync/types.ts
@@ -72,6 +72,9 @@ export type State = {
  part: {
    [messageID: string]: Part[]
  }
+  part_text_accum_delta: {
+    [partID: string]: string
+  }
 }

 export type VcsCache = {
--- a/packages/app/src/pages/session/session-side-panel.tsx
+++ b/packages/app/src/pages/session/session-side-panel.tsx
@@ -28,6 +28,12 @@ import { createOpenSessionFileTab, createSessionTabs, getTabReorderIndex, type S
 import { setSessionHandoff } from "@/pages/session/handoff"
 import { useSessionLayout } from "@/pages/session/session-layout"

+type RenderDiff = (SnapshotFileDiff & { file: string }) | VcsFileDiff
+
+function renderDiff(value: SnapshotFileDiff | VcsFileDiff): value is RenderDiff {
+  return typeof value.file === "string"
+}
+
 export function SessionSidePanel(props: {
  canReview: () => boolean
  diffs: () => (SnapshotFileDiff | VcsFileDiff)[]
@@ -70,7 +76,8 @@ export function SessionSidePanel(props: {
  })
  const treeWidth = createMemo(() => (fileOpen() ? `${layout.fileTree.width()}px` : "0px"))

-  const diffFiles = createMemo(() => props.diffs().map((d) => d.file))
+  const diffs = createMemo(() => props.diffs().filter(renderDiff))
+  const diffFiles = createMemo(() => diffs().map((d) => d.file))
  const kinds = createMemo(() => {
    const merge = (a: "add" | "del" | "mix" | undefined, b: "add" | "del" | "mix") => {
      if (!a) return b
@@ -81,7 +88,7 @@ export function SessionSidePanel(props: {
    const normalize = (p: string) => p.replaceAll("\\\\", "/").replace(/\/+$/, "")

    const out = new Map<string, "add" | "del" | "mix">()
-    for (const diff of props.diffs()) {
+    for (const diff of diffs()) {
      const file = normalize(diff.file)
      const kind = diff.status === "added" ? "add" : diff.status === "deleted" ? "del" : "mix"

--- a/packages/console/app/package.json
+++ b/packages/console/app/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@opencode-ai/console-app",
-  "version": "1.14.41",
+  "version": "1.14.48",
  "type": "module",
  "license": "MIT",
  "scripts": {
@@ -35,6 +35,7 @@
    "zod": "catalog:"
  },
  "devDependencies": {
+    "@types/bun": "catalog:",
    "@typescript/native-preview": "catalog:",
    "@webgpu/types": "0.1.54",
    "typescript": "catalog:",
--- a/packages/console/app/src/routes/workspace/[id]/usage/usage-section.module.css
+++ b/packages/console/app/src/routes/workspace/[id]/usage/usage-section.module.css
@@ -67,6 +67,7 @@
        display: inline-flex;
        align-items: center;
        justify-content: center;
+        flex-shrink: 0;
        padding: 0;
        background: transparent;
        border: none;
@@ -79,6 +80,7 @@
        }

        svg {
+          flex-shrink: 0;
          width: 16px;
          height: 16px;
        }
--- a/packages/console/app/src/routes/workspace/[id]/usage/usage-section.tsx
+++ b/packages/console/app/src/routes/workspace/[id]/usage/usage-section.tsx
@@ -53,7 +53,7 @@ export function UsageSection() {
  }

  const calculateTotalOutputTokens = (u: Awaited<ReturnType<typeof getUsageInfo>>[0]) => {
-    return u.outputTokens + (u.reasoningTokens ?? 0)
+    return u.outputTokens
  }

  const goPrev = async () => {
--- a/packages/console/app/src/routes/zen/util/handler.ts
+++ b/packages/console/app/src/routes/zen/util/handler.ts
@@ -889,10 +889,6 @@ export async function handler(

    const inputCost = modelCost.input * inputTokens * 100
    const outputCost = modelCost.output * outputTokens * 100
-    const reasoningCost = (() => {
-      if (!reasoningTokens) return undefined
-      return modelCost.output * reasoningTokens * 100
-    })()
    const cacheReadCost = (() => {
      if (!cacheReadTokens) return undefined
      if (!modelCost.cacheRead) return undefined
@@ -909,17 +905,11 @@ export async function handler(
      return modelCost.cacheWrite1h * cacheWrite1hTokens * 100
    })()
    const totalCostInCent =
-      inputCost +
-      outputCost +
-      (reasoningCost ?? 0) +
-      (cacheReadCost ?? 0) +
-      (cacheWrite5mCost ?? 0) +
-      (cacheWrite1hCost ?? 0)
+      inputCost + outputCost + (cacheReadCost ?? 0) + (cacheWrite5mCost ?? 0) + (cacheWrite1hCost ?? 0)
    return {
      totalCostInCent,
      inputCost,
      outputCost,
-      reasoningCost,
      cacheReadCost,
      cacheWrite5mCost,
      cacheWrite1hCost,
@@ -941,8 +931,7 @@ export async function handler(
  ) {
    const { inputTokens, outputTokens, reasoningTokens, cacheReadTokens, cacheWrite5mTokens, cacheWrite1hTokens } =
      usageInfo
-    const { totalCostInCent, inputCost, outputCost, reasoningCost, cacheReadCost, cacheWrite5mCost, cacheWrite1hCost } =
-      costInfo
+    const { totalCostInCent, inputCost, outputCost, cacheReadCost, cacheWrite5mCost, cacheWrite1hCost } = costInfo

    logger.metric({
      "tokens.input": inputTokens,
@@ -953,14 +942,12 @@ export async function handler(
      "tokens.cache_write_1h": cacheWrite1hTokens,
      "cost.input.microcents": centsToMicroCents(inputCost),
      "cost.output.microcents": centsToMicroCents(outputCost),
-      "cost.reasoning.microcents": reasoningCost ? centsToMicroCents(reasoningCost) : undefined,
      "cost.cache_read.microcents": cacheReadCost ? centsToMicroCents(cacheReadCost) : undefined,
      "cost.cache_write.microcents": cacheWrite5mCost ? centsToMicroCents(cacheWrite5mCost) : undefined,
      "cost.total.microcents": centsToMicroCents(totalCostInCent),
      // deprecated - remove after May 20, 2026
      "cost.input": Math.round(inputCost),
      "cost.output": Math.round(outputCost),
-      "cost.reasoning": reasoningCost ? Math.round(reasoningCost) : undefined,
      "cost.cache_read": cacheReadCost ? Math.round(cacheReadCost) : undefined,
      "cost.cache_write_5m": cacheWrite5mCost ? Math.round(cacheWrite5mCost) : undefined,
      "cost.cache_write_1h": cacheWrite1hCost ? Math.round(cacheWrite1hCost) : undefined,
--- a/packages/console/app/src/routes/zen/util/provider/openai.ts
+++ b/packages/console/app/src/routes/zen/util/provider/openai.ts
@@ -50,7 +50,7 @@ export const openaiHelper: ProviderHelper = ({ workspaceID }) => ({
    const cacheReadTokens = usage.input_tokens_details?.cached_tokens ?? undefined
    return {
      inputTokens: inputTokens - (cacheReadTokens ?? 0),
-      outputTokens: outputTokens - (reasoningTokens ?? 0),
+      outputTokens,
      reasoningTokens,
      cacheReadTokens,
      cacheWrite5mTokens: undefined,
--- a/packages/console/app/tsconfig.json
+++ b/packages/console/app/tsconfig.json
@@ -12,7 +12,7 @@
    "allowJs": true,
    "strict": true,
    "noEmit": true,
-    "types": ["vite/client", "@webgpu/types"],
+    "types": ["vite/client", "@webgpu/types", "bun"],
    "isolatedModules": true,
    "paths": {
      "~/*": ["./src/*"]
--- a/packages/console/core/package.json
+++ b/packages/console/core/package.json
@@ -1,7 +1,7 @@
 {
  "$schema": "https://json.schemastore.org/package.json",
  "name": "@opencode-ai/console-core",
-  "version": "1.14.41",
+  "version": "1.14.48",
  "private": true,
  "type": "module",
  "license": "MIT",
--- a/packages/console/function/package.json
+++ b/packages/console/function/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@opencode-ai/console-function",
-  "version": "1.14.41",
+  "version": "1.14.48",
  "$schema": "https://json.schemastore.org/package.json",
  "private": true,
  "type": "module",
--- a/packages/console/mail/package.json
+++ b/packages/console/mail/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@opencode-ai/console-mail",
-  "version": "1.14.41",
+  "version": "1.14.48",
  "dependencies": {
    "@jsx-email/all": "2.2.3",
    "@jsx-email/cli": "1.4.3",
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -1,6 +1,6 @@
 {
  "$schema": "https://json.schemastore.org/package.json",
-  "version": "1.14.41",
+  "version": "1.14.48",
  "name": "@opencode-ai/core",
  "type": "module",
  "license": "MIT",
--- a/packages/opencode/src/util/effect-zod.ts
+++ b/packages/opencode/src/util/effect-zod.ts
@@ -36,7 +36,7 @@ export function zod<S extends Schema.Top>(schema: S): z.ZodType<Schema.Schema.Ty
 * mapped `.omit()` / `.extend()` surface triggers brand-intersection
 * explosions for branded primitives (`string & Brand<"SessionID">` extends
 * `object` via the brand and gets walked into the prototype by `DeepPartial`,
- * `updateSchema`, etc.), and zod's inference through `z.ZodType<T | undefined>`
+ * mapped-schema helpers, and zod's inference through `z.ZodType<T | undefined>`
 * wrappers also can't reconstruct `T` cleanly. Consumers that care about the
 * post-`.omit()` shape should cast `c.req.valid(...)` to the expected type.
 */
--- a/packages/core/src/flag/flag.ts
+++ b/packages/core/src/flag/flag.ts
@@ -11,9 +11,12 @@ function falsy(key: string) {
  return value === "false" || value === "0"
 }

-// Channels that default to the new effect-httpapi server backend. The legacy
-// hono backend remains the default for stable (`prod`/`latest`) installs.
-const HTTPAPI_DEFAULT_ON_CHANNELS = new Set(["dev", "beta", "local"])
+// Channels where new experiments default to ON (unstable / internal users).
+// Stable channels (`prod`, `latest`) stay opt-in.
+const UNSTABLE_CHANNELS = new Set(["dev", "beta", "local"])
+function unstableDefault(key: string) {
+  return truthy(key) || (!falsy(key) && UNSTABLE_CHANNELS.has(InstallationChannel))
+}

 function number(key: string) {
  const value = process.env[key]
@@ -53,6 +56,9 @@ export const Flag = {
  OPENCODE_DISABLE_CLAUDE_CODE_PROMPT: OPENCODE_DISABLE_CLAUDE_CODE || truthy("OPENCODE_DISABLE_CLAUDE_CODE_PROMPT"),
  OPENCODE_DISABLE_CLAUDE_CODE_SKILLS,
  OPENCODE_DISABLE_EXTERNAL_SKILLS: truthy("OPENCODE_DISABLE_EXTERNAL_SKILLS"),
+  // Default-on for dev/beta/local; opt-in for stable. Set
+  // OPENCODE_EXPERIMENTAL_CUSTOMIZE_SKILL=false to force off, =true to force on.
+  OPENCODE_EXPERIMENTAL_CUSTOMIZE_SKILL: unstableDefault("OPENCODE_EXPERIMENTAL_CUSTOMIZE_SKILL"),
  OPENCODE_FAKE_VCS: process.env["OPENCODE_FAKE_VCS"],
  OPENCODE_SERVER_PASSWORD: process.env["OPENCODE_SERVER_PASSWORD"],
  OPENCODE_SERVER_USERNAME: process.env["OPENCODE_SERVER_USERNAME"],
@@ -76,6 +82,7 @@ export const Flag = {
  OPENCODE_EXPERIMENTAL_LSP_TY: truthy("OPENCODE_EXPERIMENTAL_LSP_TY"),
  OPENCODE_EXPERIMENTAL_LSP_TOOL: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_LSP_TOOL"),
  OPENCODE_EXPERIMENTAL_PLAN_MODE: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_PLAN_MODE"),
+  OPENCODE_EXPERIMENTAL_SCOUT: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_SCOUT"),
  OPENCODE_EXPERIMENTAL_MARKDOWN: !falsy("OPENCODE_EXPERIMENTAL_MARKDOWN"),
  OPENCODE_ENABLE_PARALLEL: truthy("OPENCODE_ENABLE_PARALLEL") || truthy("OPENCODE_EXPERIMENTAL_PARALLEL"),
  OPENCODE_MODELS_URL: process.env["OPENCODE_MODELS_URL"],
@@ -87,14 +94,6 @@ export const Flag = {
  OPENCODE_STRICT_CONFIG_DEPS: truthy("OPENCODE_STRICT_CONFIG_DEPS"),

  OPENCODE_WORKSPACE_ID: process.env["OPENCODE_WORKSPACE_ID"],
-  // Defaults to true on dev/beta/local channels so internal users exercise the
-  // new effect-httpapi server backend. Stable (`prod`/`latest`) installs stay
-  // on the legacy hono backend until the rollout is complete. An explicit env
-  // var ("true"/"1" or "false"/"0") always wins, providing an opt-in for
-  // stable users and an escape hatch for dev/beta users.
-  OPENCODE_EXPERIMENTAL_HTTPAPI:
-    truthy("OPENCODE_EXPERIMENTAL_HTTPAPI") ||
-    (!falsy("OPENCODE_EXPERIMENTAL_HTTPAPI") && HTTPAPI_DEFAULT_ON_CHANNELS.has(InstallationChannel)),
  OPENCODE_EXPERIMENTAL_WORKSPACES: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_WORKSPACES"),
  OPENCODE_EXPERIMENTAL_EVENT_SYSTEM: OPENCODE_EXPERIMENTAL || truthy("OPENCODE_EXPERIMENTAL_EVENT_SYSTEM"),

--- a/packages/core/src/global.ts
+++ b/packages/core/src/global.ts
@@ -20,6 +20,7 @@ const paths = {
  data,
  bin: path.join(cache, "bin"),
  log: path.join(data, "log"),
+  repos: path.join(data, "repos"),
  cache,
  config,
  state,
@@ -37,6 +38,7 @@ await Promise.all([
  fs.mkdir(Path.tmp, { recursive: true }),
  fs.mkdir(Path.log, { recursive: true }),
  fs.mkdir(Path.bin, { recursive: true }),
+  fs.mkdir(Path.repos, { recursive: true }),
 ])

 export class Service extends Context.Service<Service, Interface>()("@opencode/Global") {}
@@ -50,6 +52,7 @@ export interface Interface {
  readonly tmp: string
  readonly bin: string
  readonly log: string
+  readonly repos: string
 }

 export function make(input: Partial<Interface> = {}): Interface {
@@ -62,6 +65,7 @@ export function make(input: Partial<Interface> = {}): Interface {
    tmp: Path.tmp,
    bin: Path.bin,
    log: Path.log,
+    repos: Path.repos,
    ...input,
  }
 }
--- a/packages/opencode/src/util/schema.ts
+++ b/packages/opencode/src/util/schema.ts
--- a/packages/desktop/package.json
+++ b/packages/desktop/package.json
@@ -1,7 +1,7 @@
 {
  "name": "@opencode-ai/desktop",
  "private": true,
-  "version": "1.14.41",
+  "version": "1.14.48",
  "type": "module",
  "license": "MIT",
  "homepage": "https://opencode.ai",
--- a/packages/enterprise/package.json
+++ b/packages/enterprise/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@opencode-ai/enterprise",
-  "version": "1.14.41",
+  "version": "1.14.48",
  "private": true,
  "type": "module",
  "license": "MIT",
@@ -32,6 +32,7 @@
    "@cloudflare/workers-types": "catalog:",
    "@tailwindcss/vite": "catalog:",
    "@typescript/native-preview": "catalog:",
+    "@types/bun": "catalog:",
    "@types/luxon": "catalog:",
    "tailwindcss": "catalog:",
    "typescript": "catalog:",
--- a/packages/enterprise/tsconfig.json
+++ b/packages/enterprise/tsconfig.json
@@ -11,7 +11,7 @@
    "allowJs": true,
    "noEmit": true,
    "strict": true,
-    "types": ["@cloudflare/workers-types", "vite/client"],
+    "types": ["@cloudflare/workers-types", "vite/client", "bun"],
    "isolatedModules": true,
    "paths": {
      "~/*": ["./src/*"]
--- a/packages/extensions/zed/extension.toml
+++ b/packages/extensions/zed/extension.toml
@@ -1,7 +1,7 @@
 id = "opencode"
 name = "OpenCode"
 description = "The open source coding agent."
-version = "1.14.41"
+version = "1.14.48"
 schema_version = 1
 authors = ["Anomaly"]
 repository = "https://github.com/anomalyco/opencode"
@@ -11,26 +11,26 @@ name = "OpenCode"
 icon = "./icons/opencode.svg"

 [agent_servers.opencode.targets.darwin-aarch64]
-archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.41/opencode-darwin-arm64.zip"
+archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.48/opencode-darwin-arm64.zip"
 cmd = "./opencode"
 args = ["acp"]

 [agent_servers.opencode.targets.darwin-x86_64]
-archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.41/opencode-darwin-x64.zip"
+archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.48/opencode-darwin-x64.zip"
 cmd = "./opencode"
 args = ["acp"]

 [agent_servers.opencode.targets.linux-aarch64]
-archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.41/opencode-linux-arm64.tar.gz"
+archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.48/opencode-linux-arm64.tar.gz"
 cmd = "./opencode"
 args = ["acp"]

 [agent_servers.opencode.targets.linux-x86_64]
-archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.41/opencode-linux-x64.tar.gz"
+archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.48/opencode-linux-x64.tar.gz"
 cmd = "./opencode"
 args = ["acp"]

 [agent_servers.opencode.targets.windows-x86_64]
-archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.41/opencode-windows-x64.zip"
+archive = "https://github.com/anomalyco/opencode/releases/download/v1.14.48/opencode-windows-x64.zip"
 cmd = "./opencode.exe"
 args = ["acp"]
--- a/packages/function/package.json
+++ b/packages/function/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@opencode-ai/function",
-  "version": "1.14.41",
+  "version": "1.14.48",
  "$schema": "https://json.schemastore.org/package.json",
  "private": true,
  "type": "module",
--- a/packages/http-recorder/README.md
+++ b/packages/http-recorder/README.md
@@ -0,0 +1,214 @@
+# @opencode-ai/http-recorder
+
+Record and replay HTTP and WebSocket traffic for Effect's `HttpClient`. Tests
+exercise real request shapes against deterministic, version-controlled
+cassettes — no manual mocks, no flakes from upstream drift.
+
+## Install
+
+Internal package; depended on as `@opencode-ai/http-recorder` from another
+workspace package.
+
+```ts
+import { HttpRecorder } from "@opencode-ai/http-recorder"
+```
+
+## Quickstart
+
+Provide `cassetteLayer(name)` in place of (or layered over) your `HttpClient`.
+By default the layer records on first run and replays on subsequent runs —
+no env-var ternary at the call site, and `CI=true` forces strict replay so
+missing cassettes fail loudly in CI rather than silently re-recording.
+
+```ts
+import { Effect } from "effect"
+import { HttpClient, HttpClientRequest } from "effect/unstable/http"
+import { HttpRecorder } from "@opencode-ai/http-recorder"
+
+const program = Effect.gen(function* () {
+  const http = yield* HttpClient.HttpClient
+  const response = yield* http.execute(HttpClientRequest.get("https://api.example.com/users/1"))
+  return yield* response.json
+})
+
+// Records if the cassette is missing, replays if it exists.
+// In CI (CI=true) always replays — fails loudly on missing fixtures.
+Effect.runPromise(program.pipe(Effect.provide(HttpRecorder.cassetteLayer("users/get-one"))))
+
+// Force a refresh — always hits upstream and overwrites.
+Effect.runPromise(program.pipe(Effect.provide(HttpRecorder.cassetteLayer("users/get-one", { mode: "record" }))))
+```
+
+## Modes
+
+| Mode          | Behavior                                                                            |
+| ------------- | ----------------------------------------------------------------------------------- |
+| `auto`        | Default. Replay if the cassette exists; record if missing. `CI=true` forces replay. |
+| `replay`      | Strict — match the request to a recorded interaction; error if none.                |
+| `record`      | Execute upstream, append the interaction, write the cassette.                       |
+| `passthrough` | Bypass the recorder entirely — just call upstream.                                  |
+
+## Cassette format
+
+A cassette is JSON at `test/fixtures/recordings/<name>.json`:
+
+```json
+{
+  "version": 1,
+  "metadata": { "name": "users/get-one", "recordedAt": "2026-05-09T..." },
+  "interactions": [
+    {
+      "transport": "http",
+      "request":  { "method": "GET", "url": "...", "headers": {...}, "body": "" },
+      "response": { "status": 200, "headers": {...}, "body": "..." }
+    }
+  ]
+}
+```
+
+Cassettes are normal source files — review them, diff them, commit them.
+
+## Request matching
+
+By default, requests match on canonicalized method, URL, headers, and JSON
+body (object keys sorted). Two dispatch strategies are available:
+
+- **`match`** (default) — find the first recorded interaction whose request
+  matches the incoming request. Same request twice returns the same response.
+- **`sequential`** — return interactions in the order they were recorded,
+  validating each one matches as the cursor advances. Use for ordered flows
+  where the same URL is hit multiple times with meaningful state changes
+  (pagination, retries, polling).
+
+```ts
+HttpRecorder.cassetteLayer("flow/poll-until-done", { dispatch: "sequential" })
+```
+
+Supply your own matcher via `match: (incoming, recorded) => boolean` for
+custom equivalence (e.g. ignoring a timestamp field in the body).
+
+## Redaction & secret safety
+
+Cassettes get checked in, so the recorder is aggressive about not letting
+secrets escape. Redaction is configured by composing a `Redactor`:
+
+```ts
+import { HttpRecorder, Redactor } from "@opencode-ai/http-recorder"
+
+HttpRecorder.cassetteLayer("anthropic/messages", {
+  redactor: Redactor.defaults({
+    requestHeaders: { allow: ["content-type", "anthropic-version"] },
+    url: { transform: (url) => url.replace(/\/accounts\/[^/]+/, "/accounts/{account}") },
+    body: (parsed) => ({ ...(parsed as object), user_id: "{user}" }),
+  }),
+})
+```
+
+`Redactor.defaults({ … })` composes the four built-in redactors with your
+overrides. For full control, build the stack yourself:
+
+```ts
+const redactor = Redactor.compose(
+  Redactor.requestHeaders({ allow: ["content-type", "x-custom"] }),
+  Redactor.responseHeaders(),
+  Redactor.url({ query: ["session-id"] }),
+  Redactor.body((parsed) => /* … */),
+)
+```
+
+What each layer does:
+
+- **`requestHeaders` / `responseHeaders`** — strip headers to a small
+  allow-list (request default: `content-type`, `accept`, `openai-beta`;
+  response default: `content-type`). Sensitive headers within the
+  allow-list (`authorization`, `cookie`, API-key headers, AWS/GCP tokens,
+  …) are replaced with `[REDACTED]`.
+- **`url`** — query parameters matching common secret names (`api_key`,
+  `token`, `signature`, AWS signing params, …) are replaced with
+  `[REDACTED]`. URL user/password are replaced. `transform` runs after
+  built-in redaction for path-level scrubbing.
+- **`body`** — receives the parsed JSON request body and returns a redacted
+  version. No-op for non-JSON bodies.
+
+After assembling the cassette, the recorder scans every string for known
+secret patterns (Bearer tokens, `sk-…`, `sk-ant-…`, Google `AIza…` keys,
+AWS access keys, GitHub tokens, PEM blocks) and for values matching any
+environment variable named like a credential. If anything is found, the
+cassette is **not written** and the request fails with `UnsafeCassetteError`
+listing what was detected.
+
+## WebSocket recording
+
+WebSocket support records the open frame plus client/server message
+streams. It uses the shared `Cassette.Service`, so HTTP and WS interactions
+can live in the same cassette.
+
+```ts
+import { HttpRecorder } from "@opencode-ai/http-recorder"
+import { Effect } from "effect"
+
+const program = Effect.gen(function* () {
+  const cassette = yield* HttpRecorder.Cassette.Service
+  const executor = yield* HttpRecorder.makeWebSocketExecutor({
+    name: "ws/subscribe",
+    cassette,
+    live: liveExecutor,
+  })
+  // use executor.open(...)
+})
+```
+
+## Inspecting cassettes programmatically
+
+`Cassette.Service` exposes `read`, `append`, `exists`, and `list`. `read`
+returns the recorded interactions for a name; the file format is hidden
+behind the seam. Useful for CI checks:
+
+```ts
+import { HttpRecorder } from "@opencode-ai/http-recorder"
+import { Effect } from "effect"
+
+const audit = Effect.gen(function* () {
+  const cassettes = yield* HttpRecorder.Cassette.Service
+  const entries = yield* cassettes.list()
+  const issues = yield* Effect.forEach(entries, (entry) =>
+    cassettes
+      .read(entry.name)
+      .pipe(Effect.map((interactions) => ({ name: entry.name, findings: HttpRecorder.secretFindings(interactions) }))),
+  )
+  return issues.filter((i) => i.findings.length > 0)
+})
+```
+
+`cassetteLayer` is the batteries-included entry point — it provides
+`Cassette.fileSystem({ directory })` automatically. If you want to provide
+your own `Cassette.Service` (e.g. an in-memory adapter for the recorder's
+own unit tests), use `recordingLayer` and supply `Cassette.fileSystem` /
+`Cassette.memory` yourself.
+
+## Options reference
+
+```ts
+type RecordReplayOptions = {
+  mode?: "auto" | "replay" | "record" | "passthrough" // default: "auto" (CI=true forces "replay")
+  directory?: string // default: <cwd>/test/fixtures/recordings
+  metadata?: Record<string, unknown> // merged into cassette.metadata
+  redactor?: Redactor // default: Redactor.defaults()
+  dispatch?: "match" | "sequential" // default: "match"
+  match?: (incoming, recorded) => boolean // custom matcher
+}
+```
+
+## Layout
+
+| File           | Purpose                                                                          |
+| -------------- | -------------------------------------------------------------------------------- |
+| `effect.ts`    | `cassetteLayer` / `recordingLayer` — the `HttpClient` adapter.                   |
+| `websocket.ts` | `makeWebSocketExecutor` — WebSocket record/replay.                               |
+| `cassette.ts`  | `Cassette.Service` — reads/writes cassette files, accumulates state.             |
+| `recorder.ts`  | Shared transport plumbing: `UnsafeCassetteError`, `appendOrFail`, `ReplayState`. |
+| `redactor.ts`  | Composable `Redactor` — headers, url, body redaction.                            |
+| `redaction.ts` | Lower-level header/URL primitives + secret pattern detection.                    |
+| `schema.ts`    | Effect Schema definitions for the cassette JSON format.                          |
+| `storage.ts`   | Path resolution, JSON encode/decode, sync existence check.                       |
+| `matching.ts`  | Request matcher, canonicalization, dispatch strategies, mismatch diagnostics.    |
--- a/packages/http-recorder/package.json
+++ b/packages/http-recorder/package.json
@@ -0,0 +1,26 @@
+{
+  "$schema": "https://json.schemastore.org/package.json",
+  "version": "1.14.48",
+  "name": "@opencode-ai/http-recorder",
+  "type": "module",
+  "license": "MIT",
+  "private": true,
+  "scripts": {
+    "test": "bun test --timeout 30000",
+    "test:ci": "mkdir -p .artifacts/unit && bun test --timeout 30000 --reporter=junit --reporter-outfile=.artifacts/unit/junit.xml",
+    "typecheck": "tsgo --noEmit"
+  },
+  "exports": {
+    ".": "./src/index.ts",
+    "./*": "./src/*.ts"
+  },
+  "devDependencies": {
+    "@tsconfig/bun": "catalog:",
+    "@types/bun": "catalog:",
+    "@typescript/native-preview": "catalog:"
+  },
+  "dependencies": {
+    "@effect/platform-node": "catalog:",
+    "effect": "catalog:"
+  }
+}
--- a/packages/http-recorder/src/cassette.ts
+++ b/packages/http-recorder/src/cassette.ts
@@ -0,0 +1,150 @@
+import { Context, Effect, FileSystem, Layer, Schema } from "effect"
+import * as fs from "node:fs"
+import * as path from "node:path"
+import { secretFindings, type SecretFinding } from "./redaction"
+import { decodeCassette, encodeCassette, type Cassette, type CassetteMetadata, type Interaction } from "./schema"
+
+const DEFAULT_RECORDINGS_DIR = path.resolve(process.cwd(), "test", "fixtures", "recordings")
+
+export class CassetteNotFoundError extends Schema.TaggedErrorClass<CassetteNotFoundError>()("CassetteNotFoundError", {
+  cassetteName: Schema.String,
+}) {
+  override get message() {
+    return `Cassette "${this.cassetteName}" not found`
+  }
+}
+
+export interface AppendResult {
+  readonly findings: ReadonlyArray<SecretFinding>
+}
+
+export interface Interface {
+  readonly read: (name: string) => Effect.Effect<ReadonlyArray<Interaction>, CassetteNotFoundError>
+  readonly append: (name: string, interaction: Interaction, metadata?: CassetteMetadata) => Effect.Effect<AppendResult>
+  readonly exists: (name: string) => Effect.Effect<boolean>
+  readonly list: () => Effect.Effect<ReadonlyArray<string>>
+}
+
+export class Service extends Context.Service<Service, Interface>()("@opencode-ai/http-recorder/Cassette") {}
+
+export const hasCassetteSync = (name: string, options: { readonly directory?: string } = {}) =>
+  fs.existsSync(path.join(options.directory ?? DEFAULT_RECORDINGS_DIR, `${name}.json`))
+
+const buildCassette = (
+  name: string,
+  interactions: ReadonlyArray<Interaction>,
+  metadata: CassetteMetadata | undefined,
+): Cassette => ({
+  version: 1,
+  metadata: { name, recordedAt: new Date().toISOString(), ...(metadata ?? {}) },
+  interactions,
+})
+
+const formatCassette = (cassette: Cassette) => `${JSON.stringify(encodeCassette(cassette), null, 2)}\n`
+
+const parseCassette = (raw: string) => decodeCassette(JSON.parse(raw))
+
+export const fileSystem = (
+  options: { readonly directory?: string } = {},
+): Layer.Layer<Service, never, FileSystem.FileSystem> =>
+  Layer.effect(
+    Service,
+    Effect.gen(function* () {
+      const fs = yield* FileSystem.FileSystem
+      const directory = options.directory ?? DEFAULT_RECORDINGS_DIR
+      const recorded = new Map<string, { interactions: Interaction[]; findings: SecretFinding[] }>()
+      const directoriesEnsured = new Set<string>()
+
+      const cassettePath = (name: string) => path.join(directory, `${name}.json`)
+
+      const ensureDirectory = (name: string) =>
+        Effect.gen(function* () {
+          const dir = path.dirname(cassettePath(name))
+          if (directoriesEnsured.has(dir)) return
+          yield* fs.makeDirectory(dir, { recursive: true }).pipe(Effect.orDie)
+          directoriesEnsured.add(dir)
+        })
+
+      const walk = (current: string): Effect.Effect<ReadonlyArray<string>> =>
+        Effect.gen(function* () {
+          const entries = yield* fs.readDirectory(current).pipe(Effect.catch(() => Effect.succeed([] as string[])))
+          const nested = yield* Effect.forEach(entries, (entry) => {
+            const full = path.join(current, entry)
+            return fs.stat(full).pipe(
+              Effect.flatMap((stat) => (stat.type === "Directory" ? walk(full) : Effect.succeed([full]))),
+              Effect.catch(() => Effect.succeed([] as string[])),
+            )
+          })
+          return nested.flat()
+        })
+
+      return Service.of({
+        read: (name) =>
+          fs.readFileString(cassettePath(name)).pipe(
+            Effect.map((raw) => parseCassette(raw).interactions),
+            Effect.catch(() => Effect.fail(new CassetteNotFoundError({ cassetteName: name }))),
+          ),
+        append: (name, interaction, metadata) =>
+          Effect.gen(function* () {
+            const entry = recorded.get(name) ?? { interactions: [], findings: [] }
+            if (!recorded.has(name)) recorded.set(name, entry)
+            entry.interactions.push(interaction)
+            entry.findings.push(...secretFindings(interaction))
+            const cassette = buildCassette(name, entry.interactions, metadata)
+            const findings = [...entry.findings, ...secretFindings(cassette.metadata ?? {})]
+            if (findings.length === 0) {
+              yield* ensureDirectory(name)
+              yield* fs.writeFileString(cassettePath(name), formatCassette(cassette)).pipe(Effect.orDie)
+            }
+            return { findings }
+          }),
+        exists: (name) =>
+          fs.access(cassettePath(name)).pipe(
+            Effect.as(true),
+            Effect.catch(() => Effect.succeed(false)),
+          ),
+        list: () =>
+          walk(directory).pipe(
+            Effect.map((files) =>
+              files
+                .filter((file) => file.endsWith(".json"))
+                .map((file) =>
+                  path
+                    .relative(directory, file)
+                    .replace(/\\/g, "/")
+                    .replace(/\.json$/, ""),
+                )
+                .toSorted((a, b) => a.localeCompare(b)),
+            ),
+          ),
+      })
+    }),
+  )
+
+export const memory = (initial: Record<string, ReadonlyArray<Interaction>> = {}): Layer.Layer<Service> =>
+  Layer.sync(Service, () => {
+    const stored = new Map<string, Interaction[]>(
+      Object.entries(initial).map(([name, interactions]) => [name, [...interactions]]),
+    )
+    const accumulatedFindings = new Map<string, SecretFinding[]>()
+
+    return Service.of({
+      read: (name) =>
+        stored.has(name)
+          ? Effect.succeed(stored.get(name) ?? [])
+          : Effect.fail(new CassetteNotFoundError({ cassetteName: name })),
+      append: (name, interaction, metadata) =>
+        Effect.sync(() => {
+          const existing = stored.get(name)
+          if (existing) existing.push(interaction)
+          else stored.set(name, [interaction])
+          const findings = accumulatedFindings.get(name)
+          if (findings) findings.push(...secretFindings(interaction))
+          else accumulatedFindings.set(name, [...secretFindings(interaction)])
+          if (metadata) accumulatedFindings.get(name)!.push(...secretFindings({ name, ...metadata }))
+          return { findings: accumulatedFindings.get(name) ?? [] }
+        }),
+      exists: (name) => Effect.sync(() => stored.has(name)),
+      list: () => Effect.sync(() => Array.from(stored.keys()).toSorted()),
+    })
+  })
--- a/packages/http-recorder/src/effect.ts
+++ b/packages/http-recorder/src/effect.ts
@@ -0,0 +1,144 @@
+import { NodeFileSystem } from "@effect/platform-node"
+import { Effect, Layer, Option } from "effect"
+import {
+  FetchHttpClient,
+  Headers,
+  HttpBody,
+  HttpClient,
+  HttpClientError,
+  HttpClientRequest,
+  HttpClientResponse,
+  UrlParams,
+} from "effect/unstable/http"
+import * as CassetteService from "./cassette"
+import { defaultMatcher, selectMatch, selectSequential, type RequestMatcher } from "./matching"
+import { appendOrFail, makeReplayState, resolveAutoMode } from "./recorder"
+import { defaults, type Redactor } from "./redactor"
+import { redactUrl } from "./redaction"
+import { httpInteractions, type CassetteMetadata, type HttpInteraction, type ResponseSnapshot } from "./schema"
+
+export type RecordReplayMode = "auto" | "record" | "replay" | "passthrough"
+
+export interface RecordReplayOptions {
+  readonly mode?: RecordReplayMode
+  readonly directory?: string
+  readonly metadata?: CassetteMetadata
+  readonly redactor?: Redactor
+  readonly dispatch?: "match" | "sequential"
+  readonly match?: RequestMatcher
+}
+
+const BINARY_CONTENT_TYPES: ReadonlyArray<string> = ["vnd.amazon.eventstream", "octet-stream"]
+
+const isBinaryContentType = (contentType: string | undefined) =>
+  contentType !== undefined && BINARY_CONTENT_TYPES.some((token) => contentType.toLowerCase().includes(token))
+
+const captureResponseBody = (response: HttpClientResponse.HttpClientResponse, contentType: string | undefined) =>
+  isBinaryContentType(contentType)
+    ? response.arrayBuffer.pipe(
+        Effect.map((bytes) => ({ body: Buffer.from(bytes).toString("base64"), bodyEncoding: "base64" as const })),
+      )
+    : response.text.pipe(Effect.map((body) => ({ body })))
+
+const decodeResponseBody = (snapshot: ResponseSnapshot) =>
+  snapshot.bodyEncoding === "base64" ? Buffer.from(snapshot.body, "base64") : snapshot.body
+
+export const redactedErrorRequest = (request: HttpClientRequest.HttpClientRequest) =>
+  HttpClientRequest.makeWith(
+    request.method,
+    redactUrl(request.url),
+    UrlParams.empty,
+    Option.none(),
+    Headers.empty,
+    HttpBody.empty,
+  )
+
+const transportError = (request: HttpClientRequest.HttpClientRequest, description: string) =>
+  new HttpClientError.HttpClientError({
+    reason: new HttpClientError.TransportError({ request: redactedErrorRequest(request), description }),
+  })
+
+export const recordingLayer = (
+  name: string,
+  options: Omit<RecordReplayOptions, "directory"> = {},
+): Layer.Layer<HttpClient.HttpClient, never, HttpClient.HttpClient | CassetteService.Service> =>
+  Layer.effect(
+    HttpClient.HttpClient,
+    Effect.gen(function* () {
+      const upstream = yield* HttpClient.HttpClient
+      const cassetteService = yield* CassetteService.Service
+      const redactor = options.redactor ?? defaults()
+      const match = options.match ?? defaultMatcher
+      const requested = options.mode ?? "auto"
+      const mode = requested === "auto" ? yield* resolveAutoMode(cassetteService, name) : requested
+      const sequential = options.dispatch === "sequential"
+      const replay = yield* makeReplayState(cassetteService, name, httpInteractions)
+
+      const snapshotRequest = (request: HttpClientRequest.HttpClientRequest) =>
+        Effect.gen(function* () {
+          const web = yield* HttpClientRequest.toWeb(request).pipe(Effect.orDie)
+          return redactor.request({
+            method: web.method,
+            url: web.url,
+            headers: Object.fromEntries(web.headers.entries()),
+            body: yield* Effect.promise(() => web.text()),
+          })
+        })
+
+      return HttpClient.make((request) => {
+        if (mode === "passthrough") return upstream.execute(request)
+
+        if (mode === "record") {
+          return Effect.gen(function* () {
+            const incoming = yield* snapshotRequest(request)
+            const response = yield* upstream.execute(request)
+            const captured = yield* captureResponseBody(response, response.headers["content-type"])
+            const interaction: HttpInteraction = {
+              transport: "http",
+              request: incoming,
+              response: redactor.response({
+                status: response.status,
+                headers: response.headers as Record<string, string>,
+                ...captured,
+              }),
+            }
+            yield* appendOrFail(cassetteService, name, interaction, options.metadata).pipe(
+              Effect.catchTag("UnsafeCassetteError", (error) => Effect.fail(transportError(request, error.message))),
+            )
+            return HttpClientResponse.fromWeb(
+              request,
+              new Response(decodeResponseBody(interaction.response), interaction.response),
+            )
+          })
+        }
+
+        return Effect.gen(function* () {
+          const incoming = yield* snapshotRequest(request)
+          const interactions = yield* replay.load.pipe(
+            Effect.mapError(() =>
+              transportError(request, `Fixture "${name}" not found. Run locally to record it (CI=true forces replay).`),
+            ),
+          )
+          const result = sequential
+            ? selectSequential(interactions, incoming, match, yield* replay.cursor)
+            : selectMatch(interactions, incoming, match)
+          if (!result.interaction)
+            return yield* Effect.fail(
+              transportError(request, `Fixture "${name}" does not match the current request: ${result.detail}.`),
+            )
+          if (sequential) yield* replay.advance
+          return HttpClientResponse.fromWeb(
+            request,
+            new Response(decodeResponseBody(result.interaction.response), result.interaction.response),
+          )
+        })
+      })
+    }),
+  )
+
+export const cassetteLayer = (name: string, options: RecordReplayOptions = {}): Layer.Layer<HttpClient.HttpClient> =>
+  recordingLayer(name, options).pipe(
+    Layer.provide(CassetteService.fileSystem({ directory: options.directory })),
+    Layer.provide(FetchHttpClient.layer),
+    Layer.provide(NodeFileSystem.layer),
+  )
--- a/packages/http-recorder/src/index.ts
+++ b/packages/http-recorder/src/index.ts
@@ -0,0 +1,26 @@
+export type {
+  CassetteMetadata,
+  HttpInteraction,
+  Interaction,
+  RequestSnapshot,
+  ResponseSnapshot,
+  WebSocketFrame,
+  WebSocketInteraction,
+} from "./schema"
+export { CassetteNotFoundError, hasCassetteSync } from "./cassette"
+export { defaultMatcher, type RequestMatcher } from "./matching"
+export { redactHeaders, redactUrl, secretFindings, type SecretFinding } from "./redaction"
+export { UnsafeCassetteError } from "./recorder"
+export { cassetteLayer, recordingLayer, type RecordReplayMode, type RecordReplayOptions } from "./effect"
+export {
+  makeWebSocketExecutor,
+  type WebSocketConnection,
+  type WebSocketExecutor,
+  type WebSocketRecordReplayOptions,
+  type WebSocketRequest,
+} from "./websocket"
+
+export * as Cassette from "./cassette"
+export * as Redactor from "./redactor"
+
+export * as HttpRecorder from "."
--- a/packages/http-recorder/src/matching.ts
+++ b/packages/http-recorder/src/matching.ts
@@ -0,0 +1,124 @@
+import { Option, Schema } from "effect"
+import { REDACTED, secretFindings } from "./redaction"
+import type { HttpInteraction, RequestSnapshot } from "./schema"
+
+const JsonValue = Schema.fromJsonString(Schema.Unknown)
+export const decodeJson = Schema.decodeUnknownOption(JsonValue)
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  value !== null && typeof value === "object" && !Array.isArray(value)
+
+export const canonicalizeJson = (value: unknown): unknown => {
+  if (Array.isArray(value)) return value.map(canonicalizeJson)
+  if (isRecord(value)) {
+    return Object.fromEntries(
+      Object.keys(value)
+        .toSorted()
+        .map((key) => [key, canonicalizeJson(value[key])]),
+    )
+  }
+  return value
+}
+
+export type RequestMatcher = (incoming: RequestSnapshot, recorded: RequestSnapshot) => boolean
+
+export const canonicalSnapshot = (snapshot: RequestSnapshot): string =>
+  JSON.stringify({
+    method: snapshot.method,
+    url: snapshot.url,
+    headers: canonicalizeJson(snapshot.headers),
+    body: Option.match(decodeJson(snapshot.body), {
+      onNone: () => snapshot.body,
+      onSome: canonicalizeJson,
+    }),
+  })
+
+export const defaultMatcher: RequestMatcher = (incoming, recorded) =>
+  canonicalSnapshot(incoming) === canonicalSnapshot(recorded)
+
+const safeText = (value: unknown) => {
+  if (value === undefined) return "undefined"
+  if (secretFindings(value).length > 0) return JSON.stringify(REDACTED)
+  const text = JSON.stringify(value)
+  if (!text) return String(value)
+  return text.length > 300 ? `${text.slice(0, 300)}...` : text
+}
+
+const jsonBody = (body: string) => Option.getOrUndefined(decodeJson(body))
+
+const valueDiffs = (expected: unknown, received: unknown, base = "$", limit = 8): ReadonlyArray<string> => {
+  if (Object.is(expected, received)) return []
+  if (isRecord(expected) && isRecord(received)) {
+    return [...new Set([...Object.keys(expected), ...Object.keys(received)])]
+      .toSorted()
+      .flatMap((key) => valueDiffs(expected[key], received[key], `${base}.${key}`, limit))
+      .slice(0, limit)
+  }
+  if (Array.isArray(expected) && Array.isArray(received)) {
+    return Array.from({ length: Math.max(expected.length, received.length) }, (_, index) => index)
+      .flatMap((index) => valueDiffs(expected[index], received[index], `${base}[${index}]`, limit))
+      .slice(0, limit)
+  }
+  return [`${base} expected ${safeText(expected)}, received ${safeText(received)}`]
+}
+
+const headerDiffs = (expected: Record<string, string>, received: Record<string, string>) =>
+  [...new Set([...Object.keys(expected), ...Object.keys(received)])].toSorted().flatMap((key) => {
+    if (expected[key] === received[key]) return []
+    if (expected[key] === undefined) return [`  ${key} unexpected ${safeText(received[key])}`]
+    if (received[key] === undefined) return [`  ${key} missing expected ${safeText(expected[key])}`]
+    return [`  ${key} expected ${safeText(expected[key])}, received ${safeText(received[key])}`]
+  })
+
+export const requestDiff = (expected: RequestSnapshot, received: RequestSnapshot): ReadonlyArray<string> => {
+  const lines: string[] = []
+  if (expected.method !== received.method) {
+    lines.push("method:", `  expected ${expected.method}, received ${received.method}`)
+  }
+  if (expected.url !== received.url) {
+    lines.push("url:", `  expected ${expected.url}`, `  received ${received.url}`)
+  }
+  const headers = headerDiffs(expected.headers, received.headers)
+  if (headers.length > 0) lines.push("headers:", ...headers.slice(0, 8))
+  const expectedBody = jsonBody(expected.body)
+  const receivedBody = jsonBody(received.body)
+  const body =
+    expectedBody !== undefined && receivedBody !== undefined
+      ? valueDiffs(expectedBody, receivedBody).map((line) => `  ${line}`)
+      : expected.body === received.body
+        ? []
+        : [`  expected ${safeText(expected.body)}, received ${safeText(received.body)}`]
+  if (body.length > 0) lines.push("body:", ...body)
+  return lines
+}
+
+export const mismatchDetail = (interactions: ReadonlyArray<HttpInteraction>, incoming: RequestSnapshot): string => {
+  if (interactions.length === 0) return "cassette has no recorded HTTP interactions"
+  const ranked = interactions
+    .map((interaction, index) => ({ index, lines: requestDiff(interaction.request, incoming) }))
+    .toSorted((a, b) => a.lines.length - b.lines.length || a.index - b.index)
+  const best = ranked[0]
+  return ["no recorded interaction matched", `closest interaction: #${best.index + 1}`, ...best.lines].join("\n")
+}
+
+export const selectMatch = (
+  interactions: ReadonlyArray<HttpInteraction>,
+  incoming: RequestSnapshot,
+  match: RequestMatcher,
+): { readonly interaction: HttpInteraction | undefined; readonly detail: string } => {
+  const interaction = interactions.find((candidate) => match(incoming, candidate.request))
+  return { interaction, detail: interaction ? "" : mismatchDetail(interactions, incoming) }
+}
+
+export const selectSequential = (
+  interactions: ReadonlyArray<HttpInteraction>,
+  incoming: RequestSnapshot,
+  match: RequestMatcher,
+  index: number,
+): { readonly interaction: HttpInteraction | undefined; readonly detail: string } => {
+  const interaction = interactions[index]
+  if (!interaction) return { interaction, detail: `interaction ${index + 1} of ${interactions.length} not recorded` }
+  if (!match(incoming, interaction.request))
+    return { interaction: undefined, detail: requestDiff(interaction.request, incoming).join("\n") }
+  return { interaction, detail: "" }
+}
--- a/packages/http-recorder/src/recorder.ts
+++ b/packages/http-recorder/src/recorder.ts
@@ -0,0 +1,73 @@
+import { Effect, Ref, Schema, Scope } from "effect"
+import type * as CassetteService from "./cassette"
+import type { CassetteNotFoundError } from "./cassette"
+import { SecretFindingSchema } from "./redaction"
+import type { CassetteMetadata, Interaction } from "./schema"
+
+export class UnsafeCassetteError extends Schema.TaggedErrorClass<UnsafeCassetteError>()("UnsafeCassetteError", {
+  cassetteName: Schema.String,
+  findings: Schema.Array(SecretFindingSchema),
+}) {
+  override get message() {
+    return `Refusing to write cassette "${this.cassetteName}" because it contains possible secrets: ${this.findings
+      .map((finding) => `${finding.path} (${finding.reason})`)
+      .join(", ")}`
+  }
+}
+
+export type ResolvedMode = "record" | "replay" | "passthrough"
+
+const isCI = () => {
+  const value = process.env.CI
+  return value !== undefined && value !== "" && value !== "false" && value !== "0"
+}
+
+export const resolveAutoMode = (cassette: CassetteService.Interface, name: string): Effect.Effect<ResolvedMode> =>
+  Effect.gen(function* () {
+    if (isCI()) return "replay"
+    return (yield* cassette.exists(name)) ? "replay" : "record"
+  })
+
+export const appendOrFail = (
+  cassette: CassetteService.Interface,
+  name: string,
+  interaction: Interaction,
+  metadata: CassetteMetadata | undefined,
+): Effect.Effect<void, UnsafeCassetteError> =>
+  cassette
+    .append(name, interaction, metadata)
+    .pipe(
+      Effect.flatMap(({ findings }) =>
+        findings.length === 0 ? Effect.void : Effect.fail(new UnsafeCassetteError({ cassetteName: name, findings })),
+      ),
+    )
+
+export interface ReplayState<T> {
+  readonly load: Effect.Effect<ReadonlyArray<T>, CassetteNotFoundError>
+  readonly cursor: Effect.Effect<number>
+  readonly advance: Effect.Effect<void>
+}
+
+export const makeReplayState = <T>(
+  cassette: CassetteService.Interface,
+  name: string,
+  project: (interactions: ReadonlyArray<Interaction>) => ReadonlyArray<T>,
+): Effect.Effect<ReplayState<T>, never, Scope.Scope> =>
+  Effect.gen(function* () {
+    const load = yield* Effect.cached(cassette.read(name).pipe(Effect.map(project)))
+    const position = yield* Ref.make(0)
+
+    yield* Effect.addFinalizer(() =>
+      Effect.gen(function* () {
+        const used = yield* Ref.get(position)
+        if (used === 0) return
+        const interactions = yield* load.pipe(Effect.orDie)
+        if (used < interactions.length)
+          yield* Effect.die(
+            new Error(`Unused recorded interactions in ${name}: used ${used} of ${interactions.length}`),
+          )
+      }),
+    )
+
+    return { load, cursor: Ref.get(position), advance: Ref.update(position, (n) => n + 1) }
+  })
--- a/packages/http-recorder/src/redaction.ts
+++ b/packages/http-recorder/src/redaction.ts
@@ -0,0 +1,115 @@
+export const REDACTED = "[REDACTED]"
+
+const DEFAULT_REDACT_HEADERS = [
+  "authorization",
+  "cookie",
+  "proxy-authorization",
+  "set-cookie",
+  "x-api-key",
+  "x-amz-security-token",
+  "x-goog-api-key",
+]
+
+const DEFAULT_REDACT_QUERY = [
+  "access_token",
+  "api-key",
+  "api_key",
+  "apikey",
+  "code",
+  "key",
+  "signature",
+  "sig",
+  "token",
+  "x-amz-credential",
+  "x-amz-security-token",
+  "x-amz-signature",
+]
+
+const SECRET_PATTERNS: ReadonlyArray<{ readonly label: string; readonly pattern: RegExp }> = [
+  { label: "bearer token", pattern: /\bBearer\s+[A-Za-z0-9._~+/=-]{16,}\b/i },
+  { label: "API key", pattern: /\bsk-[A-Za-z0-9][A-Za-z0-9_-]{20,}\b/ },
+  { label: "Anthropic API key", pattern: /\bsk-ant-[A-Za-z0-9_-]{20,}\b/ },
+  { label: "Google API key", pattern: /\bAIza[0-9A-Za-z_-]{20,}\b/ },
+  { label: "AWS access key", pattern: /\b(?:AKIA|ASIA)[0-9A-Z]{16}\b/ },
+  { label: "GitHub token", pattern: /\bgh[pousr]_[A-Za-z0-9_]{20,}\b/ },
+  { label: "private key", pattern: /-----BEGIN [A-Z ]*PRIVATE KEY-----/ },
+]
+
+const ENV_SECRET_NAMES = /(?:API|AUTH|BEARER|CREDENTIAL|KEY|PASSWORD|SECRET|TOKEN)/i
+const SAFE_ENV_VALUES = new Set(["fixture", "test", "test-key"])
+
+const envSecrets = () =>
+  Object.entries(process.env).flatMap(([name, value]) => {
+    if (!value) return []
+    if (!ENV_SECRET_NAMES.test(name)) return []
+    if (value.length < 12) return []
+    if (SAFE_ENV_VALUES.has(value.toLowerCase())) return []
+    return [{ name, value }]
+  })
+
+const pathFor = (base: string, key: string) => (base ? `${base}.${key}` : key)
+
+const stringEntries = (value: unknown, base = ""): ReadonlyArray<{ readonly path: string; readonly value: string }> => {
+  if (typeof value === "string") return [{ path: base, value }]
+  if (Array.isArray(value)) return value.flatMap((item, index) => stringEntries(item, `${base}[${index}]`))
+  if (value && typeof value === "object") {
+    return Object.entries(value).flatMap(([key, child]) => stringEntries(child, pathFor(base, key)))
+  }
+  return []
+}
+
+const redactionSet = (values: ReadonlyArray<string> | undefined, defaults: ReadonlyArray<string>) =>
+  new Set([...defaults, ...(values ?? [])].map((value) => value.toLowerCase()))
+
+export type UrlRedactor = (url: string) => string
+
+export const redactUrl = (
+  raw: string,
+  query: ReadonlyArray<string> = DEFAULT_REDACT_QUERY,
+  urlRedactor?: UrlRedactor,
+) => {
+  if (!URL.canParse(raw)) return urlRedactor?.(raw) ?? raw
+  const url = new URL(raw)
+  if (url.username) url.username = REDACTED
+  if (url.password) url.password = REDACTED
+  const redacted = redactionSet(query, DEFAULT_REDACT_QUERY)
+  for (const key of [...url.searchParams.keys()]) {
+    if (redacted.has(key.toLowerCase())) url.searchParams.set(key, REDACTED)
+  }
+  return urlRedactor?.(url.toString()) ?? url.toString()
+}
+
+export const redactHeaders = (
+  headers: Record<string, string>,
+  allow: ReadonlyArray<string>,
+  redact: ReadonlyArray<string> = DEFAULT_REDACT_HEADERS,
+) => {
+  const allowed = new Set(allow.map((name) => name.toLowerCase()))
+  const redacted = redactionSet(redact, DEFAULT_REDACT_HEADERS)
+  return Object.fromEntries(
+    Object.entries(headers)
+      .map(([name, value]) => [name.toLowerCase(), value] as const)
+      .filter(([name]) => allowed.has(name))
+      .map(([name, value]) => [name, redacted.has(name) ? REDACTED : value] as const)
+      .toSorted(([a], [b]) => a.localeCompare(b)),
+  )
+}
+
+import { Schema } from "effect"
+
+export const SecretFindingSchema = Schema.Struct({
+  path: Schema.String,
+  reason: Schema.String,
+})
+export type SecretFinding = Schema.Schema.Type<typeof SecretFindingSchema>
+
+export const secretFindings = (value: unknown): ReadonlyArray<SecretFinding> =>
+  stringEntries(value).flatMap((entry) => [
+    ...SECRET_PATTERNS.filter((item) => item.pattern.test(entry.value)).map((item) => ({
+      path: entry.path,
+      reason: item.label,
+    })),
+    ...envSecrets()
+      .filter((item) => entry.value.includes(item.value))
+      .map((item) => ({ path: entry.path, reason: `environment secret ${item.name}` })),
+  ])
--- a/packages/http-recorder/src/redactor.ts
+++ b/packages/http-recorder/src/redactor.ts
@@ -0,0 +1,76 @@
+import { Option } from "effect"
+import { decodeJson } from "./matching"
+import { redactHeaders, redactUrl } from "./redaction"
+import type { RequestSnapshot, ResponseSnapshot } from "./schema"
+
+export const DEFAULT_REQUEST_HEADERS: ReadonlyArray<string> = ["content-type", "accept", "openai-beta"]
+export const DEFAULT_RESPONSE_HEADERS: ReadonlyArray<string> = ["content-type"]
+
+const identity = <T>(value: T) => value
+
+export interface Redactor {
+  readonly request: (snapshot: RequestSnapshot) => RequestSnapshot
+  readonly response: (snapshot: ResponseSnapshot) => ResponseSnapshot
+}
+
+export const compose = (...redactors: ReadonlyArray<Partial<Redactor>>): Redactor => {
+  const requests = redactors.map((r) => r.request).filter((fn): fn is Redactor["request"] => fn !== undefined)
+  const responses = redactors.map((r) => r.response).filter((fn): fn is Redactor["response"] => fn !== undefined)
+  return {
+    request: requests.length === 0 ? identity : (snapshot) => requests.reduce((acc, fn) => fn(acc), snapshot),
+    response: responses.length === 0 ? identity : (snapshot) => responses.reduce((acc, fn) => fn(acc), snapshot),
+  }
+}
+
+export interface HeaderOptions {
+  readonly allow?: ReadonlyArray<string>
+  readonly redact?: ReadonlyArray<string>
+}
+
+export const requestHeaders = (options: HeaderOptions = {}): Partial<Redactor> => ({
+  request: (snapshot) => ({
+    ...snapshot,
+    headers: redactHeaders(snapshot.headers, options.allow ?? DEFAULT_REQUEST_HEADERS, options.redact),
+  }),
+})
+
+export const responseHeaders = (options: HeaderOptions = {}): Partial<Redactor> => ({
+  response: (snapshot) => ({
+    ...snapshot,
+    headers: redactHeaders(snapshot.headers, options.allow ?? DEFAULT_RESPONSE_HEADERS, options.redact),
+  }),
+})
+
+export interface UrlOptions {
+  readonly query?: ReadonlyArray<string>
+  readonly transform?: (url: string) => string
+}
+
+export const url = (options: UrlOptions = {}): Partial<Redactor> => ({
+  request: (snapshot) => ({ ...snapshot, url: redactUrl(snapshot.url, options.query, options.transform) }),
+})
+
+export const body = (transform: (parsed: unknown) => unknown): Partial<Redactor> => ({
+  request: (snapshot) => ({
+    ...snapshot,
+    body: Option.match(decodeJson(snapshot.body), {
+      onNone: () => snapshot.body,
+      onSome: (parsed) => JSON.stringify(transform(parsed)),
+    }),
+  }),
+})
+
+export interface DefaultRedactorOverrides {
+  readonly requestHeaders?: HeaderOptions
+  readonly responseHeaders?: HeaderOptions
+  readonly url?: UrlOptions
+  readonly body?: (parsed: unknown) => unknown
+}
+
+export const defaults = (overrides: DefaultRedactorOverrides = {}): Redactor =>
+  compose(
+    requestHeaders(overrides.requestHeaders),
+    responseHeaders(overrides.responseHeaders),
+    url(overrides.url),
+    ...(overrides.body ? [body(overrides.body)] : []),
+  )
--- a/packages/http-recorder/src/schema.ts
+++ b/packages/http-recorder/src/schema.ts
@@ -0,0 +1,68 @@
+import { Schema } from "effect"
+
+export const RequestSnapshotSchema = Schema.Struct({
+  method: Schema.String,
+  url: Schema.String,
+  headers: Schema.Record(Schema.String, Schema.String),
+  body: Schema.String,
+})
+export type RequestSnapshot = Schema.Schema.Type<typeof RequestSnapshotSchema>
+
+export const ResponseSnapshotSchema = Schema.Struct({
+  status: Schema.Number,
+  headers: Schema.Record(Schema.String, Schema.String),
+  body: Schema.String,
+  bodyEncoding: Schema.optional(Schema.Literals(["text", "base64"])),
+})
+export type ResponseSnapshot = Schema.Schema.Type<typeof ResponseSnapshotSchema>
+
+export const CassetteMetadataSchema = Schema.Record(Schema.String, Schema.Unknown)
+export type CassetteMetadata = Schema.Schema.Type<typeof CassetteMetadataSchema>
+
+export const HttpInteractionSchema = Schema.Struct({
+  transport: Schema.tag("http"),
+  request: RequestSnapshotSchema,
+  response: ResponseSnapshotSchema,
+})
+export type HttpInteraction = Schema.Schema.Type<typeof HttpInteractionSchema>
+
+export const WebSocketFrameSchema = Schema.Union([
+  Schema.Struct({ kind: Schema.tag("text"), body: Schema.String }),
+  Schema.Struct({ kind: Schema.tag("binary"), body: Schema.String, bodyEncoding: Schema.Literal("base64") }),
+])
+export type WebSocketFrame = Schema.Schema.Type<typeof WebSocketFrameSchema>
+
+export const WebSocketInteractionSchema = Schema.Struct({
+  transport: Schema.tag("websocket"),
+  open: Schema.Struct({
+    url: Schema.String,
+    headers: Schema.Record(Schema.String, Schema.String),
+  }),
+  client: Schema.Array(WebSocketFrameSchema),
+  server: Schema.Array(WebSocketFrameSchema),
+})
+export type WebSocketInteraction = Schema.Schema.Type<typeof WebSocketInteractionSchema>
+
+export const InteractionSchema = Schema.Union([HttpInteractionSchema, WebSocketInteractionSchema]).pipe(
+  Schema.toTaggedUnion("transport"),
+)
+export type Interaction = Schema.Schema.Type<typeof InteractionSchema>
+
+export const isHttpInteraction = InteractionSchema.guards.http
+
+export const isWebSocketInteraction = InteractionSchema.guards.websocket
+
+export const httpInteractions = (interactions: ReadonlyArray<Interaction>) => interactions.filter(isHttpInteraction)
+
+export const webSocketInteractions = (interactions: ReadonlyArray<Interaction>) =>
+  interactions.filter(isWebSocketInteraction)
+
+export const CassetteSchema = Schema.Struct({
+  version: Schema.Literal(1),
+  metadata: Schema.optional(CassetteMetadataSchema),
+  interactions: Schema.Array(InteractionSchema),
+})
+export type Cassette = Schema.Schema.Type<typeof CassetteSchema>
+
+export const decodeCassette = Schema.decodeUnknownSync(CassetteSchema)
+export const encodeCassette = Schema.encodeSync(CassetteSchema)
--- a/packages/http-recorder/src/websocket.ts
+++ b/packages/http-recorder/src/websocket.ts
@@ -0,0 +1,159 @@
+import { Effect, Option, Ref, Scope, Stream } from "effect"
+import type { Headers } from "effect/unstable/http"
+import * as CassetteService from "./cassette"
+import { canonicalizeJson, decodeJson } from "./matching"
+import { appendOrFail, makeReplayState, resolveAutoMode } from "./recorder"
+import type { RecordReplayMode } from "./effect"
+import { defaults, type Redactor } from "./redactor"
+import { webSocketInteractions, type CassetteMetadata, type WebSocketFrame } from "./schema"
+
+export interface WebSocketRequest {
+  readonly url: string
+  readonly headers: Headers.Headers
+}
+
+export interface WebSocketConnection<E> {
+  readonly sendText: (message: string) => Effect.Effect<void, E>
+  readonly messages: Stream.Stream<string | Uint8Array, E>
+  readonly close: Effect.Effect<void>
+}
+
+export interface WebSocketExecutor<E> {
+  readonly open: (request: WebSocketRequest) => Effect.Effect<WebSocketConnection<E>, E>
+}
+
+export interface WebSocketRecordReplayOptions<E> {
+  readonly name: string
+  readonly mode?: RecordReplayMode
+  readonly metadata?: CassetteMetadata
+  readonly cassette: CassetteService.Interface
+  readonly live: WebSocketExecutor<E>
+  readonly redactor?: Redactor
+  readonly compareClientMessagesAsJson?: boolean
+}
+
+const headersRecord = (headers: Headers.Headers): Record<string, string> =>
+  Object.fromEntries(
+    Object.entries(headers as Record<string, unknown>).filter(
+      (entry): entry is [string, string] => typeof entry[1] === "string",
+    ),
+  )
+
+const encodeFrame = (message: string | Uint8Array): WebSocketFrame =>
+  typeof message === "string"
+    ? { kind: "text", body: message }
+    : { kind: "binary", body: Buffer.from(message).toString("base64"), bodyEncoding: "base64" }
+
+const decodeFrameMessage = (frame: WebSocketFrame): string | Uint8Array =>
+  frame.kind === "text" ? frame.body : new Uint8Array(Buffer.from(frame.body, "base64"))
+
+const decodeFrameText = (frame: WebSocketFrame) =>
+  frame.kind === "text" ? frame.body : new TextDecoder().decode(Buffer.from(frame.body, "base64"))
+
+const assertEqual = (message: string, actual: unknown, expected: unknown) =>
+  Effect.sync(() => {
+    if (JSON.stringify(actual) === JSON.stringify(expected)) return
+    throw new Error(`${message}: expected ${JSON.stringify(expected)}, received ${JSON.stringify(actual)}`)
+  })
+
+const jsonOrText = (value: string) => Option.match(decodeJson(value), { onNone: () => value, onSome: canonicalizeJson })
+
+const compareClientMessage = (actual: string, expected: WebSocketFrame | undefined, index: number, asJson: boolean) => {
+  if (!expected)
+    return Effect.sync(() => {
+      throw new Error(`Unexpected WebSocket client frame ${index + 1}: ${actual}`)
+    })
+  const expectedText = decodeFrameText(expected)
+  if (!asJson) return assertEqual(`WebSocket client frame ${index + 1}`, actual, expectedText)
+  return assertEqual(`WebSocket client JSON frame ${index + 1}`, jsonOrText(actual), jsonOrText(expectedText))
+}
+
+export const makeWebSocketExecutor = <E>(
+  options: WebSocketRecordReplayOptions<E>,
+): Effect.Effect<WebSocketExecutor<E>, never, Scope.Scope> =>
+  Effect.gen(function* () {
+    const requested = options.mode ?? "auto"
+    const mode = requested === "auto" ? yield* resolveAutoMode(options.cassette, options.name) : requested
+    const redactor = options.redactor ?? defaults()
+    const openSnapshot = (request: WebSocketRequest) => {
+      const redacted = redactor.request({
+        method: "GET",
+        url: request.url,
+        headers: headersRecord(request.headers),
+        body: "",
+      })
+      return { url: redacted.url, headers: redacted.headers }
+    }
+
+    if (mode === "passthrough") return options.live
+
+    if (mode === "record") {
+      return {
+        open: (request) =>
+          Effect.gen(function* () {
+            const client: WebSocketFrame[] = []
+            const server: WebSocketFrame[] = []
+            const connection = yield* options.live.open(request)
+            const closed = yield* Ref.make(false)
+            const closeOnce = Effect.gen(function* () {
+              if (yield* Ref.getAndSet(closed, true)) return
+              yield* connection.close
+              yield* appendOrFail(
+                options.cassette,
+                options.name,
+                { transport: "websocket", open: openSnapshot(request), client, server },
+                options.metadata,
+              ).pipe(Effect.orDie)
+            })
+            return {
+              sendText: (message) =>
+                connection
+                  .sendText(message)
+                  .pipe(Effect.tap(() => Effect.sync(() => client.push(encodeFrame(message))))),
+              messages: connection.messages.pipe(
+                Stream.map((message) => {
+                  server.push(encodeFrame(message))
+                  return message
+                }),
+              ),
+              close: closeOnce,
+            }
+          }),
+      }
+    }
+
+    const replay = yield* makeReplayState(options.cassette, options.name, webSocketInteractions)
+
+    return {
+      open: (request) =>
+        Effect.gen(function* () {
+          const interactions = yield* replay.load.pipe(Effect.orDie)
+          const index = yield* replay.cursor
+          const interaction = interactions[index]
+          if (!interaction) return yield* Effect.die(new Error(`No recorded WebSocket interaction for ${request.url}`))
+          yield* replay.advance
+          yield* assertEqual(`WebSocket open frame ${index + 1}`, openSnapshot(request), interaction.open)
+          const messageIndex = yield* Ref.make(0)
+          return {
+            sendText: (message) =>
+              Effect.gen(function* () {
+                const current = yield* Ref.getAndUpdate(messageIndex, (value) => value + 1)
+                yield* compareClientMessage(
+                  message,
+                  interaction.client[current],
+                  current,
+                  options.compareClientMessagesAsJson === true,
+                )
+              }),
+            messages: Stream.fromIterable(interaction.server).pipe(Stream.map(decodeFrameMessage)),
+            close: Effect.gen(function* () {
+              yield* assertEqual(
+                `WebSocket client frame count for interaction ${index + 1}`,
+                yield* Ref.get(messageIndex),
+                interaction.client.length,
+              )
+            }),
+          }
+        }),
+    }
+  })
--- a/packages/http-recorder/sst-env.d.ts
+++ b/packages/http-recorder/sst-env.d.ts
@@ -0,0 +1,10 @@
+/* This file is auto-generated by SST. Do not edit. */
+/* tslint:disable */
+/* eslint-disable */
+/* deno-fmt-ignore-file */
+/* biome-ignore-all lint: auto-generated */
+
+/// <reference path="../../sst-env.d.ts" />
+
+import "sst"
+export {}
--- a/packages/http-recorder/test/fixtures/recordings/record-replay/multi-step.json
+++ b/packages/http-recorder/test/fixtures/recordings/record-replay/multi-step.json
@@ -0,0 +1,41 @@
+{
+  "version": 1,
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://example.test/echo",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"step\":1}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"reply\":\"first\"}"
+      }
+    },
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://example.test/echo",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"step\":2}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"reply\":\"second\"}"
+      }
+    }
+  ]
+}
--- a/packages/http-recorder/test/fixtures/recordings/record-replay/retry.json
+++ b/packages/http-recorder/test/fixtures/recordings/record-replay/retry.json
@@ -0,0 +1,41 @@
+{
+  "version": 1,
+  "interactions": [
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://example.test/poll",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"id\":\"job_1\"}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"status\":\"pending\"}"
+      }
+    },
+    {
+      "transport": "http",
+      "request": {
+        "method": "POST",
+        "url": "https://example.test/poll",
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"id\":\"job_1\"}"
+      },
+      "response": {
+        "status": 200,
+        "headers": {
+          "content-type": "application/json"
+        },
+        "body": "{\"status\":\"complete\"}"
+      }
+    }
+  ]
+}
--- a/packages/http-recorder/test/record-replay.test.ts
+++ b/packages/http-recorder/test/record-replay.test.ts
@@ -0,0 +1,351 @@
+import { NodeFileSystem } from "@effect/platform-node"
+import { describe, expect, test } from "bun:test"
+import { Cause, Effect, Exit, Scope, Stream } from "effect"
+import { Headers, HttpBody, HttpClient, HttpClientRequest } from "effect/unstable/http"
+import * as fs from "node:fs"
+import * as os from "node:os"
+import * as path from "node:path"
+import { HttpRecorder } from "../src"
+import { redactedErrorRequest } from "../src/effect"
+import type { Interaction } from "../src/schema"
+
+const seedCassetteDirectory = (directory: string, name: string, interactions: ReadonlyArray<Interaction>) =>
+  Effect.runPromise(
+    Effect.gen(function* () {
+      const cassette = yield* HttpRecorder.Cassette.Service
+      yield* Effect.forEach(interactions, (interaction) => cassette.append(name, interaction))
+    }).pipe(Effect.provide(HttpRecorder.Cassette.fileSystem({ directory })), Effect.provide(NodeFileSystem.layer)),
+  )
+
+const post = (url: string, body: object) =>
+  Effect.gen(function* () {
+    const http = yield* HttpClient.HttpClient
+    const request = HttpClientRequest.post(url, {
+      headers: { "content-type": "application/json" },
+      body: HttpBody.text(JSON.stringify(body), "application/json"),
+    })
+    const response = yield* http.execute(request)
+    return yield* response.text
+  })
+
+const run = <A, E>(effect: Effect.Effect<A, E, HttpClient.HttpClient>) =>
+  Effect.runPromise(effect.pipe(Effect.provide(HttpRecorder.cassetteLayer("record-replay/multi-step"))))
+
+const runWith = <A, E>(
+  name: string,
+  options: HttpRecorder.RecordReplayOptions,
+  effect: Effect.Effect<A, E, HttpClient.HttpClient>,
+) => Effect.runPromise(effect.pipe(Effect.provide(HttpRecorder.cassetteLayer(name, options))))
+
+const runRecorder = <A, E>(effect: Effect.Effect<A, E, HttpRecorder.Cassette.Service | Scope.Scope>) =>
+  Effect.runPromise(
+    Effect.scoped(
+      effect.pipe(
+        Effect.provide(
+          HttpRecorder.Cassette.fileSystem({ directory: fs.mkdtempSync(path.join(os.tmpdir(), "http-recorder-")) }),
+        ),
+        Effect.provide(NodeFileSystem.layer),
+      ),
+    ),
+  )
+
+const failureText = (exit: Exit.Exit<unknown, unknown>) => {
+  if (Exit.isSuccess(exit)) return ""
+  return Cause.prettyErrors(exit.cause).join("\n")
+}
+
+describe("http-recorder", () => {
+  test("redacts sensitive URL query parameters", () => {
+    expect(
+      HttpRecorder.redactUrl(
+        "https://example.test/path?key=secret-google-key&api_key=secret-openai-key&safe=value&X-Amz-Signature=secret-signature",
+      ),
+    ).toBe(
+      "https://example.test/path?key=%5BREDACTED%5D&api_key=%5BREDACTED%5D&safe=value&X-Amz-Signature=%5BREDACTED%5D",
+    )
+  })
+
+  test("redacts URL credentials", () => {
+    expect(HttpRecorder.redactUrl("https://user:password@example.test/path?safe=value")).toBe(
+      "https://%5BREDACTED%5D:%5BREDACTED%5D@example.test/path?safe=value",
+    )
+  })
+
+  test("applies custom URL redaction after built-in redaction", () => {
+    expect(
+      HttpRecorder.redactUrl("https://example.test/accounts/real-account/path?key=secret-key", undefined, (url) =>
+        url.replace("/accounts/real-account/", "/accounts/{account}/"),
+      ),
+    ).toBe("https://example.test/accounts/{account}/path?key=%5BREDACTED%5D")
+  })
+
+  test("redacts sensitive headers when allow-listed", () => {
+    expect(
+      HttpRecorder.redactHeaders(
+        {
+          authorization: "Bearer secret-token",
+          "content-type": "application/json",
+          "x-custom-token": "custom-secret",
+          "x-api-key": "secret-key",
+          "x-goog-api-key": "secret-google-key",
+        },
+        ["authorization", "content-type", "x-api-key", "x-goog-api-key", "x-custom-token"],
+        ["x-custom-token"],
+      ),
+    ).toEqual({
+      authorization: "[REDACTED]",
+      "content-type": "application/json",
+      "x-api-key": "[REDACTED]",
+      "x-custom-token": "[REDACTED]",
+      "x-goog-api-key": "[REDACTED]",
+    })
+  })
+
+  test("redacts error requests without retaining headers, params, or body", () => {
+    const request = HttpClientRequest.post("https://example.test/path", {
+      headers: { authorization: "Bearer super-secret" },
+      body: HttpBody.text("super-secret-body", "text/plain"),
+    }).pipe(HttpClientRequest.setUrlParam("api_key", "super-secret-key"))
+
+    expect(redactedErrorRequest(request).toJSON()).toMatchObject({
+      url: "https://example.test/path",
+      urlParams: { params: [] },
+      headers: {},
+      body: { _tag: "Empty" },
+    })
+  })
+
+  test("detects secret-looking values without returning the secret", () => {
+    expect(
+      HttpRecorder.secretFindings({
+        version: 1,
+        interactions: [
+          {
+            transport: "http",
+            request: {
+              method: "POST",
+              url: "https://example.test/path?key=sk-123456789012345678901234",
+              headers: {},
+              body: JSON.stringify({ nested: "AIzaSyDHibiBRvJZLsFnPYPoiTwxY4ztQ55yqCE" }),
+            },
+            response: {
+              status: 200,
+              headers: {},
+              body: "Bearer abcdefghijklmnopqrstuvwxyz",
+            },
+          },
+        ],
+      }),
+    ).toEqual([
+      { path: "interactions[0].request.url", reason: "API key" },
+      { path: "interactions[0].request.body", reason: "Google API key" },
+      { path: "interactions[0].response.body", reason: "bearer token" },
+    ])
+  })
+
+  test("detects secret-looking values inside metadata", () => {
+    expect(
+      HttpRecorder.secretFindings({
+        version: 1,
+        metadata: { token: "sk-123456789012345678901234" },
+        interactions: [],
+      }),
+    ).toEqual([{ path: "metadata.token", reason: "API key" }])
+  })
+
+  test("replays websocket interactions seeded into the in-memory cassette adapter", async () => {
+    await Effect.runPromise(
+      Effect.scoped(
+        Effect.gen(function* () {
+          const cassette = yield* HttpRecorder.Cassette.Service
+          const executor = yield* HttpRecorder.makeWebSocketExecutor({
+            name: "websocket/replay",
+            cassette,
+            compareClientMessagesAsJson: true,
+            live: { open: () => Effect.die(new Error("unexpected live WebSocket open")) },
+          })
+          const connection = yield* executor.open({
+            url: "wss://example.test/realtime",
+            headers: Headers.fromInput({ "content-type": "application/json" }),
+          })
+          yield* connection.sendText(JSON.stringify({ type: "response.create" }))
+          const messages: Array<string | Uint8Array> = []
+          yield* connection.messages.pipe(Stream.runForEach((message) => Effect.sync(() => messages.push(message))))
+          yield* connection.close
+
+          expect(messages).toEqual([JSON.stringify({ type: "response.completed" })])
+        }).pipe(
+          Effect.provide(
+            HttpRecorder.Cassette.memory({
+              "websocket/replay": [
+                {
+                  transport: "websocket",
+                  open: { url: "wss://example.test/realtime", headers: { "content-type": "application/json" } },
+                  client: [{ kind: "text", body: JSON.stringify({ type: "response.create" }) }],
+                  server: [{ kind: "text", body: JSON.stringify({ type: "response.completed" }) }],
+                },
+              ],
+            }),
+          ),
+        ),
+      ),
+    )
+  })
+
+  test("records websocket interactions into the shared cassette service", async () => {
+    await runRecorder(
+      Effect.gen(function* () {
+        const cassette = yield* HttpRecorder.Cassette.Service
+        const executor = yield* HttpRecorder.makeWebSocketExecutor({
+          name: "websocket/record",
+          mode: "record",
+          metadata: { provider: "test" },
+          cassette,
+          live: {
+            open: () =>
+              Effect.succeed({
+                sendText: () => Effect.void,
+                messages: Stream.fromIterable([JSON.stringify({ type: "response.completed" })]),
+                close: Effect.void,
+              }),
+          },
+        })
+        const connection = yield* executor.open({
+          url: "wss://example.test/realtime",
+          headers: Headers.fromInput({ "content-type": "application/json" }),
+        })
+        yield* connection.sendText(JSON.stringify({ type: "response.create" }))
+        yield* connection.messages.pipe(Stream.runDrain)
+        yield* connection.close
+
+        expect(yield* cassette.read("websocket/record")).toMatchObject([
+          {
+            transport: "websocket",
+            open: { url: "wss://example.test/realtime", headers: { "content-type": "application/json" } },
+            client: [{ kind: "text", body: JSON.stringify({ type: "response.create" }) }],
+            server: [{ kind: "text", body: JSON.stringify({ type: "response.completed" }) }],
+          },
+        ])
+      }),
+    )
+  })
+
+  test("default matcher dispatches multi-interaction cassettes by request shape", async () => {
+    await run(
+      Effect.gen(function* () {
+        expect(yield* post("https://example.test/echo", { step: 2 })).toBe('{"reply":"second"}')
+        expect(yield* post("https://example.test/echo", { step: 1 })).toBe('{"reply":"first"}')
+      }),
+    )
+  })
+
+  test("sequential dispatch returns recorded responses in order for identical requests", async () => {
+    await runWith(
+      "record-replay/retry",
+      { dispatch: "sequential" },
+      Effect.gen(function* () {
+        expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
+        expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"complete"}')
+      }),
+    )
+  })
+
+  test("default matcher returns the first match for identical requests", async () => {
+    await runWith(
+      "record-replay/retry",
+      {},
+      Effect.gen(function* () {
+        expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
+        expect(yield* post("https://example.test/poll", { id: "job_1" })).toBe('{"status":"pending"}')
+      }),
+    )
+  })
+
+  test("sequential dispatch reports cursor exhaustion when more requests are made than recorded", async () => {
+    await runWith(
+      "record-replay/multi-step",
+      { dispatch: "sequential" },
+      Effect.gen(function* () {
+        yield* post("https://example.test/echo", { step: 1 })
+        yield* post("https://example.test/echo", { step: 2 })
+        const exit = yield* Effect.exit(post("https://example.test/echo", { step: 3 }))
+        expect(Exit.isFailure(exit)).toBe(true)
+      }),
+    )
+  })
+
+  test("sequential dispatch still validates each recorded request", async () => {
+    await runWith(
+      "record-replay/multi-step",
+      { dispatch: "sequential" },
+      Effect.gen(function* () {
+        yield* post("https://example.test/echo", { step: 1 })
+        const exit = yield* Effect.exit(post("https://example.test/echo", { step: 3 }))
+        expect(Exit.isFailure(exit)).toBe(true)
+        expect(failureText(exit)).toContain("$.step expected 2, received 3")
+        expect(yield* post("https://example.test/echo", { step: 2 })).toBe('{"reply":"second"}')
+      }),
+    )
+  })
+
+  test("auto mode replays when the cassette exists", async () => {
+    const directory = fs.mkdtempSync(path.join(os.tmpdir(), "http-recorder-auto-"))
+    await seedCassetteDirectory(directory, "auto-replay", [
+      {
+        transport: "http",
+        request: {
+          method: "POST",
+          url: "https://example.test/echo",
+          headers: { "content-type": "application/json" },
+          body: JSON.stringify({ step: 1 }),
+        },
+        response: { status: 200, headers: { "content-type": "application/json" }, body: '{"reply":"hi"}' },
+      },
+    ])
+
+    const result = await runWith(
+      "auto-replay",
+      { directory, mode: "auto" },
+      post("https://example.test/echo", { step: 1 }),
+    )
+    expect(result).toBe('{"reply":"hi"}')
+  })
+
+  test("auto mode forces replay when CI=true even if cassette is missing", async () => {
+    const directory = fs.mkdtempSync(path.join(os.tmpdir(), "http-recorder-auto-ci-"))
+    const previous = process.env.CI
+    process.env.CI = "true"
+    try {
+      const exit = await Effect.runPromise(
+        Effect.exit(
+          post("https://example.test/echo", { step: 1 }).pipe(
+            Effect.provide(HttpRecorder.cassetteLayer("missing-cassette", { directory, mode: "auto" })),
+          ),
+        ),
+      )
+      expect(Exit.isFailure(exit)).toBe(true)
+      expect(failureText(exit)).toContain('Fixture "missing-cassette" not found')
+    } finally {
+      if (previous === undefined) delete process.env.CI
+      else process.env.CI = previous
+    }
+  })
+
+  test("mismatch diagnostics show closest redacted request differences", async () => {
+    await run(
+      Effect.gen(function* () {
+        const exit = yield* Effect.exit(
+          post("https://example.test/echo?api_key=secret-value", { step: 3, token: "sk-123456789012345678901234" }),
+        )
+        const message = failureText(exit)
+        expect(message).toContain("closest interaction: #1")
+        expect(message).toContain("url:")
+        expect(message).toContain("https://example.test/echo?api_key=%5BREDACTED%5D")
+        expect(message).toContain("body:")
+        expect(message).toContain("$.step expected 1, received 3")
+        expect(message).toContain('$.token expected undefined, received "[REDACTED]"')
+        expect(message).not.toContain("sk-123456789012345678901234")
+      }),
+    )
+  })
+})
--- a/packages/http-recorder/tsconfig.json
+++ b/packages/http-recorder/tsconfig.json
@@ -0,0 +1,15 @@
+{
+  "$schema": "https://json.schemastore.org/tsconfig",
+  "extends": "@tsconfig/bun/tsconfig.json",
+  "compilerOptions": {
+    "lib": ["ESNext", "DOM", "DOM.Iterable"],
+    "noUncheckedIndexedAccess": false,
+    "plugins": [
+      {
+        "name": "@effect/language-service",
+        "transform": "@effect/language-service/transform",
+        "namespaceImportPackages": ["effect", "@effect/*"]
+      }
+    ]
+  }
+}
--- a/packages/llm/AGENTS.md
+++ b/packages/llm/AGENTS.md
@@ -0,0 +1,294 @@
+# LLM Package Guide
+
+## Effect
+
+- Prefer `HttpClient.HttpClient` / `HttpClientResponse.HttpClientResponse` over web `fetch` / `Response` at package boundaries.
+- Use `Stream.Stream` for streaming data flow. Avoid ad hoc async generators or manual web reader loops unless an Effect `Stream` API cannot model the behavior.
+- Use Effect Schema codecs for JSON encode/decode (`Schema.fromJsonString(...)`) instead of direct `JSON.parse` / `JSON.stringify` in implementation code.
+- In `Effect.gen`, yield yieldable errors directly (`return yield* new MyError(...)`) instead of `Effect.fail(new MyError(...))`.
+- Use `Effect.void` instead of `Effect.succeed(undefined)` when the successful value is intentionally void.
+
+## Tests
+
+- Use `testEffect(...)` from `test/lib/effect.ts` for tests requiring Effect layers.
+- Keep provider tests fixture-first. Live provider calls must stay behind `RECORD=true` and required API-key checks.
+
+## Architecture
+
+This package is an Effect Schema-first LLM core. The Schema classes in `src/schema/` are the canonical runtime data model. Convenience functions in `src/llm.ts` are thin constructors that return those same Schema class instances; they should improve callsites without creating a second model.
+
+### Request Flow
+
+The intended callsite is:
+
+```ts
+const request = LLM.request({
+  model: OpenAI.model("gpt-4o-mini", { apiKey }),
+  system: "You are concise.",
+  prompt: "Say hello.",
+})
+
+const response = yield * LLMClient.generate(request)
+```
+
+`LLM.request(...)` builds an `LLMRequest`. `LLMClient.generate(...)` selects a registered route by `request.model.route`, builds the provider-native body, asks the route's transport for a real `HttpClientRequest.HttpClientRequest`, sends it through `RequestExecutor.Service`, parses the provider stream into common `LLMEvent`s, and finally returns an `LLMResponse`.
+
+Use `LLMClient.stream(request)` when callers want incremental `LLMEvent`s. Use `LLMClient.generate(request)` when callers want those same events collected into an `LLMResponse`. Use `LLMClient.prepare<Body>(request)` to compile a request through the route pipeline without sending it — the optional `Body` type argument narrows `.body` to the route's native shape (e.g. `prepare<OpenAIChatBody>(...)` returns a `PreparedRequestOf<OpenAIChatBody>`). The runtime body is identical; the generic is a type-level assertion.
+
+Filter or narrow `LLMEvent` streams with `LLMEvent.is.*` (camelCase guards, e.g. `events.filter(LLMEvent.is.toolCall)`). The kebab-case `LLMEvent.guards["tool-call"]` form also works but prefer `is.*` in new code.
+
+### Routes
+
+A route is the registered, runnable composition of four orthogonal pieces:
+
+- **`Protocol`** (`src/route/protocol.ts`) — semantic API contract. Owns request body construction (`body.from`), the body schema (`body.schema`), the streaming-event schema (`stream.event`), and the event-to-`LLMEvent` state machine (`stream.step`). `Route.make(...)` validates and JSON-encodes the body from `body.schema` and decodes frames with `stream.event`. Examples: `OpenAIChat.protocol`, `OpenAIResponses.protocol`, `AnthropicMessages.protocol`, `Gemini.protocol`, `BedrockConverse.protocol`.
+- **`Endpoint`** (`src/route/endpoint.ts`) — path construction. The host always lives on `model.baseURL`; the endpoint just supplies the path. `Endpoint.path("/chat/completions")` is the common case; pass a function for paths that embed the model id or a body field (e.g. `Endpoint.path(({ body }) => `/model/${body.modelId}/converse-stream`)`).
+- **`Auth`** (`src/route/auth.ts`) — per-request transport authentication. Routes read `model.apiKey` at request time via `Auth.bearer` (the default; sets `Authorization: Bearer <apiKey>`) or `Auth.apiKeyHeader(name)` for providers that use a custom header (Anthropic `x-api-key`, Gemini `x-goog-api-key`). Routes that need per-request signing (Bedrock SigV4, future Vertex IAM, Azure AAD) implement `Auth` as a function that signs the body and merges signed headers into the result.
+- **`Framing`** (`src/route/framing.ts`) — bytes → frames. SSE (`Framing.sse`) is shared; Bedrock keeps its AWS event-stream framing as a typed `Framing<object>` value alongside its protocol.
+
+Compose them via `Route.make(...)`:
+
+```ts
+export const route = Route.make({
+  id: "openai-chat",
+  provider: "openai",
+  protocol: OpenAIChat.protocol,
+  transport: HttpTransport.httpJson({
+    endpoint: Endpoint.path("/chat/completions"),
+    auth: Auth.bearer(),
+    framing: Framing.sse,
+    encodeBody,
+  }),
+  defaults: {
+    baseURL: "https://api.openai.com/v1",
+    capabilities: capabilities({ tools: { calls: true, streamingInput: true } }),
+  },
+})
+```
+
+The four-axis decomposition is the reason DeepSeek, TogetherAI, Cerebras, Baseten, Fireworks, and DeepInfra all reuse `OpenAIChat.protocol` verbatim — each provider deployment is a 5-15 line `Route.make(...)` call instead of a 300-400 line route clone. Bug fixes in one protocol propagate to every consumer of that protocol in a single commit.
+
+When a provider ships a non-HTTP transport (OpenAI's WebSocket Responses backend, hypothetical bidirectional streaming APIs), the seam is `Transport` — `WebSocketTransport.json(...)` constructs a transport whose `prepare` builds a WebSocket URL and message and whose `frames` yields decoded text from the socket. Same protocol, different transport.
+
+### URL Construction
+
+`model.baseURL` is required; `Endpoint` only carries the path. Each protocol's `Route.make` includes a canonical URL in `defaults.baseURL` (e.g. `https://api.openai.com/v1`); provider helpers can override by passing `baseURL` in their input. Routes that have no canonical URL (OpenAI-compatible Chat, GitHub Copilot) set `baseURL: string` (required) on their input type so TypeScript catches a missing host at the call site.
+
+For providers where the URL is derived from typed inputs (Azure resource name, Bedrock region), the provider helper computes `baseURL` at model construction time. Use `AtLeastOne<T>` from `route/auth-options.ts` for inputs that accept either of two derivation paths (Azure: `resourceName` or `baseURL`).
+
+### Provider Definitions
+
+Provider-facing APIs are defined with `Provider.make(...)` from `src/provider.ts`:
+
+```ts
+export const provider = Provider.make({
+  id: ProviderID.make("openai"),
+  model: responses,
+  apis: { responses, chat },
+})
+
+export const model = provider.model
+export const apis = provider.apis
+```
+
+Keep provider definitions small and explicit:
+
+- Use only `id`, `model`, and optional `apis` in `Provider.make(...)`.
+- Use branded `ProviderID.make(...)` and `ModelID.make(...)` where ids are constructed directly.
+- Use `model` for the default API path and `apis` for named provider-native alternatives such as OpenAI `responses` versus `chat`.
+- Do not add author-facing `kind`, `version`, or `routes` fields.
+- Export lower-level `routes` arrays separately only when advanced internal wiring needs them.
+- Prefer `apiKey` as provider-specific sugar and `auth` as the explicit override; keep them mutually exclusive in provider option types with `ProviderAuthOption`.
+- Resolve `apiKey` → `Auth` with `AuthOptions.bearer(options, "<PROVIDER>_API_KEY")` (it honors an explicit `auth` override and falls back to `Auth.config(envVar)` so missing keys surface a typed `Authentication` error rather than a runtime crash).
+
+Built-in providers are namespace modules from `src/providers/index.ts`, so aliases like `OpenAI.model(...)`, `OpenAI.responses(...)`, and `OpenAI.apis.chat(...)` are fine. External provider packages should default-export the `Provider.make(...)` result and may add named aliases if useful.
+
+### Folder layout
+
+```
+packages/llm/src/
+  schema/                   canonical Schema model, split by concern
+    ids.ts                  branded IDs, literal types, ProviderMetadata
+    options.ts              Generation/Provider/Http options, Capabilities, Limits, ModelRef
+    messages.ts             content parts, Message, ToolDefinition, LLMRequest
+    events.ts               Usage, individual events, LLMEvent, PreparedRequest, LLMResponse
+    errors.ts               error reasons, LLMError, ToolFailure
+    index.ts                barrel
+  llm.ts                    request constructors and convenience helpers
+  route/
+    index.ts                @opencode-ai/llm/route advanced barrel
+    client.ts               Route.make + LLMClient.prepare/stream/generate
+    executor.ts             RequestExecutor service + transport error mapping
+    protocol.ts             Protocol type + Protocol.make
+    endpoint.ts             Endpoint type + Endpoint.path
+    auth.ts                 Auth type + Auth.bearer / Auth.apiKeyHeader / Auth.passthrough
+    auth-options.ts         ProviderAuthOption shape, AuthOptions.bearer, AtLeastOne helper
+    framing.ts              Framing type + Framing.sse
+    transport/              transport implementations
+      index.ts              Transport type + HttpTransport / WebSocketTransport namespaces
+      http.ts               HttpTransport.httpJson — POST + framing
+      websocket.ts          WebSocketTransport.json + WebSocketExecutor service
+  protocols/
+    shared.ts               ProviderShared toolkit used inside protocol impls
+    openai-chat.ts          protocol + route (compose OpenAIChat.protocol)
+    openai-responses.ts
+    anthropic-messages.ts
+    gemini.ts
+    bedrock-converse.ts
+    bedrock-event-stream.ts framing for AWS event-stream binary frames
+    openai-compatible-chat.ts route that reuses OpenAIChat.protocol, no canonical URL
+    utils/                  per-protocol helpers (auth, cache, media, tool-stream, ...)
+  providers/
+    openai-compatible.ts    generic compatible helper + family model helpers
+    openai-compatible-profile.ts family defaults (deepseek, togetherai, ...)
+    azure.ts / amazon-bedrock.ts / github-copilot.ts / google.ts / xai.ts / openai.ts / anthropic.ts / openrouter.ts
+  tool.ts                   typed tool() helper
+  tool-runtime.ts           implementation helpers for LLMClient tool execution
+```
+
+The dependency arrow points down: `providers/*.ts` files import `protocols`, `endpoint`, `auth`, and `framing`; protocols do not import provider metadata. Lower-level modules know nothing about specific providers.
+
+### Shared protocol helpers
+
+`ProviderShared` exports a small toolkit used inside protocol implementations to keep them focused on provider-native shapes:
+
+- `joinText(parts)` — joins an array of `TextPart` (or anything with a `.text`) with newlines. Use this anywhere a protocol flattens text content into a single string for a provider field.
+- `parseToolInput(route, name, raw)` — Schema-decodes a tool-call argument string with the canonical "Invalid JSON input for `<route>` tool call `<name>`" error message. Treats empty input as `{}`.
+- `parseJson(route, raw, message)` — generic JSON-via-Schema decode for non-tool bodies.
+- `eventError(route, message, ...)` — typed `InvalidProviderOutput` constructor for stream-time decode failures.
+- `validateWith(decoder)` — maps Schema decode errors to `InvalidRequest`. `Route.make(...)` uses this for body validation; lower-level routes can reuse it.
+- `matchToolChoice(provider, choice, branches)` — branches over `LLMRequest["toolChoice"]` for provider-specific lowering.
+
+If you find yourself copying a 3-to-5-line snippet between two protocols, lift it into `ProviderShared` next to these helpers rather than duplicating.
+
+### Tools
+
+Tool loops are represented in common messages and events:
+
+```ts
+const call = LLM.toolCall({ id: "call_1", name: "lookup", input: { query: "weather" } })
+const result = LLM.toolMessage({ id: "call_1", name: "lookup", result: { forecast: "sunny" } })
+
+const followUp = LLM.request({
+  model,
+  messages: [LLM.user("Weather?"), LLM.assistant([call]), result],
+})
+```
+
+Routes lower these into provider-native assistant tool-call messages and tool-result messages. Streaming providers should emit `tool-input-delta` events while arguments arrive, then a final `tool-call` event with parsed input.
+
+### Tool runtime
+
+`LLM.stream({ request, tools })` executes model-requested tools with full type safety. Plain `LLM.stream(request)` only streams the model; if `request.tools` contains schemas, tool calls are returned for the caller to handle. Use `toolExecution: "none"` to pass executable tool definitions as schemas without invoking handlers. Add `stopWhen` to opt into follow-up model rounds after tool results.
+
+```ts
+const get_weather = tool({
+  description: "Get current weather for a city",
+  parameters: Schema.Struct({ city: Schema.String }),
+  success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }),
+  execute: ({ city }) =>
+    Effect.gen(function* () {
+      // city: string  — typed from parameters Schema
+      const data = yield* WeatherApi.fetch(city)
+      return { temperature: data.temp, condition: data.cond }
+      // return type checked against success Schema
+    }),
+})
+
+const events = yield* LLM.stream({
+  request,
+  tools: { get_weather, get_time, ... },
+  stopWhen: LLM.stepCountIs(10),
+}).pipe(Stream.runCollect)
+```
+
+The runtime:
+
+- Adds tool definitions (derived from each tool's `parameters` Schema via `Schema.toJsonSchemaDocument`) onto `request.tools`.
+- Streams the model.
+- On `tool-call`: looks up the named tool, decodes input against `parameters` Schema, dispatches to the typed `execute`, encodes the result against `success` Schema, emits `tool-result`.
+- Emits local `tool-result` events in the same step by default.
+- Loops only when `stopWhen` is provided and the step finishes with `tool-calls`, appending the assistant + tool messages.
+
+Handler dependencies (services, permissions, plugin hooks, abort handling) are closed over by the consumer at tool-construction time. The runtime's only environment requirement is `RequestExecutor.Service`. Build the tools record inside an `Effect.gen` once and reuse it across many runs.
+
+Errors must be expressed as `ToolFailure`. The runtime catches it and emits a `tool-error` event, then a `tool-result` of `type: "error"`, so the model can self-correct on the next step. Anything that is not a `ToolFailure` is treated as a defect and fails the stream. Three recoverable error paths produce `tool-error` events:
+
+- The model called an unknown tool name.
+- Input failed the `parameters` Schema.
+- The handler returned a `ToolFailure`.
+
+Provider-defined / hosted tools (Anthropic `web_search` / `code_execution` / `web_fetch`, OpenAI Responses `web_search_call` / `file_search_call` / `code_interpreter_call` / `mcp_call` / `local_shell_call` / `image_generation_call` / `computer_use_call`) pass through the runtime untouched:
+
+- Routes surface the model's call as a `tool-call` event with `providerExecuted: true`, and the provider's result as a matching `tool-result` event with `providerExecuted: true`.
+- The runtime detects `providerExecuted` on `tool-call` and **skips client dispatch** — no handler is invoked and no `tool-error` is raised for "unknown tool". The provider already executed it.
+- Both events are appended to the assistant message in `assistantContent` so the next round's history carries the call + result for context. Anthropic encodes them back as `server_tool_use` + `web_search_tool_result` (or `code_execution_tool_result` / `web_fetch_tool_result`) blocks; OpenAI Responses callers typically use `previous_response_id` instead of resending hosted-tool items.
+
+Add provider-defined tools to `request.tools` (no runtime entry needed). The matching route must know how to lower the tool definition into the provider-native shape; right now Anthropic accepts `web_search` / `code_execution` / `web_fetch` and OpenAI Responses accepts the hosted tool names listed above.
+
+## Protocol File Style
+
+Protocol files should look self-similar. Provider quirks belong behind named helpers so a new route can be reviewed by comparing the same sections across files.
+
+### Section order
+
+Use this order for every protocol module:
+
+1. Public model input
+2. Request body schema
+3. Streaming event schema
+4. Parser state
+5. Request body construction (`fromRequest`)
+6. Stream parsing (`step` and per-event handlers)
+7. Protocol and route
+8. Model helper
+
+### Rules
+
+- Keep protocol files focused on the protocol. Move provider-specific projection, signing, media normalization, or other bulky transformations into `src/protocols/utils/*`.
+- Use `Effect.fn("Provider.fromRequest")` for request body construction entrypoints. Use `Effect.fn(...)` for event handlers that yield effects; keep purely synchronous handlers as plain functions returning a `StepResult` that the dispatcher lifts via `Effect.succeed(...)`.
+- Parser state owns terminal information. The state machine records finish reason, usage, and pending tool calls; emit one terminal `request-finish` (or `provider-error`) when a `terminal` event arrives. If a provider splits reason and usage across events, merge them in parser state before flushing.
+- Emit exactly one terminal `request-finish` event for a completed response. Use `stream.terminal` to signal the run is over and have `step` emit the final event.
+- Use shared helpers for repeated protocol policy such as text joining, usage totals, JSON parsing, and tool-call accumulation. `ToolStream` (`protocols/utils/tool-stream.ts`) accumulates streamed tool-call arguments uniformly.
+- Make intentional provider differences explicit in helper names or comments. If two protocol files differ visually, the reason should be obvious from the names.
+- Prefer dispatched per-event handlers (`onMessageStart`, `onContentBlockDelta`, ...) called from a small top-level `step` switch over a long if-chain. The dispatcher keeps the event surface visible at a glance.
+- Keep tests in the same conceptual order as the protocol: basic prepare, tools prepare, unsupported lowering, text/usage parsing, tool streaming, finish reasons, provider errors.
+
+### Review checklist
+
+- Can the file be skimmed side-by-side with `openai-chat.ts` without hunting for equivalent sections?
+- Are provider quirks named, isolated, and covered by focused tests?
+- Does request body construction validate unsupported common content at the protocol boundary?
+- Does stream parsing emit stable common events without leaking provider event order to callers?
+- Does `toolChoice: "none"` behavior read as intentional?
+
+## Recording Tests
+
+Recorded tests use one cassette file per scenario. A cassette holds an ordered array of `{ request, response }` interactions, so multi-step flows (tool loops, retries, polling) record into a single file. Use `recordedTests({ prefix, requires })` and let the helper derive cassette names from test names:
+
+```ts
+const recorded = recordedTests({ prefix: "openai-chat", requires: ["OPENAI_API_KEY"] })
+
+recorded.effect("streams text", () =>
+  Effect.gen(function* () {
+    // test body
+  }),
+)
+```
+
+Replay is the default. `RECORD=true` records fresh cassettes and requires the listed env vars. Cassettes are written as pretty-printed JSON so multi-interaction diffs stay reviewable.
+
+Pass `provider`, `protocol`, and optional `tags` to `recordedTests(...)` / `recorded.effect.with(...)` so cassettes carry searchable metadata. Use recorded-test filters to replay or record a narrow subset without rewriting a whole file:
+
+- `RECORDED_PROVIDER=openai` matches tests tagged with `provider:openai`; comma-separated values are allowed.
+- `RECORDED_PREFIX=openai-chat` matches cassette groups by `recordedTests({ prefix })`; comma-separated values are allowed.
+- `RECORDED_TAGS=tool` requires all listed tags to be present, e.g. `RECORDED_TAGS=provider:togetherai,tool`.
+- `RECORDED_TEST="streams text"` matches by test name, kebab-case test id, or cassette path.
+
+Filters apply in replay and record mode. Combine them with `RECORD=true` when refreshing only one provider or scenario.
+
+**Binary response bodies.** Most providers stream text (SSE, JSON). AWS Bedrock streams binary AWS event-stream frames whose CRC32 fields would be mangled by a UTF-8 round-trip — those bodies are stored as base64 with `bodyEncoding: "base64"` on the response snapshot. Detection is by `Content-Type` in `@opencode-ai/http-recorder` (currently `application/vnd.amazon.eventstream` and `application/octet-stream`); cassettes for SSE/JSON routes omit the field and decode as text.
+
+**Matching strategies.** Replay defaults to structural matching, which finds an interaction by comparing method, URL, allow-listed headers, and the canonical JSON body. This is the right choice for tool loops because each round's request differs (the message history grows). For scenarios where successive requests are byte-identical and expect different responses (retries, polling), pass `dispatch: "sequential"` in `RecordReplayOptions` — replay then walks the cassette in record order via an internal cursor. `scriptedResponses` (in `test/lib/http.ts`) is the deterministic counterpart for tests that don't need a live provider; it scripts response bodies in order without reading from disk.
+
+Do not blanket re-record an entire test file when adding one cassette. `RECORD=true` rewrites every recorded case that runs, and provider streams contain volatile IDs, timestamps, fingerprints, and obfuscation fields. Prefer deleting the one cassette you intend to refresh, or run a focused test pattern that only registers the scenario you want to record. Keep stable existing cassettes unchanged unless their request shape or expected behavior changed.
--- a/packages/llm/README.md
+++ b/packages/llm/README.md
@@ -0,0 +1,129 @@
+# @opencode-ai/llm
+
+Schema-first LLM core for opencode. One typed request, response, event, and tool language; provider quirks live in adapters, not in calling code.
+
+```ts
+import { Effect } from "effect"
+import { LLM, LLMClient } from "@opencode-ai/llm"
+import { OpenAI } from "@opencode-ai/llm/providers"
+
+const model = OpenAI.model("gpt-4o-mini", { apiKey: process.env.OPENAI_API_KEY })
+
+const request = LLM.request({
+  model,
+  system: "You are concise.",
+  prompt: "Say hello in one short sentence.",
+  generation: { maxTokens: 40 },
+})
+
+const program = Effect.gen(function* () {
+  const response = yield* LLMClient.generate(request)
+  console.log(response.text)
+})
+```
+
+Run `LLMClient.stream(request)` instead of `generate` when you want incremental `LLMEvent`s. The event stream is provider-neutral — same shape across OpenAI Chat, OpenAI Responses, Anthropic Messages, Gemini, Bedrock Converse, and any OpenAI-compatible deployment.
+
+## Public API
+
+- **`LLM.request({...})`** — build a provider-neutral `LLMRequest`. Accepts ergonomic inputs (`system: string`, `prompt: string`) that normalize into the canonical Schema classes.
+- **`LLM.generate` / `LLM.stream`** — re-exported from `LLMClient` for one-import use.
+- **`LLM.user(...)` / `LLM.assistant(...)` / `LLM.toolMessage(...)`** — message constructors.
+- **`LLM.toolCall(...)` / `LLM.toolResult(...)` / `LLM.toolDefinition(...)`** — tool-related parts.
+- **`LLMClient.prepare(request)`** — compile a request through protocol body construction, validation, and HTTP preparation without sending. Useful for inspection and testing.
+- **`LLMEvent.is.*`** — typed guards (`is.text`, `is.toolCall`, `is.requestFinish`, …) for filtering streams.
+
+## Caching
+
+Prompt caching is **on by default**. Every `LLMRequest` resolves to `cache: "auto"` unless the caller opts out with `cache: "none"`. Each protocol translates `CacheHint`s to its wire format (`cache_control` on Anthropic, `cachePoint` on Bedrock; OpenAI and Gemini do implicit caching server-side and don't need inline markers — auto is a no-op there).
+
+### Auto placement
+
+`"auto"` places three breakpoints — last tool definition, last system part, latest user message. The last-user-message boundary is the load-bearing detail: in a tool-use loop, a single user turn expands into many assistant/tool round-trips, all sharing that prefix. Caching at that boundary lets every intra-turn API call hit.
+
+The math justifies the default: Anthropic's 5-minute cache write is 1.25× base, read is 0.1×, so a single reuse within 5 minutes already wins. One-shot completions below the per-model minimum-cacheable-token threshold silently no-op on the wire, so the worst case is harmless.
+
+### Opting out
+
+```ts
+LLM.request({
+  model,
+  system,
+  prompt: "one-off question",
+  cache: "none",
+})
+```
+
+### Granular policy
+
+```ts
+cache: {
+  tools?: boolean,
+  system?: boolean,
+  messages?: "latest-user-message" | "latest-assistant" | { tail: number },
+  ttlSeconds?: number,         // ≥ 3600 → 1h on Anthropic/Bedrock; else 5m
+}
+```
+
+### Manual hints
+
+Inline `CacheHint` on any text / system / tool / tool-result part overrides automatic placement. The auto policy preserves manual hints; it only fills gaps.
+
+```ts
+LLM.request({
+  model,
+  system: [
+    { type: "text", text: "stable system prompt", cache: { type: "ephemeral" } },
+  ],
+  ...
+})
+```
+
+### Provider behavior table
+
+| Protocol                | `cache: "auto"`                                                           |
+| ----------------------- | ------------------------------------------------------------------------- |
+| Anthropic Messages      | emits up to 3 `cache_control` markers (4-breakpoint cap enforced)         |
+| Bedrock Converse        | emits up to 3 `cachePoint` blocks (4-breakpoint cap enforced)             |
+| OpenAI Chat / Responses | no-op (implicit caching above 1024 tokens)                                |
+| Gemini                  | no-op (implicit caching on 2.5+; explicit `CachedContent` is out-of-band) |
+
+Normalized cache usage is read back into `response.usage.cacheReadInputTokens` and `cacheWriteInputTokens` across every provider.
+
+## Providers
+
+Each provider exports a `model(...)` helper that records identity, protocol, capabilities, auth, and defaults.
+
+```ts
+import { Anthropic } from "@opencode-ai/llm/providers"
+
+const model = Anthropic.model("claude-sonnet-4-6", {
+  apiKey: process.env.ANTHROPIC_API_KEY,
+})
+```
+
+Included providers: OpenAI, Anthropic, Google (Gemini), Amazon Bedrock, Azure OpenAI, Cloudflare, GitHub Copilot, OpenRouter, xAI, plus generic OpenAI-compatible helpers for DeepSeek, Cerebras, Groq, Fireworks, Together, etc.
+
+## Provider options & HTTP overlays
+
+Three escape hatches in order of stability:
+
+1. **`generation`** — portable knobs (`maxTokens`, `temperature`, `topP`, `topK`, penalties, seed, stop).
+2. **`providerOptions: { <provider>: {...} }`** — typed-at-the-facade provider-specific knobs (OpenAI `promptCacheKey`, Anthropic `thinking`, Gemini `thinkingConfig`, OpenRouter routing).
+3. **`http: { body, headers, query }`** — last-resort serializable overlays merged into the final HTTP request. Reach for this only when a stable typed path doesn't yet exist.
+
+Model-level defaults are overridden by request-level values for each axis.
+
+## Routes
+
+Adding a new model or deployment is usually 5–15 lines using `Route.make({ protocol, transport, ... })`. The four orthogonal pieces are protocol (body construction + stream parsing), transport (endpoint + auth + framing + encoding), defaults, and capabilities. See `AGENTS.md` for the architectural detail.
+
+## Effect
+
+This package is built on Effect. Public methods return `Effect` or `Stream`; provide `LLMClient.layer` (the default registers every shipped route) for runtime dispatch. The example at `example/tutorial.ts` is a runnable walkthrough.
+
+## See also
+
+- `AGENTS.md` — architecture, route construction, contributor guide
+- `example/tutorial.ts` — runnable end-to-end walkthrough
+- `test/provider/*.test.ts` — fixture-first protocol tests; `*.recorded.test.ts` files cover live cassettes
--- a/packages/llm/example/tutorial.ts
+++ b/packages/llm/example/tutorial.ts
@@ -0,0 +1,242 @@
+import { Config, Effect, Formatter, Layer, Schema, Stream } from "effect"
+import { LLM, LLMClient, Provider, ProviderID, Tool, type ProviderModelOptions } from "@opencode-ai/llm"
+import { Route, Auth, Endpoint, Framing, Protocol, RequestExecutor } from "@opencode-ai/llm/route"
+import { OpenAI } from "@opencode-ai/llm/providers"
+
+/**
+ * A runnable walkthrough of the LLM package use-site API.
+ *
+ * Run from `packages/llm` with an OpenAI key in the environment:
+ *
+ *   OPENAI_API_KEY=... bun example/tutorial.ts
+ *
+ * The file is intentionally written as a normal TypeScript program. You can
+ * hover imports and local values to see how the public API is typed.
+ */
+
+const apiKey = Config.redacted("OPENAI_API_KEY")
+
+// 1. Pick a model. The provider helper records provider identity, protocol
+// choice, capabilities, deployment options, authentication, and defaults.
+const model = OpenAI.model("gpt-4o-mini", {
+  apiKey,
+  generation: { maxTokens: 160 },
+  providerOptions: {
+    openai: { store: false },
+  },
+})
+
+// 2. Build a provider-neutral request. This is useful when reusing one request
+// across generate and stream examples.
+//
+// Options can live on both the model and the request:
+//
+//   - `generation`: common controls such as max tokens, temperature, topP/topK,
+//     penalties, seed, and stop sequences.
+//   - `providerOptions`: namespaced provider-native behavior. For example,
+//     OpenAI cache keys and store behavior, Anthropic thinking, Gemini thinking
+//     config, or OpenRouter routing/reasoning.
+//   - `http`: last-resort serializable overlays for final request body, headers,
+//     and query params. Prefer typed `providerOptions` when a field is stable.
+//
+// Model options are defaults. Request options override them for this call.
+const request = LLM.request({
+  model,
+  system: "You are concise and practical.",
+  prompt: "Tell me a joke",
+  generation: { maxTokens: 80, temperature: 0.7 },
+  providerOptions: {
+    openai: { promptCacheKey: "tutorial-joke" },
+  },
+})
+
+// `http` is intentionally not needed for normal calls. This shows the shape for
+// newly released provider fields before they deserve a typed provider option.
+const rawOverlayExample = LLM.request({
+  model,
+  prompt: "Show the final HTTP overlay shape.",
+  http: {
+    body: { metadata: { example: "tutorial" } },
+    headers: { "x-opencode-tutorial": "1" },
+    query: { debug: "1" },
+  },
+})
+
+// 3. `generate` sends the request and collects the event stream into one
+// response object. `response.text` is the collected text output.
+const generateOnce = Effect.gen(function* () {
+  const response = yield* LLM.generate(request)
+
+  console.log("\n== generate ==")
+  console.log("generated text:", response.text)
+  console.log("usage", Formatter.formatJson(response.usage, { space: 2 }))
+})
+
+// 4. `stream` exposes provider output as common `LLMEvent`s for UIs that want
+// incremental text, reasoning, tool input, usage, or finish events.
+const streamText = LLM.stream(request).pipe(
+  Stream.tap((event) =>
+    Effect.sync(() => {
+      if (event.type === "text-delta") process.stdout.write(`\ntext: ${event.text}`)
+      if (event.type === "request-finish") process.stdout.write(`\nfinish: ${event.reason}\n`)
+    }),
+  ),
+  Stream.runDrain,
+)
+
+// 5. Tools are typed with Effect Schema. Passing tools to `LLMClient.stream`
+// adds their definitions to the request and dispatches matching tool calls.
+// Add `stopWhen` to opt into follow-up model rounds after tool results.
+const tools = {
+  get_weather: Tool.make({
+    description: "Get current weather for a city.",
+    parameters: Schema.Struct({ city: Schema.String }),
+    success: Schema.Struct({ forecast: Schema.String }),
+    execute: (input) => Effect.succeed({ forecast: `${input.city}: sunny, 72F` }),
+  }),
+}
+
+const streamWithTools = LLM.stream({
+  request: LLM.request({
+    model,
+    prompt: "Use get_weather for San Francisco, then answer in one sentence.",
+    generation: { maxTokens: 80, temperature: 0 },
+  }),
+  tools,
+  stopWhen: LLM.stepCountIs(3),
+}).pipe(
+  Stream.tap((event) =>
+    Effect.sync(() => {
+      if (event.type === "tool-call") console.log("tool call", event.name, event.input)
+      if (event.type === "tool-result") console.log("tool result", event.name, event.result)
+      if (event.type === "text-delta") process.stdout.write(event.text)
+    }),
+  ),
+  Stream.runDrain,
+)
+
+// 6. `generateObject` is the structured-output helper. It forces a synthetic
+// tool call internally, so the same call site works across providers instead of
+// depending on provider-specific JSON mode flags.
+const WeatherReport = Schema.Struct({
+  city: Schema.String,
+  forecast: Schema.String,
+  highFahrenheit: Schema.Number,
+})
+
+const generateStructuredObject = Effect.gen(function* () {
+  const response = yield* LLM.generateObject({
+    model,
+    system: "Return only structured weather data.",
+    prompt: "Give me today's weather for San Francisco.",
+    schema: WeatherReport,
+    generation: { maxTokens: 120, temperature: 0 },
+  })
+
+  console.log("\n== generateObject ==")
+  console.log(Formatter.formatJson(response.object, { space: 2 }))
+})
+
+// If the shape is only known at runtime, pass raw JSON Schema instead. The
+// `.object` type is `unknown`; callers that need static types should validate it.
+const generateDynamicObject = LLM.generateObject({
+  model,
+  prompt: "Extract the city and forecast from: San Francisco is sunny.",
+  jsonSchema: {
+    type: "object",
+    properties: {
+      city: { type: "string" },
+      forecast: { type: "string" },
+    },
+    required: ["city", "forecast"],
+  },
+})
+
+// -----------------------------------------------------------------------------
+// Part 2: provider composition with a fake provider
+// -----------------------------------------------------------------------------
+
+// A protocol is the provider-native API shape: common request -> body, response
+// frames -> common events. This fake one turns text prompts into a JSON body
+// and treats every SSE frame as output text.
+const FakeBody = Schema.Struct({
+  model: Schema.String,
+  input: Schema.String,
+})
+type FakeBody = Schema.Schema.Type<typeof FakeBody>
+
+const FakeProtocol = Protocol.make<FakeBody, string, string, void>({
+  // Protocol ids are open strings, so external packages can define their own
+  // protocols without changing this package.
+  id: "fake-echo",
+  body: {
+    schema: FakeBody,
+    from: (request) =>
+      Effect.succeed({
+        model: request.model.id,
+        input: request.messages
+          .flatMap((message) => message.content)
+          .filter((part) => part.type === "text")
+          .map((part) => part.text)
+          .join("\n"),
+      }),
+  },
+  stream: {
+    event: Schema.String,
+    initial: () => undefined,
+    step: (_, frame) => Effect.succeed([undefined, [{ type: "text-delta", id: "text-0", text: frame }]] as const),
+    onHalt: () => [{ type: "request-finish", reason: "stop" }],
+  },
+})
+
+// An route is the runnable binding for that protocol. It adds the deployment
+// axes that the protocol deliberately does not know: URL, auth, and framing.
+const FakeAdapter = Route.make({
+  id: "fake-echo",
+  protocol: FakeProtocol,
+  endpoint: Endpoint.path("/v1/echo"),
+  auth: Auth.passthrough,
+  framing: Framing.sse,
+})
+
+// A provider module exports a Provider definition. The default `model` helper
+// sets provider identity, protocol id, and the route id resolved by the registry.
+const fakeEchoModel = Route.model(FakeAdapter, { provider: "fake-echo", baseURL: "https://fake.local" })
+const FakeEcho = Provider.make({
+  id: ProviderID.make("fake-echo"),
+  model: (id: string, options: ProviderModelOptions = {}) => fakeEchoModel({ id, ...options }),
+})
+
+// `LLMClient.prepare` is the lower-level inspection hook: it compiles through
+// body conversion, validation, endpoint, auth, and HTTP construction without
+// sending anything over the network.
+const inspectFakeProvider = Effect.gen(function* () {
+  const prepared = yield* LLMClient.prepare(
+    LLM.request({
+      model: FakeEcho.model("tiny-echo"),
+      prompt: "Show me the provider pipeline.",
+    }),
+  )
+
+  console.log("\n== fake provider prepare ==")
+  console.log("route:", prepared.route)
+  console.log("body:", Formatter.formatJson(prepared.body, { space: 2 }))
+})
+
+// Provide the LLM runtime and the HTTP request executor once. Keep one path
+// enabled at a time so the tutorial can demonstrate generate, prepare, stream,
+// or tool-loop behavior without spending tokens on every example.
+const requestExecutorLayer = RequestExecutor.defaultLayer
+const llmClientLayer = LLMClient.layer.pipe(Layer.provide(requestExecutorLayer))
+
+const program = Effect.gen(function* () {
+  // yield* generateOnce
+  // yield* inspectFakeProvider
+  // yield* LLMClient.prepare(rawOverlayExample).pipe(Effect.andThen((prepared) => Effect.sync(() => console.log(prepared.body))))
+  // yield* streamText
+  // yield* generateStructuredObject
+  // yield* generateDynamicObject.pipe(Effect.andThen((response) => Effect.sync(() => console.log(response.object))))
+  yield* streamWithTools
+}).pipe(Effect.provide(Layer.mergeAll(requestExecutorLayer, llmClientLayer)))
+
+Effect.runPromise(program)
--- a/packages/llm/package.json
+++ b/packages/llm/package.json
@@ -0,0 +1,51 @@
+{
+  "$schema": "https://json.schemastore.org/package.json",
+  "version": "1.14.48",
+  "name": "@opencode-ai/llm",
+  "type": "module",
+  "license": "MIT",
+  "private": true,
+  "scripts": {
+    "setup:recording-env": "bun run script/setup-recording-env.ts",
+    "test": "bun test --timeout 30000",
+    "typecheck": "tsgo --noEmit"
+  },
+  "exports": {
+    ".": "./src/index.ts",
+    "./route": "./src/route/index.ts",
+    "./provider": "./src/provider.ts",
+    "./providers": "./src/providers/index.ts",
+    "./providers/amazon-bedrock": "./src/providers/amazon-bedrock.ts",
+    "./providers/anthropic": "./src/providers/anthropic.ts",
+    "./providers/azure": "./src/providers/azure.ts",
+    "./providers/cloudflare": "./src/providers/cloudflare.ts",
+    "./providers/github-copilot": "./src/providers/github-copilot.ts",
+    "./providers/google": "./src/providers/google.ts",
+    "./providers/openai": "./src/providers/openai.ts",
+    "./providers/openai-compatible": "./src/providers/openai-compatible.ts",
+    "./providers/openai-compatible-profile": "./src/providers/openai-compatible-profile.ts",
+    "./providers/openrouter": "./src/providers/openrouter.ts",
+    "./providers/xai": "./src/providers/xai.ts",
+    "./protocols": "./src/protocols/index.ts",
+    "./protocols/anthropic-messages": "./src/protocols/anthropic-messages.ts",
+    "./protocols/bedrock-converse": "./src/protocols/bedrock-converse.ts",
+    "./protocols/gemini": "./src/protocols/gemini.ts",
+    "./protocols/openai-chat": "./src/protocols/openai-chat.ts",
+    "./protocols/openai-compatible-chat": "./src/protocols/openai-compatible-chat.ts",
+    "./protocols/openai-responses": "./src/protocols/openai-responses.ts"
+  },
+  "devDependencies": {
+    "@clack/prompts": "1.0.0-alpha.1",
+    "@effect/platform-node": "catalog:",
+    "@opencode-ai/http-recorder": "workspace:*",
+    "@tsconfig/bun": "catalog:",
+    "@types/bun": "catalog:",
+    "@typescript/native-preview": "catalog:"
+  },
+  "dependencies": {
+    "@smithy/eventstream-codec": "4.2.14",
+    "@smithy/util-utf8": "4.2.2",
+    "aws4fetch": "1.0.20",
+    "effect": "catalog:"
+  }
+}
--- a/packages/llm/script/recording-cost-report.ts
+++ b/packages/llm/script/recording-cost-report.ts
@@ -0,0 +1,250 @@
+import * as fs from "node:fs/promises"
+import * as path from "node:path"
+
+const RECORDINGS_DIR = path.resolve(import.meta.dir, "..", "test", "fixtures", "recordings")
+const MODELS_DEV_URL = "https://models.dev/api.json"
+
+type JsonRecord = Record<string, unknown>
+
+type Pricing = {
+  readonly input?: number
+  readonly output?: number
+  readonly cache_read?: number
+  readonly cache_write?: number
+  readonly reasoning?: number
+}
+
+type Usage = {
+  readonly inputTokens: number
+  readonly outputTokens: number
+  readonly cacheReadTokens: number
+  readonly cacheWriteTokens: number
+  readonly reasoningTokens: number
+  readonly reportedCost: number
+}
+
+type Row = Usage & {
+  readonly cassette: string
+  readonly provider: string
+  readonly model: string
+  readonly estimatedCost: number
+  readonly pricingSource: string
+}
+
+const isRecord = (value: unknown): value is JsonRecord =>
+  value !== null && typeof value === "object" && !Array.isArray(value)
+
+const asNumber = (value: unknown) => (typeof value === "number" && Number.isFinite(value) ? value : 0)
+
+const asString = (value: unknown) => (typeof value === "string" ? value : undefined)
+
+const readJson = async (file: string) => JSON.parse(await Bun.file(file).text()) as unknown
+
+const walk = async (dir: string): Promise<ReadonlyArray<string>> =>
+  (await fs.readdir(dir, { withFileTypes: true }))
+    .flatMap((entry) => {
+      const file = path.join(dir, entry.name)
+      return entry.isDirectory() ? [] : [file]
+    })
+    .concat(
+      ...(await Promise.all(
+        (await fs.readdir(dir, { withFileTypes: true }))
+          .filter((entry) => entry.isDirectory())
+          .map((entry) => walk(path.join(dir, entry.name))),
+      )),
+    )
+
+const providerFromUrl = (url: string) => {
+  if (url.includes("api.openai.com")) return "openai"
+  if (url.includes("api.anthropic.com")) return "anthropic"
+  if (url.includes("generativelanguage.googleapis.com")) return "google"
+  if (url.includes("bedrock")) return "amazon-bedrock"
+  if (url.includes("openrouter.ai")) return "openrouter"
+  if (url.includes("api.x.ai")) return "xai"
+  if (url.includes("api.groq.com")) return "groq"
+  if (url.includes("api.deepseek.com")) return "deepseek"
+  if (url.includes("api.together.xyz")) return "togetherai"
+  return "unknown"
+}
+
+const providerAliases: Record<string, ReadonlyArray<string>> = {
+  openai: ["openai"],
+  anthropic: ["anthropic"],
+  google: ["google"],
+  "amazon-bedrock": ["amazon-bedrock"],
+  openrouter: ["openrouter", "openai", "anthropic", "google"],
+  xai: ["xai"],
+  groq: ["groq"],
+  deepseek: ["deepseek"],
+  togetherai: ["togetherai"],
+}
+
+const modelAliases = (model: string) => [
+  model,
+  model.replace(/^models\//, ""),
+  model.replace(/-\d{8}$/, ""),
+  model.replace(/-\d{4}-\d{2}-\d{2}$/, ""),
+  model.replace(/-\d{4}-\d{2}-\d{2}$/, "").replace(/-\d{8}$/, ""),
+  model.replace(/^openai\//, ""),
+  model.replace(/^anthropic\//, ""),
+  model.replace(/^google\//, ""),
+]
+
+const pricingFor = (models: JsonRecord, provider: string, model: string) => {
+  for (const providerID of providerAliases[provider] ?? [provider]) {
+    const providerEntry = models[providerID]
+    if (!isRecord(providerEntry) || !isRecord(providerEntry.models)) continue
+    for (const modelID of modelAliases(model)) {
+      const modelEntry = providerEntry.models[modelID]
+      if (isRecord(modelEntry) && isRecord(modelEntry.cost))
+        return { pricing: modelEntry.cost as Pricing, source: `${providerID}/${modelID}` }
+    }
+  }
+  return { pricing: undefined, source: "missing" }
+}
+
+const estimateCost = (usage: Usage, pricing: Pricing | undefined) => {
+  if (!pricing) return 0
+  return (
+    (usage.inputTokens * (pricing.input ?? 0) +
+      usage.outputTokens * (pricing.output ?? 0) +
+      usage.cacheReadTokens * (pricing.cache_read ?? 0) +
+      usage.cacheWriteTokens * (pricing.cache_write ?? 0) +
+      usage.reasoningTokens * (pricing.reasoning ?? 0)) /
+    1_000_000
+  )
+}
+
+const emptyUsage = (): Usage => ({
+  inputTokens: 0,
+  outputTokens: 0,
+  cacheReadTokens: 0,
+  cacheWriteTokens: 0,
+  reasoningTokens: 0,
+  reportedCost: 0,
+})
+
+const addUsage = (a: Usage, b: Usage): Usage => ({
+  inputTokens: a.inputTokens + b.inputTokens,
+  outputTokens: a.outputTokens + b.outputTokens,
+  cacheReadTokens: a.cacheReadTokens + b.cacheReadTokens,
+  cacheWriteTokens: a.cacheWriteTokens + b.cacheWriteTokens,
+  reasoningTokens: a.reasoningTokens + b.reasoningTokens,
+  reportedCost: a.reportedCost + b.reportedCost,
+})
+
+const usageFromObject = (usage: unknown): Usage => {
+  if (!isRecord(usage)) return emptyUsage()
+  const promptDetails = isRecord(usage.prompt_tokens_details) ? usage.prompt_tokens_details : {}
+  const completionDetails = isRecord(usage.completion_tokens_details) ? usage.completion_tokens_details : {}
+  const inputDetails = isRecord(usage.input_tokens_details) ? usage.input_tokens_details : {}
+  const outputDetails = isRecord(usage.output_tokens_details) ? usage.output_tokens_details : {}
+  const cacheWriteTokens = asNumber(promptDetails.cache_write_tokens) + asNumber(inputDetails.cache_write_tokens)
+  return {
+    inputTokens: asNumber(usage.prompt_tokens) + asNumber(usage.input_tokens),
+    outputTokens: asNumber(usage.completion_tokens) + asNumber(usage.output_tokens),
+    cacheReadTokens: asNumber(promptDetails.cached_tokens) + asNumber(inputDetails.cached_tokens),
+    cacheWriteTokens,
+    reasoningTokens: asNumber(completionDetails.reasoning_tokens) + asNumber(outputDetails.reasoning_tokens),
+    reportedCost: asNumber(usage.cost),
+  }
+}
+
+const jsonPayloads = (body: string) =>
+  body
+    .split("\n")
+    .map((line) => line.trim())
+    .filter((line) => line.startsWith("data:"))
+    .map((line) => line.slice("data:".length).trim())
+    .filter((line) => line !== "" && line !== "[DONE]")
+    .flatMap((line) => {
+      try {
+        return [JSON.parse(line) as unknown]
+      } catch {
+        return []
+      }
+    })
+
+const usageFromResponseBody = (body: string) =>
+  jsonPayloads(body).reduce<Usage>((usage, payload) => {
+    if (!isRecord(payload)) return usage
+    return addUsage(
+      usage,
+      addUsage(
+        usageFromObject(payload.usage),
+        usageFromObject(isRecord(payload.response) ? payload.response.usage : undefined),
+      ),
+    )
+  }, emptyUsage())
+
+const modelFromRequest = (request: unknown) => {
+  if (!isRecord(request)) return "unknown"
+  const requestBody = asString(request.body)
+  if (!requestBody) return "unknown"
+  try {
+    const body = JSON.parse(requestBody) as unknown
+    if (!isRecord(body)) return "unknown"
+    return asString(body.model) ?? "unknown"
+  } catch {
+    return "unknown"
+  }
+}
+
+const rowFor = (models: JsonRecord, file: string, cassette: unknown): Row | undefined => {
+  if (!isRecord(cassette) || !Array.isArray(cassette.interactions)) return undefined
+  const first = cassette.interactions.find(isRecord)
+  if (!first || !isRecord(first.request)) return undefined
+  const provider = providerFromUrl(asString(first.request.url) ?? "")
+  const model = modelFromRequest(first.request)
+  const usage = cassette.interactions.filter(isRecord).reduce<Usage>((total, interaction) => {
+    if (!isRecord(interaction.response)) return total
+    const responseBody = asString(interaction.response.body)
+    if (!responseBody) return total
+    return addUsage(total, usageFromResponseBody(responseBody))
+  }, emptyUsage())
+  const priced = pricingFor(models, provider, model)
+  return {
+    cassette: path.relative(RECORDINGS_DIR, file),
+    provider,
+    model,
+    ...usage,
+    estimatedCost: estimateCost(usage, priced.pricing),
+    pricingSource: priced.source,
+  }
+}
+
+const money = (value: number) => (value === 0 ? "$0.000000" : `$${value.toFixed(6)}`)
+const tokens = (value: number) => value.toLocaleString("en-US")
+
+const models = (await (await fetch(MODELS_DEV_URL)).json()) as JsonRecord
+const rows = (
+  await Promise.all(
+    (await walk(RECORDINGS_DIR))
+      .filter((file) => file.endsWith(".json"))
+      .map(async (file) => rowFor(models, file, await readJson(file))),
+  )
+).filter((row): row is Row => row !== undefined)
+
+const totals = rows.reduce(
+  (total, row) => ({
+    ...addUsage(total, row),
+    estimatedCost: total.estimatedCost + row.estimatedCost,
+  }),
+  { ...emptyUsage(), estimatedCost: 0 },
+)
+
+console.log("# Recording Cost Report")
+console.log("")
+console.log(`Pricing: ${MODELS_DEV_URL}`)
+console.log(`Cassettes: ${rows.length}`)
+console.log(`Reported cost: ${money(totals.reportedCost)}`)
+console.log(`Estimated cost: ${money(totals.estimatedCost)}`)
+console.log("")
+console.log("| Provider | Model | Input | Output | Reasoning | Reported | Estimated | Pricing | Cassette |")
+console.log("|---|---:|---:|---:|---:|---:|---:|---|---|")
+for (const row of rows.toSorted((a, b) => b.reportedCost + b.estimatedCost - (a.reportedCost + a.estimatedCost))) {
+  if (row.inputTokens + row.outputTokens + row.reasoningTokens + row.reportedCost + row.estimatedCost === 0) continue
+  console.log(
+    `| ${row.provider} | ${row.model} | ${tokens(row.inputTokens)} | ${tokens(row.outputTokens)} | ${tokens(row.reasoningTokens)} | ${money(row.reportedCost)} | ${money(row.estimatedCost)} | ${row.pricingSource} | ${row.cassette} |`,
+  )
+}
--- a/packages/llm/script/setup-recording-env.ts
+++ b/packages/llm/script/setup-recording-env.ts
@@ -0,0 +1,542 @@
+#!/usr/bin/env bun
+
+import { NodeFileSystem } from "@effect/platform-node"
+import * as path from "node:path"
+import * as prompts from "@clack/prompts"
+import { AwsV4Signer } from "aws4fetch"
+import { Config, ConfigProvider, Effect, FileSystem, PlatformError, Redacted } from "effect"
+import { FetchHttpClient, HttpClient, HttpClientRequest, type HttpClientResponse } from "effect/unstable/http"
+import * as ProviderShared from "../src/protocols/shared"
+import * as Cloudflare from "../src/providers/cloudflare"
+
+type Provider = {
+  readonly id: string
+  readonly label: string
+  readonly tier: "core" | "canary" | "compatible" | "optional"
+  readonly note: string
+  readonly vars: ReadonlyArray<{
+    readonly name: string
+    readonly label?: string
+    readonly optional?: boolean
+    readonly secret?: boolean
+  }>
+  readonly validate?: (env: Env) => Effect.Effect<string | undefined, unknown, HttpClient.HttpClient>
+}
+
+type Env = Record<string, string>
+
+const PROVIDERS: ReadonlyArray<Provider> = [
+  {
+    id: "openai",
+    label: "OpenAI",
+    tier: "core",
+    note: "Native OpenAI Chat / Responses recorded tests",
+    vars: [{ name: "OPENAI_API_KEY" }],
+    validate: (env) => validateBearer("https://api.openai.com/v1/models", Redacted.make(env.OPENAI_API_KEY)),
+  },
+  {
+    id: "anthropic",
+    label: "Anthropic",
+    tier: "core",
+    note: "Native Anthropic Messages recorded tests",
+    vars: [{ name: "ANTHROPIC_API_KEY" }],
+    validate: (env) =>
+      HttpClientRequest.get("https://api.anthropic.com/v1/models").pipe(
+        HttpClientRequest.setHeaders({
+          "anthropic-version": "2023-06-01",
+          "x-api-key": Redacted.value(Redacted.make(env.ANTHROPIC_API_KEY)),
+        }),
+        executeRequest,
+      ),
+  },
+  {
+    id: "google",
+    label: "Google Gemini",
+    tier: "core",
+    note: "Native Gemini recorded tests",
+    vars: [{ name: "GOOGLE_GENERATIVE_AI_API_KEY" }],
+    validate: (env) =>
+      HttpClientRequest.get(
+        `https://generativelanguage.googleapis.com/v1beta/models?key=${encodeURIComponent(env.GOOGLE_GENERATIVE_AI_API_KEY)}`,
+      ).pipe(executeRequest),
+  },
+  {
+    id: "bedrock",
+    label: "Amazon Bedrock",
+    tier: "core",
+    note: "Native Bedrock Converse recorded tests",
+    vars: [
+      { name: "AWS_ACCESS_KEY_ID" },
+      { name: "AWS_SECRET_ACCESS_KEY" },
+      { name: "AWS_SESSION_TOKEN", optional: true },
+      { name: "BEDROCK_RECORDING_REGION", optional: true },
+      { name: "BEDROCK_MODEL_ID", optional: true },
+    ],
+    validate: (env) => validateBedrock(env),
+  },
+  {
+    id: "groq",
+    label: "Groq",
+    tier: "canary",
+    note: "Fast OpenAI-compatible canary for text/tool streaming",
+    vars: [{ name: "GROQ_API_KEY" }],
+    validate: (env) => validateBearer("https://api.groq.com/openai/v1/models", Redacted.make(env.GROQ_API_KEY)),
+  },
+  {
+    id: "openrouter",
+    label: "OpenRouter",
+    tier: "canary",
+    note: "Router canary for OpenAI-compatible text/tool streaming",
+    vars: [{ name: "OPENROUTER_API_KEY" }],
+    validate: (env) =>
+      validateChat({
+        url: "https://openrouter.ai/api/v1/chat/completions",
+        token: Redacted.make(env.OPENROUTER_API_KEY),
+        model: "openai/gpt-4o-mini",
+      }),
+  },
+  {
+    id: "xai",
+    label: "xAI",
+    tier: "canary",
+    note: "OpenAI-compatible xAI chat endpoint",
+    vars: [{ name: "XAI_API_KEY" }],
+    validate: (env) => validateBearer("https://api.x.ai/v1/models", Redacted.make(env.XAI_API_KEY)),
+  },
+  {
+    id: "cloudflare-ai-gateway",
+    label: "Cloudflare AI Gateway",
+    tier: "canary",
+    note: "Cloudflare Unified/OpenAI-compatible gateway; supports provider/model ids like workers-ai/@cf/...",
+    vars: [
+      { name: "CLOUDFLARE_ACCOUNT_ID", label: "Cloudflare account ID", secret: false },
+      {
+        name: "CLOUDFLARE_GATEWAY_ID",
+        label: "Cloudflare AI Gateway ID (defaults to default)",
+        optional: true,
+        secret: false,
+      },
+      { name: "CLOUDFLARE_API_TOKEN", label: "Cloudflare AI Gateway token" },
+    ],
+    validate: (env) =>
+      validateChat({
+        url: `${Cloudflare.aiGatewayBaseURL({
+          accountId: env.CLOUDFLARE_ACCOUNT_ID,
+          gatewayId: env.CLOUDFLARE_GATEWAY_ID || undefined,
+        })}/chat/completions`,
+        token: Redacted.make(envValue(env, Cloudflare.aiGatewayAuthEnvVars)),
+        tokenHeader: "cf-aig-authorization",
+        model: "workers-ai/@cf/meta/llama-3.1-8b-instruct",
+      }),
+  },
+  {
+    id: "cloudflare-workers-ai",
+    label: "Cloudflare Workers AI",
+    tier: "canary",
+    note: "Direct Workers AI OpenAI-compatible endpoint; supports model ids like @cf/meta/...",
+    vars: [
+      { name: "CLOUDFLARE_ACCOUNT_ID", label: "Cloudflare account ID", secret: false },
+      { name: "CLOUDFLARE_API_KEY", label: "Cloudflare Workers AI API token" },
+    ],
+    validate: (env) =>
+      validateChat({
+        url: `${Cloudflare.workersAIBaseURL({ accountId: env.CLOUDFLARE_ACCOUNT_ID })}/chat/completions`,
+        token: Redacted.make(envValue(env, Cloudflare.workersAIAuthEnvVars)),
+        model: "@cf/meta/llama-3.1-8b-instruct",
+      }),
+  },
+  {
+    id: "deepseek",
+    label: "DeepSeek",
+    tier: "compatible",
+    note: "Existing OpenAI-compatible recorded tests",
+    vars: [{ name: "DEEPSEEK_API_KEY" }],
+    validate: (env) => validateBearer("https://api.deepseek.com/models", Redacted.make(env.DEEPSEEK_API_KEY)),
+  },
+  {
+    id: "togetherai",
+    label: "TogetherAI",
+    tier: "compatible",
+    note: "Existing OpenAI-compatible text/tool recorded tests",
+    vars: [{ name: "TOGETHER_AI_API_KEY" }],
+    validate: (env) => validateBearer("https://api.together.xyz/v1/models", Redacted.make(env.TOGETHER_AI_API_KEY)),
+  },
+  {
+    id: "mistral",
+    label: "Mistral",
+    tier: "optional",
+    note: "OpenAI-compatible bridge; native reasoning parity is follow-up work",
+    vars: [{ name: "MISTRAL_API_KEY" }],
+    validate: (env) => validateBearer("https://api.mistral.ai/v1/models", Redacted.make(env.MISTRAL_API_KEY)),
+  },
+  {
+    id: "perplexity",
+    label: "Perplexity",
+    tier: "optional",
+    note: "OpenAI-compatible bridge; citations/search metadata are follow-up work",
+    vars: [{ name: "PERPLEXITY_API_KEY" }],
+    validate: (env) => validateBearer("https://api.perplexity.ai/models", Redacted.make(env.PERPLEXITY_API_KEY)),
+  },
+  {
+    id: "venice",
+    label: "Venice",
+    tier: "optional",
+    note: "OpenAI-compatible bridge",
+    vars: [{ name: "VENICE_API_KEY" }],
+    validate: (env) => validateBearer("https://api.venice.ai/api/v1/models", Redacted.make(env.VENICE_API_KEY)),
+  },
+  {
+    id: "cerebras",
+    label: "Cerebras",
+    tier: "optional",
+    note: "OpenAI-compatible bridge",
+    vars: [{ name: "CEREBRAS_API_KEY" }],
+    validate: (env) => validateBearer("https://api.cerebras.ai/v1/models", Redacted.make(env.CEREBRAS_API_KEY)),
+  },
+  {
+    id: "deepinfra",
+    label: "DeepInfra",
+    tier: "optional",
+    note: "OpenAI-compatible bridge",
+    vars: [{ name: "DEEPINFRA_API_KEY" }],
+    validate: (env) =>
+      validateBearer("https://api.deepinfra.com/v1/openai/models", Redacted.make(env.DEEPINFRA_API_KEY)),
+  },
+  {
+    id: "fireworks",
+    label: "Fireworks",
+    tier: "optional",
+    note: "OpenAI-compatible bridge",
+    vars: [{ name: "FIREWORKS_API_KEY" }],
+    validate: (env) =>
+      validateBearer("https://api.fireworks.ai/inference/v1/models", Redacted.make(env.FIREWORKS_API_KEY)),
+  },
+  {
+    id: "baseten",
+    label: "Baseten",
+    tier: "optional",
+    note: "OpenAI-compatible bridge",
+    vars: [{ name: "BASETEN_API_KEY" }],
+  },
+]
+
+const args = process.argv.slice(2)
+const hasFlag = (name: string) => args.includes(name)
+const option = (name: string) => {
+  const index = args.indexOf(name)
+  if (index === -1) return undefined
+  return args[index + 1]
+}
+
+const envPath = path.resolve(process.cwd(), option("--env") ?? ".env.local")
+const checkOnly = hasFlag("--check")
+const providerOption = option("--providers")
+const interactive = Boolean(process.stdin.isTTY && process.stdout.isTTY)
+
+const envNames = Array.from(new Set(PROVIDERS.flatMap((provider) => provider.vars.map((item) => item.name))))
+
+const providersForOption = (value: string | undefined) => {
+  if (!value || value === "recommended")
+    return PROVIDERS.filter((provider) => provider.tier === "core" || provider.tier === "canary")
+  if (value === "recorded") return PROVIDERS.filter((provider) => provider.tier !== "optional")
+  if (value === "all") return PROVIDERS
+  const ids = new Set(
+    value
+      .split(",")
+      .map((item) => item.trim())
+      .filter(Boolean),
+  )
+  return PROVIDERS.filter((provider) => ids.has(provider.id))
+}
+
+const chooseProviders = async () => {
+  if (providerOption) return providersForOption(providerOption)
+  return providersForOption("recommended")
+}
+
+const catchMissingFile = (error: PlatformError.PlatformError) => {
+  if (error.reason._tag === "NotFound") return Effect.succeed("")
+  return Effect.fail(error)
+}
+
+const readEnvFile = Effect.fn("RecordingEnv.readFile")(function* () {
+  const fileSystem = yield* FileSystem.FileSystem
+  return yield* fileSystem.readFileString(envPath).pipe(Effect.catch(catchMissingFile))
+})
+
+const readConfigString = (provider: ConfigProvider.ConfigProvider, name: string) =>
+  Config.string(name)
+    .parse(provider)
+    .pipe(
+      Effect.match({
+        onFailure: () => undefined,
+        onSuccess: (value) => value,
+      }),
+    )
+
+const parseEnv = Effect.fn("RecordingEnv.parseEnv")(function* (contents: string) {
+  const provider = ConfigProvider.fromDotEnvContents(contents)
+  return Object.fromEntries(
+    (yield* Effect.forEach(envNames, (name) =>
+      readConfigString(provider, name).pipe(Effect.map((value) => [name, value] as const)),
+    )).filter((entry): entry is readonly [string, string] => entry[1] !== undefined),
+  )
+})
+
+const quote = (value: string) => JSON.stringify(value)
+
+const status = (name: string, fileEnv: Env) => {
+  if (fileEnv[name]) return "file"
+  if (process.env[name]) return "shell"
+  return "missing"
+}
+
+const statusLine = (provider: Provider, fileEnv: Env) =>
+  [
+    `${provider.label} (${provider.tier})`,
+    provider.note,
+    ...provider.vars.map((item) => {
+      const value = status(item.name, fileEnv)
+      const suffix = item.optional ? " optional" : ""
+      return `  ${value === "missing" ? "missing" : "set"} ${item.name}${suffix}${value === "shell" ? " (shell only)" : ""}`
+    }),
+  ].join("\n")
+
+const printStatus = (providers: ReadonlyArray<Provider>, fileEnv: Env) => {
+  prompts.note(providers.map((provider) => statusLine(provider, fileEnv)).join("\n\n"), `Recording env: ${envPath}`)
+}
+
+const exitIfCancel = <A>(value: A | symbol): A => {
+  if (!prompts.isCancel(value)) return value as A
+  prompts.cancel("Cancelled")
+  process.exit(130)
+}
+
+const upsertEnv = (contents: string, values: Env) => {
+  const names = Object.keys(values)
+  const seen = new Set<string>()
+  const lines = contents.split(/\r?\n/).map((line) => {
+    const match = line.match(/^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=/)
+    if (!match || !names.includes(match[1])) return line
+    seen.add(match[1])
+    return `${match[1]}=${quote(values[match[1]])}`
+  })
+  const missing = names.filter((name) => !seen.has(name))
+  if (missing.length === 0) return lines.join("\n").replace(/\n*$/, "\n")
+  const prefix = lines.join("\n").trimEnd()
+  const block = [
+    "",
+    "# Added by bun run setup:recording-env",
+    ...missing.map((name) => `${name}=${quote(values[name])}`),
+  ].join("\n")
+  return `${prefix}${block}\n`
+}
+
+const providerRequiredStatus = (provider: Provider, fileEnv: Env) => {
+  const required = requiredVars(provider)
+  if (required.some((item) => status(item.name, fileEnv) === "missing")) return "missing"
+  if (required.some((item) => status(item.name, fileEnv) === "shell")) return "set in shell"
+  return "already added"
+}
+
+const requiredVars = (provider: Provider) => provider.vars.filter((item) => !item.optional)
+
+const promptVars = (provider: Provider) => provider.vars.filter((item) => !item.optional || item.secret === false)
+
+const processEnv = (): Env =>
+  Object.fromEntries(Object.entries(process.env).filter((entry): entry is [string, string] => entry[1] !== undefined))
+
+const envValue = (env: Env, names: ReadonlyArray<string>) => names.map((name) => env[name]).find(Boolean) ?? ""
+
+const envWithValues = (fileEnv: Env, values: Env): Env => ({
+  ...processEnv(),
+  ...fileEnv,
+  ...values,
+})
+
+const responseError = Effect.fn("RecordingEnv.responseError")(function* (
+  response: HttpClientResponse.HttpClientResponse,
+) {
+  if (response.status >= 200 && response.status < 300) return undefined
+  const body = yield* response.text.pipe(Effect.catch(() => Effect.succeed("")))
+  return `${response.status}${body ? `: ${body.slice(0, 180)}` : ""}`
+})
+
+const executeRequest = Effect.fn("RecordingEnv.executeRequest")(function* (
+  request: HttpClientRequest.HttpClientRequest,
+) {
+  const http = yield* HttpClient.HttpClient
+  return yield* http.execute(request).pipe(Effect.flatMap(responseError))
+})
+
+const validateBearer = (url: string, token: Redacted.Redacted<string>, headers: Record<string, string> = {}) =>
+  HttpClientRequest.get(url).pipe(
+    HttpClientRequest.setHeaders({ ...headers, authorization: `Bearer ${Redacted.value(token)}` }),
+    executeRequest,
+  )
+
+const validateChat = (input: {
+  readonly url: string
+  readonly token: Redacted.Redacted<string>
+  readonly tokenHeader?: string
+  readonly model: string
+  readonly headers?: Record<string, string>
+}) =>
+  ProviderShared.jsonPost({
+    url: input.url,
+    headers: { ...input.headers, [input.tokenHeader ?? "authorization"]: `Bearer ${Redacted.value(input.token)}` },
+    body: ProviderShared.encodeJson({
+      model: input.model,
+      messages: [{ role: "user", content: "Reply with exactly: ok" }],
+      max_tokens: 3,
+      temperature: 0,
+    }),
+  }).pipe(executeRequest)
+
+const validateBedrock = (env: Env) =>
+  Effect.gen(function* () {
+    const request = yield* Effect.promise(() =>
+      new AwsV4Signer({
+        url: `https://bedrock.${env.BEDROCK_RECORDING_REGION || "us-east-1"}.amazonaws.com/foundation-models`,
+        method: "GET",
+        service: "bedrock",
+        region: env.BEDROCK_RECORDING_REGION || "us-east-1",
+        accessKeyId: env.AWS_ACCESS_KEY_ID,
+        secretAccessKey: env.AWS_SECRET_ACCESS_KEY,
+        sessionToken: env.AWS_SESSION_TOKEN || undefined,
+      }).sign(),
+    )
+    return yield* HttpClientRequest.get(request.url.toString()).pipe(
+      HttpClientRequest.setHeaders(Object.fromEntries(request.headers.entries())),
+      executeRequest,
+    )
+  })
+
+const validateProvider = Effect.fn("RecordingEnv.validateProvider")(function* (provider: Provider, env: Env) {
+  return yield* (provider.validate?.(env) ?? Effect.succeed("no lightweight validator")).pipe(
+    Effect.catch((error) => {
+      if (error instanceof Error) return Effect.succeed(error.message)
+      return Effect.succeed(String(error))
+    }),
+  )
+})
+
+const validateProviders = Effect.fn("RecordingEnv.validateProviders")(function* (
+  providers: ReadonlyArray<Provider>,
+  env: Env,
+) {
+  const spinner = prompts.spinner()
+  spinner.start("Validating credentials")
+  const results = yield* Effect.forEach(
+    providers,
+    (provider) => validateProvider(provider, env).pipe(Effect.map((error) => ({ provider, error }))),
+    { concurrency: 4 },
+  )
+  spinner.stop("Validation complete")
+  prompts.note(
+    results
+      .map(
+        (result) =>
+          `${result.error ? "failed" : "ok"} ${result.provider.label}${result.error ? ` - ${result.error}` : ""}`,
+      )
+      .join("\n"),
+    "Credential validation",
+  )
+})
+
+const writeEnvFile = Effect.fn("RecordingEnv.writeFile")(function* (contents: string) {
+  const fileSystem = yield* FileSystem.FileSystem
+  yield* fileSystem.makeDirectory(path.dirname(envPath), { recursive: true })
+  yield* fileSystem.writeFileString(envPath, contents, { mode: 0o600 })
+})
+
+const prompt = <A>(run: () => Promise<A | symbol>) => Effect.promise(run).pipe(Effect.map(exitIfCancel))
+
+const chooseConfigurableProviders = Effect.fn("RecordingEnv.chooseConfigurableProviders")(function* (
+  providers: ReadonlyArray<Provider>,
+  fileEnv: Env,
+) {
+  const configurable = providers.filter((provider) => requiredVars(provider).length > 0)
+  const selected = yield* prompt<ReadonlyArray<string>>(() =>
+    prompts.multiselect({
+      message: "Select provider credentials to add or override",
+      options: configurable.map((provider) => ({
+        value: provider.id,
+        label: provider.label,
+        hint: `${providerRequiredStatus(provider, fileEnv)} - ${requiredVars(provider)
+          .map((item) => item.name)
+          .join(", ")}`,
+      })),
+      initialValues: configurable
+        .filter((provider) => providerRequiredStatus(provider, fileEnv) === "missing")
+        .map((provider) => provider.id),
+    }),
+  )
+  return configurable.filter((provider) => selected.includes(provider.id))
+})
+
+const promptEnvVar = (item: Provider["vars"][number]) =>
+  prompt<string>(() => {
+    const input = {
+      message: item.label ?? item.name,
+      validate: (input: string | undefined) => {
+        if (item.optional) return undefined
+        return !input || input.length === 0 ? "Leave blank by pressing Esc/cancel, or paste a value" : undefined
+      },
+    }
+    return item.secret === false ? prompts.text(input) : prompts.password(input)
+  })
+
+const promptProviderValues = Effect.fn("RecordingEnv.promptProviderValues")(function* (
+  providers: ReadonlyArray<Provider>,
+) {
+  const values: Env = {}
+  for (const provider of providers) {
+    prompts.log.info(`${provider.label}: ${provider.note}`)
+    for (const item of promptVars(provider)) {
+      if (values[item.name]) continue
+      const value = yield* promptEnvVar(item)
+      if (value !== "") values[item.name] = value
+    }
+  }
+  return values
+})
+
+const main = Effect.fn("RecordingEnv.main")(function* () {
+  prompts.intro("LLM recording credentials")
+  const contents = yield* readEnvFile()
+  const fileEnv = yield* parseEnv(contents)
+  const providers = yield* Effect.promise(() => chooseProviders())
+  printStatus(providers, fileEnv)
+  if (checkOnly) {
+    prompts.outro("Check complete")
+    return
+  }
+  if (!interactive) {
+    prompts.outro("Run this command in a terminal to enter credentials")
+    return
+  }
+
+  const selectedProviders = yield* chooseConfigurableProviders(providers, fileEnv)
+  const values = yield* promptProviderValues(selectedProviders)
+
+  if (Object.keys(values).length === 0) {
+    prompts.outro("No changes")
+    return
+  }
+
+  if (
+    interactive &&
+    (yield* prompt(() => prompts.confirm({ message: "Validate credentials before saving?", initialValue: true })))
+  ) {
+    yield* validateProviders(selectedProviders, envWithValues(fileEnv, values))
+  }
+
+  yield* writeEnvFile(upsertEnv(contents, values))
+  prompts.log.success(
+    `Saved ${Object.keys(values).length} value${Object.keys(values).length === 1 ? "" : "s"} to ${envPath}`,
+  )
+  prompts.outro("Keep .env.local local. Store shared team credentials in a password manager or vault.")
+})
+
+await Effect.runPromise(main().pipe(Effect.provide(NodeFileSystem.layer), Effect.provide(FetchHttpClient.layer)))
--- a/packages/llm/src/cache-policy.ts
+++ b/packages/llm/src/cache-policy.ts
@@ -0,0 +1,111 @@
+// Apply an `LLMRequest.cache` policy by injecting `CacheHint`s onto the parts
+// the policy designates. Runs once at compile time, before the per-protocol
+// body builder, so the existing inline-hint lowering path handles the rest.
+//
+// The default `"auto"` shape places one breakpoint at the last tool definition,
+// one at the last system part, and one at the latest user message. This
+// matches what production agent harnesses (LangChain's caching middleware,
+// kern-ai's 10x cost-reduction playbook) converge on for tool-use loops: the
+// latest user message stays put while a single turn explodes into many
+// assistant/tool round-trips, so caching at that boundary lets every
+// intra-turn API call hit the prefix.
+//
+// Manual `cache: CacheHint` placements on individual parts are preserved —
+// this function only fills gaps the caller left empty.
+import { CacheHint, type CachePolicy, type CachePolicyObject } from "./schema/options"
+import { LLMRequest, Message, ToolDefinition, type ContentPart } from "./schema/messages"
+
+const AUTO: CachePolicyObject = {
+  tools: true,
+  system: true,
+  messages: "latest-user-message",
+}
+
+const NONE: CachePolicyObject = {}
+
+// Resolution rules:
+//   - undefined   → "auto" — caching is on by default. The math favors it:
+//                   Anthropic 5m-cache write is 1.25x base, read is 0.1x,
+//                   so a single reuse within 5 minutes already wins.
+//   - "auto"      → tools + system + latest user msg.
+//   - "none"      → no auto placement; manual `CacheHint`s still flow.
+//   - object form → exactly what the caller asked for.
+const resolve = (policy: CachePolicy | undefined): CachePolicyObject => {
+  if (policy === undefined || policy === "auto") return AUTO
+  if (policy === "none") return NONE
+  return policy
+}
+
+// Protocols whose wire format ignores inline cache markers (OpenAI's implicit
+// prefix caching, Gemini's implicit + out-of-band CachedContent). Skip the
+// whole policy pass for these — emitting hints would be harmless but pointless.
+const RESPECTS_INLINE_HINTS = new Set(["anthropic-messages", "bedrock-converse"])
+
+const makeHint = (ttlSeconds: number | undefined): CacheHint =>
+  ttlSeconds !== undefined ? new CacheHint({ type: "ephemeral", ttlSeconds }) : new CacheHint({ type: "ephemeral" })
+
+const markLastTool = (tools: ReadonlyArray<ToolDefinition>, hint: CacheHint): ReadonlyArray<ToolDefinition> => {
+  if (tools.length === 0) return tools
+  const last = tools.length - 1
+  if (tools[last]!.cache) return tools
+  return tools.map((tool, i) => (i === last ? new ToolDefinition({ ...tool, cache: hint }) : tool))
+}
+
+const markLastSystem = (system: LLMRequest["system"], hint: CacheHint): LLMRequest["system"] => {
+  if (system.length === 0) return system
+  const last = system.length - 1
+  if (system[last]!.cache) return system
+  return system.map((part, i) => (i === last ? { ...part, cache: hint } : part))
+}
+
+const lastIndexOfRole = (messages: ReadonlyArray<Message>, role: Message["role"]): number =>
+  messages.findLastIndex((m) => m.role === role)
+
+// Mark the last text part of `messages[index]`. If no text part exists, mark
+// the last content part regardless of type — that's the breakpoint position
+// in tool-result-only messages too.
+const markMessageAt = (messages: ReadonlyArray<Message>, index: number, hint: CacheHint): ReadonlyArray<Message> => {
+  if (index < 0 || index >= messages.length) return messages
+  const target = messages[index]!
+  if (target.content.length === 0) return messages
+  const lastTextIndex = target.content.findLastIndex((part) => part.type === "text")
+  const markAt = lastTextIndex >= 0 ? lastTextIndex : target.content.length - 1
+  const existing = target.content[markAt]!
+  if ("cache" in existing && existing.cache) return messages
+  const nextContent = target.content.map((part, i) => (i === markAt ? ({ ...part, cache: hint } as ContentPart) : part))
+  const next = new Message({ ...target, content: nextContent })
+  // Single pass over `messages`, substituting the one updated entry. Long
+  // conversations call this on every request, so avoid `.map()` here — its
+  // closure dispatch and identity copies show up in profiling.
+  const result = messages.slice()
+  result[index] = next
+  return result
+}
+
+const markMessages = (
+  messages: ReadonlyArray<Message>,
+  strategy: NonNullable<CachePolicyObject["messages"]>,
+  hint: CacheHint,
+): ReadonlyArray<Message> => {
+  if (messages.length === 0) return messages
+  if (strategy === "latest-user-message") return markMessageAt(messages, lastIndexOfRole(messages, "user"), hint)
+  if (strategy === "latest-assistant") return markMessageAt(messages, lastIndexOfRole(messages, "assistant"), hint)
+  const start = Math.max(0, messages.length - strategy.tail)
+  let next = messages
+  for (let i = start; i < messages.length; i++) next = markMessageAt(next, i, hint)
+  return next
+}
+
+export const applyCachePolicy = (request: LLMRequest): LLMRequest => {
+  if (!RESPECTS_INLINE_HINTS.has(request.model.route)) return request
+  const policy = resolve(request.cache)
+  if (!policy.tools && !policy.system && !policy.messages) return request
+
+  const hint = makeHint(policy.ttlSeconds)
+  const tools = policy.tools ? markLastTool(request.tools, hint) : request.tools
+  const system = policy.system ? markLastSystem(request.system, hint) : request.system
+  const messages = policy.messages ? markMessages(request.messages, policy.messages, hint) : request.messages
+
+  if (tools === request.tools && system === request.system && messages === request.messages) return request
+  return LLMRequest.update(request, { tools, system, messages })
+}
--- a/packages/llm/src/index.ts
+++ b/packages/llm/src/index.ts
@@ -0,0 +1,35 @@
+export { LLMClient, modelLimits, modelRef } from "./route/client"
+export { Auth } from "./route/auth"
+export { Provider } from "./provider"
+export type {
+  RouteModelInput,
+  RouteRoutedModelInput,
+  Interface as LLMClientShape,
+  Service as LLMClientService,
+  ModelRefInput,
+} from "./route/client"
+export * from "./schema"
+export { Tool, ToolFailure, toDefinitions, tool } from "./tool"
+export type {
+  AnyExecutableTool,
+  AnyTool,
+  ExecutableTool,
+  ExecutableTools,
+  Tool as ToolShape,
+  ToolExecute,
+  Tools,
+  ToolSchema,
+} from "./tool"
+export type {
+  RunOptions as ToolRunOptions,
+  RuntimeState as ToolRuntimeState,
+  StopCondition as ToolStopCondition,
+  ToolExecution,
+} from "./tool-runtime"
+
+export * as LLM from "./llm"
+export type {
+  Definition as ProviderDefinition,
+  ModelFactory as ProviderModelFactory,
+  ModelOptions as ProviderModelOptions,
+} from "./provider"
--- a/packages/llm/src/llm.ts
+++ b/packages/llm/src/llm.ts
@@ -0,0 +1,219 @@
+import { Effect, JsonSchema, Schema } from "effect"
+import { LLMClient, modelLimits, modelRef, type ModelRefInput } from "./route/client"
+import {
+  GenerationOptions,
+  HttpOptions,
+  InvalidProviderOutputReason,
+  LLMError,
+  LLMEvent,
+  LLMRequest,
+  LLMResponse,
+  Message,
+  SystemPart,
+  ToolChoice,
+  ToolDefinition,
+  type ContentPart,
+  ToolCallPart,
+  ToolResultPart,
+} from "./schema"
+import { make as makeTool, type ToolSchema } from "./tool"
+
+export type ModelInput = ModelRefInput
+
+export type MessageInput = Message.Input
+
+export type ToolChoiceInput = ToolChoice.Input
+export type ToolChoiceMode = ToolChoice.Mode
+
+export type ToolResultInput = Parameters<typeof ToolResultPart.make>[0]
+
+/** Input accepted by `LLM.request`, normalized into the canonical `LLMRequest` class. */
+export type RequestInput = Omit<
+  ConstructorParameters<typeof LLMRequest>[0],
+  "system" | "messages" | "tools" | "toolChoice" | "generation" | "http" | "providerOptions"
+> & {
+  readonly system?: string | SystemPart | ReadonlyArray<SystemPart>
+  readonly prompt?: string | ContentPart | ReadonlyArray<ContentPart>
+  readonly messages?: ReadonlyArray<Message | MessageInput>
+  readonly tools?: ReadonlyArray<ToolDefinition.Input>
+  readonly toolChoice?: ToolChoiceInput
+  readonly generation?: GenerationOptions.Input
+  readonly providerOptions?: ConstructorParameters<typeof LLMRequest>[0]["providerOptions"]
+  readonly http?: HttpOptions.Input
+}
+
+export const limits = modelLimits
+
+export const text = Message.text
+
+export const system = SystemPart.make
+
+export const message = Message.make
+
+export const user = Message.user
+
+export const assistant = Message.assistant
+
+export const model = modelRef
+
+export const toolDefinition = ToolDefinition.make
+
+export const toolCall = ToolCallPart.make
+
+export const toolResult = ToolResultPart.make
+
+export const toolMessage = Message.tool
+
+export const toolChoiceName = ToolChoice.named
+
+export const toolChoice = ToolChoice.make
+
+export const generation = GenerationOptions.make
+
+export const generate = LLMClient.generate
+
+export const stream = LLMClient.stream
+
+export const stepCountIs = LLMClient.stepCountIs
+
+export const requestInput = (input: LLMRequest): RequestInput => ({
+  ...LLMRequest.input(input),
+})
+
+export const request = (input: RequestInput) => {
+  const {
+    system: requestSystem,
+    prompt,
+    messages,
+    tools,
+    toolChoice: requestToolChoice,
+    generation: requestGeneration,
+    providerOptions: requestProviderOptions,
+    http: requestHttp,
+    ...rest
+  } = input
+  return new LLMRequest({
+    ...rest,
+    system: SystemPart.content(requestSystem),
+    messages: [...(messages?.map(message) ?? []), ...(prompt === undefined ? [] : [user(prompt)])],
+    tools: tools?.map(toolDefinition) ?? [],
+    toolChoice: requestToolChoice ? toolChoice(requestToolChoice) : undefined,
+    generation: requestGeneration === undefined ? undefined : generation(requestGeneration),
+    providerOptions: requestProviderOptions,
+    http: requestHttp === undefined ? undefined : HttpOptions.make(requestHttp),
+  })
+}
+
+export const updateRequest = (input: LLMRequest, patch: Partial<RequestInput>) =>
+  request({ ...requestInput(input), ...patch })
+
+const GENERATE_OBJECT_TOOL_NAME = "generate_object"
+
+const GENERATE_OBJECT_TOOL_DESCRIPTION = "Return the structured result by calling this tool."
+
+type GenerateObjectBase = Omit<RequestInput, "tools" | "toolChoice" | "responseFormat">
+
+export class GenerateObjectResponse<T> {
+  constructor(
+    readonly object: T,
+    readonly response: LLMResponse,
+  ) {}
+
+  get events() {
+    return this.response.events
+  }
+
+  get usage() {
+    return this.response.usage
+  }
+}
+
+export interface GenerateObjectOptions<S extends ToolSchema<any>> extends GenerateObjectBase {
+  readonly schema: S
+}
+
+export interface GenerateObjectDynamicOptions extends GenerateObjectBase {
+  /** Raw JSON Schema object describing the expected output shape. */
+  readonly jsonSchema: JsonSchema.JsonSchema
+}
+
+const runGenerateObject = Effect.fn("LLM.generateObject")(function* (
+  options: GenerateObjectBase,
+  tool: ReturnType<typeof makeTool>,
+) {
+  const baseRequest = request(options)
+  const generateRequest = LLMRequest.update(baseRequest, {
+    toolChoice: ToolChoice.named(GENERATE_OBJECT_TOOL_NAME),
+  })
+  const response = yield* LLMClient.generate({
+    request: generateRequest,
+    tools: { [GENERATE_OBJECT_TOOL_NAME]: tool },
+    toolExecution: "none",
+  })
+  const call = response.toolCalls.find(
+    (event) => LLMEvent.is.toolCall(event) && event.name === GENERATE_OBJECT_TOOL_NAME,
+  )
+  if (!call || !LLMEvent.is.toolCall(call))
+    return yield* new LLMError({
+      module: "LLM",
+      method: "generateObject",
+      reason: new InvalidProviderOutputReason({
+        message: `generateObject: model did not call the forced \`${GENERATE_OBJECT_TOOL_NAME}\` tool`,
+      }),
+    })
+  const object = yield* tool._decode(call.input).pipe(
+    Effect.mapError(
+      (error) =>
+        new LLMError({
+          module: "LLM",
+          method: "generateObject",
+          reason: new InvalidProviderOutputReason({
+            message: `generateObject: tool input failed schema decode: ${error.message}`,
+          }),
+        }),
+    ),
+  )
+  return new GenerateObjectResponse(object, response)
+})
+
+/**
+ * Run a model and decode its output against `schema`. Works on every protocol
+ * because it forces a synthetic tool call internally — provider-native JSON
+ * modes are intentionally avoided so behaviour is uniform.
+ *
+ * Two input modes:
+ *
+ * 1. `schema: EffectSchema<T>` — `.object` is decoded and typed as `T`.
+ *    Decode failures surface as `LLMError`.
+ * 2. `jsonSchema: JsonSchema.JsonSchema` — `.object` is `unknown`. Use when
+ *    the schema is only available at runtime (MCP, plugin manifests). Caller validates.
+ */
+export function generateObject<S extends ToolSchema<any>>(
+  options: GenerateObjectOptions<S>,
+): Effect.Effect<GenerateObjectResponse<Schema.Schema.Type<S>>, LLMError>
+export function generateObject(
+  options: GenerateObjectDynamicOptions,
+): Effect.Effect<GenerateObjectResponse<unknown>, LLMError>
+export function generateObject(options: GenerateObjectOptions<ToolSchema<any>> | GenerateObjectDynamicOptions) {
+  if ("schema" in options) {
+    const { schema, ...rest } = options
+    return runGenerateObject(
+      rest,
+      makeTool({
+        description: GENERATE_OBJECT_TOOL_DESCRIPTION,
+        parameters: schema,
+        success: Schema.Unknown as ToolSchema<unknown>,
+        execute: () => Effect.void,
+      }),
+    )
+  }
+  const { jsonSchema, ...rest } = options
+  return runGenerateObject(
+    rest,
+    makeTool({
+      description: GENERATE_OBJECT_TOOL_DESCRIPTION,
+      jsonSchema,
+      execute: () => Effect.void,
+    }),
+  )
+}
--- a/packages/llm/src/protocols/anthropic-messages.ts
+++ b/packages/llm/src/protocols/anthropic-messages.ts
@@ -0,0 +1,659 @@
+import { Effect, Schema } from "effect"
+import { Route } from "../route/client"
+import { Auth } from "../route/auth"
+import { Endpoint } from "../route/endpoint"
+import { Framing } from "../route/framing"
+import { Protocol } from "../route/protocol"
+import {
+  LLMEvent,
+  Usage,
+  type CacheHint,
+  type FinishReason,
+  type LLMRequest,
+  type ProviderMetadata,
+  type ToolCallPart,
+  type ToolDefinition,
+  type ToolResultPart,
+} from "../schema"
+import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared"
+import * as Cache from "./utils/cache"
+import { ToolStream } from "./utils/tool-stream"
+
+const ADAPTER = "anthropic-messages"
+export const DEFAULT_BASE_URL = "https://api.anthropic.com/v1"
+export const PATH = "/messages"
+
+// =============================================================================
+// Request Body Schema
+// =============================================================================
+const AnthropicCacheControl = Schema.Struct({
+  type: Schema.tag("ephemeral"),
+  ttl: Schema.optional(Schema.Literals(["5m", "1h"])),
+})
+
+const AnthropicTextBlock = Schema.Struct({
+  type: Schema.tag("text"),
+  text: Schema.String,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicTextBlock = Schema.Schema.Type<typeof AnthropicTextBlock>
+
+const AnthropicThinkingBlock = Schema.Struct({
+  type: Schema.tag("thinking"),
+  thinking: Schema.String,
+  signature: Schema.optional(Schema.String),
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+
+const AnthropicToolUseBlock = Schema.Struct({
+  type: Schema.tag("tool_use"),
+  id: Schema.String,
+  name: Schema.String,
+  input: Schema.Unknown,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicToolUseBlock = Schema.Schema.Type<typeof AnthropicToolUseBlock>
+
+const AnthropicServerToolUseBlock = Schema.Struct({
+  type: Schema.tag("server_tool_use"),
+  id: Schema.String,
+  name: Schema.String,
+  input: Schema.Unknown,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicServerToolUseBlock = Schema.Schema.Type<typeof AnthropicServerToolUseBlock>
+
+// Server tool result blocks: web_search_tool_result, code_execution_tool_result,
+// and web_fetch_tool_result. The provider executes the tool and inlines the
+// structured result into the assistant turn — there is no client tool_result
+// round-trip. We round-trip the structured `content` payload as opaque JSON so
+// the next request can echo it back when continuing the conversation.
+const AnthropicServerToolResultType = Schema.Literals([
+  "web_search_tool_result",
+  "code_execution_tool_result",
+  "web_fetch_tool_result",
+])
+type AnthropicServerToolResultType = Schema.Schema.Type<typeof AnthropicServerToolResultType>
+
+const AnthropicServerToolResultBlock = Schema.Struct({
+  type: AnthropicServerToolResultType,
+  tool_use_id: Schema.String,
+  content: Schema.Unknown,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicServerToolResultBlock = Schema.Schema.Type<typeof AnthropicServerToolResultBlock>
+
+const AnthropicToolResultBlock = Schema.Struct({
+  type: Schema.tag("tool_result"),
+  tool_use_id: Schema.String,
+  content: Schema.String,
+  is_error: Schema.optional(Schema.Boolean),
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+
+const AnthropicUserBlock = Schema.Union([AnthropicTextBlock, AnthropicToolResultBlock])
+const AnthropicAssistantBlock = Schema.Union([
+  AnthropicTextBlock,
+  AnthropicThinkingBlock,
+  AnthropicToolUseBlock,
+  AnthropicServerToolUseBlock,
+  AnthropicServerToolResultBlock,
+])
+type AnthropicAssistantBlock = Schema.Schema.Type<typeof AnthropicAssistantBlock>
+type AnthropicToolResultBlock = Schema.Schema.Type<typeof AnthropicToolResultBlock>
+
+const AnthropicMessage = Schema.Union([
+  Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(AnthropicUserBlock) }),
+  Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(AnthropicAssistantBlock) }),
+]).pipe(Schema.toTaggedUnion("role"))
+type AnthropicMessage = Schema.Schema.Type<typeof AnthropicMessage>
+
+const AnthropicTool = Schema.Struct({
+  name: Schema.String,
+  description: Schema.String,
+  input_schema: JsonObject,
+  cache_control: Schema.optional(AnthropicCacheControl),
+})
+type AnthropicTool = Schema.Schema.Type<typeof AnthropicTool>
+
+const AnthropicToolChoice = Schema.Union([
+  Schema.Struct({ type: Schema.Literals(["auto", "any"]) }),
+  Schema.Struct({ type: Schema.tag("tool"), name: Schema.String }),
+])
+
+const AnthropicThinking = Schema.Struct({
+  type: Schema.tag("enabled"),
+  budget_tokens: Schema.Number,
+})
+
+const AnthropicBodyFields = {
+  model: Schema.String,
+  system: optionalArray(AnthropicTextBlock),
+  messages: Schema.Array(AnthropicMessage),
+  tools: optionalArray(AnthropicTool),
+  tool_choice: Schema.optional(AnthropicToolChoice),
+  stream: Schema.Literal(true),
+  max_tokens: Schema.Number,
+  temperature: Schema.optional(Schema.Number),
+  top_p: Schema.optional(Schema.Number),
+  top_k: Schema.optional(Schema.Number),
+  stop_sequences: optionalArray(Schema.String),
+  thinking: Schema.optional(AnthropicThinking),
+}
+const AnthropicMessagesBody = Schema.Struct(AnthropicBodyFields)
+export type AnthropicMessagesBody = Schema.Schema.Type<typeof AnthropicMessagesBody>
+
+const AnthropicUsage = Schema.Struct({
+  input_tokens: Schema.optional(Schema.Number),
+  output_tokens: Schema.optional(Schema.Number),
+  cache_creation_input_tokens: optionalNull(Schema.Number),
+  cache_read_input_tokens: optionalNull(Schema.Number),
+})
+type AnthropicUsage = Schema.Schema.Type<typeof AnthropicUsage>
+
+const AnthropicStreamBlock = Schema.Struct({
+  type: Schema.String,
+  id: Schema.optional(Schema.String),
+  name: Schema.optional(Schema.String),
+  text: Schema.optional(Schema.String),
+  thinking: Schema.optional(Schema.String),
+  signature: Schema.optional(Schema.String),
+  input: Schema.optional(Schema.Unknown),
+  // *_tool_result blocks arrive whole as content_block_start (no streaming
+  // delta) with the structured payload in `content` and the originating
+  // server_tool_use id in `tool_use_id`.
+  tool_use_id: Schema.optional(Schema.String),
+  content: Schema.optional(Schema.Unknown),
+})
+
+const AnthropicStreamDelta = Schema.Struct({
+  type: Schema.optional(Schema.String),
+  text: Schema.optional(Schema.String),
+  thinking: Schema.optional(Schema.String),
+  partial_json: Schema.optional(Schema.String),
+  signature: Schema.optional(Schema.String),
+  stop_reason: optionalNull(Schema.String),
+  stop_sequence: optionalNull(Schema.String),
+})
+
+const AnthropicEvent = Schema.Struct({
+  type: Schema.String,
+  index: Schema.optional(Schema.Number),
+  message: Schema.optional(Schema.Struct({ usage: Schema.optional(AnthropicUsage) })),
+  content_block: Schema.optional(AnthropicStreamBlock),
+  delta: Schema.optional(AnthropicStreamDelta),
+  usage: Schema.optional(AnthropicUsage),
+  error: Schema.optional(Schema.Struct({ type: Schema.String, message: Schema.String })),
+})
+type AnthropicEvent = Schema.Schema.Type<typeof AnthropicEvent>
+
+interface ParserState {
+  readonly tools: ToolStream.State<number>
+  readonly usage?: Usage
+}
+
+const invalid = ProviderShared.invalidRequest
+
+// =============================================================================
+// Request Lowering
+// =============================================================================
+// Anthropic accepts at most 4 explicit cache_control breakpoints per request,
+// across `tools`, `system`, and `messages`. Beyond the cap the API returns a
+// 400 — so the lowering layer counts emitted markers and silently drops any
+// that exceed it.
+const ANTHROPIC_BREAKPOINT_CAP = 4
+
+const EPHEMERAL_5M = { type: "ephemeral" as const }
+const EPHEMERAL_1H = { type: "ephemeral" as const, ttl: "1h" as const }
+
+const cacheControl = (breakpoints: Cache.Breakpoints, cache: CacheHint | undefined) => {
+  if (cache?.type !== "ephemeral" && cache?.type !== "persistent") return undefined
+  if (breakpoints.remaining <= 0) {
+    breakpoints.dropped += 1
+    return undefined
+  }
+  breakpoints.remaining -= 1
+  return Cache.ttlBucket(cache.ttlSeconds) === "1h" ? EPHEMERAL_1H : EPHEMERAL_5M
+}
+
+const anthropicMetadata = (metadata: Record<string, unknown>): ProviderMetadata => ({ anthropic: metadata })
+
+const signatureFromMetadata = (metadata: ProviderMetadata | undefined): string | undefined => {
+  const anthropic = metadata?.anthropic
+  if (!ProviderShared.isRecord(anthropic)) return undefined
+  return typeof anthropic.signature === "string" ? anthropic.signature : undefined
+}
+
+const lowerTool = (breakpoints: Cache.Breakpoints, tool: ToolDefinition): AnthropicTool => ({
+  name: tool.name,
+  description: tool.description,
+  input_schema: tool.inputSchema,
+  cache_control: cacheControl(breakpoints, tool.cache),
+})
+
+const lowerToolChoice = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
+  ProviderShared.matchToolChoice("Anthropic Messages", toolChoice, {
+    auto: () => ({ type: "auto" as const }),
+    none: () => undefined,
+    required: () => ({ type: "any" as const }),
+    tool: (name) => ({ type: "tool" as const, name }),
+  })
+
+const lowerToolCall = (part: ToolCallPart): AnthropicToolUseBlock => ({
+  type: "tool_use",
+  id: part.id,
+  name: part.name,
+  input: part.input,
+})
+
+const lowerServerToolCall = (part: ToolCallPart): AnthropicServerToolUseBlock => ({
+  type: "server_tool_use",
+  id: part.id,
+  name: part.name,
+  input: part.input,
+})
+
+// Server tool result blocks are typed by name. Anthropic ships three today;
+// extend this list when new server tools land. The block content is the
+// structured payload returned by the provider, which we round-trip as-is.
+const serverToolResultType = (name: string): AnthropicServerToolResultType | undefined => {
+  if (name === "web_search") return "web_search_tool_result"
+  if (name === "code_execution") return "code_execution_tool_result"
+  if (name === "web_fetch") return "web_fetch_tool_result"
+  return undefined
+}
+
+const lowerServerToolResult = Effect.fn("AnthropicMessages.lowerServerToolResult")(function* (part: ToolResultPart) {
+  const wireType = serverToolResultType(part.name)
+  if (!wireType)
+    return yield* invalid(`Anthropic Messages does not know how to round-trip server tool result for ${part.name}`)
+  return { type: wireType, tool_use_id: part.id, content: part.result.value } satisfies AnthropicServerToolResultBlock
+})
+
+const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* (
+  request: LLMRequest,
+  breakpoints: Cache.Breakpoints,
+) {
+  const messages: AnthropicMessage[] = []
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const content: AnthropicTextBlock[] = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text"]))
+          return yield* ProviderShared.unsupportedContent("Anthropic Messages", "user", ["text"])
+        content.push({ type: "text", text: part.text, cache_control: cacheControl(breakpoints, part.cache) })
+      }
+      messages.push({ role: "user", content })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const content: AnthropicAssistantBlock[] = []
+      for (const part of message.content) {
+        if (part.type === "text") {
+          content.push({ type: "text", text: part.text, cache_control: cacheControl(breakpoints, part.cache) })
+          continue
+        }
+        if (part.type === "reasoning") {
+          content.push({
+            type: "thinking",
+            thinking: part.text,
+            signature: part.encrypted ?? signatureFromMetadata(part.providerMetadata),
+          })
+          continue
+        }
+        if (part.type === "tool-call") {
+          content.push(part.providerExecuted ? lowerServerToolCall(part) : lowerToolCall(part))
+          continue
+        }
+        if (part.type === "tool-result" && part.providerExecuted) {
+          content.push(yield* lowerServerToolResult(part))
+          continue
+        }
+        return yield* invalid(
+          `Anthropic Messages assistant messages only support text, reasoning, and tool-call content for now`,
+        )
+      }
+      messages.push({ role: "assistant", content })
+      continue
+    }
+
+    const content: AnthropicToolResultBlock[] = []
+    for (const part of message.content) {
+      if (!ProviderShared.supportsContent(part, ["tool-result"]))
+        return yield* ProviderShared.unsupportedContent("Anthropic Messages", "tool", ["tool-result"])
+      content.push({
+        type: "tool_result",
+        tool_use_id: part.id,
+        content: ProviderShared.toolResultText(part),
+        is_error: part.result.type === "error" ? true : undefined,
+        cache_control: cacheControl(breakpoints, part.cache),
+      })
+    }
+    messages.push({ role: "user", content })
+  }
+
+  return messages
+})
+
+const anthropicOptions = (request: LLMRequest) => request.providerOptions?.anthropic
+
+const lowerThinking = Effect.fn("AnthropicMessages.lowerThinking")(function* (request: LLMRequest) {
+  const thinking = anthropicOptions(request)?.thinking
+  if (!ProviderShared.isRecord(thinking) || thinking.type !== "enabled") return undefined
+  const budget =
+    typeof thinking.budgetTokens === "number"
+      ? thinking.budgetTokens
+      : typeof thinking.budget_tokens === "number"
+        ? thinking.budget_tokens
+        : undefined
+  if (budget === undefined) return yield* invalid("Anthropic thinking provider option requires budgetTokens")
+  return { type: "enabled" as const, budget_tokens: budget }
+})
+
+const fromRequest = Effect.fn("AnthropicMessages.fromRequest")(function* (request: LLMRequest) {
+  const toolChoice = request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined
+  const generation = request.generation
+  // Allocate the 4-breakpoint budget in invalidation order: tools → system →
+  // messages. Tools live highest in the cache hierarchy, so when callers
+  // over-mark we keep their tool hints and shed the message-tail ones first.
+  const breakpoints = Cache.newBreakpoints(ANTHROPIC_BREAKPOINT_CAP)
+  const tools =
+    request.tools.length === 0 || request.toolChoice?.type === "none"
+      ? undefined
+      : request.tools.map((tool) => lowerTool(breakpoints, tool))
+  const system =
+    request.system.length === 0
+      ? undefined
+      : request.system.map((part) => ({
+          type: "text" as const,
+          text: part.text,
+          cache_control: cacheControl(breakpoints, part.cache),
+        }))
+  const messages = yield* lowerMessages(request, breakpoints)
+  if (breakpoints.dropped > 0) {
+    yield* Effect.logWarning(
+      `Anthropic Messages: dropped ${breakpoints.dropped} cache breakpoint(s); the API allows at most ${ANTHROPIC_BREAKPOINT_CAP} per request.`,
+    )
+  }
+  return {
+    model: request.model.id,
+    system,
+    messages,
+    tools,
+    tool_choice: toolChoice,
+    stream: true as const,
+    max_tokens: generation?.maxTokens ?? request.model.limits.output ?? 4096,
+    temperature: generation?.temperature,
+    top_p: generation?.topP,
+    top_k: generation?.topK,
+    stop_sequences: generation?.stop,
+    thinking: yield* lowerThinking(request),
+  }
+})
+
+// =============================================================================
+// Stream Parsing
+// =============================================================================
+const mapFinishReason = (reason: string | null | undefined): FinishReason => {
+  if (reason === "end_turn" || reason === "stop_sequence" || reason === "pause_turn") return "stop"
+  if (reason === "max_tokens") return "length"
+  if (reason === "tool_use") return "tool-calls"
+  if (reason === "refusal") return "content-filter"
+  return "unknown"
+}
+
+// Anthropic reports the non-overlapping breakdown natively — its
+// `input_tokens` is the *non-cached* count per the Messages API docs, with
+// cache reads and writes as separate fields. We sum them to derive the
+// inclusive `inputTokens` the rest of the contract expects. Extended
+// thinking tokens are *not* broken out by Anthropic — they're billed as
+// part of `output_tokens`, so `reasoningTokens` stays `undefined` and
+// `outputTokens` carries the combined total.
+const mapUsage = (usage: AnthropicUsage | undefined): Usage | undefined => {
+  if (!usage) return undefined
+  const nonCached = usage.input_tokens
+  const cacheRead = usage.cache_read_input_tokens ?? undefined
+  const cacheWrite = usage.cache_creation_input_tokens ?? undefined
+  const inputTokens = ProviderShared.sumTokens(nonCached, cacheRead, cacheWrite)
+  return new Usage({
+    inputTokens,
+    outputTokens: usage.output_tokens,
+    nonCachedInputTokens: nonCached,
+    cacheReadInputTokens: cacheRead,
+    cacheWriteInputTokens: cacheWrite,
+    totalTokens: ProviderShared.totalTokens(inputTokens, usage.output_tokens, undefined),
+    providerMetadata: { anthropic: usage },
+  })
+}
+
+// Anthropic emits usage on `message_start` and again on `message_delta` — the
+// final delta carries the authoritative totals. Right-biased merge: each
+// field prefers `right` when defined, falls back to `left`. `inputTokens` is
+// recomputed from the merged breakdown so the inclusive total stays
+// consistent with `nonCached + cacheRead + cacheWrite`.
+const mergeUsage = (left: Usage | undefined, right: Usage | undefined) => {
+  if (!left) return right
+  if (!right) return left
+  const nonCachedInputTokens = right.nonCachedInputTokens ?? left.nonCachedInputTokens
+  const cacheReadInputTokens = right.cacheReadInputTokens ?? left.cacheReadInputTokens
+  const cacheWriteInputTokens = right.cacheWriteInputTokens ?? left.cacheWriteInputTokens
+  const inputTokens = ProviderShared.sumTokens(nonCachedInputTokens, cacheReadInputTokens, cacheWriteInputTokens)
+  const outputTokens = right.outputTokens ?? left.outputTokens
+  return new Usage({
+    inputTokens,
+    outputTokens,
+    nonCachedInputTokens,
+    cacheReadInputTokens,
+    cacheWriteInputTokens,
+    totalTokens: ProviderShared.totalTokens(inputTokens, outputTokens, undefined),
+    providerMetadata: {
+      anthropic: {
+        ...(left.providerMetadata?.["anthropic"] ?? {}),
+        ...(right.providerMetadata?.["anthropic"] ?? {}),
+      },
+    },
+  })
+}
+
+// Server tool result blocks come whole in `content_block_start` (no streaming
+// delta sequence). We convert the payload to a `tool-result` event with
+// `providerExecuted: true`. The runtime appends it to the assistant message
+// for round-trip; downstream consumers can inspect `result.value` for the
+// structured payload.
+const SERVER_TOOL_RESULT_NAMES: Record<AnthropicServerToolResultType, string> = {
+  web_search_tool_result: "web_search",
+  code_execution_tool_result: "code_execution",
+  web_fetch_tool_result: "web_fetch",
+}
+
+const isServerToolResultType = (type: string): type is AnthropicServerToolResultType => type in SERVER_TOOL_RESULT_NAMES
+
+const serverToolResultEvent = (block: NonNullable<AnthropicEvent["content_block"]>): LLMEvent | undefined => {
+  if (!block.type || !isServerToolResultType(block.type)) return undefined
+  const errorPayload =
+    typeof block.content === "object" && block.content !== null && "type" in block.content
+      ? String((block.content as Record<string, unknown>).type)
+      : ""
+  const isError = errorPayload.endsWith("_tool_result_error")
+  return LLMEvent.toolResult({
+    id: block.tool_use_id ?? "",
+    name: SERVER_TOOL_RESULT_NAMES[block.type],
+    result: isError ? { type: "error", value: block.content } : { type: "json", value: block.content },
+    providerExecuted: true,
+    providerMetadata: anthropicMetadata({ blockType: block.type }),
+  })
+}
+
+type StepResult = readonly [ParserState, ReadonlyArray<LLMEvent>]
+
+const NO_EVENTS: StepResult["1"] = []
+
+const onMessageStart = (state: ParserState, event: AnthropicEvent): StepResult => {
+  const usage = mapUsage(event.message?.usage)
+  return [usage ? { ...state, usage: mergeUsage(state.usage, usage) } : state, NO_EVENTS]
+}
+
+const onContentBlockStart = (state: ParserState, event: AnthropicEvent): StepResult => {
+  const block = event.content_block
+  if (!block) return [state, NO_EVENTS]
+
+  if ((block.type === "tool_use" || block.type === "server_tool_use") && event.index !== undefined) {
+    return [
+      {
+        ...state,
+        tools: ToolStream.start(state.tools, event.index, {
+          id: block.id ?? String(event.index),
+          name: block.name ?? "",
+          providerExecuted: block.type === "server_tool_use",
+        }),
+      },
+      NO_EVENTS,
+    ]
+  }
+
+  if (block.type === "text" && block.text) {
+    return [state, [LLMEvent.textDelta({ id: `text-${event.index ?? 0}`, text: block.text })]]
+  }
+
+  if (block.type === "thinking" && block.thinking) {
+    return [
+      state,
+      [
+        LLMEvent.reasoningDelta({
+          id: `reasoning-${event.index ?? 0}`,
+          text: block.thinking,
+        }),
+      ],
+    ]
+  }
+
+  const result = serverToolResultEvent(block)
+  return [state, result ? [result] : NO_EVENTS]
+}
+
+const onContentBlockDelta = Effect.fn("AnthropicMessages.onContentBlockDelta")(function* (
+  state: ParserState,
+  event: AnthropicEvent,
+) {
+  const delta = event.delta
+
+  if (delta?.type === "text_delta" && delta.text) {
+    return [state, [LLMEvent.textDelta({ id: `text-${event.index ?? 0}`, text: delta.text })]] satisfies StepResult
+  }
+
+  if (delta?.type === "thinking_delta" && delta.thinking) {
+    return [
+      state,
+      [LLMEvent.reasoningDelta({ id: `reasoning-${event.index ?? 0}`, text: delta.thinking })],
+    ] satisfies StepResult
+  }
+
+  if (delta?.type === "signature_delta" && delta.signature) {
+    return [
+      state,
+      [
+        LLMEvent.reasoningEnd({
+          id: `reasoning-${event.index ?? 0}`,
+          providerMetadata: anthropicMetadata({ signature: delta.signature }),
+        }),
+      ],
+    ] satisfies StepResult
+  }
+
+  if (delta?.type === "input_json_delta" && event.index !== undefined) {
+    if (!delta.partial_json) return [state, NO_EVENTS] satisfies StepResult
+    const result = ToolStream.appendExisting(
+      ADAPTER,
+      state.tools,
+      event.index,
+      delta.partial_json,
+      "Anthropic Messages tool argument delta is missing its tool call",
+    )
+    if (ToolStream.isError(result)) return yield* result
+    return [{ ...state, tools: result.tools }, result.event ? [result.event] : NO_EVENTS] satisfies StepResult
+  }
+
+  return [state, NO_EVENTS] satisfies StepResult
+})
+
+const onContentBlockStop = Effect.fn("AnthropicMessages.onContentBlockStop")(function* (
+  state: ParserState,
+  event: AnthropicEvent,
+) {
+  if (event.index === undefined) return [state, NO_EVENTS] satisfies StepResult
+  const result = yield* ToolStream.finish(ADAPTER, state.tools, event.index)
+  return [{ ...state, tools: result.tools }, result.event ? [result.event] : NO_EVENTS] satisfies StepResult
+})
+
+const onMessageDelta = (state: ParserState, event: AnthropicEvent): StepResult => {
+  const usage = mergeUsage(state.usage, mapUsage(event.usage))
+  return [
+    { ...state, usage },
+    [
+      LLMEvent.requestFinish({
+        reason: mapFinishReason(event.delta?.stop_reason),
+        usage,
+        providerMetadata: event.delta?.stop_sequence
+          ? anthropicMetadata({ stopSequence: event.delta.stop_sequence })
+          : undefined,
+      }),
+    ],
+  ]
+}
+
+const onError = (state: ParserState, event: AnthropicEvent): StepResult => [
+  state,
+  [LLMEvent.providerError({ message: event.error?.message ?? "Anthropic Messages stream error" })],
+]
+
+const step = (state: ParserState, event: AnthropicEvent) => {
+  if (event.type === "message_start") return Effect.succeed(onMessageStart(state, event))
+  if (event.type === "content_block_start") return Effect.succeed(onContentBlockStart(state, event))
+  if (event.type === "content_block_delta") return onContentBlockDelta(state, event)
+  if (event.type === "content_block_stop") return onContentBlockStop(state, event)
+  if (event.type === "message_delta") return Effect.succeed(onMessageDelta(state, event))
+  if (event.type === "error") return Effect.succeed(onError(state, event))
+  return Effect.succeed<StepResult>([state, NO_EVENTS])
+}
+
+// =============================================================================
+// Protocol And Anthropic Route
+// =============================================================================
+/**
+ * The Anthropic Messages protocol — request body construction, body schema,
+ * and the streaming-event state machine. Used by native Anthropic Cloud and
+ * (once registered) Vertex Anthropic / Bedrock-hosted Anthropic passthrough.
+ */
+export const protocol = Protocol.make({
+  id: ADAPTER,
+  body: {
+    schema: AnthropicMessagesBody,
+    from: fromRequest,
+  },
+  stream: {
+    event: Protocol.jsonEvent(AnthropicEvent),
+    initial: () => ({ tools: ToolStream.empty<number>() }),
+    step,
+  },
+})
+
+export const route = Route.make({
+  id: ADAPTER,
+  protocol,
+  endpoint: Endpoint.path(PATH),
+  auth: Auth.apiKeyHeader("x-api-key"),
+  framing: Framing.sse,
+  headers: () => ({ "anthropic-version": "2023-06-01" }),
+})
+
+// =============================================================================
+// Model Helper
+// =============================================================================
+export const model = Route.model(route, {
+  provider: "anthropic",
+  baseURL: DEFAULT_BASE_URL,
+})
+
+export * as AnthropicMessages from "./anthropic-messages"
--- a/packages/llm/src/protocols/bedrock-converse.ts
+++ b/packages/llm/src/protocols/bedrock-converse.ts
@@ -0,0 +1,580 @@
+import { Effect, Schema } from "effect"
+import { Route, type RouteModelInput } from "../route/client"
+import { Endpoint } from "../route/endpoint"
+import { Protocol } from "../route/protocol"
+import {
+  LLMEvent,
+  Usage,
+  type CacheHint,
+  type FinishReason,
+  type LLMRequest,
+  type ToolCallPart,
+  type ToolDefinition,
+  type ToolResultPart,
+} from "../schema"
+import { BedrockEventStream } from "./bedrock-event-stream"
+import { JsonObject, optionalArray, ProviderShared } from "./shared"
+import { BedrockAuth, type Credentials as BedrockCredentials } from "./utils/bedrock-auth"
+import { BedrockCache } from "./utils/bedrock-cache"
+import { BedrockMedia } from "./utils/bedrock-media"
+import { ToolStream } from "./utils/tool-stream"
+
+const ADAPTER = "bedrock-converse"
+
+export type { Credentials as BedrockCredentials } from "./utils/bedrock-auth"
+
+// =============================================================================
+// Public Model Input
+// =============================================================================
+export type BedrockConverseModelInput = RouteModelInput & {
+  /**
+   * Bearer API key (Bedrock's newer API key auth). Sets the `Authorization`
+   * header and bypasses SigV4 signing. Mutually exclusive with `credentials`.
+   */
+  readonly apiKey?: string
+  /**
+   * AWS credentials for SigV4 signing. The route signs each request at
+   * `toHttp` time using `aws4fetch`. Mutually exclusive with `apiKey`.
+   */
+  readonly credentials?: BedrockCredentials
+  readonly headers?: Record<string, string>
+}
+
+// =============================================================================
+// Request Body Schema
+// =============================================================================
+const BedrockTextBlock = Schema.Struct({
+  text: Schema.String,
+})
+type BedrockTextBlock = Schema.Schema.Type<typeof BedrockTextBlock>
+
+const BedrockToolUseBlock = Schema.Struct({
+  toolUse: Schema.Struct({
+    toolUseId: Schema.String,
+    name: Schema.String,
+    input: Schema.Unknown,
+  }),
+})
+type BedrockToolUseBlock = Schema.Schema.Type<typeof BedrockToolUseBlock>
+
+const BedrockToolResultContentItem = Schema.Union([
+  Schema.Struct({ text: Schema.String }),
+  Schema.Struct({ json: Schema.Unknown }),
+])
+
+const BedrockToolResultBlock = Schema.Struct({
+  toolResult: Schema.Struct({
+    toolUseId: Schema.String,
+    content: Schema.Array(BedrockToolResultContentItem),
+    status: Schema.optional(Schema.Literals(["success", "error"])),
+  }),
+})
+type BedrockToolResultBlock = Schema.Schema.Type<typeof BedrockToolResultBlock>
+
+const BedrockReasoningBlock = Schema.Struct({
+  reasoningContent: Schema.Struct({
+    reasoningText: Schema.optional(
+      Schema.Struct({
+        text: Schema.String,
+        signature: Schema.optional(Schema.String),
+      }),
+    ),
+  }),
+})
+
+const BedrockUserBlock = Schema.Union([
+  BedrockTextBlock,
+  BedrockMedia.ImageBlock,
+  BedrockMedia.DocumentBlock,
+  BedrockToolResultBlock,
+  BedrockCache.CachePointBlock,
+])
+type BedrockUserBlock = Schema.Schema.Type<typeof BedrockUserBlock>
+
+const BedrockAssistantBlock = Schema.Union([
+  BedrockTextBlock,
+  BedrockReasoningBlock,
+  BedrockToolUseBlock,
+  BedrockCache.CachePointBlock,
+])
+type BedrockAssistantBlock = Schema.Schema.Type<typeof BedrockAssistantBlock>
+
+const BedrockMessage = Schema.Union([
+  Schema.Struct({ role: Schema.Literal("user"), content: Schema.Array(BedrockUserBlock) }),
+  Schema.Struct({ role: Schema.Literal("assistant"), content: Schema.Array(BedrockAssistantBlock) }),
+]).pipe(Schema.toTaggedUnion("role"))
+type BedrockMessage = Schema.Schema.Type<typeof BedrockMessage>
+
+const BedrockSystemBlock = Schema.Union([BedrockTextBlock, BedrockCache.CachePointBlock])
+type BedrockSystemBlock = Schema.Schema.Type<typeof BedrockSystemBlock>
+
+const BedrockToolSpec = Schema.Struct({
+  toolSpec: Schema.Struct({
+    name: Schema.String,
+    description: Schema.String,
+    inputSchema: Schema.Struct({
+      json: JsonObject,
+    }),
+  }),
+})
+type BedrockToolSpec = Schema.Schema.Type<typeof BedrockToolSpec>
+
+const BedrockTool = Schema.Union([BedrockToolSpec, BedrockCache.CachePointBlock])
+type BedrockTool = Schema.Schema.Type<typeof BedrockTool>
+
+const BedrockToolChoice = Schema.Union([
+  Schema.Struct({ auto: Schema.Struct({}) }),
+  Schema.Struct({ any: Schema.Struct({}) }),
+  Schema.Struct({ tool: Schema.Struct({ name: Schema.String }) }),
+])
+
+const BedrockBodyFields = {
+  modelId: Schema.String,
+  messages: Schema.Array(BedrockMessage),
+  system: optionalArray(BedrockSystemBlock),
+  inferenceConfig: Schema.optional(
+    Schema.Struct({
+      maxTokens: Schema.optional(Schema.Number),
+      temperature: Schema.optional(Schema.Number),
+      topP: Schema.optional(Schema.Number),
+      stopSequences: optionalArray(Schema.String),
+    }),
+  ),
+  toolConfig: Schema.optional(
+    Schema.Struct({
+      tools: Schema.Array(BedrockTool),
+      toolChoice: Schema.optional(BedrockToolChoice),
+    }),
+  ),
+  additionalModelRequestFields: Schema.optional(JsonObject),
+}
+const BedrockConverseBody = Schema.Struct(BedrockBodyFields)
+export type BedrockConverseBody = Schema.Schema.Type<typeof BedrockConverseBody>
+
+const BedrockUsageSchema = Schema.Struct({
+  inputTokens: Schema.optional(Schema.Number),
+  outputTokens: Schema.optional(Schema.Number),
+  totalTokens: Schema.optional(Schema.Number),
+  cacheReadInputTokens: Schema.optional(Schema.Number),
+  cacheWriteInputTokens: Schema.optional(Schema.Number),
+})
+type BedrockUsageSchema = Schema.Schema.Type<typeof BedrockUsageSchema>
+
+// Streaming event shape — the AWS event stream wraps each JSON payload by its
+// `:event-type` header (e.g. `messageStart`, `contentBlockDelta`). We
+// reconstruct that wrapping in `decodeFrames` below so the event schema can
+// stay a plain discriminated record.
+const BedrockEvent = Schema.Struct({
+  messageStart: Schema.optional(Schema.Struct({ role: Schema.String })),
+  contentBlockStart: Schema.optional(
+    Schema.Struct({
+      contentBlockIndex: Schema.Number,
+      start: Schema.optional(
+        Schema.Struct({
+          toolUse: Schema.optional(Schema.Struct({ toolUseId: Schema.String, name: Schema.String })),
+        }),
+      ),
+    }),
+  ),
+  contentBlockDelta: Schema.optional(
+    Schema.Struct({
+      contentBlockIndex: Schema.Number,
+      delta: Schema.optional(
+        Schema.Struct({
+          text: Schema.optional(Schema.String),
+          toolUse: Schema.optional(Schema.Struct({ input: Schema.String })),
+          reasoningContent: Schema.optional(
+            Schema.Struct({
+              text: Schema.optional(Schema.String),
+              signature: Schema.optional(Schema.String),
+            }),
+          ),
+        }),
+      ),
+    }),
+  ),
+  contentBlockStop: Schema.optional(Schema.Struct({ contentBlockIndex: Schema.Number })),
+  messageStop: Schema.optional(
+    Schema.Struct({
+      stopReason: Schema.String,
+      additionalModelResponseFields: Schema.optional(Schema.Unknown),
+    }),
+  ),
+  metadata: Schema.optional(
+    Schema.Struct({
+      usage: Schema.optional(BedrockUsageSchema),
+      metrics: Schema.optional(Schema.Unknown),
+    }),
+  ),
+  internalServerException: Schema.optional(Schema.Struct({ message: Schema.String })),
+  modelStreamErrorException: Schema.optional(Schema.Struct({ message: Schema.String })),
+  validationException: Schema.optional(Schema.Struct({ message: Schema.String })),
+  throttlingException: Schema.optional(Schema.Struct({ message: Schema.String })),
+  serviceUnavailableException: Schema.optional(Schema.Struct({ message: Schema.String })),
+})
+type BedrockEvent = Schema.Schema.Type<typeof BedrockEvent>
+
+// =============================================================================
+// Request Lowering
+// =============================================================================
+const lowerToolSpec = (tool: ToolDefinition): BedrockToolSpec => ({
+  toolSpec: {
+    name: tool.name,
+    description: tool.description,
+    inputSchema: { json: tool.inputSchema },
+  },
+})
+
+const lowerTools = (breakpoints: BedrockCache.Breakpoints, tools: ReadonlyArray<ToolDefinition>): BedrockTool[] => {
+  const result: BedrockTool[] = []
+  for (const tool of tools) {
+    result.push(lowerToolSpec(tool))
+    const cachePoint = BedrockCache.block(breakpoints, tool.cache)
+    if (cachePoint) result.push(cachePoint)
+  }
+  return result
+}
+
+const textWithCache = (
+  breakpoints: BedrockCache.Breakpoints,
+  text: string,
+  cache: CacheHint | undefined,
+): Array<BedrockTextBlock | BedrockCache.CachePointBlock> => {
+  const cachePoint = BedrockCache.block(breakpoints, cache)
+  return cachePoint ? [{ text }, cachePoint] : [{ text }]
+}
+
+const lowerToolChoice = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
+  ProviderShared.matchToolChoice("Bedrock Converse", toolChoice, {
+    auto: () => ({ auto: {} }) as const,
+    none: () => undefined,
+    required: () => ({ any: {} }) as const,
+    tool: (name) => ({ tool: { name } }) as const,
+  })
+
+const lowerToolCall = (part: ToolCallPart): BedrockToolUseBlock => ({
+  toolUse: {
+    toolUseId: part.id,
+    name: part.name,
+    input: part.input,
+  },
+})
+
+const lowerToolResult = (part: ToolResultPart): BedrockToolResultBlock => ({
+  toolResult: {
+    toolUseId: part.id,
+    content:
+      part.result.type === "text" || part.result.type === "error"
+        ? [{ text: ProviderShared.toolResultText(part) }]
+        : [{ json: part.result.value }],
+    status: part.result.type === "error" ? "error" : "success",
+  },
+})
+
+const lowerMessages = Effect.fn("BedrockConverse.lowerMessages")(function* (
+  request: LLMRequest,
+  breakpoints: BedrockCache.Breakpoints,
+) {
+  const messages: BedrockMessage[] = []
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const content: BedrockUserBlock[] = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text", "media"]))
+          return yield* ProviderShared.unsupportedContent("Bedrock Converse", "user", ["text", "media"])
+        if (part.type === "text") {
+          content.push(...textWithCache(breakpoints, part.text, part.cache))
+          continue
+        }
+        if (part.type === "media") {
+          content.push(yield* BedrockMedia.lower(part))
+          continue
+        }
+      }
+      messages.push({ role: "user", content })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const content: BedrockAssistantBlock[] = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text", "reasoning", "tool-call"]))
+          return yield* ProviderShared.unsupportedContent("Bedrock Converse", "assistant", [
+            "text",
+            "reasoning",
+            "tool-call",
+          ])
+        if (part.type === "text") {
+          content.push(...textWithCache(breakpoints, part.text, part.cache))
+          continue
+        }
+        if (part.type === "reasoning") {
+          content.push({
+            reasoningContent: {
+              reasoningText: { text: part.text, signature: part.encrypted },
+            },
+          })
+          continue
+        }
+        if (part.type === "tool-call") {
+          content.push(lowerToolCall(part))
+          continue
+        }
+      }
+      messages.push({ role: "assistant", content })
+      continue
+    }
+
+    const content: BedrockUserBlock[] = []
+    for (const part of message.content) {
+      if (!ProviderShared.supportsContent(part, ["tool-result"]))
+        return yield* ProviderShared.unsupportedContent("Bedrock Converse", "tool", ["tool-result"])
+      content.push(lowerToolResult(part))
+      const cachePoint = BedrockCache.block(breakpoints, part.cache)
+      if (cachePoint) content.push(cachePoint)
+    }
+    messages.push({ role: "user", content })
+  }
+
+  return messages
+})
+
+// System prompts share the cache-point convention: emit the text block, then
+// optionally a positional `cachePoint` marker.
+const lowerSystem = (
+  breakpoints: BedrockCache.Breakpoints,
+  system: ReadonlyArray<LLMRequest["system"][number]>,
+): BedrockSystemBlock[] => system.flatMap((part) => textWithCache(breakpoints, part.text, part.cache))
+
+const fromRequest = Effect.fn("BedrockConverse.fromRequest")(function* (request: LLMRequest) {
+  const toolChoice = request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined
+  const generation = request.generation
+  // Bedrock-Claude shares Anthropic's 4-breakpoint cap. Spend the budget in
+  // tools → system → messages order to favour the highest-impact prefixes.
+  const breakpoints = BedrockCache.breakpoints()
+  const toolConfig =
+    request.tools.length > 0 && request.toolChoice?.type !== "none"
+      ? { tools: lowerTools(breakpoints, request.tools), toolChoice }
+      : undefined
+  const system = request.system.length === 0 ? undefined : lowerSystem(breakpoints, request.system)
+  const messages = yield* lowerMessages(request, breakpoints)
+  if (breakpoints.dropped > 0) {
+    yield* Effect.logWarning(
+      `Bedrock Converse: dropped ${breakpoints.dropped} cache breakpoint(s); the API allows at most ${BedrockCache.BEDROCK_BREAKPOINT_CAP} per request.`,
+    )
+  }
+  return {
+    modelId: request.model.id,
+    messages,
+    system,
+    inferenceConfig:
+      generation?.maxTokens === undefined &&
+      generation?.temperature === undefined &&
+      generation?.topP === undefined &&
+      (generation?.stop === undefined || generation.stop.length === 0)
+        ? undefined
+        : {
+            maxTokens: generation?.maxTokens,
+            temperature: generation?.temperature,
+            topP: generation?.topP,
+            stopSequences: generation?.stop,
+          },
+    toolConfig,
+  }
+})
+
+// =============================================================================
+// Stream Parsing
+// =============================================================================
+const mapFinishReason = (reason: string): FinishReason => {
+  if (reason === "end_turn" || reason === "stop_sequence") return "stop"
+  if (reason === "max_tokens") return "length"
+  if (reason === "tool_use") return "tool-calls"
+  if (reason === "content_filtered" || reason === "guardrail_intervened") return "content-filter"
+  return "unknown"
+}
+
+// AWS Bedrock Converse reports `inputTokens` (inclusive total) with
+// `cacheReadInputTokens` and `cacheWriteInputTokens` as subsets. Pass
+// the total through and derive the non-cached breakdown. Bedrock does
+// not break reasoning out of `outputTokens` for any current model.
+const mapUsage = (usage: BedrockUsageSchema | undefined): Usage | undefined => {
+  if (!usage) return undefined
+  const cacheTotal = (usage.cacheReadInputTokens ?? 0) + (usage.cacheWriteInputTokens ?? 0)
+  const nonCached = ProviderShared.subtractTokens(usage.inputTokens, cacheTotal)
+  return new Usage({
+    inputTokens: usage.inputTokens,
+    outputTokens: usage.outputTokens,
+    nonCachedInputTokens: nonCached,
+    cacheReadInputTokens: usage.cacheReadInputTokens,
+    cacheWriteInputTokens: usage.cacheWriteInputTokens,
+    totalTokens: ProviderShared.totalTokens(usage.inputTokens, usage.outputTokens, usage.totalTokens),
+    providerMetadata: { bedrock: usage },
+  })
+}
+
+interface ParserState {
+  readonly tools: ToolStream.State<number>
+  // Bedrock splits the finish into `messageStop` (carries `stopReason`) and
+  // `metadata` (carries usage). Hold the terminal event in state so `onHalt`
+  // can emit exactly one finish after both chunks have had a chance to arrive.
+  readonly pendingFinish: { readonly reason: FinishReason; readonly usage?: Usage } | undefined
+}
+
+const step = (state: ParserState, event: BedrockEvent) =>
+  Effect.gen(function* () {
+    if (event.contentBlockStart?.start?.toolUse) {
+      const index = event.contentBlockStart.contentBlockIndex
+      return [
+        {
+          ...state,
+          tools: ToolStream.start(state.tools, index, {
+            id: event.contentBlockStart.start.toolUse.toolUseId,
+            name: event.contentBlockStart.start.toolUse.name,
+          }),
+        },
+        [],
+      ] as const
+    }
+
+    if (event.contentBlockDelta?.delta?.text) {
+      return [
+        state,
+        [
+          LLMEvent.textDelta({
+            id: `text-${event.contentBlockDelta.contentBlockIndex}`,
+            text: event.contentBlockDelta.delta.text,
+          }),
+        ],
+      ] as const
+    }
+
+    if (event.contentBlockDelta?.delta?.reasoningContent?.text) {
+      return [
+        state,
+        [
+          LLMEvent.reasoningDelta({
+            id: `reasoning-${event.contentBlockDelta.contentBlockIndex}`,
+            text: event.contentBlockDelta.delta.reasoningContent.text,
+          }),
+        ],
+      ] as const
+    }
+
+    if (event.contentBlockDelta?.delta?.toolUse) {
+      const index = event.contentBlockDelta.contentBlockIndex
+      const result = ToolStream.appendExisting(
+        ADAPTER,
+        state.tools,
+        index,
+        event.contentBlockDelta.delta.toolUse.input,
+        "Bedrock Converse tool delta is missing its tool call",
+      )
+      if (ToolStream.isError(result)) return yield* result
+      return [{ ...state, tools: result.tools }, result.event ? [result.event] : []] as const
+    }
+
+    if (event.contentBlockStop) {
+      const result = yield* ToolStream.finish(ADAPTER, state.tools, event.contentBlockStop.contentBlockIndex)
+      return [{ ...state, tools: result.tools }, result.event ? [result.event] : []] as const
+    }
+
+    if (event.messageStop) {
+      return [
+        {
+          ...state,
+          pendingFinish: { reason: mapFinishReason(event.messageStop.stopReason), usage: state.pendingFinish?.usage },
+        },
+        [],
+      ] as const
+    }
+
+    if (event.metadata) {
+      const usage = mapUsage(event.metadata.usage)
+      return [{ ...state, pendingFinish: { reason: state.pendingFinish?.reason ?? "stop", usage } }, []] as const
+    }
+
+    if (event.internalServerException || event.modelStreamErrorException || event.serviceUnavailableException) {
+      const message =
+        event.internalServerException?.message ??
+        event.modelStreamErrorException?.message ??
+        event.serviceUnavailableException?.message ??
+        "Bedrock Converse stream error"
+      return [state, [LLMEvent.providerError({ message, retryable: true })]] as const
+    }
+
+    if (event.validationException || event.throttlingException) {
+      const message =
+        event.validationException?.message ?? event.throttlingException?.message ?? "Bedrock Converse error"
+      return [state, [LLMEvent.providerError({ message, retryable: event.throttlingException !== undefined })]] as const
+    }
+
+    return [state, []] as const
+  })
+
+const framing = BedrockEventStream.framing(ADAPTER)
+
+const onHalt = (state: ParserState): ReadonlyArray<LLMEvent> =>
+  state.pendingFinish
+    ? [LLMEvent.requestFinish({ reason: state.pendingFinish.reason, usage: state.pendingFinish.usage })]
+    : []
+
+// =============================================================================
+// Protocol And Bedrock Route
+// =============================================================================
+/**
+ * The Bedrock Converse protocol — request body construction, body schema, and
+ * the streaming-event state machine.
+ */
+export const protocol = Protocol.make({
+  id: ADAPTER,
+  body: {
+    schema: BedrockConverseBody,
+    from: fromRequest,
+  },
+  stream: {
+    event: BedrockEvent,
+    initial: () => ({ tools: ToolStream.empty<number>(), pendingFinish: undefined }),
+    step,
+    onHalt,
+  },
+})
+
+export const route = Route.make({
+  id: ADAPTER,
+  protocol,
+  // Bedrock's URL embeds the region in the host (set on `model.baseURL` by
+  // the provider helper from credentials) and the validated modelId in the
+  // path. We read the validated body so the URL matches the body that gets
+  // signed.
+  endpoint: Endpoint.path<BedrockConverseBody>(
+    ({ body }) => `/model/${encodeURIComponent(body.modelId)}/converse-stream`,
+  ),
+  auth: BedrockAuth.auth,
+  framing,
+})
+
+export const nativeCredentials = BedrockAuth.nativeCredentials
+
+const bedrockModel = Route.model(
+  route,
+  {
+    provider: "bedrock",
+  },
+  {
+    mapInput: (input: BedrockConverseModelInput) => {
+      const { credentials, ...rest } = input
+      const region = credentials?.region ?? "us-east-1"
+      return {
+        ...rest,
+        baseURL: rest.baseURL ?? `https://bedrock-runtime.${region}.amazonaws.com`,
+        native: nativeCredentials(input.native, credentials),
+      }
+    },
+  },
+)
+
+export const model = bedrockModel
+
+export * as BedrockConverse from "./bedrock-converse"
--- a/packages/llm/src/protocols/bedrock-event-stream.ts
+++ b/packages/llm/src/protocols/bedrock-event-stream.ts
@@ -0,0 +1,87 @@
+import { EventStreamCodec } from "@smithy/eventstream-codec"
+import { fromUtf8, toUtf8 } from "@smithy/util-utf8"
+import { Effect, Stream } from "effect"
+import type { Framing } from "../route/framing"
+import { ProviderShared } from "./shared"
+
+// Bedrock streams responses using the AWS event stream binary protocol — each
+// frame is `[length:4][headers-length:4][prelude-crc:4][headers][payload][crc:4]`.
+// We use `@smithy/eventstream-codec` to validate framing and CRCs, then
+// reconstruct the JSON wrapping by `:event-type` so the chunk schema can match.
+const eventCodec = new EventStreamCodec(toUtf8, fromUtf8)
+const utf8 = new TextDecoder()
+
+// Cursor-tracking buffer state. Bytes accumulate in `buffer`; `offset` is the
+// read position. Reading by `subarray` is zero-copy. We only allocate a fresh
+// buffer when a new network chunk arrives and we need to append.
+interface FrameBufferState {
+  readonly buffer: Uint8Array
+  readonly offset: number
+}
+
+const initialFrameBuffer: FrameBufferState = { buffer: new Uint8Array(0), offset: 0 }
+
+const appendChunk = (state: FrameBufferState, chunk: Uint8Array): FrameBufferState => {
+  const remaining = state.buffer.length - state.offset
+  // Compact: drop the consumed prefix and append the new chunk in one alloc.
+  // This bounds buffer growth to at most one network chunk past the live
+  // window, regardless of stream length.
+  const next = new Uint8Array(remaining + chunk.length)
+  next.set(state.buffer.subarray(state.offset), 0)
+  next.set(chunk, remaining)
+  return { buffer: next, offset: 0 }
+}
+
+const consumeFrames = (route: string) => (state: FrameBufferState, chunk: Uint8Array) =>
+  Effect.gen(function* () {
+    let cursor = appendChunk(state, chunk)
+    const out: object[] = []
+    while (cursor.buffer.length - cursor.offset >= 4) {
+      const view = cursor.buffer.subarray(cursor.offset)
+      const totalLength = new DataView(view.buffer, view.byteOffset, view.byteLength).getUint32(0, false)
+      if (view.length < totalLength) break
+
+      const decoded = yield* Effect.try({
+        try: () => eventCodec.decode(view.subarray(0, totalLength)),
+        catch: (error) =>
+          ProviderShared.eventError(
+            route,
+            `Failed to decode Bedrock Converse event-stream frame: ${
+              error instanceof Error ? error.message : String(error)
+            }`,
+          ),
+      })
+      cursor = { buffer: cursor.buffer, offset: cursor.offset + totalLength }
+
+      if (decoded.headers[":message-type"]?.value !== "event") continue
+      const eventType = decoded.headers[":event-type"]?.value
+      if (typeof eventType !== "string") continue
+      const payload = utf8.decode(decoded.body)
+      if (!payload) continue
+      // The AWS event stream pads short payloads with a `p` field. Drop it
+      // before handing the object to the chunk schema. JSON decode goes
+      // through the shared Schema-driven codec to satisfy the package rule
+      // against ad-hoc `JSON.parse` calls.
+      const parsed = (yield* ProviderShared.parseJson(
+        route,
+        payload,
+        "Failed to parse Bedrock Converse event-stream payload",
+      )) as Record<string, unknown>
+      delete parsed.p
+      out.push({ [eventType]: parsed })
+    }
+    return [cursor, out] as const
+  })
+
+/**
+ * AWS event-stream framing for Bedrock Converse. Each frame is decoded by
+ * `@smithy/eventstream-codec` (length + header + payload + CRC) and rewrapped
+ * under its `:event-type` header so the chunk schema can match the JSON
+ * payload directly.
+ */
+export const framing = (route: string): Framing<object> => ({
+  id: "aws-event-stream",
+  frame: (bytes) => bytes.pipe(Stream.mapAccumEffect(() => initialFrameBuffer, consumeFrames(route))),
+})
+
+export * as BedrockEventStream from "./bedrock-event-stream"
--- a/packages/llm/src/protocols/gemini.ts
+++ b/packages/llm/src/protocols/gemini.ts
@@ -0,0 +1,414 @@
+import { Effect, Schema } from "effect"
+import { Route } from "../route/client"
+import { Auth } from "../route/auth"
+import { Endpoint } from "../route/endpoint"
+import { Framing } from "../route/framing"
+import { Protocol } from "../route/protocol"
+import {
+  LLMEvent,
+  Usage,
+  type FinishReason,
+  type LLMRequest,
+  type MediaPart,
+  type TextPart,
+  type ToolCallPart,
+  type ToolDefinition,
+} from "../schema"
+import { JsonObject, optionalArray, ProviderShared } from "./shared"
+import { GeminiToolSchema } from "./utils/gemini-tool-schema"
+
+const ADAPTER = "gemini"
+export const DEFAULT_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
+
+// =============================================================================
+// Request Body Schema
+// =============================================================================
+const GeminiTextPart = Schema.Struct({
+  text: Schema.String,
+  thought: Schema.optional(Schema.Boolean),
+  thoughtSignature: Schema.optional(Schema.String),
+})
+
+const GeminiInlineDataPart = Schema.Struct({
+  inlineData: Schema.Struct({
+    mimeType: Schema.String,
+    data: Schema.String,
+  }),
+})
+
+const GeminiFunctionCallPart = Schema.Struct({
+  functionCall: Schema.Struct({
+    name: Schema.String,
+    args: Schema.Unknown,
+  }),
+  thoughtSignature: Schema.optional(Schema.String),
+})
+
+const GeminiFunctionResponsePart = Schema.Struct({
+  functionResponse: Schema.Struct({
+    name: Schema.String,
+    response: Schema.Unknown,
+  }),
+})
+
+const GeminiContentPart = Schema.Union([
+  GeminiTextPart,
+  GeminiInlineDataPart,
+  GeminiFunctionCallPart,
+  GeminiFunctionResponsePart,
+])
+
+const GeminiContent = Schema.Struct({
+  role: Schema.Literals(["user", "model"]),
+  parts: Schema.Array(GeminiContentPart),
+})
+type GeminiContent = Schema.Schema.Type<typeof GeminiContent>
+
+const GeminiSystemInstruction = Schema.Struct({
+  parts: Schema.Array(Schema.Struct({ text: Schema.String })),
+})
+
+const GeminiFunctionDeclaration = Schema.Struct({
+  name: Schema.String,
+  description: Schema.String,
+  parameters: Schema.optional(JsonObject),
+})
+
+const GeminiTool = Schema.Struct({
+  functionDeclarations: Schema.Array(GeminiFunctionDeclaration),
+})
+
+const GeminiToolConfig = Schema.Struct({
+  functionCallingConfig: Schema.Struct({
+    mode: Schema.Literals(["AUTO", "NONE", "ANY"]),
+    allowedFunctionNames: optionalArray(Schema.String),
+  }),
+})
+
+const GeminiThinkingConfig = Schema.Struct({
+  thinkingBudget: Schema.optional(Schema.Number),
+  includeThoughts: Schema.optional(Schema.Boolean),
+})
+
+const GeminiGenerationConfig = Schema.Struct({
+  maxOutputTokens: Schema.optional(Schema.Number),
+  temperature: Schema.optional(Schema.Number),
+  topP: Schema.optional(Schema.Number),
+  topK: Schema.optional(Schema.Number),
+  stopSequences: optionalArray(Schema.String),
+  thinkingConfig: Schema.optional(GeminiThinkingConfig),
+})
+
+const GeminiBodyFields = {
+  contents: Schema.Array(GeminiContent),
+  systemInstruction: Schema.optional(GeminiSystemInstruction),
+  tools: optionalArray(GeminiTool),
+  toolConfig: Schema.optional(GeminiToolConfig),
+  generationConfig: Schema.optional(GeminiGenerationConfig),
+}
+const GeminiBody = Schema.Struct(GeminiBodyFields)
+export type GeminiBody = Schema.Schema.Type<typeof GeminiBody>
+
+const GeminiUsage = Schema.Struct({
+  cachedContentTokenCount: Schema.optional(Schema.Number),
+  thoughtsTokenCount: Schema.optional(Schema.Number),
+  promptTokenCount: Schema.optional(Schema.Number),
+  candidatesTokenCount: Schema.optional(Schema.Number),
+  totalTokenCount: Schema.optional(Schema.Number),
+})
+type GeminiUsage = Schema.Schema.Type<typeof GeminiUsage>
+
+const GeminiCandidate = Schema.Struct({
+  content: Schema.optional(GeminiContent),
+  finishReason: Schema.optional(Schema.String),
+})
+
+const GeminiEvent = Schema.Struct({
+  candidates: optionalArray(GeminiCandidate),
+  usageMetadata: Schema.optional(GeminiUsage),
+})
+type GeminiEvent = Schema.Schema.Type<typeof GeminiEvent>
+
+interface ParserState {
+  readonly finishReason?: string
+  readonly hasToolCalls: boolean
+  readonly nextToolCallId: number
+  readonly usage?: Usage
+}
+
+const invalid = ProviderShared.invalidRequest
+
+const mediaData = ProviderShared.mediaBytes
+
+// =============================================================================
+// Tool Schema Conversion
+// =============================================================================
+// Tool-schema conversion has two distinct concerns:
+//
+// 1. Sanitize — fix common authoring mistakes Gemini rejects: integer/number
+//    enums (must be strings), `required` entries that don't match a property,
+//    untyped arrays (`items` must be present), and `properties`/`required`
+//    keys on non-object scalars. Mirrors OpenCode's historical Gemini rules.
+//
+// 2. Project — lossy mapping from JSON Schema to Gemini's schema dialect:
+//    drop empty objects, derive `nullable: true` from `type: [..., "null"]`,
+//    coerce `const` to `[const]` enum, recurse properties/items, propagate
+//    only an allowlisted set of keys (description, required, format, type,
+//    properties, items, allOf, anyOf, oneOf, minLength). Anything outside the
+//    allowlist (e.g. `additionalProperties`, `$ref`) is silently dropped.
+//
+// Sanitize runs first, then project. The implementation lives in
+// `utils/gemini-tool-schema` so this protocol keeps the same shape as the other
+// provider protocols.
+
+// =============================================================================
+// Request Lowering
+// =============================================================================
+const lowerTool = (tool: ToolDefinition) => ({
+  name: tool.name,
+  description: tool.description,
+  parameters: GeminiToolSchema.convert(tool.inputSchema),
+})
+
+const lowerToolConfig = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
+  ProviderShared.matchToolChoice("Gemini", toolChoice, {
+    auto: () => ({ functionCallingConfig: { mode: "AUTO" as const } }),
+    none: () => ({ functionCallingConfig: { mode: "NONE" as const } }),
+    required: () => ({ functionCallingConfig: { mode: "ANY" as const } }),
+    tool: (name) => ({ functionCallingConfig: { mode: "ANY" as const, allowedFunctionNames: [name] } }),
+  })
+
+const lowerUserPart = (part: TextPart | MediaPart) =>
+  part.type === "text" ? { text: part.text } : { inlineData: { mimeType: part.mediaType, data: mediaData(part) } }
+
+const lowerToolCall = (part: ToolCallPart) => ({
+  functionCall: { name: part.name, args: part.input },
+})
+
+const lowerMessages = Effect.fn("Gemini.lowerMessages")(function* (request: LLMRequest) {
+  const contents: GeminiContent[] = []
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text", "media"]))
+          return yield* ProviderShared.unsupportedContent("Gemini", "user", ["text", "media"])
+        parts.push(lowerUserPart(part))
+      }
+      contents.push({ role: "user", parts })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text", "reasoning", "tool-call"]))
+          return yield* ProviderShared.unsupportedContent("Gemini", "assistant", ["text", "reasoning", "tool-call"])
+        if (part.type === "text") {
+          parts.push({ text: part.text })
+          continue
+        }
+        if (part.type === "reasoning") {
+          parts.push({ text: part.text, thought: true })
+          continue
+        }
+        if (part.type === "tool-call") {
+          parts.push(lowerToolCall(part))
+          continue
+        }
+      }
+      contents.push({ role: "model", parts })
+      continue
+    }
+
+    const parts: Array<Schema.Schema.Type<typeof GeminiContentPart>> = []
+    for (const part of message.content) {
+      if (!ProviderShared.supportsContent(part, ["tool-result"]))
+        return yield* ProviderShared.unsupportedContent("Gemini", "tool", ["tool-result"])
+      parts.push({
+        functionResponse: {
+          name: part.name,
+          response: {
+            name: part.name,
+            content: ProviderShared.toolResultText(part),
+          },
+        },
+      })
+    }
+    contents.push({ role: "user", parts })
+  }
+
+  return contents
+})
+
+const geminiOptions = (request: LLMRequest) => request.providerOptions?.gemini
+
+const thinkingConfig = (request: LLMRequest) => {
+  const value = geminiOptions(request)?.thinkingConfig
+  if (!ProviderShared.isRecord(value)) return undefined
+  const result = {
+    thinkingBudget: typeof value.thinkingBudget === "number" ? value.thinkingBudget : undefined,
+    includeThoughts: typeof value.includeThoughts === "boolean" ? value.includeThoughts : undefined,
+  }
+  return Object.values(result).some((item) => item !== undefined) ? result : undefined
+}
+
+const fromRequest = Effect.fn("Gemini.fromRequest")(function* (request: LLMRequest) {
+  const toolsEnabled = request.tools.length > 0 && request.toolChoice?.type !== "none"
+  const generation = request.generation
+  const generationConfig = {
+    maxOutputTokens: generation?.maxTokens,
+    temperature: generation?.temperature,
+    topP: generation?.topP,
+    topK: generation?.topK,
+    stopSequences: generation?.stop,
+    thinkingConfig: thinkingConfig(request),
+  }
+
+  return {
+    contents: yield* lowerMessages(request),
+    systemInstruction:
+      request.system.length === 0 ? undefined : { parts: [{ text: ProviderShared.joinText(request.system) }] },
+    tools: toolsEnabled ? [{ functionDeclarations: request.tools.map(lowerTool) }] : undefined,
+    toolConfig: toolsEnabled && request.toolChoice ? yield* lowerToolConfig(request.toolChoice) : undefined,
+    generationConfig: Object.values(generationConfig).some((value) => value !== undefined)
+      ? generationConfig
+      : undefined,
+  }
+})
+
+// =============================================================================
+// Stream Parsing
+// =============================================================================
+// Gemini reports `promptTokenCount` (inclusive total) with a
+// `cachedContentTokenCount` subset. `candidatesTokenCount` is *exclusive*
+// of `thoughtsTokenCount` — visible-only, not a total — so we sum the two
+// to produce the inclusive `outputTokens` the rest of the contract expects.
+const mapUsage = (usage: GeminiUsage | undefined) => {
+  if (!usage) return undefined
+  const cached = usage.cachedContentTokenCount
+  const nonCached = ProviderShared.subtractTokens(usage.promptTokenCount, cached)
+  // `candidatesTokenCount` is visible-only; sum with thoughts to produce the
+  // inclusive `outputTokens` the contract expects. Only compute the total
+  // when the visible component is reported — otherwise we'd fabricate an
+  // inclusive number from a partial breakdown.
+  const outputTokens =
+    usage.candidatesTokenCount !== undefined ? usage.candidatesTokenCount + (usage.thoughtsTokenCount ?? 0) : undefined
+  return new Usage({
+    inputTokens: usage.promptTokenCount,
+    outputTokens,
+    nonCachedInputTokens: nonCached,
+    cacheReadInputTokens: cached,
+    reasoningTokens: usage.thoughtsTokenCount,
+    totalTokens: ProviderShared.totalTokens(usage.promptTokenCount, outputTokens, usage.totalTokenCount),
+    providerMetadata: { google: usage },
+  })
+}
+
+const mapFinishReason = (finishReason: string | undefined, hasToolCalls: boolean): FinishReason => {
+  if (finishReason === "STOP") return hasToolCalls ? "tool-calls" : "stop"
+  if (finishReason === "MAX_TOKENS") return "length"
+  if (
+    finishReason === "IMAGE_SAFETY" ||
+    finishReason === "RECITATION" ||
+    finishReason === "SAFETY" ||
+    finishReason === "BLOCKLIST" ||
+    finishReason === "PROHIBITED_CONTENT" ||
+    finishReason === "SPII"
+  )
+    return "content-filter"
+  if (finishReason === "MALFORMED_FUNCTION_CALL") return "error"
+  return "unknown"
+}
+
+const finish = (state: ParserState): ReadonlyArray<LLMEvent> =>
+  state.finishReason || state.usage
+    ? [LLMEvent.requestFinish({ reason: mapFinishReason(state.finishReason, state.hasToolCalls), usage: state.usage })]
+    : []
+
+const step = (state: ParserState, event: GeminiEvent) => {
+  const nextState = {
+    ...state,
+    usage: event.usageMetadata ? (mapUsage(event.usageMetadata) ?? state.usage) : state.usage,
+  }
+  const candidate = event.candidates?.[0]
+  if (!candidate?.content)
+    return Effect.succeed([
+      { ...nextState, finishReason: candidate?.finishReason ?? nextState.finishReason },
+      [],
+    ] as const)
+
+  const events: LLMEvent[] = []
+  let hasToolCalls = nextState.hasToolCalls
+  let nextToolCallId = nextState.nextToolCallId
+
+  for (const part of candidate.content.parts) {
+    if ("text" in part && part.text.length > 0) {
+      events.push(
+        part.thought
+          ? LLMEvent.reasoningDelta({ id: "reasoning-0", text: part.text })
+          : LLMEvent.textDelta({ id: "text-0", text: part.text }),
+      )
+      continue
+    }
+
+    if ("functionCall" in part) {
+      const input = part.functionCall.args
+      const id = `tool_${nextToolCallId++}`
+      events.push(LLMEvent.toolCall({ id, name: part.functionCall.name, input }))
+      hasToolCalls = true
+    }
+  }
+
+  return Effect.succeed([
+    {
+      ...nextState,
+      hasToolCalls,
+      nextToolCallId,
+      finishReason: candidate.finishReason ?? nextState.finishReason,
+    },
+    events,
+  ] as const)
+}
+
+// =============================================================================
+// Protocol And Gemini Route
+// =============================================================================
+/**
+ * The Gemini protocol — request body construction, body schema, and the
+ * streaming-event state machine. Used by Google AI Studio Gemini and (once
+ * registered) Vertex Gemini.
+ */
+export const protocol = Protocol.make({
+  id: ADAPTER,
+  body: {
+    schema: GeminiBody,
+    from: fromRequest,
+  },
+  stream: {
+    event: Protocol.jsonEvent(GeminiEvent),
+    initial: () => ({ hasToolCalls: false, nextToolCallId: 0 }),
+    step,
+    onHalt: finish,
+  },
+})
+
+export const route = Route.make({
+  id: ADAPTER,
+  protocol,
+  // Gemini's path embeds the model id and pins SSE framing at the URL level.
+  endpoint: Endpoint.path(({ request }) => `/models/${request.model.id}:streamGenerateContent?alt=sse`),
+  auth: Auth.apiKeyHeader("x-goog-api-key"),
+  framing: Framing.sse,
+})
+
+// =============================================================================
+// Model Helper
+// =============================================================================
+export const model = Route.model(route, {
+  provider: "google",
+  baseURL: DEFAULT_BASE_URL,
+})
+
+export * as Gemini from "./gemini"
--- a/packages/llm/src/protocols/index.ts
+++ b/packages/llm/src/protocols/index.ts
@@ -0,0 +1,6 @@
+export * as AnthropicMessages from "./anthropic-messages"
+export * as BedrockConverse from "./bedrock-converse"
+export * as Gemini from "./gemini"
+export * as OpenAIChat from "./openai-chat"
+export * as OpenAICompatibleChat from "./openai-compatible-chat"
+export * as OpenAIResponses from "./openai-responses"
--- a/packages/llm/src/protocols/openai-chat.ts
+++ b/packages/llm/src/protocols/openai-chat.ts
@@ -0,0 +1,410 @@
+import { Array as Arr, Effect, Schema } from "effect"
+import { Route } from "../route/client"
+import { Auth } from "../route/auth"
+import { Endpoint } from "../route/endpoint"
+import { Framing } from "../route/framing"
+import { HttpTransport } from "../route/transport"
+import { Protocol } from "../route/protocol"
+import {
+  LLMEvent,
+  Usage,
+  type FinishReason,
+  type LLMRequest,
+  type TextPart,
+  type ToolCallPart,
+  type ToolDefinition,
+} from "../schema"
+import { isRecord, JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared"
+import { OpenAIOptions } from "./utils/openai-options"
+import { ToolStream } from "./utils/tool-stream"
+
+const ADAPTER = "openai-chat"
+export const DEFAULT_BASE_URL = "https://api.openai.com/v1"
+export const PATH = "/chat/completions"
+
+// =============================================================================
+// Request Body Schema
+// =============================================================================
+// The body schema is the provider-native JSON body. `fromRequest` below builds
+// this shape from the common `LLMRequest`, then `Route.make` validates and
+// JSON-encodes it before transport.
+const OpenAIChatFunction = Schema.Struct({
+  name: Schema.String,
+  description: Schema.String,
+  parameters: JsonObject,
+})
+
+const OpenAIChatTool = Schema.Struct({
+  type: Schema.tag("function"),
+  function: OpenAIChatFunction,
+})
+type OpenAIChatTool = Schema.Schema.Type<typeof OpenAIChatTool>
+
+const OpenAIChatAssistantToolCall = Schema.Struct({
+  id: Schema.String,
+  type: Schema.tag("function"),
+  function: Schema.Struct({
+    name: Schema.String,
+    arguments: Schema.String,
+  }),
+})
+type OpenAIChatAssistantToolCall = Schema.Schema.Type<typeof OpenAIChatAssistantToolCall>
+
+const OpenAIChatMessage = Schema.Union([
+  Schema.Struct({ role: Schema.Literal("system"), content: Schema.String }),
+  Schema.Struct({ role: Schema.Literal("user"), content: Schema.String }),
+  Schema.Struct({
+    role: Schema.Literal("assistant"),
+    content: Schema.NullOr(Schema.String),
+    tool_calls: optionalArray(OpenAIChatAssistantToolCall),
+    reasoning_content: Schema.optional(Schema.String),
+  }),
+  Schema.Struct({ role: Schema.Literal("tool"), tool_call_id: Schema.String, content: Schema.String }),
+]).pipe(Schema.toTaggedUnion("role"))
+type OpenAIChatMessage = Schema.Schema.Type<typeof OpenAIChatMessage>
+
+const OpenAIChatToolChoice = Schema.Union([
+  Schema.Literals(["auto", "none", "required"]),
+  Schema.Struct({
+    type: Schema.tag("function"),
+    function: Schema.Struct({ name: Schema.String }),
+  }),
+])
+
+export const bodyFields = {
+  model: Schema.String,
+  messages: Schema.Array(OpenAIChatMessage),
+  tools: optionalArray(OpenAIChatTool),
+  tool_choice: Schema.optional(OpenAIChatToolChoice),
+  stream: Schema.Literal(true),
+  stream_options: Schema.optional(Schema.Struct({ include_usage: Schema.Boolean })),
+  store: Schema.optional(Schema.Boolean),
+  reasoning_effort: Schema.optional(OpenAIOptions.OpenAIReasoningEffort),
+  max_tokens: Schema.optional(Schema.Number),
+  temperature: Schema.optional(Schema.Number),
+  top_p: Schema.optional(Schema.Number),
+  frequency_penalty: Schema.optional(Schema.Number),
+  presence_penalty: Schema.optional(Schema.Number),
+  seed: Schema.optional(Schema.Number),
+  stop: optionalArray(Schema.String),
+}
+const OpenAIChatBody = Schema.Struct(bodyFields)
+export type OpenAIChatBody = Schema.Schema.Type<typeof OpenAIChatBody>
+
+// =============================================================================
+// Streaming Event Schema
+// =============================================================================
+// The event schema is one decoded SSE `data:` payload. `Framing.sse` splits the
+// byte stream into strings, then `Protocol.jsonEvent` decodes each string into
+// this provider-native event shape.
+const OpenAIChatUsage = Schema.Struct({
+  prompt_tokens: Schema.optional(Schema.Number),
+  completion_tokens: Schema.optional(Schema.Number),
+  total_tokens: Schema.optional(Schema.Number),
+  prompt_tokens_details: optionalNull(
+    Schema.Struct({
+      cached_tokens: Schema.optional(Schema.Number),
+    }),
+  ),
+  completion_tokens_details: optionalNull(
+    Schema.Struct({
+      reasoning_tokens: Schema.optional(Schema.Number),
+    }),
+  ),
+})
+
+const OpenAIChatToolCallDeltaFunction = Schema.Struct({
+  name: optionalNull(Schema.String),
+  arguments: optionalNull(Schema.String),
+})
+
+const OpenAIChatToolCallDelta = Schema.Struct({
+  index: Schema.Number,
+  id: optionalNull(Schema.String),
+  function: optionalNull(OpenAIChatToolCallDeltaFunction),
+})
+type OpenAIChatToolCallDelta = Schema.Schema.Type<typeof OpenAIChatToolCallDelta>
+
+const OpenAIChatDelta = Schema.Struct({
+  content: optionalNull(Schema.String),
+  tool_calls: optionalNull(Schema.Array(OpenAIChatToolCallDelta)),
+})
+
+const OpenAIChatChoice = Schema.Struct({
+  delta: optionalNull(OpenAIChatDelta),
+  finish_reason: optionalNull(Schema.String),
+})
+
+const OpenAIChatEvent = Schema.Struct({
+  choices: Schema.Array(OpenAIChatChoice),
+  usage: optionalNull(OpenAIChatUsage),
+})
+type OpenAIChatEvent = Schema.Schema.Type<typeof OpenAIChatEvent>
+type OpenAIChatRequestMessage = LLMRequest["messages"][number]
+
+interface ParserState {
+  readonly tools: ToolStream.State<number>
+  readonly toolCallEvents: ReadonlyArray<LLMEvent>
+  readonly usage?: Usage
+  readonly finishReason?: FinishReason
+}
+
+const invalid = ProviderShared.invalidRequest
+
+// =============================================================================
+// Request Lowering
+// =============================================================================
+// Lowering is the only place that knows how common LLM messages map onto the
+// OpenAI Chat wire format. Keep provider quirks here instead of leaking native
+// fields into `LLMRequest`.
+const lowerTool = (tool: ToolDefinition): OpenAIChatTool => ({
+  type: "function",
+  function: {
+    name: tool.name,
+    description: tool.description,
+    parameters: tool.inputSchema,
+  },
+})
+
+const lowerToolChoice = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
+  ProviderShared.matchToolChoice("OpenAI Chat", toolChoice, {
+    auto: () => "auto" as const,
+    none: () => "none" as const,
+    required: () => "required" as const,
+    tool: (name) => ({ type: "function" as const, function: { name } }),
+  })
+
+const lowerToolCall = (part: ToolCallPart): OpenAIChatAssistantToolCall => ({
+  id: part.id,
+  type: "function",
+  function: {
+    name: part.name,
+    arguments: ProviderShared.encodeJson(part.input),
+  },
+})
+
+const openAICompatibleReasoningContent = (native: unknown) =>
+  isRecord(native) && typeof native.reasoning_content === "string" ? native.reasoning_content : undefined
+
+const lowerUserMessage = Effect.fn("OpenAIChat.lowerUserMessage")(function* (message: OpenAIChatRequestMessage) {
+  const content: TextPart[] = []
+  for (const part of message.content) {
+    if (!ProviderShared.supportsContent(part, ["text"]))
+      return yield* ProviderShared.unsupportedContent("OpenAI Chat", "user", ["text"])
+    content.push(part)
+  }
+  return { role: "user" as const, content: ProviderShared.joinText(content) }
+})
+
+const lowerAssistantMessage = Effect.fn("OpenAIChat.lowerAssistantMessage")(function* (
+  message: OpenAIChatRequestMessage,
+) {
+  const content: TextPart[] = []
+  const toolCalls: OpenAIChatAssistantToolCall[] = []
+  for (const part of message.content) {
+    if (!ProviderShared.supportsContent(part, ["text", "tool-call"]))
+      return yield* ProviderShared.unsupportedContent("OpenAI Chat", "assistant", ["text", "tool-call"])
+    if (part.type === "text") {
+      content.push(part)
+      continue
+    }
+    if (part.type === "tool-call") {
+      toolCalls.push(lowerToolCall(part))
+      continue
+    }
+  }
+  return {
+    role: "assistant" as const,
+    content: content.length === 0 ? null : ProviderShared.joinText(content),
+    tool_calls: toolCalls.length === 0 ? undefined : toolCalls,
+    reasoning_content: openAICompatibleReasoningContent(message.native?.openaiCompatible),
+  }
+})
+
+const lowerToolMessages = Effect.fn("OpenAIChat.lowerToolMessages")(function* (message: OpenAIChatRequestMessage) {
+  const messages: OpenAIChatMessage[] = []
+  for (const part of message.content) {
+    if (!ProviderShared.supportsContent(part, ["tool-result"]))
+      return yield* ProviderShared.unsupportedContent("OpenAI Chat", "tool", ["tool-result"])
+    messages.push({ role: "tool", tool_call_id: part.id, content: ProviderShared.toolResultText(part) })
+  }
+  return messages
+})
+
+const lowerMessage = Effect.fn("OpenAIChat.lowerMessage")(function* (message: OpenAIChatRequestMessage) {
+  if (message.role === "user") return [yield* lowerUserMessage(message)]
+  if (message.role === "assistant") return [yield* lowerAssistantMessage(message)]
+  return yield* lowerToolMessages(message)
+})
+
+const lowerMessages = Effect.fn("OpenAIChat.lowerMessages")(function* (request: LLMRequest) {
+  const system: OpenAIChatMessage[] =
+    request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }]
+  return [...system, ...Arr.flatten(yield* Effect.forEach(request.messages, lowerMessage))]
+})
+
+const lowerOptions = Effect.fn("OpenAIChat.lowerOptions")(function* (request: LLMRequest) {
+  const store = OpenAIOptions.store(request)
+  const reasoningEffort = OpenAIOptions.reasoningEffort(request)
+  if (reasoningEffort && !OpenAIOptions.isReasoningEffort(reasoningEffort))
+    return yield* invalid(`OpenAI Chat does not support reasoning effort ${reasoningEffort}`)
+  return {
+    ...(store !== undefined ? { store } : {}),
+    ...(reasoningEffort ? { reasoning_effort: reasoningEffort } : {}),
+  }
+})
+
+const fromRequest = Effect.fn("OpenAIChat.fromRequest")(function* (request: LLMRequest) {
+  // `fromRequest` returns the provider body only. Endpoint, auth, framing,
+  // validation, and HTTP execution are composed by `Route.make`.
+  const generation = request.generation
+  return {
+    model: request.model.id,
+    messages: yield* lowerMessages(request),
+    tools: request.tools.length === 0 ? undefined : request.tools.map(lowerTool),
+    tool_choice: request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined,
+    stream: true as const,
+    stream_options: { include_usage: true },
+    max_tokens: generation?.maxTokens,
+    temperature: generation?.temperature,
+    top_p: generation?.topP,
+    frequency_penalty: generation?.frequencyPenalty,
+    presence_penalty: generation?.presencePenalty,
+    seed: generation?.seed,
+    stop: generation?.stop,
+    ...(yield* lowerOptions(request)),
+  }
+})
+
+// =============================================================================
+// Stream Parsing
+// =============================================================================
+// Streaming parsers are small state machines: every event returns a new state
+// plus the common `LLMEvent`s produced by that event. Tool calls are accumulated
+// because OpenAI streams JSON arguments across multiple deltas.
+const mapFinishReason = (reason: string | null | undefined): FinishReason => {
+  if (reason === "stop") return "stop"
+  if (reason === "length") return "length"
+  if (reason === "content_filter") return "content-filter"
+  if (reason === "function_call" || reason === "tool_calls") return "tool-calls"
+  return "unknown"
+}
+
+// OpenAI Chat reports `prompt_tokens` (inclusive total) with a
+// `cached_tokens` subset, and `completion_tokens` (inclusive total) with
+// a `reasoning_tokens` subset. We pass the inclusive totals through and
+// derive the non-cached breakdown so the `LLM.Usage` contract is
+// satisfied on both sides.
+const mapUsage = (usage: OpenAIChatEvent["usage"]): Usage | undefined => {
+  if (!usage) return undefined
+  const cached = usage.prompt_tokens_details?.cached_tokens
+  const reasoning = usage.completion_tokens_details?.reasoning_tokens
+  const nonCached = ProviderShared.subtractTokens(usage.prompt_tokens, cached)
+  return new Usage({
+    inputTokens: usage.prompt_tokens,
+    outputTokens: usage.completion_tokens,
+    nonCachedInputTokens: nonCached,
+    cacheReadInputTokens: cached,
+    reasoningTokens: reasoning,
+    totalTokens: ProviderShared.totalTokens(usage.prompt_tokens, usage.completion_tokens, usage.total_tokens),
+    providerMetadata: { openai: usage },
+  })
+}
+
+const step = (state: ParserState, event: OpenAIChatEvent) =>
+  Effect.gen(function* () {
+    const events: LLMEvent[] = []
+    const usage = mapUsage(event.usage) ?? state.usage
+    const choice = event.choices[0]
+    const finishReason = choice?.finish_reason ? mapFinishReason(choice.finish_reason) : state.finishReason
+    const delta = choice?.delta
+    const toolDeltas = delta?.tool_calls ?? []
+    let tools = state.tools
+
+    if (delta?.content) events.push(LLMEvent.textDelta({ id: "text-0", text: delta.content }))
+
+    for (const tool of toolDeltas) {
+      const result = ToolStream.appendOrStart(
+        ADAPTER,
+        tools,
+        tool.index,
+        { id: tool.id ?? undefined, name: tool.function?.name ?? undefined, text: tool.function?.arguments ?? "" },
+        "OpenAI Chat tool call delta is missing id or name",
+      )
+      if (ToolStream.isError(result)) return yield* result
+      tools = result.tools
+      if (result.event) events.push(result.event)
+    }
+
+    // Finalize accumulated tool inputs eagerly when finish_reason arrives so
+    // JSON parse failures fail the stream at the boundary rather than at halt.
+    const finished =
+      finishReason !== undefined && state.finishReason === undefined && Object.keys(tools).length > 0
+        ? yield* ToolStream.finishAll(ADAPTER, tools)
+        : undefined
+
+    return [
+      {
+        tools: finished?.tools ?? tools,
+        toolCallEvents: finished?.events ?? state.toolCallEvents,
+        usage,
+        finishReason,
+      },
+      events,
+    ] as const
+  })
+
+const finishEvents = (state: ParserState): ReadonlyArray<LLMEvent> => {
+  const hasToolCalls = state.toolCallEvents.length > 0
+  const reason = state.finishReason === "stop" && hasToolCalls ? "tool-calls" : state.finishReason
+  return [...state.toolCallEvents, ...(reason ? [LLMEvent.requestFinish({ reason, usage: state.usage })] : [])]
+}
+
+// =============================================================================
+// Protocol And OpenAI Route
+// =============================================================================
+/**
+ * The OpenAI Chat protocol — request body construction, body schema, and the
+ * streaming-event state machine. Reused by every route that speaks OpenAI Chat
+ * over HTTP+SSE: native OpenAI, DeepSeek, TogetherAI, Cerebras, Baseten,
+ * Fireworks, DeepInfra, and (once added) Azure OpenAI Chat.
+ */
+export const protocol = Protocol.make({
+  id: ADAPTER,
+  body: {
+    schema: OpenAIChatBody,
+    from: fromRequest,
+  },
+  stream: {
+    event: Protocol.jsonEvent(OpenAIChatEvent),
+    initial: () => ({ tools: ToolStream.empty<number>(), toolCallEvents: [] }),
+    step,
+    onHalt: finishEvents,
+  },
+})
+
+const encodeBody = Schema.encodeSync(Schema.fromJsonString(OpenAIChatBody))
+
+export const httpTransport = HttpTransport.httpJson({
+  endpoint: Endpoint.path(PATH),
+  auth: Auth.bearer(),
+  framing: Framing.sse,
+  encodeBody,
+})
+
+export const route = Route.make({
+  id: ADAPTER,
+  provider: "openai",
+  protocol,
+  transport: httpTransport,
+  defaults: {
+    baseURL: DEFAULT_BASE_URL,
+  },
+})
+
+// =============================================================================
+// Model Helper
+// =============================================================================
+export const model = route.model
+
+export * as OpenAIChat from "./openai-chat"
--- a/packages/llm/src/protocols/openai-compatible-chat.ts
+++ b/packages/llm/src/protocols/openai-compatible-chat.ts
@@ -0,0 +1,28 @@
+import { Route, type RouteRoutedModelInput } from "../route/client"
+import { Endpoint } from "../route/endpoint"
+import { Framing } from "../route/framing"
+import * as OpenAIChat from "./openai-chat"
+
+const ADAPTER = "openai-compatible-chat"
+
+export type OpenAICompatibleChatModelInput = Omit<RouteRoutedModelInput, "baseURL"> & {
+  readonly baseURL: string
+}
+
+/**
+ * Route for non-OpenAI providers that expose an OpenAI Chat-compatible
+ * `/chat/completions` endpoint. Reuses `OpenAIChat.protocol` end-to-end and
+ * overrides only the route id so providers can be resolved per-family without
+ * colliding with native OpenAI. The model carries the host on `baseURL`,
+ * supplied by whichever profile/provider helper builds it.
+ */
+export const route = Route.make({
+  id: ADAPTER,
+  protocol: OpenAIChat.protocol,
+  endpoint: Endpoint.path("/chat/completions"),
+  framing: Framing.sse,
+})
+
+export const model = Route.model<OpenAICompatibleChatModelInput>(route)
+
+export * as OpenAICompatibleChat from "./openai-compatible-chat"
--- a/packages/llm/src/protocols/openai-responses.ts
+++ b/packages/llm/src/protocols/openai-responses.ts
@@ -0,0 +1,569 @@
+import { Effect, Schema } from "effect"
+import { Route } from "../route/client"
+import { Auth } from "../route/auth"
+import { Endpoint } from "../route/endpoint"
+import { Framing } from "../route/framing"
+import { HttpTransport, WebSocketTransport } from "../route/transport"
+import { Protocol } from "../route/protocol"
+import {
+  LLMEvent,
+  Usage,
+  type FinishReason,
+  type LLMRequest,
+  type ProviderMetadata,
+  type TextPart,
+  type ToolCallPart,
+  type ToolDefinition,
+} from "../schema"
+import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared"
+import { OpenAIOptions } from "./utils/openai-options"
+import { ToolStream } from "./utils/tool-stream"
+
+const ADAPTER = "openai-responses"
+export const DEFAULT_BASE_URL = "https://api.openai.com/v1"
+export const PATH = "/responses"
+
+// =============================================================================
+// Request Body Schema
+// =============================================================================
+const OpenAIResponsesInputText = Schema.Struct({
+  type: Schema.tag("input_text"),
+  text: Schema.String,
+})
+
+const OpenAIResponsesOutputText = Schema.Struct({
+  type: Schema.tag("output_text"),
+  text: Schema.String,
+})
+
+const OpenAIResponsesInputItem = Schema.Union([
+  Schema.Struct({ role: Schema.tag("system"), content: Schema.String }),
+  Schema.Struct({ role: Schema.tag("user"), content: Schema.Array(OpenAIResponsesInputText) }),
+  Schema.Struct({ role: Schema.tag("assistant"), content: Schema.Array(OpenAIResponsesOutputText) }),
+  Schema.Struct({
+    type: Schema.tag("function_call"),
+    call_id: Schema.String,
+    name: Schema.String,
+    arguments: Schema.String,
+  }),
+  Schema.Struct({
+    type: Schema.tag("function_call_output"),
+    call_id: Schema.String,
+    output: Schema.String,
+  }),
+])
+type OpenAIResponsesInputItem = Schema.Schema.Type<typeof OpenAIResponsesInputItem>
+
+const OpenAIResponsesTool = Schema.Struct({
+  type: Schema.tag("function"),
+  name: Schema.String,
+  description: Schema.String,
+  parameters: JsonObject,
+  strict: Schema.optional(Schema.Boolean),
+})
+type OpenAIResponsesTool = Schema.Schema.Type<typeof OpenAIResponsesTool>
+
+const OpenAIResponsesToolChoice = Schema.Union([
+  Schema.Literals(["auto", "none", "required"]),
+  Schema.Struct({ type: Schema.tag("function"), name: Schema.String }),
+])
+
+// Fields shared between the HTTP body and the WebSocket `response.create`
+// message. The HTTP body adds `stream: true`; the WebSocket message adds
+// `type: "response.create"`. Defining the shared shape once keeps the two
+// transports in sync without a destructure-and-strip dance.
+const OpenAIResponsesCoreFields = {
+  model: Schema.String,
+  input: Schema.Array(OpenAIResponsesInputItem),
+  tools: optionalArray(OpenAIResponsesTool),
+  tool_choice: Schema.optional(OpenAIResponsesToolChoice),
+  store: Schema.optional(Schema.Boolean),
+  prompt_cache_key: Schema.optional(Schema.String),
+  include: optionalArray(Schema.Literal("reasoning.encrypted_content")),
+  reasoning: Schema.optional(
+    Schema.Struct({
+      effort: Schema.optional(OpenAIOptions.OpenAIReasoningEffort),
+      summary: Schema.optional(Schema.Literal("auto")),
+    }),
+  ),
+  text: Schema.optional(
+    Schema.Struct({
+      verbosity: Schema.optional(OpenAIOptions.OpenAITextVerbosity),
+    }),
+  ),
+  max_output_tokens: Schema.optional(Schema.Number),
+  temperature: Schema.optional(Schema.Number),
+  top_p: Schema.optional(Schema.Number),
+}
+
+const OpenAIResponsesBody = Schema.Struct({
+  ...OpenAIResponsesCoreFields,
+  stream: Schema.Literal(true),
+})
+export type OpenAIResponsesBody = Schema.Schema.Type<typeof OpenAIResponsesBody>
+
+const OpenAIResponsesWebSocketMessage = Schema.StructWithRest(
+  Schema.Struct({
+    type: Schema.tag("response.create"),
+    ...OpenAIResponsesCoreFields,
+  }),
+  [Schema.Record(Schema.String, Schema.Unknown)],
+)
+type OpenAIResponsesWebSocketMessage = Schema.Schema.Type<typeof OpenAIResponsesWebSocketMessage>
+const encodeWebSocketMessage = Schema.encodeSync(Schema.fromJsonString(OpenAIResponsesWebSocketMessage))
+
+const OpenAIResponsesUsage = Schema.Struct({
+  input_tokens: Schema.optional(Schema.Number),
+  input_tokens_details: optionalNull(Schema.Struct({ cached_tokens: Schema.optional(Schema.Number) })),
+  output_tokens: Schema.optional(Schema.Number),
+  output_tokens_details: optionalNull(Schema.Struct({ reasoning_tokens: Schema.optional(Schema.Number) })),
+  total_tokens: Schema.optional(Schema.Number),
+})
+type OpenAIResponsesUsage = Schema.Schema.Type<typeof OpenAIResponsesUsage>
+
+const OpenAIResponsesStreamItem = Schema.Struct({
+  type: Schema.String,
+  id: Schema.optional(Schema.String),
+  call_id: Schema.optional(Schema.String),
+  name: Schema.optional(Schema.String),
+  arguments: Schema.optional(Schema.String),
+  // Hosted (provider-executed) tool fields. Each hosted tool item carries its
+  // own subset of these — we capture them generically so we can surface the
+  // call's typed input portion and round-trip the full result payload without
+  // hand-rolling a per-tool schema.
+  status: Schema.optional(Schema.String),
+  action: Schema.optional(Schema.Unknown),
+  queries: Schema.optional(Schema.Unknown),
+  results: Schema.optional(Schema.Unknown),
+  code: Schema.optional(Schema.String),
+  container_id: Schema.optional(Schema.String),
+  outputs: Schema.optional(Schema.Unknown),
+  server_label: Schema.optional(Schema.String),
+  output: Schema.optional(Schema.Unknown),
+  error: Schema.optional(Schema.Unknown),
+})
+type OpenAIResponsesStreamItem = Schema.Schema.Type<typeof OpenAIResponsesStreamItem>
+
+const OpenAIResponsesEvent = Schema.Struct({
+  type: Schema.String,
+  delta: Schema.optional(Schema.String),
+  item_id: Schema.optional(Schema.String),
+  item: Schema.optional(OpenAIResponsesStreamItem),
+  response: Schema.optional(
+    Schema.Struct({
+      id: Schema.optional(Schema.String),
+      service_tier: Schema.optional(Schema.String),
+      incomplete_details: optionalNull(Schema.Struct({ reason: Schema.String })),
+      usage: optionalNull(OpenAIResponsesUsage),
+    }),
+  ),
+  code: Schema.optional(Schema.String),
+  message: Schema.optional(Schema.String),
+})
+type OpenAIResponsesEvent = Schema.Schema.Type<typeof OpenAIResponsesEvent>
+
+interface ParserState {
+  readonly tools: ToolStream.State<string>
+  readonly hasFunctionCall: boolean
+}
+
+const invalid = ProviderShared.invalidRequest
+
+// =============================================================================
+// Request Lowering
+// =============================================================================
+const lowerTool = (tool: ToolDefinition): OpenAIResponsesTool => ({
+  type: "function",
+  name: tool.name,
+  description: tool.description,
+  parameters: tool.inputSchema,
+})
+
+const lowerToolChoice = (toolChoice: NonNullable<LLMRequest["toolChoice"]>) =>
+  ProviderShared.matchToolChoice("OpenAI Responses", toolChoice, {
+    auto: () => "auto" as const,
+    none: () => "none" as const,
+    required: () => "required" as const,
+    tool: (name) => ({ type: "function" as const, name }),
+  })
+
+const lowerToolCall = (part: ToolCallPart): OpenAIResponsesInputItem => ({
+  type: "function_call",
+  call_id: part.id,
+  name: part.name,
+  arguments: ProviderShared.encodeJson(part.input),
+})
+
+const lowerMessages = Effect.fn("OpenAIResponses.lowerMessages")(function* (request: LLMRequest) {
+  const system: OpenAIResponsesInputItem[] =
+    request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }]
+  const input: OpenAIResponsesInputItem[] = [...system]
+
+  for (const message of request.messages) {
+    if (message.role === "user") {
+      const content: TextPart[] = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text"]))
+          return yield* ProviderShared.unsupportedContent("OpenAI Responses", "user", ["text"])
+        content.push(part)
+      }
+      input.push({ role: "user", content: content.map((part) => ({ type: "input_text", text: part.text })) })
+      continue
+    }
+
+    if (message.role === "assistant") {
+      const content: TextPart[] = []
+      for (const part of message.content) {
+        if (!ProviderShared.supportsContent(part, ["text", "tool-call"]))
+          return yield* ProviderShared.unsupportedContent("OpenAI Responses", "assistant", ["text", "tool-call"])
+        if (part.type === "text") {
+          content.push(part)
+          continue
+        }
+        if (part.type === "tool-call") {
+          input.push(lowerToolCall(part))
+          continue
+        }
+      }
+      if (content.length > 0)
+        input.push({ role: "assistant", content: content.map((part) => ({ type: "output_text", text: part.text })) })
+      continue
+    }
+
+    for (const part of message.content) {
+      if (!ProviderShared.supportsContent(part, ["tool-result"]))
+        return yield* ProviderShared.unsupportedContent("OpenAI Responses", "tool", ["tool-result"])
+      input.push({ type: "function_call_output", call_id: part.id, output: ProviderShared.toolResultText(part) })
+    }
+  }
+
+  return input
+})
+
+const lowerOptions = Effect.fn("OpenAIResponses.lowerOptions")(function* (request: LLMRequest) {
+  const store = OpenAIOptions.store(request)
+  const promptCacheKey = OpenAIOptions.promptCacheKey(request)
+  const effort = OpenAIOptions.reasoningEffort(request)
+  if (effort && !OpenAIOptions.isReasoningEffort(effort))
+    return yield* invalid(`OpenAI Responses does not support reasoning effort ${effort}`)
+  const summary = OpenAIOptions.reasoningSummary(request)
+  const encryptedState = OpenAIOptions.encryptedReasoning(request)
+  const verbosity = OpenAIOptions.textVerbosity(request)
+  return {
+    ...(store !== undefined ? { store } : {}),
+    ...(promptCacheKey ? { prompt_cache_key: promptCacheKey } : {}),
+    ...(encryptedState ? { include: ["reasoning.encrypted_content"] as const } : {}),
+    ...(effort || summary ? { reasoning: { effort, summary } } : {}),
+    ...(verbosity ? { text: { verbosity } } : {}),
+  }
+})
+
+const fromRequest = Effect.fn("OpenAIResponses.fromRequest")(function* (request: LLMRequest) {
+  const generation = request.generation
+  return {
+    model: request.model.id,
+    input: yield* lowerMessages(request),
+    tools: request.tools.length === 0 ? undefined : request.tools.map(lowerTool),
+    tool_choice: request.toolChoice ? yield* lowerToolChoice(request.toolChoice) : undefined,
+    stream: true as const,
+    max_output_tokens: generation?.maxTokens,
+    temperature: generation?.temperature,
+    top_p: generation?.topP,
+    ...(yield* lowerOptions(request)),
+  }
+})
+
+// =============================================================================
+// Stream Parsing
+// =============================================================================
+// OpenAI Responses reports `input_tokens` (inclusive total) with a
+// `cached_tokens` subset, and `output_tokens` (inclusive total) with a
+// `reasoning_tokens` subset. Pass the totals through and derive the
+// non-cached breakdown.
+const mapUsage = (usage: OpenAIResponsesUsage | null | undefined) => {
+  if (!usage) return undefined
+  const cached = usage.input_tokens_details?.cached_tokens
+  const reasoning = usage.output_tokens_details?.reasoning_tokens
+  const nonCached = ProviderShared.subtractTokens(usage.input_tokens, cached)
+  return new Usage({
+    inputTokens: usage.input_tokens,
+    outputTokens: usage.output_tokens,
+    nonCachedInputTokens: nonCached,
+    cacheReadInputTokens: cached,
+    reasoningTokens: reasoning,
+    totalTokens: ProviderShared.totalTokens(usage.input_tokens, usage.output_tokens, usage.total_tokens),
+    providerMetadata: { openai: usage },
+  })
+}
+
+const mapFinishReason = (event: OpenAIResponsesEvent, hasFunctionCall: boolean): FinishReason => {
+  const reason = event.response?.incomplete_details?.reason
+  if (reason === undefined || reason === null) return hasFunctionCall ? "tool-calls" : "stop"
+  if (reason === "max_output_tokens") return "length"
+  if (reason === "content_filter") return "content-filter"
+  return hasFunctionCall ? "tool-calls" : "unknown"
+}
+
+const openaiMetadata = (metadata: Record<string, unknown>): ProviderMetadata => ({ openai: metadata })
+
+// Hosted tool items (provider-executed) ship their typed input + status +
+// result fields all in one item. We expose them as a `tool-call` +
+// `tool-result` pair so consumers can treat them uniformly with client tools,
+// only differentiated by `providerExecuted: true`.
+//
+// One record per OpenAI Responses item type that represents a hosted
+// (provider-executed) tool call: the common name we surface, plus an `input`
+// extractor that picks the fields the model actually populated for that tool.
+// Falling back to `{}` when an entry isn't fully typed keeps unknown tools
+// observable without rolling a per-tool schema.
+const HOSTED_TOOLS = {
+  web_search_call: { name: "web_search", input: (item) => item.action ?? {} },
+  web_search_preview_call: { name: "web_search_preview", input: (item) => item.action ?? {} },
+  file_search_call: { name: "file_search", input: (item) => ({ queries: item.queries ?? [] }) },
+  code_interpreter_call: {
+    name: "code_interpreter",
+    input: (item) => ({ code: item.code, container_id: item.container_id }),
+  },
+  computer_use_call: { name: "computer_use", input: (item) => item.action ?? {} },
+  image_generation_call: { name: "image_generation", input: () => ({}) },
+  mcp_call: {
+    name: "mcp",
+    input: (item) => ({ server_label: item.server_label, name: item.name, arguments: item.arguments }),
+  },
+  local_shell_call: { name: "local_shell", input: (item) => item.action ?? {} },
+} as const satisfies Record<
+  string,
+  { readonly name: string; readonly input: (item: OpenAIResponsesStreamItem) => unknown }
+>
+
+type HostedToolType = keyof typeof HOSTED_TOOLS
+
+const isHostedToolItem = (
+  item: OpenAIResponsesStreamItem,
+): item is OpenAIResponsesStreamItem & { type: HostedToolType; id: string } =>
+  item.type in HOSTED_TOOLS && typeof item.id === "string" && item.id.length > 0
+
+// Round-trip the full item as the structured result so consumers can extract
+// outputs / sources / status without re-decoding.
+const hostedToolResult = (item: OpenAIResponsesStreamItem) => {
+  const isError = typeof item.error !== "undefined" && item.error !== null
+  return isError ? { type: "error" as const, value: item.error } : { type: "json" as const, value: item }
+}
+
+const hostedToolEvents = (
+  item: OpenAIResponsesStreamItem & { type: HostedToolType; id: string },
+): ReadonlyArray<LLMEvent> => {
+  const tool = HOSTED_TOOLS[item.type]
+  const providerMetadata = openaiMetadata({ itemId: item.id })
+  return [
+    LLMEvent.toolCall({
+      id: item.id,
+      name: tool.name,
+      input: tool.input(item),
+      providerExecuted: true,
+      providerMetadata,
+    }),
+    LLMEvent.toolResult({
+      id: item.id,
+      name: tool.name,
+      result: hostedToolResult(item),
+      providerExecuted: true,
+      providerMetadata,
+    }),
+  ]
+}
+
+type StepResult = readonly [ParserState, ReadonlyArray<LLMEvent>]
+
+const NO_EVENTS: StepResult["1"] = []
+
+// `response.completed` / `response.incomplete` are clean finishes that emit a
+// `request-finish` event; `response.failed` is a hard failure that emits a
+// `provider-error`. All three end the stream — kept in one set so `step` and
+// the protocol's `terminal` predicate stay in sync.
+const TERMINAL_TYPES = new Set(["response.completed", "response.incomplete", "response.failed"])
+
+const onOutputTextDelta = (state: ParserState, event: OpenAIResponsesEvent): StepResult => {
+  if (!event.delta) return [state, NO_EVENTS]
+  return [state, [LLMEvent.textDelta({ id: event.item_id ?? "text-0", text: event.delta })]]
+}
+
+const onOutputItemAdded = (state: ParserState, event: OpenAIResponsesEvent): StepResult => {
+  const item = event.item
+  if (item?.type !== "function_call" || !item.id) return [state, NO_EVENTS]
+  return [
+    {
+      hasFunctionCall: state.hasFunctionCall,
+      tools: ToolStream.start(state.tools, item.id, {
+        id: item.call_id ?? item.id,
+        name: item.name ?? "",
+        input: item.arguments ?? "",
+        providerMetadata: openaiMetadata({ itemId: item.id }),
+      }),
+    },
+    NO_EVENTS,
+  ]
+}
+
+const onFunctionCallArgumentsDelta = Effect.fn("OpenAIResponses.onFunctionCallArgumentsDelta")(function* (
+  state: ParserState,
+  event: OpenAIResponsesEvent,
+) {
+  if (!event.item_id || !event.delta) return [state, NO_EVENTS] satisfies StepResult
+  const result = ToolStream.appendExisting(
+    ADAPTER,
+    state.tools,
+    event.item_id,
+    event.delta,
+    "OpenAI Responses tool argument delta is missing its tool call",
+  )
+  if (ToolStream.isError(result)) return yield* result
+  return [
+    { hasFunctionCall: state.hasFunctionCall, tools: result.tools },
+    result.event ? [result.event] : NO_EVENTS,
+  ] satisfies StepResult
+})
+
+const onOutputItemDone = Effect.fn("OpenAIResponses.onOutputItemDone")(function* (
+  state: ParserState,
+  event: OpenAIResponsesEvent,
+) {
+  const item = event.item
+  if (!item) return [state, NO_EVENTS] satisfies StepResult
+
+  if (item.type === "function_call") {
+    if (!item.id || !item.call_id || !item.name) return [state, NO_EVENTS] satisfies StepResult
+    const tools = state.tools[item.id]
+      ? state.tools
+      : ToolStream.start(state.tools, item.id, { id: item.call_id, name: item.name })
+    const result =
+      item.arguments === undefined
+        ? yield* ToolStream.finish(ADAPTER, tools, item.id)
+        : yield* ToolStream.finishWithInput(ADAPTER, tools, item.id, item.arguments)
+    return [
+      { hasFunctionCall: result.event ? true : state.hasFunctionCall, tools: result.tools },
+      result.event ? [result.event] : NO_EVENTS,
+    ] satisfies StepResult
+  }
+
+  if (isHostedToolItem(item)) return [state, hostedToolEvents(item)] satisfies StepResult
+
+  return [state, NO_EVENTS] satisfies StepResult
+})
+
+const onResponseFinish = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [
+  state,
+  [
+    LLMEvent.requestFinish({
+      reason: mapFinishReason(event, state.hasFunctionCall),
+      usage: mapUsage(event.response?.usage),
+      providerMetadata:
+        event.response?.id || event.response?.service_tier
+          ? openaiMetadata({
+              responseId: event.response.id,
+              serviceTier: event.response.service_tier,
+            })
+          : undefined,
+    }),
+  ],
+]
+
+const onResponseFailed = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [
+  state,
+  [LLMEvent.providerError({ message: event.message ?? event.code ?? "OpenAI Responses response failed" })],
+]
+
+const onError = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [
+  state,
+  [LLMEvent.providerError({ message: event.message ?? event.code ?? "OpenAI Responses stream error" })],
+]
+
+const step = (state: ParserState, event: OpenAIResponsesEvent) => {
+  if (event.type === "response.output_text.delta") return Effect.succeed(onOutputTextDelta(state, event))
+  if (event.type === "response.output_item.added") return Effect.succeed(onOutputItemAdded(state, event))
+  if (event.type === "response.function_call_arguments.delta") return onFunctionCallArgumentsDelta(state, event)
+  if (event.type === "response.output_item.done") return onOutputItemDone(state, event)
+  if (event.type === "response.completed" || event.type === "response.incomplete")
+    return Effect.succeed(onResponseFinish(state, event))
+  if (event.type === "response.failed") return Effect.succeed(onResponseFailed(state, event))
+  if (event.type === "error") return Effect.succeed(onError(state, event))
+  return Effect.succeed<StepResult>([state, NO_EVENTS])
+}
+
+// =============================================================================
+// Protocol And OpenAI Route
+// =============================================================================
+/**
+ * The OpenAI Responses protocol — request body construction, body schema, and
+ * the streaming-event state machine. Used by native OpenAI and (once
+ * registered) Azure OpenAI Responses.
+ */
+export const protocol = Protocol.make({
+  id: ADAPTER,
+  body: {
+    schema: OpenAIResponsesBody,
+    from: fromRequest,
+  },
+  stream: {
+    event: Protocol.jsonEvent(OpenAIResponsesEvent),
+    initial: () => ({ hasFunctionCall: false, tools: ToolStream.empty<string>() }),
+    step,
+    terminal: (event) => TERMINAL_TYPES.has(event.type),
+  },
+})
+
+const encodeBody = Schema.encodeSync(Schema.fromJsonString(OpenAIResponsesBody))
+const transportBase = {
+  endpoint: Endpoint.path<OpenAIResponsesBody>(PATH),
+  auth: Auth.bearer(),
+  encodeBody,
+}
+const routeDefaults = {
+  baseURL: DEFAULT_BASE_URL,
+}
+
+export const httpTransport = HttpTransport.httpJson({
+  ...transportBase,
+  framing: Framing.sse,
+})
+
+export const route = Route.make({
+  id: ADAPTER,
+  provider: "openai",
+  protocol,
+  transport: httpTransport,
+  defaults: routeDefaults,
+})
+
+const decodeWebSocketMessage = ProviderShared.validateWith(Schema.decodeUnknownEffect(OpenAIResponsesWebSocketMessage))
+
+const webSocketMessage = (body: OpenAIResponsesBody | Record<string, unknown>) =>
+  Effect.gen(function* () {
+    if (!ProviderShared.isRecord(body))
+      return yield* ProviderShared.invalidRequest("OpenAI Responses WebSocket body must be a JSON object")
+    const { stream: _stream, ...message } = body
+    return yield* decodeWebSocketMessage({ ...message, type: "response.create" })
+  })
+
+export const webSocketTransport = WebSocketTransport.json({
+  ...transportBase,
+  toMessage: webSocketMessage,
+  encodeMessage: encodeWebSocketMessage,
+})
+
+export const webSocketRoute = Route.make({
+  id: `${ADAPTER}-websocket`,
+  provider: "openai",
+  protocol,
+  transport: webSocketTransport,
+  defaults: routeDefaults,
+})
+
+// =============================================================================
+// Model Helper
+// =============================================================================
+export const model = route.model
+
+export const webSocketModel = webSocketRoute.model
+
+export * as OpenAIResponses from "./openai-responses"
--- a/packages/llm/src/protocols/shared.ts
+++ b/packages/llm/src/protocols/shared.ts
@@ -0,0 +1,239 @@
+import { Buffer } from "node:buffer"
+import { Effect, Schema, Stream } from "effect"
+import * as Sse from "effect/unstable/encoding/Sse"
+import { Headers, HttpClientRequest } from "effect/unstable/http"
+import {
+  InvalidProviderOutputReason,
+  InvalidRequestReason,
+  LLMError,
+  type ContentPart,
+  type LLMRequest,
+  type MediaPart,
+  type ToolResultPart,
+} from "../schema"
+
+export const Json = Schema.fromJsonString(Schema.Unknown)
+export const decodeJson = Schema.decodeUnknownSync(Json)
+export const encodeJson = Schema.encodeSync(Json)
+export const JsonObject = Schema.Record(Schema.String, Schema.Unknown)
+export const optionalArray = <const S extends Schema.Top>(schema: S) => Schema.optional(Schema.Array(schema))
+export const optionalNull = <const S extends Schema.Top>(schema: S) => Schema.optional(Schema.NullOr(schema))
+
+/**
+ * Plain-record narrowing. Excludes arrays so routes checking nested JSON
+ * Schema fragments don't accidentally treat a tuple as a key/value bag.
+ */
+export const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null && !Array.isArray(value)
+
+/**
+ * Streaming tool-call accumulator. Adapters that build a tool call across
+ * multiple `tool-input-delta` chunks store the partial JSON input string here
+ * and finalize it with `parseToolInput` once the call completes.
+ */
+export interface ToolAccumulator {
+  readonly id: string
+  readonly name: string
+  readonly input: string
+}
+
+/**
+ * `Usage.totalTokens` policy shared by every route. Honors a provider-
+ * supplied total; otherwise falls back to `inputTokens + outputTokens` only
+ * when at least one is defined. Returns `undefined` when neither input nor
+ * output is known so routes don't publish a misleading `0`.
+ *
+ * Under the additive `LLM.Usage` contract, `inputTokens` and `outputTokens`
+ * are the non-cached input and visible output only. The provider-supplied
+ * `total` is the source of truth when present; the computed fallback
+ * under-counts cache and reasoning by design and exists mainly so
+ * Anthropic-style providers (which don't surface a total) still get a
+ * sensible aggregate on the input + output axes.
+ */
+export const totalTokens = (
+  inputTokens: number | undefined,
+  outputTokens: number | undefined,
+  total: number | undefined,
+) => {
+  if (total !== undefined) return total
+  if (inputTokens === undefined && outputTokens === undefined) return undefined
+  return (inputTokens ?? 0) + (outputTokens ?? 0)
+}
+
+/**
+ * Subtract `subtrahend` from `total`, clamping to zero if the provider
+ * reports a non-sensical breakdown (e.g. `cached_tokens > prompt_tokens`).
+ * Used by protocol mappers when deriving a non-overlapping breakdown field
+ * from a provider's inclusive total — `nonCachedInputTokens` from
+ * `inputTokens - cacheReadInputTokens - cacheWriteInputTokens`.
+ *
+ * If `total` is `undefined`, returns `undefined` (we don't fabricate
+ * counts). If `subtrahend` is `undefined`, returns `total` unchanged. The
+ * provider-native breakdown stays available on `Usage.native` for debugging.
+ */
+export const subtractTokens = (total: number | undefined, subtrahend: number | undefined): number | undefined => {
+  if (total === undefined) return undefined
+  if (subtrahend === undefined) return total
+  return Math.max(0, total - subtrahend)
+}
+
+/**
+ * Sum a list of optional token counts, returning `undefined` only when
+ * every value is `undefined` (so we don't fabricate a `0`). Used by
+ * protocol mappers to derive the inclusive `inputTokens` total from a
+ * provider that natively reports a non-overlapping breakdown
+ * (e.g. Anthropic, whose `input_tokens` is already non-cached only).
+ */
+export const sumTokens = (...values: ReadonlyArray<number | undefined>): number | undefined => {
+  if (values.every((value) => value === undefined)) return undefined
+  return values.reduce<number>((acc, value) => acc + (value ?? 0), 0)
+}
+
+export const eventError = (route: string, message: string, raw?: string) =>
+  new LLMError({
+    module: "ProviderShared",
+    method: "stream",
+    reason: new InvalidProviderOutputReason({ route, message, raw }),
+  })
+
+export const parseJson = (route: string, input: string, message: string) =>
+  Effect.try({
+    try: () => decodeJson(input),
+    catch: () => eventError(route, message, input),
+  })
+
+/**
+ * Join the `text` field of a list of parts with newlines. Used by routes
+ * that flatten system / message content arrays into a single provider string
+ * (OpenAI Chat `system` content, OpenAI Responses `system` content, Gemini
+ * `systemInstruction.parts[].text`).
+ */
+export const joinText = (parts: ReadonlyArray<{ readonly text: string }>) => parts.map((part) => part.text).join("\n")
+
+/**
+ * Parse the streamed JSON input of a tool call. Treats an empty string as
+ * `"{}"` — providers occasionally finish a tool call without ever emitting
+ * input deltas (e.g. zero-arg tools). The error message is uniform across
+ * routes: `Invalid JSON input for <route> tool call <name>`.
+ */
+export const parseToolInput = (route: string, name: string, raw: string) =>
+  parseJson(route, raw || "{}", `Invalid JSON input for ${route} tool call ${name}`)
+
+/**
+ * Encode a `MediaPart`'s raw bytes for inclusion in a JSON request body.
+ * `data: string` is assumed to already be base64 (matches caller convention
+ * across Gemini / Bedrock); `data: Uint8Array` is base64-encoded here. Used
+ * by every route that supports image / document inputs.
+ */
+export const mediaBytes = (part: MediaPart) =>
+  typeof part.data === "string" ? part.data : Buffer.from(part.data).toString("base64")
+
+export const trimBaseUrl = (value: string) => value.replace(/\/+$/, "")
+
+export const toolResultText = (part: ToolResultPart) => {
+  if (part.result.type === "text" || part.result.type === "error") return String(part.result.value)
+  return encodeJson(part.result.value)
+}
+
+export const errorText = (error: unknown) => {
+  if (error instanceof Error) return error.message
+  if (typeof error === "string") return error
+  if (typeof error === "number" || typeof error === "boolean" || typeof error === "bigint") return String(error)
+  if (error === null) return "null"
+  if (error === undefined) return "undefined"
+  return "Unknown stream error"
+}
+
+/**
+ * `framing` step for Server-Sent Events. Decodes UTF-8, runs the SSE channel
+ * decoder, and drops empty / `[DONE]` keep-alive events so the downstream
+ * `decodeChunk` sees one JSON string per element. The SSE channel emits a
+ * `Retry` control event on its error channel; we drop it here (we don't
+ * implement client-driven retries) so the public error channel stays
+ * `LLMError`.
+ */
+export const sseFraming = (bytes: Stream.Stream<Uint8Array, LLMError>): Stream.Stream<string, LLMError> =>
+  bytes.pipe(
+    Stream.decodeText(),
+    Stream.pipeThroughChannel(Sse.decode()),
+    Stream.catchTag("Retry", () => Stream.empty),
+    Stream.filter((event) => event.data.length > 0 && event.data !== "[DONE]"),
+    Stream.map((event) => event.data),
+  )
+
+/**
+ * Canonical invalid-request constructor. Lift one-line `const invalid =
+ * (message) => invalidRequest(message)` aliases out of every
+ * route so the error constructor lives in one place. If we ever extend
+ * `InvalidRequestReason` with route context or trace metadata, the change
+ * lands here.
+ */
+export const invalidRequest = (message: string) =>
+  new LLMError({
+    module: "ProviderShared",
+    method: "request",
+    reason: new InvalidRequestReason({ message }),
+  })
+
+export const matchToolChoice = <Auto, None, Required, Tool>(
+  route: string,
+  toolChoice: NonNullable<LLMRequest["toolChoice"]>,
+  cases: {
+    readonly auto: () => Auto
+    readonly none: () => None
+    readonly required: () => Required
+    readonly tool: (name: string) => Tool
+  },
+) =>
+  Effect.gen(function* () {
+    if (toolChoice.type === "auto") return cases.auto()
+    if (toolChoice.type === "none") return cases.none()
+    if (toolChoice.type === "required") return cases.required()
+    if (!toolChoice.name) return yield* invalidRequest(`${route} tool choice requires a tool name`)
+    return cases.tool(toolChoice.name)
+  })
+
+type ContentType = ContentPart["type"]
+
+const formatContentTypes = (types: ReadonlyArray<ContentType>) => {
+  if (types.length <= 1) return types[0] ?? ""
+  if (types.length === 2) return `${types[0]} and ${types[1]}`
+  return `${types.slice(0, -1).join(", ")}, and ${types.at(-1)}`
+}
+
+export const supportsContent = <const Type extends ContentType>(
+  part: ContentPart,
+  types: ReadonlyArray<Type>,
+): part is Extract<ContentPart, { readonly type: Type }> => (types as ReadonlyArray<ContentType>).includes(part.type)
+
+export const unsupportedContent = (
+  route: string,
+  role: LLMRequest["messages"][number]["role"],
+  types: ReadonlyArray<ContentType>,
+) => invalidRequest(`${route} ${role} messages only support ${formatContentTypes(types)} content for now`)
+
+/**
+ * Build a `validate` step from a Schema decoder. Replaces the per-route
+ * lambda body `(payload) => decode(payload).pipe(Effect.mapError((e) =>
+ * invalid(e.message)))`. Any decode error is translated into
+ * `LLMError` carrying the original parse-error message.
+ */
+export const validateWith =
+  <A, I, E extends { readonly message: string }>(decode: (input: I) => Effect.Effect<A, E>) =>
+  (payload: I) =>
+    decode(payload).pipe(Effect.mapError((error) => invalidRequest(error.message)))
+
+/**
+ * Build an HTTP POST with a JSON body. Sets `content-type: application/json`
+ * automatically after caller-supplied headers so routes cannot accidentally
+ * send JSON with a stale content type. The body is passed pre-encoded so
+ * routes can choose between
+ * `Schema.encodeSync(payload)` and `ProviderShared.encodeJson(payload)`.
+ */
+export const jsonPost = (input: { readonly url: string; readonly body: string; readonly headers?: Headers.Input }) =>
+  HttpClientRequest.post(input.url).pipe(
+    HttpClientRequest.setHeaders(Headers.set(Headers.fromInput(input.headers), "content-type", "application/json")),
+    HttpClientRequest.bodyText(input.body, "application/json"),
+  )
+
+export * as ProviderShared from "./shared"
--- a/packages/llm/src/protocols/utils/bedrock-auth.ts
+++ b/packages/llm/src/protocols/utils/bedrock-auth.ts
@@ -0,0 +1,103 @@
+import { AwsV4Signer } from "aws4fetch"
+import { Effect, Option, Schema } from "effect"
+import { Headers } from "effect/unstable/http"
+import { Auth, type AuthInput } from "../../route/auth"
+import type { LLMRequest } from "../../schema"
+import { ProviderShared } from "../shared"
+
+/**
+ * AWS credentials for SigV4 signing. Bedrock also supports Bearer API key auth
+ * via `model.apiKey`, which bypasses SigV4 signing. STS-vended credentials
+ * should be refreshed by the consumer (rebuild the model) before they expire;
+ * the route does not refresh.
+ */
+export interface Credentials {
+  readonly region: string
+  readonly accessKeyId: string
+  readonly secretAccessKey: string
+  readonly sessionToken?: string
+}
+
+const NativeCredentials = Schema.Struct({
+  accessKeyId: Schema.String,
+  secretAccessKey: Schema.String,
+  region: Schema.optional(Schema.String),
+  sessionToken: Schema.optional(Schema.String),
+})
+
+const decodeNativeCredentials = Schema.decodeUnknownOption(NativeCredentials)
+
+export const region = (request: LLMRequest) => {
+  const fromNative = request.model.native?.aws_region
+  if (typeof fromNative === "string" && fromNative !== "") return fromNative
+  return (
+    decodeNativeCredentials(request.model.native?.aws_credentials).pipe(
+      Option.map((credentials) => credentials.region),
+      Option.getOrUndefined,
+    ) ?? "us-east-1"
+  )
+}
+
+const credentialsFromInput = (request: LLMRequest): Credentials | undefined =>
+  decodeNativeCredentials(request.model.native?.aws_credentials).pipe(
+    Option.map((creds) => ({ ...creds, region: creds.region ?? region(request) })),
+    Option.getOrUndefined,
+  )
+
+const signRequest = (input: {
+  readonly url: string
+  readonly body: string
+  readonly headers: Headers.Headers
+  readonly credentials: Credentials
+}) =>
+  Effect.tryPromise({
+    try: async () => {
+      const signed = await new AwsV4Signer({
+        url: input.url,
+        method: "POST",
+        headers: Object.entries(input.headers),
+        body: input.body,
+        region: input.credentials.region,
+        accessKeyId: input.credentials.accessKeyId,
+        secretAccessKey: input.credentials.secretAccessKey,
+        sessionToken: input.credentials.sessionToken,
+        service: "bedrock",
+      }).sign()
+      return Object.fromEntries(signed.headers.entries())
+    },
+    catch: (error) =>
+      ProviderShared.invalidRequest(
+        `Bedrock Converse SigV4 signing failed: ${error instanceof Error ? error.message : String(error)}`,
+      ),
+  })
+
+/**
+ * Bedrock auth. `model.apiKey` (Bedrock's newer Bearer API key auth) wins if
+ * set; otherwise sign the exact JSON bytes with SigV4 using credentials from
+ * `model.native.aws_credentials`.
+ */
+export const auth = Auth.custom((input: AuthInput) => {
+  if (input.request.model.apiKey) return Auth.toEffect(Auth.bearer())(input)
+  return Effect.gen(function* () {
+    const credentials = credentialsFromInput(input.request)
+    if (!credentials) {
+      return yield* ProviderShared.invalidRequest(
+        "Bedrock Converse requires either model.apiKey or AWS credentials in model.native.aws_credentials",
+      )
+    }
+    const headersForSigning = Headers.set(input.headers, "content-type", "application/json")
+    const signed = yield* signRequest({ url: input.url, body: input.body, headers: headersForSigning, credentials })
+    return Headers.setAll(headersForSigning, signed)
+  })
+})
+
+export const nativeCredentials = (native: Record<string, unknown> | undefined, credentials: Credentials | undefined) =>
+  credentials
+    ? {
+        ...native,
+        aws_credentials: credentials,
+        aws_region: credentials.region,
+      }
+    : native
+
+export * as BedrockAuth from "./bedrock-auth"
--- a/packages/llm/src/protocols/utils/bedrock-cache.ts
+++ b/packages/llm/src/protocols/utils/bedrock-cache.ts
@@ -0,0 +1,37 @@
+import { Schema } from "effect"
+import type { CacheHint } from "../../schema"
+import { newBreakpoints, ttlBucket, type Breakpoints } from "./cache"
+
+// Bedrock cache markers are positional: emit a `cachePoint` block immediately
+// after the content the caller wants treated as a cacheable prefix. Bedrock
+// accepts optional `ttl: "5m" | "1h"` on cachePoint, mirroring Anthropic.
+export const CachePointBlock = Schema.Struct({
+  cachePoint: Schema.Struct({
+    type: Schema.tag("default"),
+    ttl: Schema.optional(Schema.Literals(["5m", "1h"])),
+  }),
+})
+export type CachePointBlock = Schema.Schema.Type<typeof CachePointBlock>
+
+// Bedrock-Claude enforces the same 4-breakpoint cap as the Anthropic Messages
+// API. Callers pass a shared counter through every `block()` call site so the
+// budget is respected across `system`, `messages`, and `tools`.
+export const BEDROCK_BREAKPOINT_CAP = 4
+
+export type { Breakpoints } from "./cache"
+export const breakpoints = () => newBreakpoints(BEDROCK_BREAKPOINT_CAP)
+
+const DEFAULT_5M: CachePointBlock = { cachePoint: { type: "default" } }
+const DEFAULT_1H: CachePointBlock = { cachePoint: { type: "default", ttl: "1h" } }
+
+export const block = (breakpoints: Breakpoints, cache: CacheHint | undefined): CachePointBlock | undefined => {
+  if (cache?.type !== "ephemeral" && cache?.type !== "persistent") return undefined
+  if (breakpoints.remaining <= 0) {
+    breakpoints.dropped += 1
+    return undefined
+  }
+  breakpoints.remaining -= 1
+  return ttlBucket(cache.ttlSeconds) === "1h" ? DEFAULT_1H : DEFAULT_5M
+}
+
+export * as BedrockCache from "./bedrock-cache"
--- a/packages/llm/src/protocols/utils/bedrock-media.ts
+++ b/packages/llm/src/protocols/utils/bedrock-media.ts
@@ -0,0 +1,80 @@
+import { Effect, Schema } from "effect"
+import type { MediaPart } from "../../schema"
+import { ProviderShared } from "../shared"
+
+// Bedrock Converse accepts image `format` as the file extension and
+// `source.bytes` as base64 in the JSON wire format.
+export const ImageFormat = Schema.Literals(["png", "jpeg", "gif", "webp"])
+export type ImageFormat = Schema.Schema.Type<typeof ImageFormat>
+
+export const ImageBlock = Schema.Struct({
+  image: Schema.Struct({
+    format: ImageFormat,
+    source: Schema.Struct({ bytes: Schema.String }),
+  }),
+})
+export type ImageBlock = Schema.Schema.Type<typeof ImageBlock>
+
+// Bedrock document blocks require a user-facing name so the model can refer to
+// the uploaded document.
+export const DocumentFormat = Schema.Literals(["pdf", "csv", "doc", "docx", "xls", "xlsx", "html", "txt", "md"])
+export type DocumentFormat = Schema.Schema.Type<typeof DocumentFormat>
+
+export const DocumentBlock = Schema.Struct({
+  document: Schema.Struct({
+    format: DocumentFormat,
+    name: Schema.String,
+    source: Schema.Struct({ bytes: Schema.String }),
+  }),
+})
+export type DocumentBlock = Schema.Schema.Type<typeof DocumentBlock>
+
+const IMAGE_FORMATS = {
+  "image/png": "png",
+  "image/jpeg": "jpeg",
+  "image/jpg": "jpeg",
+  "image/gif": "gif",
+  "image/webp": "webp",
+} as const satisfies Record<string, ImageFormat>
+
+const DOCUMENT_FORMATS = {
+  "application/pdf": "pdf",
+  "text/csv": "csv",
+  "application/msword": "doc",
+  "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
+  "application/vnd.ms-excel": "xls",
+  "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
+  "text/html": "html",
+  "text/plain": "txt",
+  "text/markdown": "md",
+} as const satisfies Record<string, DocumentFormat>
+
+const imageBlock = (part: MediaPart, format: ImageFormat): ImageBlock => ({
+  image: { format, source: { bytes: ProviderShared.mediaBytes(part) } },
+})
+
+const documentBlock = (part: MediaPart, format: DocumentFormat): DocumentBlock => ({
+  document: {
+    format,
+    name: part.filename ?? `document.${format}`,
+    source: { bytes: ProviderShared.mediaBytes(part) },
+  },
+})
+
+// Route by MIME. Known image/document formats lower into a typed block; anything
+// else fails with a clear error instead of silently degrading to a malformed
+// document block. Image MIME types not in `IMAGE_FORMATS` (e.g. `image/svg+xml`)
+// get an image-specific error so the caller knows it's a format-support issue,
+// not a kind-detection issue.
+export const lower = (part: MediaPart) => {
+  const mime = part.mediaType.toLowerCase()
+  const imageFormat = IMAGE_FORMATS[mime as keyof typeof IMAGE_FORMATS]
+  if (imageFormat) return Effect.succeed(imageBlock(part, imageFormat))
+  if (mime.startsWith("image/"))
+    return ProviderShared.invalidRequest(`Bedrock Converse does not support image media type ${part.mediaType}`)
+  const documentFormat = DOCUMENT_FORMATS[mime as keyof typeof DOCUMENT_FORMATS]
+  if (documentFormat) return Effect.succeed(documentBlock(part, documentFormat))
+  return ProviderShared.invalidRequest(`Bedrock Converse does not support media type ${part.mediaType}`)
+}
+
+export * as BedrockMedia from "./bedrock-media"
--- a/packages/llm/src/protocols/utils/cache.ts
+++ b/packages/llm/src/protocols/utils/cache.ts
@@ -0,0 +1,16 @@
+// Shared helpers for provider cache-marker lowering. Anthropic and Bedrock
+// both enforce a 4-breakpoint cap per request and accept the same `5m`/`1h`
+// TTL buckets, so the counter and TTL mapping live here.
+
+export interface Breakpoints {
+  remaining: number
+  dropped: number
+}
+
+export const newBreakpoints = (cap: number): Breakpoints => ({ remaining: cap, dropped: 0 })
+
+// Returns `"1h"` for any `ttlSeconds >= 3600`, otherwise `undefined` (the
+// provider default 5m). Anthropic & Bedrock both treat anything shorter than
+// an hour as 5m.
+export const ttlBucket = (ttlSeconds: number | undefined): "1h" | undefined =>
+  ttlSeconds !== undefined && ttlSeconds >= 3600 ? "1h" : undefined
--- a/packages/llm/src/protocols/utils/gemini-tool-schema.ts
+++ b/packages/llm/src/protocols/utils/gemini-tool-schema.ts
@@ -0,0 +1,101 @@
+import { ProviderShared } from "../shared"
+
+// Gemini accepts a JSON Schema-like dialect for tool parameters, but rejects a
+// handful of common JSON Schema shapes. Keep this projection isolated so the
+// Gemini protocol file still reads like the other protocol modules.
+const SCHEMA_INTENT_KEYS = [
+  "type",
+  "properties",
+  "items",
+  "prefixItems",
+  "enum",
+  "const",
+  "$ref",
+  "additionalProperties",
+  "patternProperties",
+  "required",
+  "not",
+  "if",
+  "then",
+  "else",
+]
+
+const isRecord = ProviderShared.isRecord
+
+const hasCombiner = (schema: unknown) =>
+  isRecord(schema) && (Array.isArray(schema.anyOf) || Array.isArray(schema.oneOf) || Array.isArray(schema.allOf))
+
+const hasSchemaIntent = (schema: unknown) =>
+  isRecord(schema) && (hasCombiner(schema) || SCHEMA_INTENT_KEYS.some((key) => key in schema))
+
+const sanitizeNode = (schema: unknown): unknown => {
+  if (!isRecord(schema)) return Array.isArray(schema) ? schema.map(sanitizeNode) : schema
+
+  const result: Record<string, unknown> = Object.fromEntries(
+    Object.entries(schema).map(([key, value]) => [
+      key,
+      key === "enum" && Array.isArray(value) ? value.map(String) : sanitizeNode(value),
+    ]),
+  )
+
+  if (Array.isArray(result.enum) && (result.type === "integer" || result.type === "number")) result.type = "string"
+
+  const properties = result.properties
+  if (result.type === "object" && isRecord(properties) && Array.isArray(result.required)) {
+    result.required = result.required.filter((field) => typeof field === "string" && field in properties)
+  }
+
+  if (result.type === "array" && !hasCombiner(result)) {
+    result.items = result.items ?? {}
+    if (isRecord(result.items) && !hasSchemaIntent(result.items)) result.items = { ...result.items, type: "string" }
+  }
+
+  if (typeof result.type === "string" && result.type !== "object" && !hasCombiner(result)) {
+    delete result.properties
+    delete result.required
+  }
+
+  return result
+}
+
+const emptyObjectSchema = (schema: Record<string, unknown>) =>
+  schema.type === "object" &&
+  (!isRecord(schema.properties) || Object.keys(schema.properties).length === 0) &&
+  !schema.additionalProperties
+
+const projectNode = (schema: unknown): Record<string, unknown> | undefined => {
+  if (!isRecord(schema)) return undefined
+  if (emptyObjectSchema(schema)) return undefined
+  return Object.fromEntries(
+    [
+      ["description", schema.description],
+      ["required", schema.required],
+      ["format", schema.format],
+      ["type", Array.isArray(schema.type) ? schema.type.filter((type) => type !== "null")[0] : schema.type],
+      ["nullable", Array.isArray(schema.type) && schema.type.includes("null") ? true : undefined],
+      ["enum", schema.const !== undefined ? [schema.const] : schema.enum],
+      [
+        "properties",
+        isRecord(schema.properties)
+          ? Object.fromEntries(Object.entries(schema.properties).map(([key, value]) => [key, projectNode(value)]))
+          : undefined,
+      ],
+      [
+        "items",
+        Array.isArray(schema.items)
+          ? schema.items.map(projectNode)
+          : schema.items === undefined
+            ? undefined
+            : projectNode(schema.items),
+      ],
+      ["allOf", Array.isArray(schema.allOf) ? schema.allOf.map(projectNode) : undefined],
+      ["anyOf", Array.isArray(schema.anyOf) ? schema.anyOf.map(projectNode) : undefined],
+      ["oneOf", Array.isArray(schema.oneOf) ? schema.oneOf.map(projectNode) : undefined],
+      ["minLength", schema.minLength],
+    ].filter((entry) => entry[1] !== undefined),
+  )
+}
+
+export const convert = (schema: unknown) => projectNode(sanitizeNode(schema))
+
+export * as GeminiToolSchema from "./gemini-tool-schema"
--- a/packages/llm/src/protocols/utils/openai-options.ts
+++ b/packages/llm/src/protocols/utils/openai-options.ts
@@ -0,0 +1,55 @@
+import { Schema } from "effect"
+import type { LLMRequest, ReasoningEffort, TextVerbosity as TextVerbosityValue } from "../../schema"
+import { ReasoningEfforts, TextVerbosity } from "../../schema"
+
+export const OpenAIReasoningEfforts = ReasoningEfforts.filter(
+  (effort): effort is Exclude<ReasoningEffort, "max"> => effort !== "max",
+)
+export type OpenAIReasoningEffort = (typeof OpenAIReasoningEfforts)[number]
+
+const REASONING_EFFORTS = new Set<string>(ReasoningEfforts)
+const OPENAI_REASONING_EFFORTS = new Set<string>(OpenAIReasoningEfforts)
+const TEXT_VERBOSITY = new Set<string>(["low", "medium", "high"])
+
+export const OpenAIReasoningEffort = Schema.Literals(OpenAIReasoningEfforts)
+export const OpenAITextVerbosity = TextVerbosity
+
+const isAnyReasoningEffort = (effort: unknown): effort is ReasoningEffort =>
+  typeof effort === "string" && REASONING_EFFORTS.has(effort)
+
+export const isReasoningEffort = (effort: unknown): effort is OpenAIReasoningEffort =>
+  typeof effort === "string" && OPENAI_REASONING_EFFORTS.has(effort)
+
+const isTextVerbosity = (value: unknown): value is TextVerbosityValue =>
+  typeof value === "string" && TEXT_VERBOSITY.has(value)
+
+const options = (request: LLMRequest) => request.providerOptions?.openai
+
+export const store = (request: LLMRequest): boolean | undefined => {
+  const value = options(request)?.store
+  return typeof value === "boolean" ? value : undefined
+}
+
+export const reasoningEffort = (request: LLMRequest): ReasoningEffort | undefined => {
+  const value = options(request)?.reasoningEffort
+  return isAnyReasoningEffort(value) ? value : undefined
+}
+
+export const reasoningSummary = (request: LLMRequest): "auto" | undefined => {
+  return options(request)?.reasoningSummary === "auto" ? "auto" : undefined
+}
+
+export const encryptedReasoning = (request: LLMRequest) =>
+  options(request)?.includeEncryptedReasoning === true ? true : undefined
+
+export const promptCacheKey = (request: LLMRequest) => {
+  const value = options(request)?.promptCacheKey
+  return typeof value === "string" ? value : undefined
+}
+
+export const textVerbosity = (request: LLMRequest) => {
+  const value = options(request)?.textVerbosity
+  return isTextVerbosity(value) ? value : undefined
+}
+
+export * as OpenAIOptions from "./openai-options"
--- a/packages/llm/src/protocols/utils/tool-stream.ts
+++ b/packages/llm/src/protocols/utils/tool-stream.ts
@@ -0,0 +1,186 @@
+import { Effect } from "effect"
+import { LLMError, LLMEvent, type ProviderMetadata, type ToolCall, type ToolInputDelta } from "../../schema"
+import { eventError, parseToolInput, type ToolAccumulator } from "../shared"
+
+type StreamKey = string | number
+
+/**
+ * One pending streamed tool call. Providers emit the tool identity and JSON
+ * argument text across separate chunks; `input` is the raw JSON string collected
+ * so far, not the parsed object.
+ */
+export interface PendingTool extends ToolAccumulator {
+  readonly providerExecuted?: boolean
+  readonly providerMetadata?: ProviderMetadata
+}
+
+/**
+ * Sparse parser state keyed by the provider's stream-local tool identifier.
+ *
+ * This key is not the final tool-call id (`call_...`). It is the id/index the
+ * provider uses while streaming a partial call: OpenAI Chat / Anthropic /
+ * Bedrock use numeric content indexes, while OpenAI Responses uses string
+ * `item_id`s. The generic keeps each protocol internally consistent.
+ */
+export type State<K extends StreamKey> = Partial<Record<K, PendingTool>>
+
+/**
+ * Result of adding argument text to one pending tool call. It returns both the
+ * next `tools` state and the updated `tool` because parsers often need the
+ * current id/name immediately. `event` is present only when new text arrived;
+ * metadata-only deltas update identity without emitting `tool-input-delta`.
+ */
+export interface AppendOutcome<K extends StreamKey> {
+  readonly tools: State<K>
+  readonly tool: PendingTool
+  readonly event?: ToolInputDelta
+}
+
+/** Create empty accumulator state for one provider stream. */
+export const empty = <K extends StreamKey>(): State<K> => ({})
+
+const withTool = <K extends StreamKey>(tools: State<K>, key: K, tool: PendingTool): State<K> => {
+  return { ...tools, [key]: tool }
+}
+
+const withoutTool = <K extends StreamKey>(tools: State<K>, key: K): State<K> => {
+  const next = { ...tools }
+  delete next[key]
+  return next
+}
+
+const inputDelta = (tool: PendingTool, text: string): ToolInputDelta =>
+  LLMEvent.toolInputDelta({
+    id: tool.id,
+    name: tool.name,
+    text,
+  })
+
+const toolCall = (route: string, tool: PendingTool, inputOverride?: string) =>
+  parseToolInput(route, tool.name, inputOverride ?? tool.input).pipe(
+    Effect.map(
+      (input): ToolCall =>
+        LLMEvent.toolCall({
+          id: tool.id,
+          name: tool.name,
+          input,
+          providerExecuted: tool.providerExecuted ? true : undefined,
+          providerMetadata: tool.providerMetadata,
+        }),
+    ),
+  )
+
+/** Store the updated tool and produce the optional public delta event. */
+const appendTool = <K extends StreamKey>(
+  tools: State<K>,
+  key: K,
+  tool: PendingTool,
+  text: string,
+): AppendOutcome<K> => ({
+  tools: withTool(tools, key, tool),
+  tool,
+  event: text.length === 0 ? undefined : inputDelta(tool, text),
+})
+
+export const isError = <K extends StreamKey>(result: AppendOutcome<K> | LLMError): result is LLMError =>
+  result instanceof LLMError
+
+/**
+ * Register a tool call whose start event arrived before any argument deltas.
+ * Used by Anthropic `content_block_start`, Bedrock `contentBlockStart`, and
+ * OpenAI Responses `response.output_item.added`.
+ */
+export const start = <K extends StreamKey>(
+  tools: State<K>,
+  key: K,
+  tool: Omit<PendingTool, "input"> & { readonly input?: string },
+) => withTool(tools, key, { ...tool, input: tool.input ?? "" })
+
+/**
+ * Append a streamed argument delta, starting the tool if this provider encodes
+ * identity on the first delta instead of a separate start event. OpenAI Chat has
+ * this shape: `tool_calls[].index` is the stream key, and `id` / `name` may only
+ * appear on the first delta for that index.
+ */
+export const appendOrStart = <K extends StreamKey>(
+  route: string,
+  tools: State<K>,
+  key: K,
+  delta: { readonly id?: string; readonly name?: string; readonly text: string },
+  missingToolMessage: string,
+): AppendOutcome<K> | LLMError => {
+  const current = tools[key]
+  const id = delta.id ?? current?.id
+  const name = delta.name ?? current?.name
+  if (!id || !name) return eventError(route, missingToolMessage)
+
+  const tool = {
+    id,
+    name,
+    input: `${current?.input ?? ""}${delta.text}`,
+    providerExecuted: current?.providerExecuted,
+    providerMetadata: current?.providerMetadata,
+  }
+  if (current && delta.text.length === 0 && current.id === id && current.name === name) return { tools, tool: current }
+  return appendTool(tools, key, tool, delta.text)
+}
+
+/**
+ * Append argument text to a tool that must already have been started. This keeps
+ * protocols honest when their stream grammar promises a start event before any
+ * argument delta.
+ */
+export const appendExisting = <K extends StreamKey>(
+  route: string,
+  tools: State<K>,
+  key: K,
+  text: string,
+  missingToolMessage: string,
+): AppendOutcome<K> | LLMError => {
+  const current = tools[key]
+  if (!current) return eventError(route, missingToolMessage)
+  if (text.length === 0) return { tools, tool: current }
+  return appendTool(tools, key, { ...current, input: `${current.input}${text}` }, text)
+}
+
+/**
+ * Finalize one pending tool call: parse the accumulated raw JSON, remove it
+ * from state, and return the optional public `tool-call` event. Missing keys are
+ * a no-op because some providers emit stop events for non-tool content blocks.
+ */
+export const finish = <K extends StreamKey>(route: string, tools: State<K>, key: K) =>
+  Effect.gen(function* () {
+    const tool = tools[key]
+    if (!tool) return { tools }
+    return { tools: withoutTool(tools, key), event: yield* toolCall(route, tool) }
+  })
+
+/**
+ * Finalize one pending tool call with an authoritative final input string.
+ * OpenAI Responses can send accumulated deltas and then repeat the completed
+ * arguments on `response.output_item.done`; the final value wins.
+ */
+export const finishWithInput = <K extends StreamKey>(route: string, tools: State<K>, key: K, input: string) =>
+  Effect.gen(function* () {
+    const tool = tools[key]
+    if (!tool) return { tools }
+    return { tools: withoutTool(tools, key), event: yield* toolCall(route, tool, input) }
+  })
+
+/**
+ * Finalize every pending tool call at once. OpenAI Chat has this shape: it does
+ * not emit per-tool stop events, so all accumulated calls finish when the choice
+ * receives a terminal `finish_reason`.
+ */
+export const finishAll = <K extends StreamKey>(route: string, tools: State<K>) =>
+  Effect.gen(function* () {
+    const pending = Object.values<PendingTool | undefined>(tools).filter(
+      (tool): tool is PendingTool => tool !== undefined,
+    )
+    return {
+      tools: empty<K>(),
+      events: yield* Effect.forEach(pending, (tool) => toolCall(route, tool)),
+    }
+  })
+
+export * as ToolStream from "./tool-stream"
--- a/packages/llm/src/provider.ts
+++ b/packages/llm/src/provider.ts
@@ -0,0 +1,31 @@
+import type { RouteModelInput } from "./route/client"
+import type { ModelID, ModelRef, ProviderID } from "./schema"
+
+export type ModelOptions = Omit<RouteModelInput, "id">
+
+export type ModelFactory<Options extends ModelOptions = ModelOptions> = (
+  id: string | ModelID,
+  options?: Options,
+) => ModelRef
+
+type AnyModelFactory = (...args: never[]) => ModelRef
+
+export interface Definition<Factory extends AnyModelFactory = ModelFactory> {
+  readonly id: ProviderID
+  readonly model: Factory
+  readonly apis?: Record<string, AnyModelFactory>
+}
+
+type DefinitionShape = {
+  readonly id: ProviderID
+  readonly model: (...args: never[]) => ModelRef
+  readonly apis?: Record<string, (...args: never[]) => ModelRef>
+}
+
+type NoExtraFields<Input, Shape> = Input & Record<Exclude<keyof Input, keyof Shape>, never>
+
+export const make = <DefinitionType extends DefinitionShape>(
+  definition: NoExtraFields<DefinitionType, DefinitionShape>,
+) => definition
+
+export * as Provider from "./provider"
--- a/packages/llm/src/providers/amazon-bedrock.ts
+++ b/packages/llm/src/providers/amazon-bedrock.ts
@@ -0,0 +1,48 @@
+import { Route, type RouteModelInput } from "../route/client"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as BedrockConverse from "../protocols/bedrock-converse"
+import type { BedrockCredentials } from "../protocols/bedrock-converse"
+
+export const id = ProviderID.make("amazon-bedrock")
+
+export type ModelOptions = Omit<RouteModelInput, "id" | "baseURL"> & {
+  readonly apiKey?: string
+  readonly headers?: Record<string, string>
+  readonly credentials?: BedrockCredentials
+  /** AWS region. Defaults to `us-east-1` when neither this nor `credentials.region` is set. */
+  readonly region?: string
+  /** Override the computed `https://bedrock-runtime.<region>.amazonaws.com` URL. */
+  readonly baseURL?: string
+}
+type ModelInput = ModelOptions & Pick<RouteModelInput, "id">
+
+export const routes = [BedrockConverse.route]
+
+const bedrockBaseURL = (region: string) => `https://bedrock-runtime.${region}.amazonaws.com`
+
+const converseModel = Route.model<ModelInput>(
+  BedrockConverse.route,
+  {
+    provider: "amazon-bedrock",
+  },
+  {
+    mapInput: (input) => {
+      const { credentials, region, baseURL, ...rest } = input
+      const resolvedRegion = region ?? credentials?.region ?? "us-east-1"
+      return {
+        ...rest,
+        baseURL: baseURL ?? bedrockBaseURL(resolvedRegion),
+        native: BedrockConverse.nativeCredentials(input.native, credentials),
+      }
+    },
+  },
+)
+
+export const model = (modelID: string | ModelID, options: ModelOptions = {}) =>
+  converseModel({ ...options, id: modelID })
+
+export const provider = Provider.make({
+  id,
+  model,
+})
--- a/packages/llm/src/providers/anthropic.ts
+++ b/packages/llm/src/providers/anthropic.ts
@@ -0,0 +1,18 @@
+import type { RouteModelInput } from "../route/client"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as AnthropicMessages from "../protocols/anthropic-messages"
+
+export const id = ProviderID.make("anthropic")
+
+export const routes = [AnthropicMessages.route]
+
+export const model = (
+  id: string | ModelID,
+  options: Omit<RouteModelInput, "id" | "baseURL"> & { readonly baseURL?: string } = {},
+) => AnthropicMessages.model({ ...options, id })
+
+export const provider = Provider.make({
+  id,
+  model,
+})
--- a/packages/llm/src/providers/azure.ts
+++ b/packages/llm/src/providers/azure.ts
@@ -0,0 +1,83 @@
+import { Auth } from "../route/auth"
+import { type AtLeastOne, type ProviderAuthOption } from "../route/auth-options"
+import { Route } from "../route/client"
+import type { ModelInput } from "../llm"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as OpenAIChat from "../protocols/openai-chat"
+import * as OpenAIResponses from "../protocols/openai-responses"
+import { withOpenAIOptions, type OpenAIProviderOptionsInput } from "./openai-options"
+
+export const id = ProviderID.make("azure")
+const routeAuth = Auth.remove("authorization").andThen(Auth.apiKeyHeader("api-key"))
+
+// Azure needs the customer's resource URL; supply either `resourceName`
+// (helper builds the URL) or `baseURL` directly.
+type AzureURL = AtLeastOne<{ readonly resourceName: string; readonly baseURL: string }>
+
+export type ModelOptions = AzureURL &
+  Omit<ModelInput, "id" | "provider" | "route" | "apiKey" | "auth" | "baseURL"> &
+  ProviderAuthOption<"optional"> & {
+    readonly apiVersion?: string
+    readonly useCompletionUrls?: boolean
+    readonly providerOptions?: OpenAIProviderOptionsInput
+  }
+type AzureModelInput = ModelOptions & Pick<ModelInput, "id">
+
+const resourceBaseURL = (resourceName: string) => `https://${resourceName.trim()}.openai.azure.com/openai/v1`
+
+const responsesRoute = OpenAIResponses.route.with({
+  id: "azure-openai-responses",
+  provider: id,
+  transport: OpenAIResponses.httpTransport.with({ auth: routeAuth }),
+})
+
+const chatRoute = OpenAIChat.route.with({
+  id: "azure-openai-chat",
+  provider: id,
+  transport: OpenAIChat.httpTransport.with({ auth: routeAuth }),
+})
+
+export const routes = [responsesRoute, chatRoute]
+
+const mapInput = (input: AzureModelInput) => {
+  const { apiKey: _, apiVersion, resourceName, useCompletionUrls, ...rest } = input
+  return {
+    ...withOpenAIOptions(input.id, rest),
+    auth:
+      "auth" in input && input.auth
+        ? input.auth
+        : Auth.remove("authorization").andThen(
+            Auth.optional("apiKey" in input ? input.apiKey : undefined, "apiKey")
+              .orElse(Auth.config("AZURE_OPENAI_API_KEY"))
+              .pipe(Auth.header("api-key")),
+          ),
+    // AtLeastOne guarantees at least one is set; baseURL wins if both are.
+    baseURL: rest.baseURL ?? resourceBaseURL(resourceName!),
+    queryParams: {
+      ...rest.queryParams,
+      "api-version": apiVersion ?? rest.queryParams?.["api-version"] ?? "v1",
+    },
+  }
+}
+
+const chatModel = Route.model<AzureModelInput>(chatRoute, {}, { mapInput })
+const responsesModel = Route.model<AzureModelInput>(responsesRoute, {}, { mapInput })
+
+export const responses = (modelID: string | ModelID, options: ModelOptions) =>
+  responsesModel({ ...options, id: modelID })
+
+export const chat = (modelID: string | ModelID, options: ModelOptions) => chatModel({ ...options, id: modelID })
+
+export const model = (modelID: string | ModelID, options: ModelOptions) => {
+  if (options.useCompletionUrls === true) return chat(modelID, options)
+  return responses(modelID, options)
+}
+
+export const provider = Provider.make({
+  id,
+  model,
+  apis: { responses, chat },
+})
+
+export const apis = provider.apis
--- a/packages/llm/src/providers/cloudflare.ts
+++ b/packages/llm/src/providers/cloudflare.ts
@@ -0,0 +1,139 @@
+import type { Config, Redacted } from "effect"
+import { type ModelInput } from "../llm"
+import { Provider } from "../provider"
+import * as OpenAICompatibleChat from "../protocols/openai-compatible-chat"
+import { Auth } from "../route/auth"
+import { AuthOptions, type AtLeastOne, type ProviderAuthOption } from "../route/auth-options"
+import { Route } from "../route/client"
+import { ProviderID, type ModelID } from "../schema"
+
+export const aiGatewayID = ProviderID.make("cloudflare-ai-gateway")
+export const workersAIID = ProviderID.make("cloudflare-workers-ai")
+export const id = aiGatewayID
+export const aiGatewayAuthEnvVars = ["CLOUDFLARE_API_TOKEN", "CF_AIG_TOKEN"] as const
+export const workersAIAuthEnvVars = ["CLOUDFLARE_API_KEY", "CLOUDFLARE_WORKERS_AI_TOKEN"] as const
+
+type CloudflareSecret = string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>>
+
+type GatewayURL = AtLeastOne<{
+  readonly accountId: string
+  readonly baseURL: string
+}> & {
+  readonly gatewayId?: string
+}
+
+export type AIGatewayOptions = GatewayURL &
+  Omit<ModelInput, "id" | "provider" | "route" | "baseURL" | "apiKey" | "auth"> &
+  ProviderAuthOption<"optional"> & {
+    /** Cloudflare AI Gateway authentication token. Sent as `cf-aig-authorization`. */
+    readonly gatewayApiKey?: CloudflareSecret
+  }
+
+type AIGatewayInput = AIGatewayOptions & Pick<ModelInput, "id">
+
+type WorkersAIURL = AtLeastOne<{
+  readonly accountId: string
+  readonly baseURL: string
+}>
+
+export type WorkersAIOptions = WorkersAIURL &
+  Omit<ModelInput, "id" | "provider" | "route" | "baseURL" | "apiKey" | "auth"> &
+  ProviderAuthOption<"optional">
+
+type WorkersAIInput = WorkersAIOptions & Pick<ModelInput, "id">
+
+export const aiGatewayBaseURL = (input: GatewayURL) => {
+  if (input.baseURL) return input.baseURL
+  if (!input.accountId) throw new Error("Cloudflare.aiGateway requires accountId unless baseURL is supplied")
+  return `https://gateway.ai.cloudflare.com/v1/${encodeURIComponent(input.accountId)}/${encodeURIComponent(input.gatewayId?.trim() || "default")}/compat`
+}
+
+const aiGatewayAuth = (input: AIGatewayInput) => {
+  if ("auth" in input && input.auth) return input.auth
+  const gateway = Auth.optional(input.gatewayApiKey, "gatewayApiKey")
+    .orElse(Auth.config("CLOUDFLARE_API_TOKEN"))
+    .orElse(Auth.config("CF_AIG_TOKEN"))
+    .pipe(Auth.bearerHeader("cf-aig-authorization"))
+  if (!("apiKey" in input) || input.apiKey === undefined) return gateway
+  if (input.gatewayApiKey === undefined) return Auth.bearer(input.apiKey)
+  return Auth.bearerHeader("cf-aig-authorization", input.gatewayApiKey).andThen(Auth.bearer(input.apiKey))
+}
+
+export const workersAIBaseURL = (input: WorkersAIURL) => {
+  if (input.baseURL) return input.baseURL
+  if (!input.accountId) throw new Error("Cloudflare.workersAI requires accountId unless baseURL is supplied")
+  return `https://api.cloudflare.com/client/v4/accounts/${encodeURIComponent(input.accountId)}/ai/v1`
+}
+
+const workersAIAuth = (input: WorkersAIInput) => {
+  return AuthOptions.bearer(input, workersAIAuthEnvVars)
+}
+
+export const aiGatewayRoute = OpenAICompatibleChat.route.with({
+  id: "cloudflare-ai-gateway",
+  provider: aiGatewayID,
+})
+
+export const workersAIRoute = OpenAICompatibleChat.route.with({
+  id: "cloudflare-workers-ai",
+  provider: workersAIID,
+})
+
+export const routes = [aiGatewayRoute, workersAIRoute]
+
+const aiGatewayModel = Route.model<AIGatewayInput>(
+  aiGatewayRoute,
+  {
+    provider: id,
+  },
+  {
+    mapInput: (input) => {
+      const {
+        accountId: _accountId,
+        gatewayId: _gatewayId,
+        apiKey: _apiKey,
+        gatewayApiKey: _gatewayApiKey,
+        auth: _auth,
+        ...rest
+      } = input
+      return {
+        ...rest,
+        auth: aiGatewayAuth(input),
+        baseURL: aiGatewayBaseURL(input),
+      }
+    },
+  },
+)
+
+const workersAIModel = Route.model<WorkersAIInput>(
+  workersAIRoute,
+  {
+    provider: workersAIID,
+  },
+  {
+    mapInput: (input) => {
+      const { accountId: _accountId, apiKey: _apiKey, auth: _auth, ...rest } = input
+      return {
+        ...rest,
+        auth: workersAIAuth(input),
+        baseURL: workersAIBaseURL(input),
+      }
+    },
+  },
+)
+
+export const aiGateway = (modelID: string | ModelID, options: AIGatewayOptions) =>
+  aiGatewayModel({ ...options, id: modelID })
+
+export const workersAI = (modelID: string | ModelID, options: WorkersAIOptions) =>
+  workersAIModel({ ...options, id: modelID })
+
+export const model = aiGateway
+
+export const provider = Provider.make({
+  id,
+  model,
+  apis: { aiGateway, workersAI },
+})
+
+export const apis = provider.apis
--- a/packages/llm/src/providers/github-copilot.ts
+++ b/packages/llm/src/providers/github-copilot.ts
@@ -0,0 +1,48 @@
+import { Route } from "../route/client"
+import type { ModelInput } from "../llm"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as OpenAIChat from "../protocols/openai-chat"
+import * as OpenAIResponses from "../protocols/openai-responses"
+import { withOpenAIOptions, type OpenAIProviderOptionsInput } from "./openai-options"
+
+export const id = ProviderID.make("github-copilot")
+
+// GitHub Copilot has no canonical public URL — callers (opencode, etc.) must
+// supply `baseURL` explicitly.
+export type ModelOptions = Omit<ModelInput, "id" | "provider" | "route"> & {
+  readonly providerOptions?: OpenAIProviderOptionsInput
+}
+type CopilotModelInput = ModelOptions & Pick<ModelInput, "id">
+
+export const shouldUseResponsesApi = (modelID: string | ModelID) => {
+  const model = String(modelID)
+  const match = /^gpt-(\d+)/.exec(model)
+  if (!match) return false
+  return Number(match[1]) >= 5 && !model.startsWith("gpt-5-mini")
+}
+
+export const routes = [OpenAIResponses.route, OpenAIChat.route]
+
+const mapInput = (input: CopilotModelInput) => withOpenAIOptions(input.id, input)
+
+const chatModel = Route.model<CopilotModelInput>(OpenAIChat.route, { provider: id }, { mapInput })
+const responsesModel = Route.model<CopilotModelInput>(OpenAIResponses.route, { provider: id }, { mapInput })
+
+export const responses = (modelID: string | ModelID, options: ModelOptions) =>
+  responsesModel({ ...options, id: modelID })
+
+export const chat = (modelID: string | ModelID, options: ModelOptions) => chatModel({ ...options, id: modelID })
+
+export const model = (modelID: string | ModelID, options: ModelOptions) => {
+  const create = shouldUseResponsesApi(modelID) ? responsesModel : chatModel
+  return create({ ...options, id: modelID })
+}
+
+export const provider = Provider.make({
+  id,
+  model,
+  apis: { responses, chat },
+})
+
+export const apis = provider.apis
--- a/packages/llm/src/providers/google.ts
+++ b/packages/llm/src/providers/google.ts
@@ -0,0 +1,18 @@
+import type { RouteModelInput } from "../route/client"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as Gemini from "../protocols/gemini"
+
+export const id = ProviderID.make("google")
+
+export const routes = [Gemini.route]
+
+export const model = (
+  id: string | ModelID,
+  options: Omit<RouteModelInput, "id" | "baseURL"> & { readonly baseURL?: string } = {},
+) => Gemini.model({ ...options, id })
+
+export const provider = Provider.make({
+  id,
+  model,
+})
--- a/packages/llm/src/providers/index.ts
+++ b/packages/llm/src/providers/index.ts
@@ -0,0 +1,10 @@
+export * as Anthropic from "./anthropic"
+export * as AmazonBedrock from "./amazon-bedrock"
+export * as Azure from "./azure"
+export * as Cloudflare from "./cloudflare"
+export * as GitHubCopilot from "./github-copilot"
+export * as Google from "./google"
+export * as OpenAI from "./openai"
+export * as OpenAICompatible from "./openai-compatible"
+export * as OpenRouter from "./openrouter"
+export * as XAI from "./xai"
--- a/packages/llm/src/providers/openai-compatible-profile.ts
+++ b/packages/llm/src/providers/openai-compatible-profile.ts
@@ -0,0 +1,20 @@
+export interface OpenAICompatibleProfile {
+  readonly provider: string
+  readonly baseURL: string
+}
+
+export const profiles = {
+  baseten: { provider: "baseten", baseURL: "https://inference.baseten.co/v1" },
+  cerebras: { provider: "cerebras", baseURL: "https://api.cerebras.ai/v1" },
+  deepinfra: { provider: "deepinfra", baseURL: "https://api.deepinfra.com/v1/openai" },
+  deepseek: { provider: "deepseek", baseURL: "https://api.deepseek.com/v1" },
+  fireworks: { provider: "fireworks", baseURL: "https://api.fireworks.ai/inference/v1" },
+  groq: { provider: "groq", baseURL: "https://api.groq.com/openai/v1" },
+  openrouter: { provider: "openrouter", baseURL: "https://openrouter.ai/api/v1" },
+  togetherai: { provider: "togetherai", baseURL: "https://api.together.xyz/v1" },
+  xai: { provider: "xai", baseURL: "https://api.x.ai/v1" },
+} as const satisfies Record<string, OpenAICompatibleProfile>
+
+export const byProvider: Record<string, OpenAICompatibleProfile> = Object.fromEntries(
+  Object.values(profiles).map((profile) => [profile.provider, profile]),
+)
--- a/packages/llm/src/providers/openai-compatible.ts
+++ b/packages/llm/src/providers/openai-compatible.ts
@@ -0,0 +1,61 @@
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as OpenAICompatibleChat from "../protocols/openai-compatible-chat"
+import type { OpenAICompatibleChatModelInput } from "../protocols/openai-compatible-chat"
+import { profiles, type OpenAICompatibleProfile } from "./openai-compatible-profile"
+
+export const id = ProviderID.make("openai-compatible")
+
+export type ModelOptions = Omit<OpenAICompatibleChatModelInput, "id" | "provider"> & {
+  readonly provider: string
+}
+
+type GenericModelOptions = Omit<ModelOptions, "provider"> & {
+  readonly provider?: string
+}
+
+export type FamilyModelOptions = Omit<OpenAICompatibleChatModelInput, "id" | "provider" | "baseURL"> & {
+  readonly baseURL?: string
+}
+
+export const routes = [OpenAICompatibleChat.route]
+
+export const model = (id: string | ModelID, options: ModelOptions) => {
+  return OpenAICompatibleChat.model({
+    ...options,
+    id,
+    provider: ProviderID.make(options.provider),
+  })
+}
+
+export const profileModel = (
+  profile: OpenAICompatibleProfile,
+  id: string | ModelID,
+  options: FamilyModelOptions = {},
+) =>
+  OpenAICompatibleChat.model({
+    ...options,
+    id,
+    provider: profile.provider,
+    baseURL: options.baseURL ?? profile.baseURL,
+  })
+
+const define = (profile: OpenAICompatibleProfile) =>
+  Provider.make({
+    id: ProviderID.make(profile.provider),
+    model: (id: string | ModelID, options: FamilyModelOptions = {}) => profileModel(profile, id, options),
+  })
+
+export const provider = Provider.make({
+  id,
+  model: (id: string | ModelID, options: GenericModelOptions) =>
+    model(id, { ...options, provider: options.provider ?? "openai-compatible" }),
+})
+
+export const baseten = define(profiles.baseten)
+export const cerebras = define(profiles.cerebras)
+export const deepinfra = define(profiles.deepinfra)
+export const deepseek = define(profiles.deepseek)
+export const fireworks = define(profiles.fireworks)
+export const groq = define(profiles.groq)
+export const togetherai = define(profiles.togetherai)
--- a/packages/llm/src/providers/openai-options.ts
+++ b/packages/llm/src/providers/openai-options.ts
@@ -0,0 +1,70 @@
+import type { ProviderOptions, ReasoningEffort, TextVerbosity } from "../schema"
+import { mergeProviderOptions } from "../schema"
+
+export interface OpenAIOptionsInput {
+  readonly [key: string]: unknown
+  readonly store?: boolean
+  readonly promptCacheKey?: string
+  readonly reasoningEffort?: ReasoningEffort
+  readonly reasoningSummary?: "auto"
+  readonly includeEncryptedReasoning?: boolean
+  readonly textVerbosity?: TextVerbosity
+}
+
+export type OpenAIProviderOptionsInput = ProviderOptions & {
+  readonly openai?: OpenAIOptionsInput
+}
+
+const definedEntries = (input: Record<string, unknown>) =>
+  Object.entries(input).filter((entry) => entry[1] !== undefined)
+
+const openAIProviderOptions = (options: OpenAIOptionsInput | undefined): ProviderOptions | undefined => {
+  const openai = Object.fromEntries(
+    definedEntries({
+      store: options?.store,
+      promptCacheKey: options?.promptCacheKey,
+      reasoningEffort: options?.reasoningEffort,
+      reasoningSummary: options?.reasoningSummary,
+      includeEncryptedReasoning: options?.includeEncryptedReasoning,
+      textVerbosity: options?.textVerbosity,
+    }),
+  )
+  if (Object.keys(openai).length === 0) return undefined
+  return { openai }
+}
+
+export const gpt5DefaultOptions = (
+  modelID: string,
+  options: { readonly textVerbosity?: boolean } = {},
+): ProviderOptions | undefined => {
+  const id = modelID.toLowerCase()
+  if (!id.includes("gpt-5") || id.includes("gpt-5-chat") || id.includes("gpt-5-pro")) return undefined
+  return openAIProviderOptions({
+    reasoningEffort: "medium",
+    reasoningSummary: "auto",
+    textVerbosity:
+      options.textVerbosity === true && id.includes("gpt-5.") && !id.includes("codex") && !id.includes("-chat")
+        ? "low"
+        : undefined,
+  })
+}
+
+export const openAIDefaultOptions = (
+  modelID: string,
+  options: { readonly textVerbosity?: boolean } = {},
+): ProviderOptions | undefined =>
+  mergeProviderOptions(openAIProviderOptions({ store: false }), gpt5DefaultOptions(modelID, options))
+
+export const withOpenAIOptions = <Options extends { readonly providerOptions?: OpenAIProviderOptionsInput }>(
+  modelID: string,
+  options: Options,
+  defaults: { readonly textVerbosity?: boolean } = {},
+): Options & { readonly id: string; readonly providerOptions?: ProviderOptions } => {
+  return {
+    ...options,
+    id: modelID,
+    providerOptions: mergeProviderOptions(openAIDefaultOptions(modelID, defaults), options.providerOptions),
+  }
+}
+
+export * as OpenAIProviderOptions from "./openai-options"
--- a/packages/llm/src/providers/openai.ts
+++ b/packages/llm/src/providers/openai.ts
@@ -0,0 +1,53 @@
+import { AuthOptions, type ProviderAuthOption } from "../route/auth-options"
+import type { RouteModelInput } from "../route/client"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as OpenAIChat from "../protocols/openai-chat"
+import * as OpenAIResponses from "../protocols/openai-responses"
+import { withOpenAIOptions, type OpenAIProviderOptionsInput } from "./openai-options"
+
+export type { OpenAIOptionsInput } from "./openai-options"
+
+export const id = ProviderID.make("openai")
+
+export const routes = [OpenAIResponses.route, OpenAIResponses.webSocketRoute, OpenAIChat.route]
+
+// This provider facade wraps the lower-level Responses and Chat model factories
+// with OpenAI-specific conveniences: typed options, API-key sugar, env fallback,
+// and default option normalization.
+type OpenAIModelInput<ModelInput> = Omit<ModelInput, "apiKey" | "auth" | "baseURL"> &
+  ProviderAuthOption<"optional"> & {
+    readonly baseURL?: string
+    readonly providerOptions?: OpenAIProviderOptionsInput
+  }
+
+const auth = (options: ProviderAuthOption<"optional">) => AuthOptions.bearer(options, "OPENAI_API_KEY")
+
+export const responses = (id: string | ModelID, options: OpenAIModelInput<Omit<RouteModelInput, "id">> = {}) => {
+  const { apiKey: _, ...rest } = options
+  return OpenAIResponses.model(withOpenAIOptions(id, { ...rest, auth: auth(options) }, { textVerbosity: true }))
+}
+
+export const responsesWebSocket = (
+  id: string | ModelID,
+  options: OpenAIModelInput<Omit<RouteModelInput, "id">> = {},
+) => {
+  const { apiKey: _, ...rest } = options
+  return OpenAIResponses.webSocketModel(
+    withOpenAIOptions(id, { ...rest, auth: auth(options) }, { textVerbosity: true }),
+  )
+}
+
+export const chat = (id: string | ModelID, options: OpenAIModelInput<Omit<RouteModelInput, "id">> = {}) => {
+  const { apiKey: _, ...rest } = options
+  return OpenAIChat.model(withOpenAIOptions(id, { ...rest, auth: auth(options) }))
+}
+
+export const provider = Provider.make({
+  id,
+  model: responses,
+  apis: { responses, responsesWebSocket, chat },
+})
+
+export const model = provider.model
+export const apis = provider.apis
--- a/packages/llm/src/providers/openrouter.ts
+++ b/packages/llm/src/providers/openrouter.ts
@@ -0,0 +1,88 @@
+import { Effect, Schema } from "effect"
+import { Route, type RouteModelInput } from "../route/client"
+import { Endpoint } from "../route/endpoint"
+import { Framing } from "../route/framing"
+import { Provider } from "../provider"
+import { Protocol } from "../route/protocol"
+import { ProviderID, type ModelID, type ProviderOptions } from "../schema"
+import * as OpenAICompatibleProfiles from "./openai-compatible-profile"
+import * as OpenAIChat from "../protocols/openai-chat"
+import { isRecord } from "../protocols/shared"
+
+export const profile = OpenAICompatibleProfiles.profiles.openrouter
+export const id = ProviderID.make(profile.provider)
+const ADAPTER = "openrouter"
+
+export interface OpenRouterOptions {
+  readonly [key: string]: unknown
+  readonly usage?: boolean | Record<string, unknown>
+  readonly reasoning?: Record<string, unknown>
+  readonly promptCacheKey?: string
+}
+
+export type OpenRouterProviderOptionsInput = ProviderOptions & {
+  readonly openrouter?: OpenRouterOptions
+}
+
+export type ModelOptions = Omit<RouteModelInput, "id" | "baseURL" | "providerOptions"> & {
+  readonly baseURL?: string
+  readonly providerOptions?: OpenRouterProviderOptionsInput
+}
+type ModelInput = ModelOptions & Pick<RouteModelInput, "id">
+
+const OpenRouterBody = Schema.StructWithRest(Schema.Struct(OpenAIChat.bodyFields), [
+  Schema.Record(Schema.String, Schema.Any),
+])
+export type OpenRouterBody = Schema.Schema.Type<typeof OpenRouterBody>
+
+export const protocol = Protocol.make({
+  id: "openrouter-chat",
+  body: {
+    schema: OpenRouterBody,
+    from: (request) =>
+      OpenAIChat.protocol.body.from(request).pipe(
+        Effect.map(
+          (body) =>
+            ({
+              ...body,
+              ...bodyOptions(request.providerOptions?.openrouter),
+            }) as OpenRouterBody,
+        ),
+      ),
+  },
+  stream: OpenAIChat.protocol.stream,
+})
+
+const bodyOptions = (input: unknown) => {
+  const openrouter = isRecord(input) ? input : {}
+  return {
+    ...(openrouter.usage === true
+      ? { usage: { include: true } }
+      : isRecord(openrouter.usage)
+        ? { usage: openrouter.usage }
+        : {}),
+    ...(isRecord(openrouter.reasoning) ? { reasoning: openrouter.reasoning } : {}),
+    ...(typeof openrouter.promptCacheKey === "string" ? { prompt_cache_key: openrouter.promptCacheKey } : {}),
+  }
+}
+
+export const route = Route.make({
+  id: ADAPTER,
+  protocol,
+  endpoint: Endpoint.path("/chat/completions"),
+  framing: Framing.sse,
+})
+
+export const routes = [route]
+
+const modelRef = Route.model<ModelInput>(route, {
+  provider: profile.provider,
+  baseURL: profile.baseURL,
+})
+
+export const model = (id: string | ModelID, options: ModelOptions = {}) => modelRef({ ...options, id })
+
+export const provider = Provider.make({
+  id,
+  model,
+})
--- a/packages/llm/src/providers/xai.ts
+++ b/packages/llm/src/providers/xai.ts
@@ -0,0 +1,52 @@
+import { AuthOptions, type ProviderAuthOption } from "../route/auth-options"
+import { Route } from "../route/client"
+import type { RouteModelInput } from "../route/client"
+import { Provider } from "../provider"
+import { ProviderID, type ModelID } from "../schema"
+import * as OpenAICompatibleProfiles from "./openai-compatible-profile"
+import * as OpenAICompatibleChat from "../protocols/openai-compatible-chat"
+import * as OpenAIResponses from "../protocols/openai-responses"
+
+export const id = ProviderID.make("xai")
+
+export type ModelOptions = Omit<RouteModelInput, "id" | "apiKey" | "auth" | "baseURL"> &
+  ProviderAuthOption<"optional"> & {
+    readonly baseURL?: string
+  }
+
+export const routes = [OpenAIResponses.route, OpenAICompatibleChat.route]
+
+const responsesModel = Route.model(OpenAIResponses.route, { provider: id })
+const chatModel = OpenAICompatibleChat.model
+
+const auth = (options: ProviderAuthOption<"optional">) => AuthOptions.bearer(options, "XAI_API_KEY")
+
+export const responses = (modelID: string | ModelID, options: ModelOptions = {}) => {
+  const { apiKey: _, ...rest } = options
+  return responsesModel({
+    ...rest,
+    auth: auth(options),
+    id: modelID,
+    baseURL: options.baseURL ?? OpenAICompatibleProfiles.profiles.xai.baseURL,
+  })
+}
+
+export const chat = (modelID: string | ModelID, options: ModelOptions = {}) => {
+  const { apiKey: _, ...rest } = options
+  return chatModel({
+    ...rest,
+    auth: auth(options),
+    id: modelID,
+    provider: id,
+    baseURL: options.baseURL ?? OpenAICompatibleProfiles.profiles.xai.baseURL,
+  })
+}
+
+export const provider = Provider.make({
+  id,
+  model: responses,
+  apis: { responses, chat },
+})
+
+export const model = provider.model
+export const apis = provider.apis
--- a/packages/llm/src/route/auth-options.ts
+++ b/packages/llm/src/route/auth-options.ts
@@ -0,0 +1,57 @@
+import type { Config, Redacted } from "effect"
+import { Auth } from "./auth"
+
+export type ApiKeyMode = "optional" | "required"
+
+export type AuthOverride = {
+  readonly auth: Auth
+  readonly apiKey?: never
+}
+
+export type OptionalApiKeyAuth = {
+  readonly apiKey?: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>>
+  readonly auth?: never
+}
+
+export type RequiredApiKeyAuth = {
+  readonly apiKey: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>>
+  readonly auth?: never
+}
+
+export type ProviderAuthOption<Mode extends ApiKeyMode> =
+  | AuthOverride
+  | (Mode extends "optional" ? OptionalApiKeyAuth : RequiredApiKeyAuth)
+
+export type ModelOptions<Base, Mode extends ApiKeyMode> = Omit<Base, "apiKey" | "auth"> & ProviderAuthOption<Mode>
+
+export type ModelArgs<Base, Mode extends ApiKeyMode> = Mode extends "optional"
+  ? readonly [options?: ModelOptions<Base, Mode>]
+  : readonly [options: ModelOptions<Base, Mode>]
+
+export type ModelFactory<Base, Mode extends ApiKeyMode, Model> = (id: string, ...args: ModelArgs<Base, Mode>) => Model
+
+/**
+ * Require at least one of the keys in `T`. Use for option shapes where any
+ * subset of fields is acceptable but at least one must be present (e.g. Azure
+ * accepts `resourceName` or `baseURL`).
+ */
+export type AtLeastOne<T> = {
+  [K in keyof T]: Required<Pick<T, K>> & Partial<Omit<T, K>>
+}[keyof T]
+
+/**
+ * Standard bearer-auth resolution for providers: honor an explicit `auth`
+ * override, otherwise resolve `apiKey` (option > config var) and apply it as
+ * a bearer token.
+ */
+export const bearer = (options: ProviderAuthOption<"optional">, envVar: string | ReadonlyArray<string>): Auth => {
+  if ("auth" in options && options.auth) return options.auth
+  return (Array.isArray(envVar) ? envVar : [envVar])
+    .reduce(
+      (auth, name) => auth.orElse(Auth.config(name)),
+      Auth.optional("apiKey" in options ? options.apiKey : undefined, "apiKey"),
+    )
+    .bearer()
+}
+
+export * as AuthOptions from "./auth-options"
--- a/packages/llm/src/route/auth.ts
+++ b/packages/llm/src/route/auth.ts
@@ -0,0 +1,197 @@
+import { Config, Effect, Redacted } from "effect"
+import { Headers } from "effect/unstable/http"
+import { AuthenticationReason, InvalidRequestReason, LLMError, type LLMRequest } from "../schema"
+
+export class MissingCredentialError extends Error {
+  readonly _tag = "MissingCredentialError"
+
+  constructor(readonly source: string) {
+    super(`Missing auth credential: ${source}`)
+  }
+}
+
+export type CredentialError = MissingCredentialError | Config.ConfigError
+export type AuthError = CredentialError | LLMError
+
+export interface AuthInput {
+  readonly request: LLMRequest
+  readonly method: "POST" | "GET"
+  readonly url: string
+  readonly body: string
+  readonly headers: Headers.Headers
+}
+
+export interface Credential {
+  readonly load: Effect.Effect<Redacted.Redacted<string>, CredentialError>
+  readonly orElse: (that: Credential) => Credential
+  readonly bearer: () => Auth
+  readonly header: (name: string) => Auth
+  readonly pipe: <A>(f: (self: Credential) => A) => A
+}
+
+export interface Auth {
+  readonly apply: (input: AuthInput) => Effect.Effect<Headers.Headers, AuthError>
+  readonly andThen: (that: Auth) => Auth
+  readonly orElse: (that: Auth) => Auth
+  readonly pipe: <A>(f: (self: Auth) => A) => A
+}
+
+export const isAuth = (input: unknown): input is Auth =>
+  typeof input === "object" && input !== null && "apply" in input && typeof input.apply === "function"
+
+const credential = (load: Effect.Effect<Redacted.Redacted<string>, CredentialError>): Credential => {
+  const self: Credential = {
+    load,
+    orElse: (that) => credential(load.pipe(Effect.catch(() => that.load))),
+    bearer: () => fromCredential(self, (secret) => ({ authorization: `Bearer ${secret}` })),
+    header: (name) => fromCredential(self, (secret) => ({ [name]: secret })),
+    pipe: (f) => f(self),
+  }
+  return self
+}
+
+const auth = (apply: Auth["apply"]): Auth => {
+  const self: Auth = {
+    apply,
+    andThen: (that) =>
+      auth((input) => apply(input).pipe(Effect.flatMap((headers) => that.apply({ ...input, headers })))),
+    orElse: (that) => auth((input) => apply(input).pipe(Effect.catch(() => that.apply(input)))),
+    pipe: (f) => f(self),
+  }
+  return self
+}
+
+const fromCredential = (source: Credential, render: (secret: string) => Headers.Input) =>
+  auth((input) =>
+    source.load.pipe(Effect.map((secret) => Headers.setAll(input.headers, render(Redacted.value(secret))))),
+  )
+
+const secretEffect = (secret: string | Redacted.Redacted<string>, source: string) => {
+  const redacted = typeof secret === "string" ? Redacted.make(secret) : secret
+  if (Redacted.value(redacted) === "") return Effect.fail(new MissingCredentialError(source))
+  return Effect.succeed(redacted)
+}
+
+const credentialFromSecret = (
+  secret: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>>,
+  source: string,
+) => {
+  if (typeof secret === "string" || Redacted.isRedacted(secret)) return credential(secretEffect(secret, source))
+  return credential(
+    Effect.gen(function* () {
+      return yield* secretEffect(yield* secret, source)
+    }),
+  )
+}
+
+export const value = (secret: string, source = "value") => credentialFromSecret(secret, source)
+
+export const optional = (
+  secret: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | undefined,
+  source = "optional value",
+) =>
+  secret === undefined
+    ? credential(Effect.fail(new MissingCredentialError(source)))
+    : credentialFromSecret(secret, source)
+
+export const config = (name: string) => credentialFromSecret(Config.redacted(name), name)
+
+export const effect = (load: Effect.Effect<Redacted.Redacted<string>, CredentialError>) => credential(load)
+
+export const none = auth((input) => Effect.succeed(input.headers))
+
+export const headers = (input: Headers.Input) =>
+  auth((inputAuth) => Effect.succeed(Headers.setAll(inputAuth.headers, input)))
+
+export const remove = (name: string) => auth((input) => Effect.succeed(Headers.remove(input.headers, name)))
+
+export const custom = (apply: (input: AuthInput) => Effect.Effect<Headers.Headers, LLMError>) => auth(apply)
+
+export const passthrough = none
+
+const fromModelApiKey = (from: (apiKey: string) => Headers.Input) =>
+  auth(({ request, headers }) => {
+    const key = request.model.apiKey
+    if (!key) return Effect.succeed(headers)
+    return Effect.succeed(Headers.setAll(headers, from(key)))
+  })
+
+const credentialInput = (
+  source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+) =>
+  typeof source === "string" || Redacted.isRedacted(source) || Config.isConfig(source)
+    ? credentialFromSecret(source, "value")
+    : source
+
+export function bearer(): Auth
+export function bearer(
+  source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+): Auth
+export function bearer(
+  source?: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+) {
+  if (source === undefined) return fromModelApiKey((key) => ({ authorization: `Bearer ${key}` }))
+  return credentialInput(source).bearer()
+}
+
+export const apiKey = bearer
+
+export const apiKeyHeader = (name: string) => fromModelApiKey((key) => ({ [name]: key }))
+
+export function header(
+  name: string,
+): (source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential) => Auth
+export function header(
+  name: string,
+  source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+): Auth
+export function header(
+  name: string,
+  source?: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+) {
+  if (source === undefined) {
+    return (
+      next: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+    ) => credentialInput(next).header(name)
+  }
+  return credentialInput(source).header(name)
+}
+
+export function bearerHeader(
+  name: string,
+): (source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential) => Auth
+export function bearerHeader(
+  name: string,
+  source: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+): Auth
+export function bearerHeader(
+  name: string,
+  source?: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+) {
+  const render = (
+    input: string | Redacted.Redacted<string> | Config.Config<string | Redacted.Redacted<string>> | Credential,
+  ) => fromCredential(credentialInput(input), (secret) => ({ [name]: `Bearer ${secret}` }))
+  if (source === undefined) return render
+  return render(source)
+}
+
+const toLLMError = (error: AuthError): LLMError => {
+  if (error instanceof MissingCredentialError || error instanceof Config.ConfigError) {
+    return new LLMError({
+      module: "Auth",
+      method: "apply",
+      reason:
+        error instanceof MissingCredentialError
+          ? new AuthenticationReason({ message: error.message, kind: "missing" })
+          : new InvalidRequestReason({ message: `Failed to resolve auth config: ${error.message}` }),
+    })
+  }
+  return error
+}
+
+export const toEffect =
+  (input: Auth) =>
+  (authInput: AuthInput): Effect.Effect<Headers.Headers, LLMError> =>
+    input.apply(authInput).pipe(Effect.mapError(toLLMError))
+
+export * as Auth from "./auth"
--- a/packages/llm/src/route/client.ts
+++ b/packages/llm/src/route/client.ts
@@ -0,0 +1,527 @@
+import { Cause, Context, Effect, Layer, Schema, Stream } from "effect"
+import type { Auth as AuthDef } from "./auth"
+import type { Endpoint } from "./endpoint"
+import { RequestExecutor } from "./executor"
+import type { Framing } from "./framing"
+import { HttpTransport } from "./transport"
+import type { Transport, TransportRuntime } from "./transport"
+import { WebSocketExecutor } from "./transport"
+import type { Service as WebSocketExecutorService } from "./transport/websocket"
+import type { Protocol } from "./protocol"
+import { applyCachePolicy } from "../cache-policy"
+import * as ProviderShared from "../protocols/shared"
+import * as ToolRuntime from "../tool-runtime"
+import type { Tools } from "../tool"
+import type { LLMError, LLMEvent, PreparedRequestOf, ProtocolID } from "../schema"
+import {
+  GenerationOptions,
+  HttpOptions,
+  LLMRequest,
+  LLMResponse,
+  ModelID,
+  ModelLimits,
+  ModelRef,
+  LLMError as LLMErrorClass,
+  NoRouteReason,
+  PreparedRequest,
+  ProviderID,
+  RouteID,
+  mergeGenerationOptions,
+  mergeHttpOptions,
+  mergeProviderOptions,
+} from "../schema"
+
+export interface RouteBody<Body> {
+  /** Schema for the validated provider-native body sent as the JSON request. */
+  readonly schema: Schema.Codec<Body, unknown>
+  /** Build the provider-native body from a common `LLMRequest`. */
+  readonly from: (request: LLMRequest) => Effect.Effect<Body, LLMError>
+}
+
+export interface Route<Body, Prepared = unknown> {
+  readonly id: string
+  readonly provider?: ProviderID
+  readonly protocol: ProtocolID
+  readonly transport: Transport<Body, Prepared, unknown>
+  readonly defaults: RouteDefaults
+  readonly body: RouteBody<Body>
+  readonly with: (patch: RoutePatch<Body, Prepared>) => Route<Body, Prepared>
+  readonly model: <Input extends RouteModelInput = RouteModelInput>(input: Input) => ModelRef
+  readonly prepareTransport: (body: Body, request: LLMRequest) => Effect.Effect<Prepared, LLMError>
+  readonly streamPrepared: (
+    prepared: Prepared,
+    request: LLMRequest,
+    runtime: TransportRuntime,
+  ) => Stream.Stream<LLMEvent, LLMError>
+}
+
+// Route registries intentionally erase body generics after construction.
+// Normal call sites use `OpenAIChat.route`; callers only need body types
+// when preparing a request with a protocol-specific type assertion.
+// oxlint-disable-next-line typescript-eslint/no-explicit-any
+export type AnyRoute = Route<any, any>
+
+const routeRegistry = new Map<string, AnyRoute>()
+
+// Route lookup is intentionally global: model refs name a route id, and
+// importing the provider/protocol/custom-route module registers the runnable
+// implementation. Duplicate ids are bugs because model refs cannot disambiguate
+// them.
+const register = <R extends AnyRoute>(route: R): R => {
+  const existing = routeRegistry.get(route.id)
+  if (existing && existing !== route) throw new Error(`Duplicate LLM route id "${route.id}"`)
+  routeRegistry.set(route.id, route)
+  return route
+}
+
+const registeredRoute = (id: string) => routeRegistry.get(id)
+
+export type HttpOptionsInput = HttpOptions.Input
+
+export type ModelRefInput = Omit<
+  ConstructorParameters<typeof ModelRef>[0],
+  "id" | "provider" | "route" | "limits" | "generation" | "http" | "auth"
+> & {
+  readonly id: string | ModelID
+  readonly provider: string | ProviderID
+  readonly route: string | RouteID
+  readonly auth?: AuthDef
+  readonly limits?: ModelLimits.Input
+  readonly generation?: GenerationOptions.Input
+  readonly http?: HttpOptionsInput
+}
+
+// `baseURL` is required on `ModelRefInput` (every materialized `ModelRef` has
+// a host) but optional at the route-input layers below. The route's `defaults`
+// can supply a canonical URL (e.g. OpenAI/Anthropic) so the user's input may
+// omit it. Routes without a canonical URL (OpenAI-compatible, GitHub Copilot)
+// re-tighten this in their own input type.
+export type RouteModelInput = Omit<ModelRefInput, "provider" | "route" | "baseURL"> & {
+  readonly baseURL?: string
+}
+
+export type RouteModelDefaults = Omit<ModelRefInput, "id" | "route" | "baseURL"> & {
+  readonly baseURL?: string
+}
+
+export type RouteRoutedModelInput = Omit<ModelRefInput, "route" | "baseURL"> & {
+  readonly baseURL?: string
+}
+
+export type RouteRoutedModelDefaults = Partial<Omit<ModelRefInput, "id" | "provider" | "route">>
+
+export type RouteDefaults = Partial<Omit<ModelRefInput, "id" | "provider" | "route">>
+
+export interface RoutePatch<Body, Prepared> extends RouteDefaults {
+  readonly id: string
+  readonly provider?: string | ProviderID
+  readonly transport?: Transport<Body, Prepared, unknown>
+}
+
+type RouteMappedModelInput = RouteModelInput | RouteRoutedModelInput
+
+export interface RouteModelOptions<
+  Input extends RouteMappedModelInput,
+  Output extends RouteMappedModelInput = RouteMappedModelInput,
+> {
+  readonly mapInput?: (input: Input) => Output
+}
+
+export interface RouteMappedModelOptions<Input, Output extends RouteMappedModelInput = RouteMappedModelInput> {
+  readonly mapInput: (input: Input) => Output
+}
+
+const modelWithDefaults =
+  <Input>(
+    route: AnyRoute,
+    defaults: Partial<Omit<ModelRefInput, "id" | "route">>,
+    options: { readonly mapInput?: (input: Input) => RouteMappedModelInput },
+  ) =>
+  (input: Input) => {
+    const mapped = options.mapInput === undefined ? (input as RouteMappedModelInput) : options.mapInput(input)
+    const provider = defaults.provider ?? route.provider ?? ("provider" in mapped ? mapped.provider : undefined)
+    if (!provider) throw new Error(`Route.model(${route.id}) requires a provider`)
+    const baseURL = mapped.baseURL ?? defaults.baseURL ?? route.defaults.baseURL
+    if (!baseURL)
+      throw new Error(`Route.model(${route.id}) requires a baseURL — supply it via input, defaults, or route defaults`)
+    const generation = mergeGenerationOptions(route.defaults.generation, defaults.generation)
+    const providerOptions = mergeProviderOptions(route.defaults.providerOptions, defaults.providerOptions)
+    const http = mergeHttpOptions(httpOptions(route.defaults.http), httpOptions(defaults.http))
+    return modelRef({
+      ...route.defaults,
+      ...defaults,
+      ...mapped,
+      baseURL,
+      provider,
+      route: route.id,
+      limits: mapped.limits ?? defaults.limits ?? route.defaults.limits,
+      generation: mergeGenerationOptions(generation, mapped.generation),
+      providerOptions: mergeProviderOptions(providerOptions, mapped.providerOptions),
+      http: mergeHttpOptions(http, httpOptions(mapped.http)),
+    })
+  }
+
+const mergeRouteDefaults = (base: RouteDefaults | undefined, patch: RouteDefaults): RouteDefaults => ({
+  ...base,
+  ...patch,
+  limits: patch.limits ?? base?.limits,
+  generation: mergeGenerationOptions(generationOptions(base?.generation), generationOptions(patch.generation)),
+  providerOptions: mergeProviderOptions(base?.providerOptions, patch.providerOptions),
+  http: mergeHttpOptions(httpOptions(base?.http), httpOptions(patch.http)),
+})
+
+export const modelLimits = ModelLimits.make
+
+export const generationOptions = (input: GenerationOptions.Input | undefined) =>
+  input === undefined ? undefined : GenerationOptions.make(input)
+
+export const httpOptions = (input: HttpOptionsInput | undefined) => {
+  if (input === undefined) return input
+  return HttpOptions.make(input)
+}
+
+export const modelRef = (input: ModelRefInput) =>
+  new ModelRef({
+    ...input,
+    id: ModelID.make(input.id),
+    provider: ProviderID.make(input.provider),
+    route: RouteID.make(input.route),
+    limits: modelLimits(input.limits),
+    generation: generationOptions(input.generation),
+    http: httpOptions(input.http),
+  })
+
+function model<Input extends RouteModelInput = RouteModelInput>(
+  route: AnyRoute,
+  defaults: RouteModelDefaults,
+  options?: RouteModelOptions<Input, RouteModelInput>,
+): (input: Input) => ModelRef
+function model<Input extends RouteRoutedModelInput = RouteRoutedModelInput>(
+  route: AnyRoute,
+  defaults?: RouteRoutedModelDefaults,
+  options?: RouteModelOptions<Input, RouteRoutedModelInput>,
+): (input: Input) => ModelRef
+function model<Input, Output extends RouteMappedModelInput = RouteMappedModelInput>(
+  route: AnyRoute,
+  defaults: Partial<Omit<ModelRefInput, "id" | "route">>,
+  options: RouteMappedModelOptions<Input, Output>,
+): (input: Input) => ModelRef
+function model<Input>(
+  route: AnyRoute,
+  defaults: Partial<Omit<ModelRefInput, "id" | "route">> = {},
+  options: { readonly mapInput?: (input: Input) => RouteMappedModelInput } = {},
+) {
+  return modelWithDefaults(route, defaults, options)
+}
+
+export interface Interface {
+  /**
+   * Compile a request through protocol body construction, validation, and HTTP
+   * preparation without sending it. Returns the prepared request including the
+   * provider-native body.
+   *
+   * Pass a `Body` type argument to statically expose the route's body
+   * shape (e.g. `prepare<OpenAIChatBody>(...)`) — the runtime body is
+   * identical, so this is a type-level assertion the caller makes about which
+   * route the request will resolve to.
+   */
+  readonly prepare: <Body = unknown>(request: LLMRequest) => Effect.Effect<PreparedRequestOf<Body>, LLMError>
+  readonly stream: StreamMethod
+  readonly generate: GenerateMethod
+}
+
+export interface StreamMethod {
+  (request: LLMRequest): Stream.Stream<LLMEvent, LLMError>
+  <T extends Tools>(options: ToolRuntime.RunOptions<T>): Stream.Stream<LLMEvent, LLMError>
+}
+
+export interface GenerateMethod {
+  (request: LLMRequest): Effect.Effect<LLMResponse, LLMError>
+  <T extends Tools>(options: ToolRuntime.RunOptions<T>): Effect.Effect<LLMResponse, LLMError>
+}
+
+export class Service extends Context.Service<Service, Interface>()("@opencode/LLMClient") {}
+
+const noRoute = (model: ModelRef) =>
+  new LLMErrorClass({
+    module: "LLMClient",
+    method: "resolveRoute",
+    reason: new NoRouteReason({ route: model.route, provider: model.provider, model: model.id }),
+  })
+
+const resolveRequestOptions = (request: LLMRequest) =>
+  LLMRequest.update(request, {
+    generation: mergeGenerationOptions(request.model.generation, request.generation) ?? new GenerationOptions({}),
+    providerOptions: mergeProviderOptions(request.model.providerOptions, request.providerOptions),
+    http: mergeHttpOptions(request.model.http, request.http),
+  })
+
+export interface MakeInput<Body, Frame, Event, State> {
+  /** Route id used in registry lookup and error messages. */
+  readonly id: string
+  /** Provider identity for route-owned model construction. */
+  readonly provider?: string | ProviderID
+  /** Semantic API contract — owns body construction, body schema, and parsing. */
+  readonly protocol: Protocol<Body, Frame, Event, State>
+  /** Where the request is sent. */
+  readonly endpoint: Endpoint<Body>
+  /** Per-request transport auth. Model-level `Auth` overrides this. */
+  readonly auth?: AuthDef
+  /** Stream framing — bytes -> frames before `protocol.stream.event` decoding. */
+  readonly framing: Framing<Frame>
+  /** Static / per-request headers added before `auth` runs. */
+  readonly headers?: (input: { readonly request: LLMRequest }) => Record<string, string>
+  /** Model defaults used by the route's `.model(...)` helper. */
+  readonly defaults?: RouteDefaults
+}
+
+export interface MakeTransportInput<Body, Prepared, Frame, Event, State> {
+  /** Route id used in registry lookup and error messages. */
+  readonly id: string
+  /** Provider identity for route-owned model construction. */
+  readonly provider?: string | ProviderID
+  /** Semantic API contract — owns body construction, body schema, and parsing. */
+  readonly protocol: Protocol<Body, Frame, Event, State>
+  /** Runnable transport route. */
+  readonly transport: Transport<Body, Prepared, Frame>
+  /** Provider/model defaults used by the route's `.model(...)` helper. */
+  readonly defaults?: RouteDefaults
+}
+
+const streamError = (route: string, message: string, cause: Cause.Cause<unknown>) => {
+  const failed = cause.reasons.find(Cause.isFailReason)?.error
+  if (failed instanceof LLMErrorClass) return failed
+  return ProviderShared.eventError(route, message, Cause.pretty(cause))
+}
+
+function makeFromTransport<Body, Prepared, Frame, Event, State>(
+  input: MakeTransportInput<Body, Prepared, Frame, Event, State>,
+): Route<Body, Prepared> {
+  const protocol = input.protocol
+  const decodeEventEffect = Schema.decodeUnknownEffect(protocol.stream.event)
+  const decodeEvent = (route: string) => (frame: Frame) =>
+    decodeEventEffect(frame).pipe(
+      Effect.mapError(() =>
+        ProviderShared.eventError(
+          input.id,
+          `Invalid ${route} stream event`,
+          typeof frame === "string" ? frame : ProviderShared.encodeJson(frame),
+        ),
+      ),
+    )
+
+  const build = (routeInput: MakeTransportInput<Body, Prepared, Frame, Event, State>): Route<Body, Prepared> => {
+    const route: Route<Body, Prepared> = {
+      id: routeInput.id,
+      provider: routeInput.provider === undefined ? undefined : ProviderID.make(routeInput.provider),
+      protocol: protocol.id,
+      transport: routeInput.transport,
+      defaults: routeInput.defaults ?? {},
+      body: protocol.body,
+      with: (patch: RoutePatch<Body, Prepared>) => {
+        const { id, provider, transport, ...defaults } = patch
+        if (!id || id === routeInput.id) throw new Error(`Route.with(${routeInput.id}) requires a new route id`)
+        return build({
+          ...routeInput,
+          id,
+          provider: provider ?? routeInput.provider,
+          transport: (transport as Transport<Body, Prepared, Frame> | undefined) ?? routeInput.transport,
+          defaults: mergeRouteDefaults(routeInput.defaults, defaults),
+        })
+      },
+      model: (input: RouteModelInput): ModelRef => modelWithDefaults<RouteModelInput>(route, {}, {})(input),
+      prepareTransport: routeInput.transport.prepare,
+      streamPrepared: (prepared: Prepared, request: LLMRequest, runtime: TransportRuntime) => {
+        const route = `${request.model.provider}/${request.model.route}`
+        const events = routeInput.transport
+          .frames(prepared, request, runtime)
+          .pipe(
+            Stream.mapEffect(decodeEvent(route)),
+            protocol.stream.terminal ? Stream.takeUntil(protocol.stream.terminal) : (stream) => stream,
+          )
+        return events.pipe(
+          Stream.mapAccumEffect(
+            protocol.stream.initial,
+            protocol.stream.step,
+            protocol.stream.onHalt ? { onHalt: protocol.stream.onHalt } : undefined,
+          ),
+          Stream.catchCause((cause) => Stream.fail(streamError(route, `Failed to read ${route} stream`, cause))),
+        )
+      },
+    } satisfies Route<Body, Prepared>
+    return register(route)
+  }
+
+  return build(input)
+}
+
+export function make<Body, Prepared, Frame, Event, State>(
+  input: MakeTransportInput<Body, Prepared, Frame, Event, State>,
+): Route<Body, Prepared>
+/**
+ * Build a `Route` by composing the four orthogonal pieces of a deployment:
+ *
+ * - `Protocol` — what is the API I'm speaking?
+ * - `Endpoint` — where do I send the request?
+ * - `Auth` — how do I authenticate it?
+ * - `Framing` — how do I cut the response stream into protocol frames?
+ *
+ * Plus optional `headers` for cross-cutting deployment concerns (provider
+ * version pins, per-deployment quirks).
+ *
+ * This is the canonical route constructor. If a new route does not fit
+ * this four-axis model, add a purpose-built constructor rather than widening
+ * the public surface preemptively.
+ */
+export function make<Body, Frame, Event, State>(
+  input: MakeInput<Body, Frame, Event, State>,
+): Route<Body, HttpTransport.HttpPrepared<Frame>>
+export function make<Body, Prepared, Frame, Event, State>(
+  input: MakeInput<Body, Frame, Event, State> | MakeTransportInput<Body, Prepared, Frame, Event, State>,
+): Route<Body, Prepared> | Route<Body, HttpTransport.HttpPrepared<Frame>> {
+  if ("transport" in input) return makeFromTransport(input)
+  const protocol = input.protocol
+  const encodeBody = Schema.encodeSync(Schema.fromJsonString(protocol.body.schema))
+  return makeFromTransport({
+    id: input.id,
+    provider: input.provider,
+    protocol,
+    transport: HttpTransport.httpJson({
+      endpoint: input.endpoint,
+      auth: input.auth,
+      framing: input.framing,
+      encodeBody,
+      headers: input.headers,
+    }),
+    defaults: input.defaults,
+  })
+}
+
+// `compile` is the important boundary: it turns a common `LLMRequest` into a
+// validated provider body plus transport-private prepared data, but does not
+// execute transport.
+const compile = Effect.fn("LLM.compile")(function* (request: LLMRequest) {
+  const resolved = applyCachePolicy(resolveRequestOptions(request))
+  const route = registeredRoute(resolved.model.route)
+  if (!route) return yield* noRoute(resolved.model)
+
+  const body = yield* route.body
+    .from(resolved)
+    .pipe(Effect.flatMap(ProviderShared.validateWith(Schema.decodeUnknownEffect(route.body.schema))))
+  const prepared = yield* route.prepareTransport(body, resolved)
+
+  return {
+    request: resolved,
+    route,
+    body,
+    prepared,
+  }
+})
+
+const prepareWith = Effect.fn("LLMClient.prepare")(function* (request: LLMRequest) {
+  const compiled = yield* compile(request)
+
+  return new PreparedRequest({
+    id: compiled.request.id ?? "request",
+    route: compiled.route.id,
+    protocol: compiled.route.protocol,
+    model: compiled.request.model,
+    body: compiled.body,
+    metadata: { transport: compiled.route.transport.id },
+  })
+})
+
+const streamRequestWith = (runtime: TransportRuntime) => (request: LLMRequest) =>
+  Stream.unwrap(
+    Effect.gen(function* () {
+      const compiled = yield* compile(request)
+      return compiled.route.streamPrepared(compiled.prepared, compiled.request, runtime)
+    }),
+  )
+
+const isToolRunOptions = (input: LLMRequest | ToolRuntime.RunOptions<Tools>): input is ToolRuntime.RunOptions<Tools> =>
+  "request" in input && "tools" in input
+
+const streamWith = (streamRequest: (request: LLMRequest) => Stream.Stream<LLMEvent, LLMError>): StreamMethod =>
+  ((input: LLMRequest | ToolRuntime.RunOptions<Tools>) => {
+    if (isToolRunOptions(input)) return ToolRuntime.stream({ ...input, stream: streamRequest })
+    return streamRequest(input)
+  }) as StreamMethod
+
+const generateWith = (stream: Interface["stream"]) =>
+  Effect.fn("LLM.generate")(function* (input: LLMRequest | ToolRuntime.RunOptions<Tools>) {
+    return new LLMResponse(
+      yield* stream(input as never).pipe(
+        Stream.runFold(
+          () => ({ events: [] as LLMEvent[], usage: undefined as LLMResponse["usage"] }),
+          (acc, event) => {
+            acc.events.push(event)
+            if ("usage" in event && event.usage !== undefined) acc.usage = event.usage
+            return acc
+          },
+        ),
+      ),
+    )
+  })
+
+export const prepare = <Body = unknown>(request: LLMRequest) =>
+  prepareWith(request) as Effect.Effect<PreparedRequestOf<Body>, LLMError>
+
+export function stream(request: LLMRequest): Stream.Stream<LLMEvent, LLMError>
+export function stream<T extends Tools>(options: ToolRuntime.RunOptions<T>): Stream.Stream<LLMEvent, LLMError>
+export function stream(input: LLMRequest | ToolRuntime.RunOptions<Tools>) {
+  return Stream.unwrap(
+    Effect.gen(function* () {
+      return (yield* Service).stream(input as never)
+    }),
+  )
+}
+
+export function generate(request: LLMRequest): Effect.Effect<LLMResponse, LLMError>
+export function generate<T extends Tools>(options: ToolRuntime.RunOptions<T>): Effect.Effect<LLMResponse, LLMError>
+export function generate(input: LLMRequest | ToolRuntime.RunOptions<Tools>) {
+  return Effect.gen(function* () {
+    return yield* (yield* Service).generate(input as never)
+  })
+}
+
+export const streamRequest = (request: LLMRequest) =>
+  Stream.unwrap(
+    Effect.gen(function* () {
+      return (yield* Service).stream(request)
+    }),
+  )
+
+export const layer: Layer.Layer<Service, never, RequestExecutor.Service> = Layer.effect(
+  Service,
+  Effect.gen(function* () {
+    const stream = streamWith(streamRequestWith({ http: yield* RequestExecutor.Service }))
+    return Service.of({ prepare: prepareWith as Interface["prepare"], stream, generate: generateWith(stream) })
+  }),
+)
+
+export const layerWithWebSocket: Layer.Layer<Service, never, RequestExecutor.Service | WebSocketExecutorService> =
+  Layer.effect(
+    Service,
+    Effect.gen(function* () {
+      const stream = streamWith(
+        streamRequestWith({
+          http: yield* RequestExecutor.Service,
+          webSocket: yield* WebSocketExecutor.Service,
+        }),
+      )
+      return Service.of({ prepare: prepareWith as Interface["prepare"], stream, generate: generateWith(stream) })
+    }),
+  )
+
+export const Route = { make, model } as const
+
+export const LLMClient = {
+  Service,
+  layer,
+  layerWithWebSocket,
+  prepare,
+  stream,
+  generate,
+  stepCountIs: ToolRuntime.stepCountIs,
+} as const
--- a/packages/llm/src/route/endpoint.ts
+++ b/packages/llm/src/route/endpoint.ts
@@ -0,0 +1,39 @@
+import type { LLMRequest } from "../schema"
+import * as ProviderShared from "../protocols/shared"
+
+export interface EndpointInput<Body> {
+  readonly request: LLMRequest
+  readonly body: Body
+}
+
+export type EndpointPart<Body> = string | ((input: EndpointInput<Body>) => string)
+
+/**
+ * Declarative URL construction for one route.
+ *
+ * `Endpoint` carries only the path. The host always lives on `model.baseURL`,
+ * supplied by the provider helper that constructs the model. `render(...)`
+ * just appends the path (and any `model.queryParams`) to that host.
+ *
+ * `path` may be a string or a function of `EndpointInput`, for routes whose
+ * URL embeds the model id, region, or another body field (e.g. Bedrock,
+ * Gemini).
+ */
+export interface Endpoint<Body> {
+  readonly path: EndpointPart<Body>
+}
+
+/** Construct an `Endpoint` from a path string or path function. */
+export const path = <Body>(value: EndpointPart<Body>): Endpoint<Body> => ({ path: value })
+
+const renderPart = <Body>(part: EndpointPart<Body>, input: EndpointInput<Body>) =>
+  typeof part === "function" ? part(input) : part
+
+export const render = <Body>(endpoint: Endpoint<Body>, input: EndpointInput<Body>) => {
+  const url = new URL(`${ProviderShared.trimBaseUrl(input.request.model.baseURL)}${renderPart(endpoint.path, input)}`)
+  const params = input.request.model.queryParams
+  if (params) for (const [key, value] of Object.entries(params)) url.searchParams.set(key, value)
+  return url
+}
+
+export * as Endpoint from "./endpoint"
--- a/packages/llm/src/route/executor.ts
+++ b/packages/llm/src/route/executor.ts
@@ -0,0 +1,374 @@
+import { Cause, Context, Effect, Layer, Random } from "effect"
+import {
+  FetchHttpClient,
+  Headers,
+  HttpClient,
+  HttpClientError,
+  HttpClientRequest,
+  HttpClientResponse,
+} from "effect/unstable/http"
+import {
+  AuthenticationReason,
+  ContentPolicyReason,
+  HttpContext,
+  HttpRateLimitDetails,
+  HttpRequestDetails,
+  HttpResponseDetails,
+  InvalidRequestReason,
+  LLMError,
+  ProviderInternalReason,
+  QuotaExceededReason,
+  RateLimitReason,
+  TransportReason,
+  UnknownProviderReason,
+} from "../schema"
+
+export interface Interface {
+  readonly execute: (
+    request: HttpClientRequest.HttpClientRequest,
+  ) => Effect.Effect<HttpClientResponse.HttpClientResponse, LLMError>
+}
+
+export class Service extends Context.Service<Service, Interface>()("@opencode/LLM/RequestExecutor") {}
+
+const BODY_LIMIT = 16_384
+const MAX_RETRIES = 2
+const BASE_DELAY_MS = 500
+const MAX_DELAY_MS = 10_000
+const REDACTED = "<redacted>"
+
+// One source of truth for what counts as a sensitive name across headers,
+// URL query keys, and field names embedded inside request/response bodies.
+//
+// `SENSITIVE_NAME` is used as both a substring matcher (for free-form header
+// names like `Authorization` / `X-API-Key`) and as the body-field alternation
+// list. `SHORT_QUERY_NAME` covers anchored short keys like `?key=…` / `?sig=…`
+// that are too generic to redact substring-style without false positives.
+const SENSITIVE_NAME_SOURCE =
+  "authorization|api[-_]?key|access[-_]?token|refresh[-_]?token|id[-_]?token|token|secret|credential|signature|x-amz-signature"
+const SENSITIVE_NAME = new RegExp(SENSITIVE_NAME_SOURCE, "i")
+const SHORT_QUERY_NAME = /^(key|sig)$/i
+const SENSITIVE_BODY_FIELD = new RegExp(`(?:${SENSITIVE_NAME_SOURCE}|key)`, "i")
+const REDACT_JSON_FIELD = new RegExp(`("(?:${SENSITIVE_BODY_FIELD.source})"\\s*:\\s*)"[^"]*"`, "gi")
+const REDACT_QUERY_FIELD = new RegExp(`((?:${SENSITIVE_BODY_FIELD.source})=)[^&\\s"]+`, "gi")
+
+const isSensitiveHeaderName = (name: string) => SENSITIVE_NAME.test(name)
+
+const isSensitiveQueryName = (name: string) => isSensitiveHeaderName(name) || SHORT_QUERY_NAME.test(name)
+
+const redactHeaders = (headers: Headers.Headers, redactedNames: ReadonlyArray<string | RegExp>) =>
+  Object.fromEntries(
+    Object.entries(Headers.redact(headers, [...redactedNames, SENSITIVE_NAME])).map(([name, value]) => [
+      name,
+      String(value),
+    ]),
+  )
+
+const redactUrl = (value: string) => {
+  if (!URL.canParse(value)) return REDACTED
+  const url = new URL(value)
+  url.searchParams.forEach((_, key) => {
+    if (isSensitiveQueryName(key)) url.searchParams.set(key, REDACTED)
+  })
+  return url.toString()
+}
+
+const normalizedHeaders = (headers: Headers.Headers) =>
+  Object.fromEntries(Object.entries(headers).map(([key, value]) => [key.toLowerCase(), value]))
+
+const requestId = (headers: Record<string, string>) => {
+  return (
+    headers["x-request-id"] ??
+    headers["request-id"] ??
+    headers["x-amzn-requestid"] ??
+    headers["x-amz-request-id"] ??
+    headers["x-goog-request-id"] ??
+    headers["cf-ray"]
+  )
+}
+
+const retryableStatus = (status: number) => status === 429 || status === 503 || status === 504 || status === 529
+
+const retryAfterMs = (headers: Record<string, string>) => {
+  const millis = Number(headers["retry-after-ms"])
+  if (Number.isFinite(millis)) return Math.max(0, millis)
+
+  const value = headers["retry-after"]
+  if (!value) return undefined
+
+  const seconds = Number(value)
+  if (Number.isFinite(seconds)) return Math.max(0, seconds * 1000)
+
+  const date = Date.parse(value)
+  if (!Number.isNaN(date)) return Math.max(0, date - Date.now())
+  return undefined
+}
+
+const addRateLimitValue = (target: Record<string, string>, key: string, value: string) => {
+  if (key.length > 0) target[key] = value
+}
+
+const rateLimitDetails = (headers: Record<string, string>, retryAfter: number | undefined) => {
+  const limit: Record<string, string> = {}
+  const remaining: Record<string, string> = {}
+  const reset: Record<string, string> = {}
+
+  Object.entries(headers).forEach(([name, value]) => {
+    const openaiLimit = /^x-ratelimit-limit-(.+)$/.exec(name)?.[1]
+    if (openaiLimit) return addRateLimitValue(limit, openaiLimit, value)
+
+    const openaiRemaining = /^x-ratelimit-remaining-(.+)$/.exec(name)?.[1]
+    if (openaiRemaining) return addRateLimitValue(remaining, openaiRemaining, value)
+
+    const openaiReset = /^x-ratelimit-reset-(.+)$/.exec(name)?.[1]
+    if (openaiReset) return addRateLimitValue(reset, openaiReset, value)
+
+    const anthropic = /^anthropic-ratelimit-(.+)-(limit|remaining|reset)$/.exec(name)
+    if (!anthropic) return
+    if (anthropic[2] === "limit") return addRateLimitValue(limit, anthropic[1], value)
+    if (anthropic[2] === "remaining") return addRateLimitValue(remaining, anthropic[1], value)
+    return addRateLimitValue(reset, anthropic[1], value)
+  })
+
+  if (
+    retryAfter === undefined &&
+    Object.keys(limit).length === 0 &&
+    Object.keys(remaining).length === 0 &&
+    Object.keys(reset).length === 0
+  )
+    return undefined
+
+  return new HttpRateLimitDetails({
+    retryAfterMs: retryAfter,
+    limit: Object.keys(limit).length === 0 ? undefined : limit,
+    remaining: Object.keys(remaining).length === 0 ? undefined : remaining,
+    reset: Object.keys(reset).length === 0 ? undefined : reset,
+  })
+}
+
+const requestDetails = (request: HttpClientRequest.HttpClientRequest, redactedNames: ReadonlyArray<string | RegExp>) =>
+  new HttpRequestDetails({
+    method: request.method,
+    url: redactUrl(request.url),
+    headers: redactHeaders(request.headers, redactedNames),
+  })
+
+const responseDetails = (
+  response: HttpClientResponse.HttpClientResponse,
+  redactedNames: ReadonlyArray<string | RegExp>,
+) =>
+  new HttpResponseDetails({
+    status: response.status,
+    headers: redactHeaders(response.headers, redactedNames),
+  })
+
+const secretValues = (request: HttpClientRequest.HttpClientRequest) => {
+  const values = new Set<string>()
+  const add = (value: string) => {
+    if (value.length < 4) return
+    values.add(value)
+    values.add(encodeURIComponent(value))
+  }
+
+  Object.entries(request.headers).forEach(([name, value]) => {
+    if (!isSensitiveHeaderName(name)) return
+    add(value)
+    const bearer = /^Bearer\s+(.+)$/i.exec(value)?.[1]
+    if (bearer) add(bearer)
+  })
+
+  if (!URL.canParse(request.url)) return values
+  new URL(request.url).searchParams.forEach((value, key) => {
+    if (isSensitiveQueryName(key)) add(value)
+  })
+  return values
+}
+
+// Two passes: structural (redact `"name": "value"` and `name=value` patterns
+// for any field name that looks sensitive) plus literal (replace any actual
+// secret values we sent in the request, in case the response echoes one back).
+const redactBody = (body: string, request: HttpClientRequest.HttpClientRequest) =>
+  Array.from(secretValues(request)).reduce(
+    (text, secret) => text.split(secret).join(REDACTED),
+    body.replace(REDACT_JSON_FIELD, `$1"${REDACTED}"`).replace(REDACT_QUERY_FIELD, `$1${REDACTED}`),
+  )
+
+const responseBody = (body: string | void, request: HttpClientRequest.HttpClientRequest) => {
+  if (body === undefined) return {}
+  const redacted = redactBody(body, request)
+  if (redacted.length <= BODY_LIMIT) return { body: redacted }
+  return { body: redacted.slice(0, BODY_LIMIT), bodyTruncated: true }
+}
+
+const providerMessage = (status: number, body: { readonly body?: string }) => {
+  if (body.body && body.body.length <= 500) return `Provider request failed with HTTP ${status}: ${body.body}`
+  return `Provider request failed with HTTP ${status}`
+}
+
+const responseHttp = (input: {
+  readonly request: HttpClientRequest.HttpClientRequest
+  readonly response: HttpClientResponse.HttpClientResponse
+  readonly redactedNames: ReadonlyArray<string | RegExp>
+  readonly body: ReturnType<typeof responseBody>
+  readonly requestId?: string | undefined
+  readonly rateLimit?: HttpRateLimitDetails | undefined
+}) =>
+  new HttpContext({
+    request: requestDetails(input.request, input.redactedNames),
+    response: responseDetails(input.response, input.redactedNames),
+    ...input.body,
+    requestId: input.requestId,
+    rateLimit: input.rateLimit,
+  })
+
+const statusReason = (input: {
+  readonly status: number
+  readonly message: string
+  readonly retryAfterMs?: number | undefined
+  readonly rateLimit?: HttpRateLimitDetails | undefined
+  readonly http: HttpContext
+}) => {
+  const body = input.http.body ?? ""
+  if (/content[-_\s]?policy|content_filter|safety/i.test(body)) {
+    return new ContentPolicyReason({ message: input.message, http: input.http })
+  }
+  if (input.status === 401) {
+    return new AuthenticationReason({ message: input.message, kind: "invalid", http: input.http })
+  }
+  if (input.status === 403) {
+    return new AuthenticationReason({ message: input.message, kind: "insufficient-permissions", http: input.http })
+  }
+  if (input.status === 429) {
+    if (/insufficient[-_\s]?quota|quota[-_\s]?exceeded/i.test(body)) {
+      return new QuotaExceededReason({ message: input.message, http: input.http })
+    }
+    return new RateLimitReason({
+      message: input.message,
+      retryAfterMs: input.retryAfterMs,
+      rateLimit: input.rateLimit,
+      http: input.http,
+    })
+  }
+  if (input.status === 400 || input.status === 404 || input.status === 409 || input.status === 422) {
+    return new InvalidRequestReason({ message: input.message, http: input.http })
+  }
+  if (input.status >= 500 || retryableStatus(input.status)) {
+    return new ProviderInternalReason({
+      message: input.message,
+      status: input.status,
+      retryAfterMs: input.retryAfterMs,
+      http: input.http,
+    })
+  }
+  return new UnknownProviderReason({ message: input.message, status: input.status, http: input.http })
+}
+
+const statusError =
+  (request: HttpClientRequest.HttpClientRequest, redactedNames: ReadonlyArray<string | RegExp>) =>
+  (response: HttpClientResponse.HttpClientResponse) =>
+    Effect.gen(function* () {
+      if (response.status < 400) return response
+      const body = yield* response.text.pipe(Effect.catch(() => Effect.void))
+      const headers = normalizedHeaders(response.headers)
+      const retryAfter = retryAfterMs(headers)
+      const rateLimit = rateLimitDetails(headers, retryAfter)
+      const details = responseBody(body, request)
+      return yield* new LLMError({
+        module: "RequestExecutor",
+        method: "execute",
+        reason: statusReason({
+          status: response.status,
+          message: providerMessage(response.status, details),
+          retryAfterMs: retryAfter,
+          rateLimit,
+          http: responseHttp({
+            request,
+            response,
+            redactedNames,
+            body: details,
+            requestId: requestId(headers),
+            rateLimit,
+          }),
+        }),
+      })
+    })
+
+const toHttpError = (redactedNames: ReadonlyArray<string | RegExp>) => (error: unknown) => {
+  const transportError = (input: {
+    readonly message: string
+    readonly kind?: string | undefined
+    readonly request?: HttpClientRequest.HttpClientRequest | undefined
+  }) =>
+    new LLMError({
+      module: "RequestExecutor",
+      method: "execute",
+      reason: new TransportReason({
+        message: input.message,
+        kind: input.kind,
+        url: input.request ? redactUrl(input.request.url) : undefined,
+        http: input.request ? new HttpContext({ request: requestDetails(input.request, redactedNames) }) : undefined,
+      }),
+    })
+
+  if (Cause.isTimeoutError(error)) {
+    return transportError({ message: error.message, kind: "Timeout" })
+  }
+  if (!HttpClientError.isHttpClientError(error)) {
+    return transportError({ message: "HTTP transport failed" })
+  }
+  const request = "request" in error ? error.request : undefined
+  if (error.reason._tag === "TransportError") {
+    return transportError({
+      message: error.reason.description ?? "HTTP transport failed",
+      kind: error.reason._tag,
+      request,
+    })
+  }
+  return transportError({
+    message: `HTTP transport failed: ${error.reason._tag}`,
+    kind: error.reason._tag,
+    request,
+  })
+}
+
+const retryDelay = (error: LLMError, attempt: number) => {
+  if (error.retryAfterMs !== undefined) return Effect.succeed(Math.min(error.retryAfterMs, MAX_DELAY_MS))
+  return Random.nextBetween(
+    Math.min(BASE_DELAY_MS * 2 ** attempt * 0.8, MAX_DELAY_MS),
+    Math.min(BASE_DELAY_MS * 2 ** attempt * 1.2, MAX_DELAY_MS),
+  ).pipe(Effect.map((delay) => Math.round(delay)))
+}
+
+const retryStatusFailures = <A, R>(
+  effect: Effect.Effect<A, LLMError, R>,
+  retries = MAX_RETRIES,
+  attempt = 0,
+): Effect.Effect<A, LLMError, R> =>
+  Effect.catchTag(effect, "LLM.Error", (error): Effect.Effect<A, LLMError, R> => {
+    if (!error.retryable || retries <= 0) return Effect.fail(error)
+    return retryDelay(error, attempt).pipe(
+      Effect.flatMap((delay) => Effect.sleep(delay)),
+      Effect.flatMap(() => retryStatusFailures(effect, retries - 1, attempt + 1)),
+    )
+  })
+
+export const layer: Layer.Layer<Service, never, HttpClient.HttpClient> = Layer.effect(
+  Service,
+  Effect.gen(function* () {
+    const http = yield* HttpClient.HttpClient
+    const executeOnce = (request: HttpClientRequest.HttpClientRequest) =>
+      Effect.gen(function* () {
+        const redactedNames = yield* Headers.CurrentRedactedNames
+        return yield* http
+          .execute(request)
+          .pipe(Effect.mapError(toHttpError(redactedNames)), Effect.flatMap(statusError(request, redactedNames)))
+      })
+    return Service.of({
+      execute: (request) => retryStatusFailures(executeOnce(request)),
+    })
+  }),
+)
+
+export const defaultLayer = layer.pipe(Layer.provide(FetchHttpClient.layer))
+
+export * as RequestExecutor from "./executor"
--- a/packages/llm/src/route/framing.ts
+++ b/packages/llm/src/route/framing.ts
@@ -0,0 +1,27 @@
+import type { Stream } from "effect"
+import * as ProviderShared from "../protocols/shared"
+import type { LLMError } from "../schema"
+
+/**
+ * Decode a streaming HTTP response body into provider-protocol frames.
+ *
+ * `Framing` is the byte-stream-shaped seam between transport and protocol:
+ *
+ * - SSE (`Framing.sse`) — UTF-8 decode the body, run the SSE channel decoder,
+ *   drop empty / `[DONE]` keep-alives. Each emitted frame is the JSON `data:`
+ *   payload of one event.
+ * - AWS event stream — length-prefixed binary frames with CRC checksums.
+ *   Each emitted frame is one parsed binary event record.
+ *
+ * The frame type is opaque to this layer; the protocol's `decode` step turns
+ * a frame into a typed chunk.
+ */
+export interface Framing<Frame> {
+  readonly id: string
+  readonly frame: (bytes: Stream.Stream<Uint8Array, LLMError>) => Stream.Stream<Frame, LLMError>
+}
+
+/** Server-Sent Events framing. Used by every JSON-streaming HTTP provider. */
+export const sse: Framing<string> = { id: "sse", frame: ProviderShared.sseFraming }
+
+export * as Framing from "./framing"
--- a/packages/llm/src/route/index.ts
+++ b/packages/llm/src/route/index.ts
@@ -0,0 +1,26 @@
+export { Route, LLMClient, modelLimits, modelRef } from "./client"
+export type {
+  Route as RouteShape,
+  RouteModelDefaults,
+  RouteModelInput,
+  RouteRoutedModelDefaults,
+  RouteRoutedModelInput,
+  AnyRoute,
+  Interface as LLMClientShape,
+  Service as LLMClientService,
+  ModelRefInput,
+} from "./client"
+export * from "./executor"
+export { Auth } from "./auth"
+export { AuthOptions } from "./auth-options"
+export { Endpoint } from "./endpoint"
+export { Framing } from "./framing"
+export { Protocol } from "./protocol"
+export { HttpTransport, WebSocketExecutor, WebSocketTransport } from "./transport"
+export * as Transport from "./transport"
+export type { Auth as AuthShape, AuthInput, Credential, CredentialError } from "./auth"
+export type { ApiKeyMode, AuthOverride, ProviderAuthOption } from "./auth-options"
+export type { Endpoint as EndpointFn, EndpointInput } from "./endpoint"
+export type { Framing as FramingDef } from "./framing"
+export type { Protocol as ProtocolDef } from "./protocol"
+export type { Transport as TransportDef, TransportRuntime } from "./transport"
--- a/packages/llm/src/route/protocol.ts
+++ b/packages/llm/src/route/protocol.ts
@@ -0,0 +1,84 @@
+import { Schema, type Effect } from "effect"
+import type { LLMError, LLMEvent, LLMRequest, ProtocolID } from "../schema"
+
+/**
+ * The semantic API contract of one model server family.
+ *
+ * A `Protocol` owns the parts of a route that are intrinsic to "what does
+ * this API look like": how a common `LLMRequest` becomes a provider-native
+ * body, what schema that body must satisfy before it is JSON-encoded, and
+ * how the streaming response decodes back into common `LLMEvent`s.
+ *
+ * Examples:
+ *
+ * - `OpenAIChat.protocol` — chat completions style
+ * - `OpenAIResponses.protocol` — responses API
+ * - `AnthropicMessages.protocol` — messages API with content blocks
+ * - `Gemini.protocol` — generateContent
+ * - `BedrockConverse.protocol` — Converse with binary event-stream framing
+ *
+ * A `Protocol` is **not** a deployment. It does not know which URL, which
+ * headers, or which auth scheme to use. Those are deployment concerns owned
+ * by `Route.make(...)` along with the chosen `Endpoint`, `Auth`,
+ * and `Framing`. This separation is what lets DeepSeek, TogetherAI, Cerebras,
+ * etc. all reuse `OpenAIChat.protocol` without forking 300 lines per provider.
+ *
+ * The four type parameters reflect the pipeline:
+ *
+ * - `Body` — provider-native request body candidate. `Route.make(...)`
+ *   validates and JSON-encodes it with `body.schema`.
+ * - `Frame` — one unit of the framed response stream. SSE: a JSON data
+ *   string. AWS event stream: a parsed binary frame.
+ * - `Event` — schema-decoded provider event produced from one frame.
+ * - `State` — accumulator threaded through `stream.step` to translate event
+ *   sequences into `LLMEvent` sequences.
+ */
+export interface Protocol<Body, Frame, Event, State> {
+  /** Stable id for the wire protocol implementation. */
+  readonly id: ProtocolID
+  /** Request side: schema for the provider-native body and how to build it. */
+  readonly body: ProtocolBody<Body>
+  /** Response side: streaming state machine. */
+  readonly stream: ProtocolStream<Frame, Event, State>
+}
+
+export interface ProtocolBody<Body> {
+  /** Schema for the validated provider-native body sent as the JSON request. */
+  readonly schema: Schema.Codec<Body, unknown>
+  /** Build the provider-native body from a common `LLMRequest`. */
+  readonly from: (request: LLMRequest) => Effect.Effect<Body, LLMError>
+}
+
+export interface ProtocolStream<Frame, Event, State> {
+  /** Schema for one decoded streaming event, decoded from a transport frame. */
+  readonly event: Schema.Codec<Event, Frame>
+  /** Initial parser state. Called once per response. */
+  readonly initial: () => State
+  /** Translate one event into emitted `LLMEvent`s plus the next state. */
+  readonly step: (state: State, event: Event) => Effect.Effect<readonly [State, ReadonlyArray<LLMEvent>], LLMError>
+  /** Optional request-completion signal for transports that do not end naturally. */
+  readonly terminal?: (event: Event) => boolean
+  /** Optional flush emitted when the framed stream ends. */
+  readonly onHalt?: (state: State) => ReadonlyArray<LLMEvent>
+}
+
+/**
+ * Construct a `Protocol` from its body and stream pieces:
+ *
+ * - `body.schema` infers the provider-native request body shape.
+ * - `body.from` ties the common `LLMRequest` to the provider body.
+ * - `stream.event` infers the decoded streaming event and the wire frame.
+ * - `stream.initial`, `stream.step`, and `stream.onHalt` infer the parser state.
+ *
+ * Provider implementations should usually call `Protocol.make({ ... })`
+ * without explicit type arguments; the schemas and parser functions are the
+ * source of truth. The constructor remains as the public seam for future
+ * cross-cutting concerns such as tracing or instrumentation.
+ */
+export const make = <Body, Frame, Event, State>(
+  input: Protocol<Body, Frame, Event, State>,
+): Protocol<Body, Frame, Event, State> => input
+
+export const jsonEvent = <const S extends Schema.Top>(schema: S) => Schema.fromJsonString(schema)
+
+export * as Protocol from "./protocol"
--- a/Show More
+++ b/Show More